Spaces:

Jay-Rajput
/

humanizer

No application file

App Files Files Community

humanizer / text_detector.py

Jay-Rajput

ai detector new

36a3d26 3 days ago

raw

history blame contribute delete

6.38 kB

	import math
	import re
	import numpy as np
	import torch
	from transformers import (
	AutoTokenizer,
	AutoModelForSequenceClassification,
	AutoModelForCausalLM,
	)
	from collections import Counter


	class AITextDetector:
	"""
	AI Text Detector

	- Transformer classifier for AI vs Human
	- Metrics: perplexity, burstiness, repetition, semantic smoothness
	- Returns AI-vs-Human probability + category distribution
	"""

	def __init__(self, model_name="roberta-base-openai-detector", device=None):
	# Device setup
	self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")

	# Classifier model & tokenizer
	self.classifier_tokenizer = AutoTokenizer.from_pretrained(model_name)
	self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(self.device)
	self.model.eval()

	# Language model for perplexity (lighter than full GPT-2 if needed)
	self.lm_tokenizer = AutoTokenizer.from_pretrained("gpt2")
	self.lm_model = AutoModelForCausalLM.from_pretrained("gpt2").to(self.device)
	self.lm_model.eval()

	# ------------------ Metrics ------------------
	def _compute_perplexity(self, text: str, max_length: int = 512):
	"""Compute perplexity using GPT-2 LM."""
	encodings = self.lm_tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	max_length=max_length,
	).to(self.device)

	with torch.no_grad():
	outputs = self.lm_model(**encodings, labels=encodings.input_ids)
	loss = outputs.loss.item()

	# Clamp to avoid overflow
	return float(min(math.exp(loss), 1e4))

	def _compute_burstiness(self, text: str):
	"""Variance of sentence lengths (burstiness)."""
	sentences = [s.strip() for s in re.split(r"[.!?]", text) if s.strip()]
	if len(sentences) < 2:
	return 0.0
	lengths = [len(s.split()) for s in sentences]
	return float(np.var(lengths))

	def _compute_repetition_score(self, text: str):
	"""Repetition = proportion of duplicate words."""
	words = [w.lower() for w in re.findall(r"\b\w+\b", text)]
	if not words:
	return 0.0
	counts = Counter(words)
	repeated = sum(c - 1 for c in counts.values() if c > 1)
	return repeated / len(words)

	def _compute_semantic_smoothness(self, text: str):
	"""
	Semantic smoothness = avg cosine similarity between consecutive sentence embeddings.
	Uses last hidden states instead of raw embeddings.
	"""
	sentences = [s.strip() for s in re.split(r"[.!?]", text) if s.strip()]
	if len(sentences) < 2:
	return 1.0

	embeddings = []
	for s in sentences:
	encodings = self.classifier_tokenizer(
	s,
	return_tensors="pt",
	truncation=True,
	padding=True,
	max_length=128,
	).to(self.device)

	with torch.no_grad():
	outputs = self.model(
	**encodings,
	output_hidden_states=True,
	)
	hidden_states = outputs.hidden_states[-1] # last layer
	sent_emb = hidden_states.mean(dim=1).cpu().numpy()
	embeddings.append(sent_emb)

	similarities = []
	for i in range(len(embeddings) - 1):
	a, b = embeddings[i], embeddings[i + 1]
	num = float(np.dot(a, b.T))
	denom = np.linalg.norm(a) * np.linalg.norm(b)
	if denom > 0:
	similarities.append(num / denom)
	return float(np.mean(similarities)) if similarities else 1.0

	# ------------------ Main detection ------------------
	def detect(self, text: str):
	"""Run detection pipeline and return results."""
	# Empty text case
	if not text.strip():
	return {
	"ai_probability": 0.0,
	"metrics": {},
	"distribution": {},
	"final_label": "empty",
	}

	# Classifier prediction
	inputs = self.classifier_tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	padding=True,
	max_length=512,
	).to(self.device)

	with torch.no_grad():
	logits = self.model(**inputs).logits
	probs = torch.softmax(logits, dim=1).cpu().numpy()[0]

	human_prob, ai_prob = float(probs[0]), float(probs[1])

	# Extra metrics
	perplexity = self._compute_perplexity(text)
	burstiness = self._compute_burstiness(text)
	repetition = self._compute_repetition_score(text)
	smoothness = self._compute_semantic_smoothness(text)

	# Normalize distribution
	distribution = {
	"Human-written": round(human_prob * 100, 2),
	"AI-generated": round(ai_prob * 100 * (1 - repetition), 2),
	"AI-generated & AI-refined": round(ai_prob * 100 * repetition, 2),
	"Mixed": round(ai_prob * 100 * (1 - smoothness), 2),
	}
	total = sum(distribution.values())
	if total > 0:
	for k in distribution:
	distribution[k] = round(distribution[k] / total * 100, 2)

	# Final label
	final_label = max(distribution, key=distribution.get)

	return {
	"summary": f"{distribution['AI-generated']}% of text is likely AI",
	"overall_ai_probability": overall_ai_probability,
	"category_distribution": distribution,
	"metrics": {
	"perplexity": round(perplexity, 2),
	"burstiness": round(burstiness, 3),
	"repetition_score": round(repetition, 3),
	"semantic_smoothness": round(smoothness, 3),
	"ai_probability": overall_ai_probability,
	},
	"interpretation": (
	"This detector uses structural patterns (perplexity, burstiness, repetition, semantic smoothness) "
	"to estimate the likelihood of AI authorship. Results are probabilistic, not definitive. "
	"Always apply judgment."
	),
	"label": "AI-generated" if overall_ai_probability > 0.5 else "Human-written"
	}