humanizer / text_detector.py
Jay-Rajput's picture
ai detector new
36a3d26
import math
import re
import numpy as np
import torch
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
AutoModelForCausalLM,
)
from collections import Counter
class AITextDetector:
"""
AI Text Detector
- Transformer classifier for AI vs Human
- Metrics: perplexity, burstiness, repetition, semantic smoothness
- Returns AI-vs-Human probability + category distribution
"""
def __init__(self, model_name="roberta-base-openai-detector", device=None):
# Device setup
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
# Classifier model & tokenizer
self.classifier_tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(self.device)
self.model.eval()
# Language model for perplexity (lighter than full GPT-2 if needed)
self.lm_tokenizer = AutoTokenizer.from_pretrained("gpt2")
self.lm_model = AutoModelForCausalLM.from_pretrained("gpt2").to(self.device)
self.lm_model.eval()
# ------------------ Metrics ------------------
def _compute_perplexity(self, text: str, max_length: int = 512):
"""Compute perplexity using GPT-2 LM."""
encodings = self.lm_tokenizer(
text,
return_tensors="pt",
truncation=True,
max_length=max_length,
).to(self.device)
with torch.no_grad():
outputs = self.lm_model(**encodings, labels=encodings.input_ids)
loss = outputs.loss.item()
# Clamp to avoid overflow
return float(min(math.exp(loss), 1e4))
def _compute_burstiness(self, text: str):
"""Variance of sentence lengths (burstiness)."""
sentences = [s.strip() for s in re.split(r"[.!?]", text) if s.strip()]
if len(sentences) < 2:
return 0.0
lengths = [len(s.split()) for s in sentences]
return float(np.var(lengths))
def _compute_repetition_score(self, text: str):
"""Repetition = proportion of duplicate words."""
words = [w.lower() for w in re.findall(r"\b\w+\b", text)]
if not words:
return 0.0
counts = Counter(words)
repeated = sum(c - 1 for c in counts.values() if c > 1)
return repeated / len(words)
def _compute_semantic_smoothness(self, text: str):
"""
Semantic smoothness = avg cosine similarity between consecutive sentence embeddings.
Uses last hidden states instead of raw embeddings.
"""
sentences = [s.strip() for s in re.split(r"[.!?]", text) if s.strip()]
if len(sentences) < 2:
return 1.0
embeddings = []
for s in sentences:
encodings = self.classifier_tokenizer(
s,
return_tensors="pt",
truncation=True,
padding=True,
max_length=128,
).to(self.device)
with torch.no_grad():
outputs = self.model(
**encodings,
output_hidden_states=True,
)
hidden_states = outputs.hidden_states[-1] # last layer
sent_emb = hidden_states.mean(dim=1).cpu().numpy()
embeddings.append(sent_emb)
similarities = []
for i in range(len(embeddings) - 1):
a, b = embeddings[i], embeddings[i + 1]
num = float(np.dot(a, b.T))
denom = np.linalg.norm(a) * np.linalg.norm(b)
if denom > 0:
similarities.append(num / denom)
return float(np.mean(similarities)) if similarities else 1.0
# ------------------ Main detection ------------------
def detect(self, text: str):
"""Run detection pipeline and return results."""
# Empty text case
if not text.strip():
return {
"ai_probability": 0.0,
"metrics": {},
"distribution": {},
"final_label": "empty",
}
# Classifier prediction
inputs = self.classifier_tokenizer(
text,
return_tensors="pt",
truncation=True,
padding=True,
max_length=512,
).to(self.device)
with torch.no_grad():
logits = self.model(**inputs).logits
probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
human_prob, ai_prob = float(probs[0]), float(probs[1])
# Extra metrics
perplexity = self._compute_perplexity(text)
burstiness = self._compute_burstiness(text)
repetition = self._compute_repetition_score(text)
smoothness = self._compute_semantic_smoothness(text)
# Normalize distribution
distribution = {
"Human-written": round(human_prob * 100, 2),
"AI-generated": round(ai_prob * 100 * (1 - repetition), 2),
"AI-generated & AI-refined": round(ai_prob * 100 * repetition, 2),
"Mixed": round(ai_prob * 100 * (1 - smoothness), 2),
}
total = sum(distribution.values())
if total > 0:
for k in distribution:
distribution[k] = round(distribution[k] / total * 100, 2)
# Final label
final_label = max(distribution, key=distribution.get)
return {
"summary": f"{distribution['AI-generated']}% of text is likely AI",
"overall_ai_probability": overall_ai_probability,
"category_distribution": distribution,
"metrics": {
"perplexity": round(perplexity, 2),
"burstiness": round(burstiness, 3),
"repetition_score": round(repetition, 3),
"semantic_smoothness": round(smoothness, 3),
"ai_probability": overall_ai_probability,
},
"interpretation": (
"This detector uses structural patterns (perplexity, burstiness, repetition, semantic smoothness) "
"to estimate the likelihood of AI authorship. Results are probabilistic, not definitive. "
"Always apply judgment."
),
"label": "AI-generated" if overall_ai_probability > 0.5 else "Human-written"
}