Spaces:
No application file
No application file
import math | |
import re | |
import numpy as np | |
import torch | |
from transformers import ( | |
AutoTokenizer, | |
AutoModelForSequenceClassification, | |
AutoModelForCausalLM, | |
) | |
from collections import Counter | |
class AITextDetector: | |
""" | |
AI Text Detector | |
- Transformer classifier for AI vs Human | |
- Metrics: perplexity, burstiness, repetition, semantic smoothness | |
- Returns AI-vs-Human probability + category distribution | |
""" | |
def __init__(self, model_name="roberta-base-openai-detector", device=None): | |
# Device setup | |
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") | |
# Classifier model & tokenizer | |
self.classifier_tokenizer = AutoTokenizer.from_pretrained(model_name) | |
self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(self.device) | |
self.model.eval() | |
# Language model for perplexity (lighter than full GPT-2 if needed) | |
self.lm_tokenizer = AutoTokenizer.from_pretrained("gpt2") | |
self.lm_model = AutoModelForCausalLM.from_pretrained("gpt2").to(self.device) | |
self.lm_model.eval() | |
# ------------------ Metrics ------------------ | |
def _compute_perplexity(self, text: str, max_length: int = 512): | |
"""Compute perplexity using GPT-2 LM.""" | |
encodings = self.lm_tokenizer( | |
text, | |
return_tensors="pt", | |
truncation=True, | |
max_length=max_length, | |
).to(self.device) | |
with torch.no_grad(): | |
outputs = self.lm_model(**encodings, labels=encodings.input_ids) | |
loss = outputs.loss.item() | |
# Clamp to avoid overflow | |
return float(min(math.exp(loss), 1e4)) | |
def _compute_burstiness(self, text: str): | |
"""Variance of sentence lengths (burstiness).""" | |
sentences = [s.strip() for s in re.split(r"[.!?]", text) if s.strip()] | |
if len(sentences) < 2: | |
return 0.0 | |
lengths = [len(s.split()) for s in sentences] | |
return float(np.var(lengths)) | |
def _compute_repetition_score(self, text: str): | |
"""Repetition = proportion of duplicate words.""" | |
words = [w.lower() for w in re.findall(r"\b\w+\b", text)] | |
if not words: | |
return 0.0 | |
counts = Counter(words) | |
repeated = sum(c - 1 for c in counts.values() if c > 1) | |
return repeated / len(words) | |
def _compute_semantic_smoothness(self, text: str): | |
""" | |
Semantic smoothness = avg cosine similarity between consecutive sentence embeddings. | |
Uses last hidden states instead of raw embeddings. | |
""" | |
sentences = [s.strip() for s in re.split(r"[.!?]", text) if s.strip()] | |
if len(sentences) < 2: | |
return 1.0 | |
embeddings = [] | |
for s in sentences: | |
encodings = self.classifier_tokenizer( | |
s, | |
return_tensors="pt", | |
truncation=True, | |
padding=True, | |
max_length=128, | |
).to(self.device) | |
with torch.no_grad(): | |
outputs = self.model( | |
**encodings, | |
output_hidden_states=True, | |
) | |
hidden_states = outputs.hidden_states[-1] # last layer | |
sent_emb = hidden_states.mean(dim=1).cpu().numpy() | |
embeddings.append(sent_emb) | |
similarities = [] | |
for i in range(len(embeddings) - 1): | |
a, b = embeddings[i], embeddings[i + 1] | |
num = float(np.dot(a, b.T)) | |
denom = np.linalg.norm(a) * np.linalg.norm(b) | |
if denom > 0: | |
similarities.append(num / denom) | |
return float(np.mean(similarities)) if similarities else 1.0 | |
# ------------------ Main detection ------------------ | |
def detect(self, text: str): | |
"""Run detection pipeline and return results.""" | |
# Empty text case | |
if not text.strip(): | |
return { | |
"ai_probability": 0.0, | |
"metrics": {}, | |
"distribution": {}, | |
"final_label": "empty", | |
} | |
# Classifier prediction | |
inputs = self.classifier_tokenizer( | |
text, | |
return_tensors="pt", | |
truncation=True, | |
padding=True, | |
max_length=512, | |
).to(self.device) | |
with torch.no_grad(): | |
logits = self.model(**inputs).logits | |
probs = torch.softmax(logits, dim=1).cpu().numpy()[0] | |
human_prob, ai_prob = float(probs[0]), float(probs[1]) | |
# Extra metrics | |
perplexity = self._compute_perplexity(text) | |
burstiness = self._compute_burstiness(text) | |
repetition = self._compute_repetition_score(text) | |
smoothness = self._compute_semantic_smoothness(text) | |
# Normalize distribution | |
distribution = { | |
"Human-written": round(human_prob * 100, 2), | |
"AI-generated": round(ai_prob * 100 * (1 - repetition), 2), | |
"AI-generated & AI-refined": round(ai_prob * 100 * repetition, 2), | |
"Mixed": round(ai_prob * 100 * (1 - smoothness), 2), | |
} | |
total = sum(distribution.values()) | |
if total > 0: | |
for k in distribution: | |
distribution[k] = round(distribution[k] / total * 100, 2) | |
# Final label | |
final_label = max(distribution, key=distribution.get) | |
return { | |
"summary": f"{distribution['AI-generated']}% of text is likely AI", | |
"overall_ai_probability": overall_ai_probability, | |
"category_distribution": distribution, | |
"metrics": { | |
"perplexity": round(perplexity, 2), | |
"burstiness": round(burstiness, 3), | |
"repetition_score": round(repetition, 3), | |
"semantic_smoothness": round(smoothness, 3), | |
"ai_probability": overall_ai_probability, | |
}, | |
"interpretation": ( | |
"This detector uses structural patterns (perplexity, burstiness, repetition, semantic smoothness) " | |
"to estimate the likelihood of AI authorship. Results are probabilistic, not definitive. " | |
"Always apply judgment." | |
), | |
"label": "AI-generated" if overall_ai_probability > 0.5 else "Human-written" | |
} | |