Spaces:
Sleeping
Sleeping
Commit
·
c14e307
1
Parent(s):
35534f7
handle both ar and eng
Browse files
app.py
CHANGED
|
@@ -102,6 +102,14 @@ def get_perplexity(text: str, tokenizer, model) -> float:
|
|
| 102 |
ppl = torch.exp(torch.stack(nlls).sum() / end_loc if nlls else torch.tensor(0)).item()
|
| 103 |
return ppl
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
def split_text(text: str, max_chars: int = 5000) -> list:
|
| 106 |
"""Split text into chunks of max_chars, preserving sentence boundaries."""
|
| 107 |
sentences = re.split(r'(?<=[.!?])\s+', text)
|
|
@@ -147,9 +155,9 @@ def detect(input_text: TextInput):
|
|
| 147 |
is_ensemble = detected_lang == 'en'
|
| 148 |
|
| 149 |
# Thresholds for human classification
|
| 150 |
-
ppl_threshold =
|
| 151 |
-
burstiness_threshold = 1.
|
| 152 |
-
ttr_threshold = 0.
|
| 153 |
|
| 154 |
if len(cleaned_text) > 10000:
|
| 155 |
chunks = split_text(cleaned_text, max_chars=5000)
|
|
@@ -162,7 +170,7 @@ def detect(input_text: TextInput):
|
|
| 162 |
chunk_clf_scores = []
|
| 163 |
for det_idx, detector in enumerate(detectors):
|
| 164 |
clf_score = get_classifier_score(chunk, detector)
|
| 165 |
-
label = "AI" if clf_score >= 0.
|
| 166 |
chunk_labels.append(label)
|
| 167 |
chunk_clf_scores.append(clf_score)
|
| 168 |
logging.debug(f"Chunk {chunk_idx}, Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
|
|
@@ -170,7 +178,7 @@ def detect(input_text: TextInput):
|
|
| 170 |
chunk_final_label = Counter(chunk_labels).most_common(1)[0][0]
|
| 171 |
avg_clf_score = np.mean(chunk_clf_scores)
|
| 172 |
|
| 173 |
-
# Count
|
| 174 |
human_features = sum([
|
| 175 |
chunk_ppl > ppl_threshold,
|
| 176 |
burstiness > burstiness_threshold,
|
|
@@ -178,28 +186,34 @@ def detect(input_text: TextInput):
|
|
| 178 |
])
|
| 179 |
feature_note = f"Human-like features: {human_features}/3 (PPL={chunk_ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
|
| 180 |
|
| 181 |
-
#
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
| 184 |
chunk_final_label = "Human"
|
| 185 |
-
elif chunk_final_label == "AI" and avg_clf_score < 0.
|
| 186 |
chunk_final_label = "Human"
|
| 187 |
|
| 188 |
labels.append(chunk_final_label)
|
| 189 |
clf_scores.append(avg_clf_score)
|
| 190 |
ppls.append(chunk_ppl)
|
| 191 |
-
logging.debug(f"Chunk {chunk_idx} Final: Label={chunk_final_label}, Avg Classifier Score={avg_clf_score:.4f}, Perplexity={chunk_ppl:.2f}, {feature_note}")
|
| 192 |
|
| 193 |
label_counts = Counter(labels)
|
| 194 |
final_label = label_counts.most_common(1)[0][0]
|
| 195 |
-
|
|
|
|
|
|
|
| 196 |
human_features = sum([
|
| 197 |
any(ppl > ppl_threshold for ppl in ppls),
|
| 198 |
burstiness > burstiness_threshold,
|
| 199 |
ttr < ttr_threshold
|
| 200 |
])
|
| 201 |
-
if human_features
|
| 202 |
final_label = "Human"
|
|
|
|
| 203 |
avg_clf_score = sum(clf_scores) / len(clf_scores) if clf_scores else 0.0
|
| 204 |
avg_ppl = sum(ppls) / len(ppls) if ppls else 0.0
|
| 205 |
logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Chunks: {len(chunks)} | Prediction: {final_label} | Avg Classifier Score: {avg_clf_score:.4f} | Avg Perplexity: {avg_ppl:.2f} | {note_features}")
|
|
@@ -207,7 +221,7 @@ def detect(input_text: TextInput):
|
|
| 207 |
"prediction": final_label,
|
| 208 |
"classifier_score": round(avg_clf_score, 4),
|
| 209 |
"perplexity": round(avg_ppl, 2),
|
| 210 |
-
"note": f"{note_lang}. Text was split into {len(chunks)} chunks due to length > 10,000 characters. {note_features}.",
|
| 211 |
"chunk_results": [
|
| 212 |
{"chunk": chunk[:50] + "...", "label": labels[i], "classifier_score": clf_scores[i], "perplexity": ppls[i], "burstiness": burstiness, "ttr": ttr}
|
| 213 |
for i, chunk in enumerate(chunks)
|
|
@@ -219,7 +233,7 @@ def detect(input_text: TextInput):
|
|
| 219 |
labels = []
|
| 220 |
for det_idx, detector in enumerate(detectors):
|
| 221 |
clf_score = get_classifier_score(cleaned_text, detector)
|
| 222 |
-
label = "AI" if clf_score >= 0.
|
| 223 |
labels.append(label)
|
| 224 |
clf_scores.append(clf_score)
|
| 225 |
logging.debug(f"Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
|
|
@@ -235,20 +249,23 @@ def detect(input_text: TextInput):
|
|
| 235 |
])
|
| 236 |
feature_note = f"Human-like features: {human_features}/3 (PPL={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
|
| 237 |
|
| 238 |
-
#
|
| 239 |
-
|
| 240 |
-
|
|
|
|
|
|
|
|
|
|
| 241 |
final_label = "Human"
|
| 242 |
-
elif final_label == "AI" and avg_clf_score < 0.
|
| 243 |
final_label = "Human"
|
| 244 |
|
| 245 |
-
note = f"{note_lang}. Ensemble used: {len(detectors)} models. {note_features}. {feature_note}."
|
| 246 |
-
if 0.60 <= avg_clf_score < 0.
|
| 247 |
note += " Warning: Close to threshold, result may be uncertain."
|
| 248 |
logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Avg Classifier Score: {avg_clf_score:.4f} | Perplexity: {ppl:.2f} | {note_features} | {feature_note}")
|
| 249 |
else:
|
| 250 |
clf_score = get_classifier_score(cleaned_text, arabic_detector)
|
| 251 |
-
final_label = "AI" if clf_score >= 0.
|
| 252 |
# Count human-like features
|
| 253 |
human_features = sum([
|
| 254 |
ppl > ppl_threshold,
|
|
@@ -257,17 +274,20 @@ def detect(input_text: TextInput):
|
|
| 257 |
])
|
| 258 |
feature_note = f"Human-like features: {human_features}/3 (PPL={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
|
| 259 |
|
| 260 |
-
#
|
|
|
|
|
|
|
|
|
|
| 261 |
if final_label == "Uncertain" or final_label == "Human":
|
| 262 |
-
if human_features
|
| 263 |
final_label = "Human"
|
| 264 |
-
elif final_label == "AI" and clf_score < 0.
|
| 265 |
final_label = "Human"
|
| 266 |
-
|
| 267 |
-
note = f"{note_lang}. {note_features}. {feature_note}."
|
| 268 |
-
if 0.60 <= clf_score < 0.
|
| 269 |
note += " Warning: Close to threshold, result may be uncertain."
|
| 270 |
-
logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Classifier Score: {
|
| 271 |
return {
|
| 272 |
"prediction": final_label,
|
| 273 |
"classifier_score": round(avg_clf_score, 4),
|
|
|
|
| 102 |
ppl = torch.exp(torch.stack(nlls).sum() / end_loc if nlls else torch.tensor(0)).item()
|
| 103 |
return ppl
|
| 104 |
|
| 105 |
+
def calculate_weighted_score(clf_score: float, ppl: float, burstiness: float, ttr: float, detected_lang: str) -> float:
|
| 106 |
+
"""Calculate a weighted score combining classifier and features."""
|
| 107 |
+
ppl_norm = min(ppl / 200, 1.0) # Normalize perplexity (cap at 200)
|
| 108 |
+
burstiness_norm = min(burstiness / (2.0 if detected_lang == 'en' else 1.5), 1.0) # Normalize burstiness
|
| 109 |
+
ttr_norm = max(0.1 / max(ttr, 0.01), 1.0) # Normalize TTR (inverse, cap at 0.1)
|
| 110 |
+
feature_score = (ppl_norm + burstiness_norm + ttr_norm) / 3 # Average feature score
|
| 111 |
+
return 0.6 * clf_score + 0.4 * feature_score # Weight classifier higher
|
| 112 |
+
|
| 113 |
def split_text(text: str, max_chars: int = 5000) -> list:
|
| 114 |
"""Split text into chunks of max_chars, preserving sentence boundaries."""
|
| 115 |
sentences = re.split(r'(?<=[.!?])\s+', text)
|
|
|
|
| 155 |
is_ensemble = detected_lang == 'en'
|
| 156 |
|
| 157 |
# Thresholds for human classification
|
| 158 |
+
ppl_threshold = 150 # Increased from 100
|
| 159 |
+
burstiness_threshold = 1.7 if detected_lang == 'en' else 1.2 # Increased from 1.5/1.0
|
| 160 |
+
ttr_threshold = 0.08 # Decreased from 0.10
|
| 161 |
|
| 162 |
if len(cleaned_text) > 10000:
|
| 163 |
chunks = split_text(cleaned_text, max_chars=5000)
|
|
|
|
| 170 |
chunk_clf_scores = []
|
| 171 |
for det_idx, detector in enumerate(detectors):
|
| 172 |
clf_score = get_classifier_score(chunk, detector)
|
| 173 |
+
label = "AI" if clf_score >= 0.90 else "Human" if clf_score < 0.60 else "Uncertain" # Adjusted from 0.95
|
| 174 |
chunk_labels.append(label)
|
| 175 |
chunk_clf_scores.append(clf_score)
|
| 176 |
logging.debug(f"Chunk {chunk_idx}, Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
|
|
|
|
| 178 |
chunk_final_label = Counter(chunk_labels).most_common(1)[0][0]
|
| 179 |
avg_clf_score = np.mean(chunk_clf_scores)
|
| 180 |
|
| 181 |
+
# Count human-like features
|
| 182 |
human_features = sum([
|
| 183 |
chunk_ppl > ppl_threshold,
|
| 184 |
burstiness > burstiness_threshold,
|
|
|
|
| 186 |
])
|
| 187 |
feature_note = f"Human-like features: {human_features}/3 (PPL={chunk_ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
|
| 188 |
|
| 189 |
+
# Calculate weighted score
|
| 190 |
+
weighted_score = calculate_weighted_score(avg_clf_score, chunk_ppl, burstiness, ttr, detected_lang)
|
| 191 |
+
chunk_final_label = "AI" if weighted_score >= 0.7 else "Human" if weighted_score < 0.4 else "Uncertain"
|
| 192 |
+
# Require all 3 features to override to Human
|
| 193 |
+
if chunk_final_label == "Uncertain" or any(l == "Human" for l in chunk_labels):
|
| 194 |
+
if human_features == 3:
|
| 195 |
chunk_final_label = "Human"
|
| 196 |
+
elif chunk_final_label == "AI" and avg_clf_score < 0.90 and human_features == 3:
|
| 197 |
chunk_final_label = "Human"
|
| 198 |
|
| 199 |
labels.append(chunk_final_label)
|
| 200 |
clf_scores.append(avg_clf_score)
|
| 201 |
ppls.append(chunk_ppl)
|
| 202 |
+
logging.debug(f"Chunk {chunk_idx} Final: Label={chunk_final_label}, Avg Classifier Score={avg_clf_score:.4f}, Weighted Score={weighted_score:.4f}, Perplexity={chunk_ppl:.2f}, {feature_note}")
|
| 203 |
|
| 204 |
label_counts = Counter(labels)
|
| 205 |
final_label = label_counts.most_common(1)[0][0]
|
| 206 |
+
avg_weighted_score = sum(calculate_weighted_score(clf_scores[i], ppls[i], burstiness, ttr, detected_lang) for i in range(len(clf_scores))) / len(clf_scores) if clf_scores else 0.0
|
| 207 |
+
final_label = "AI" if avg_weighted_score >= 0.7 else "Human" if avg_weighted_score < 0.4 else "Uncertain"
|
| 208 |
+
if final_label == "Uncertain" or any(l == "Human" for l in labels):
|
| 209 |
human_features = sum([
|
| 210 |
any(ppl > ppl_threshold for ppl in ppls),
|
| 211 |
burstiness > burstiness_threshold,
|
| 212 |
ttr < ttr_threshold
|
| 213 |
])
|
| 214 |
+
if human_features == 3:
|
| 215 |
final_label = "Human"
|
| 216 |
+
|
| 217 |
avg_clf_score = sum(clf_scores) / len(clf_scores) if clf_scores else 0.0
|
| 218 |
avg_ppl = sum(ppls) / len(ppls) if ppls else 0.0
|
| 219 |
logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Chunks: {len(chunks)} | Prediction: {final_label} | Avg Classifier Score: {avg_clf_score:.4f} | Avg Perplexity: {avg_ppl:.2f} | {note_features}")
|
|
|
|
| 221 |
"prediction": final_label,
|
| 222 |
"classifier_score": round(avg_clf_score, 4),
|
| 223 |
"perplexity": round(avg_ppl, 2),
|
| 224 |
+
"note": f"{note_lang}. Text was split into {len(chunks)} chunks due to length > 10,000 characters. {note_features}. Weighted Score={avg_weighted_score:.4f}.",
|
| 225 |
"chunk_results": [
|
| 226 |
{"chunk": chunk[:50] + "...", "label": labels[i], "classifier_score": clf_scores[i], "perplexity": ppls[i], "burstiness": burstiness, "ttr": ttr}
|
| 227 |
for i, chunk in enumerate(chunks)
|
|
|
|
| 233 |
labels = []
|
| 234 |
for det_idx, detector in enumerate(detectors):
|
| 235 |
clf_score = get_classifier_score(cleaned_text, detector)
|
| 236 |
+
label = "AI" if clf_score >= 0.90 else "Human" if clf_score < 0.60 else "Uncertain" # Adjusted from 0.95
|
| 237 |
labels.append(label)
|
| 238 |
clf_scores.append(clf_score)
|
| 239 |
logging.debug(f"Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
|
|
|
|
| 249 |
])
|
| 250 |
feature_note = f"Human-like features: {human_features}/3 (PPL={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
|
| 251 |
|
| 252 |
+
# Calculate weighted score
|
| 253 |
+
weighted_score = calculate_weighted_score(avg_clf_score, ppl, burstiness, ttr, detected_lang)
|
| 254 |
+
final_label = "AI" if weighted_score >= 0.7 else "Human" if weighted_score < 0.4 else "Uncertain"
|
| 255 |
+
# Require all 3 features to override to Human
|
| 256 |
+
if final_label == "Uncertain" or any(l == "Human" for l in labels):
|
| 257 |
+
if human_features == 3:
|
| 258 |
final_label = "Human"
|
| 259 |
+
elif final_label == "AI" and avg_clf_score < 0.90 and human_features == 3:
|
| 260 |
final_label = "Human"
|
| 261 |
|
| 262 |
+
note = f"{note_lang}. Ensemble used: {len(detectors)} models. {note_features}. {feature_note}. Weighted Score={weighted_score:.4f}."
|
| 263 |
+
if 0.60 <= avg_clf_score < 0.90:
|
| 264 |
note += " Warning: Close to threshold, result may be uncertain."
|
| 265 |
logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Avg Classifier Score: {avg_clf_score:.4f} | Perplexity: {ppl:.2f} | {note_features} | {feature_note}")
|
| 266 |
else:
|
| 267 |
clf_score = get_classifier_score(cleaned_text, arabic_detector)
|
| 268 |
+
final_label = "AI" if clf_score >= 0.90 else "Human" if clf_score < 0.60 else "Uncertain" # Adjusted from 0.95
|
| 269 |
# Count human-like features
|
| 270 |
human_features = sum([
|
| 271 |
ppl > ppl_threshold,
|
|
|
|
| 274 |
])
|
| 275 |
feature_note = f"Human-like features: {human_features}/3 (PPL={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
|
| 276 |
|
| 277 |
+
# Calculate weighted score
|
| 278 |
+
weighted_score = calculate_weighted_score(clf_score, ppl, burstiness, ttr, detected_lang)
|
| 279 |
+
final_label = "AI" if weighted_score >= 0.7 else "Human" if weighted_score < 0.4 else "Uncertain"
|
| 280 |
+
# Require all 3 features to override to Human
|
| 281 |
if final_label == "Uncertain" or final_label == "Human":
|
| 282 |
+
if human_features == 3:
|
| 283 |
final_label = "Human"
|
| 284 |
+
elif final_label == "AI" and clf_score < 0.90 and human_features == 3:
|
| 285 |
final_label = "Human"
|
| 286 |
+
|
| 287 |
+
note = f"{note_lang}. {note_features}. {feature_note}. Weighted Score={weighted_score:.4f}."
|
| 288 |
+
if 0.60 <= clf_score < 0.90:
|
| 289 |
note += " Warning: Close to threshold, result may be uncertain."
|
| 290 |
+
logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Classifier Score: {clf_score:.4f} | Perplexity: {ppl:.2f} | {note_features} | {feature_note}")
|
| 291 |
return {
|
| 292 |
"prediction": final_label,
|
| 293 |
"classifier_score": round(avg_clf_score, 4),
|