iyadalagha commited on
Commit
c14e307
·
1 Parent(s): 35534f7

handle both ar and eng

Browse files
Files changed (1) hide show
  1. app.py +48 -28
app.py CHANGED
@@ -102,6 +102,14 @@ def get_perplexity(text: str, tokenizer, model) -> float:
102
  ppl = torch.exp(torch.stack(nlls).sum() / end_loc if nlls else torch.tensor(0)).item()
103
  return ppl
104
 
 
 
 
 
 
 
 
 
105
  def split_text(text: str, max_chars: int = 5000) -> list:
106
  """Split text into chunks of max_chars, preserving sentence boundaries."""
107
  sentences = re.split(r'(?<=[.!?])\s+', text)
@@ -147,9 +155,9 @@ def detect(input_text: TextInput):
147
  is_ensemble = detected_lang == 'en'
148
 
149
  # Thresholds for human classification
150
- ppl_threshold = 100 # Increased from 60
151
- burstiness_threshold = 1.5 if detected_lang == 'en' else 1.0 # Increased from 1.2/0.8
152
- ttr_threshold = 0.10 # Decreased from 0.12
153
 
154
  if len(cleaned_text) > 10000:
155
  chunks = split_text(cleaned_text, max_chars=5000)
@@ -162,7 +170,7 @@ def detect(input_text: TextInput):
162
  chunk_clf_scores = []
163
  for det_idx, detector in enumerate(detectors):
164
  clf_score = get_classifier_score(chunk, detector)
165
- label = "AI" if clf_score >= 0.95 else "Human" if clf_score < 0.60 else "Uncertain" # Adjusted AI threshold from 0.99
166
  chunk_labels.append(label)
167
  chunk_clf_scores.append(clf_score)
168
  logging.debug(f"Chunk {chunk_idx}, Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
@@ -170,7 +178,7 @@ def detect(input_text: TextInput):
170
  chunk_final_label = Counter(chunk_labels).most_common(1)[0][0]
171
  avg_clf_score = np.mean(chunk_clf_scores)
172
 
173
- # Count how many human-like features are present
174
  human_features = sum([
175
  chunk_ppl > ppl_threshold,
176
  burstiness > burstiness_threshold,
@@ -178,28 +186,34 @@ def detect(input_text: TextInput):
178
  ])
179
  feature_note = f"Human-like features: {human_features}/3 (PPL={chunk_ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
180
 
181
- # Require at least 2 features to override to Human
182
- if chunk_final_label == "Uncertain" or len(set(chunk_labels)) == len(detectors) or any(l == "Human" for l in chunk_labels):
183
- if human_features >= 2:
 
 
 
184
  chunk_final_label = "Human"
185
- elif chunk_final_label == "AI" and avg_clf_score < 0.95 and human_features >= 2:
186
  chunk_final_label = "Human"
187
 
188
  labels.append(chunk_final_label)
189
  clf_scores.append(avg_clf_score)
190
  ppls.append(chunk_ppl)
191
- logging.debug(f"Chunk {chunk_idx} Final: Label={chunk_final_label}, Avg Classifier Score={avg_clf_score:.4f}, Perplexity={chunk_ppl:.2f}, {feature_note}")
192
 
193
  label_counts = Counter(labels)
194
  final_label = label_counts.most_common(1)[0][0]
195
- if final_label == "Uncertain" or len(set(labels)) == len(detectors) or any(l == "Human" for l in labels):
 
 
196
  human_features = sum([
197
  any(ppl > ppl_threshold for ppl in ppls),
198
  burstiness > burstiness_threshold,
199
  ttr < ttr_threshold
200
  ])
201
- if human_features >= 2:
202
  final_label = "Human"
 
203
  avg_clf_score = sum(clf_scores) / len(clf_scores) if clf_scores else 0.0
204
  avg_ppl = sum(ppls) / len(ppls) if ppls else 0.0
205
  logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Chunks: {len(chunks)} | Prediction: {final_label} | Avg Classifier Score: {avg_clf_score:.4f} | Avg Perplexity: {avg_ppl:.2f} | {note_features}")
@@ -207,7 +221,7 @@ def detect(input_text: TextInput):
207
  "prediction": final_label,
208
  "classifier_score": round(avg_clf_score, 4),
209
  "perplexity": round(avg_ppl, 2),
210
- "note": f"{note_lang}. Text was split into {len(chunks)} chunks due to length > 10,000 characters. {note_features}.",
211
  "chunk_results": [
212
  {"chunk": chunk[:50] + "...", "label": labels[i], "classifier_score": clf_scores[i], "perplexity": ppls[i], "burstiness": burstiness, "ttr": ttr}
213
  for i, chunk in enumerate(chunks)
@@ -219,7 +233,7 @@ def detect(input_text: TextInput):
219
  labels = []
220
  for det_idx, detector in enumerate(detectors):
221
  clf_score = get_classifier_score(cleaned_text, detector)
222
- label = "AI" if clf_score >= 0.95 else "Human" if clf_score < 0.60 else "Uncertain" # Adjusted AI threshold from 0.99
223
  labels.append(label)
224
  clf_scores.append(clf_score)
225
  logging.debug(f"Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
@@ -235,20 +249,23 @@ def detect(input_text: TextInput):
235
  ])
236
  feature_note = f"Human-like features: {human_features}/3 (PPL={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
237
 
238
- # Require at least 2 features to override to Human
239
- if final_label == "Uncertain" or len(set(labels)) == len(detectors) or any(l == "Human" for l in labels):
240
- if human_features >= 2:
 
 
 
241
  final_label = "Human"
242
- elif final_label == "AI" and avg_clf_score < 0.95 and human_features >= 2:
243
  final_label = "Human"
244
 
245
- note = f"{note_lang}. Ensemble used: {len(detectors)} models. {note_features}. {feature_note}."
246
- if 0.60 <= avg_clf_score < 0.95:
247
  note += " Warning: Close to threshold, result may be uncertain."
248
  logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Avg Classifier Score: {avg_clf_score:.4f} | Perplexity: {ppl:.2f} | {note_features} | {feature_note}")
249
  else:
250
  clf_score = get_classifier_score(cleaned_text, arabic_detector)
251
- final_label = "AI" if clf_score >= 0.95 else "Human" if clf_score < 0.60 else "Uncertain" # Adjusted AI threshold from 0.97
252
  # Count human-like features
253
  human_features = sum([
254
  ppl > ppl_threshold,
@@ -257,17 +274,20 @@ def detect(input_text: TextInput):
257
  ])
258
  feature_note = f"Human-like features: {human_features}/3 (PPL={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
259
 
260
- # Require at least 2 features to override to Human
 
 
 
261
  if final_label == "Uncertain" or final_label == "Human":
262
- if human_features >= 2:
263
  final_label = "Human"
264
- elif final_label == "AI" and clf_score < 0.95 and human_features >= 2:
265
  final_label = "Human"
266
- avg_clf_score = clf_score
267
- note = f"{note_lang}. {note_features}. {feature_note}."
268
- if 0.60 <= clf_score < 0.95:
269
  note += " Warning: Close to threshold, result may be uncertain."
270
- logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Classifier Score: {avg_clf_score:.4f} | Perplexity: {ppl:.2f} | {note_features} | {feature_note}")
271
  return {
272
  "prediction": final_label,
273
  "classifier_score": round(avg_clf_score, 4),
 
102
  ppl = torch.exp(torch.stack(nlls).sum() / end_loc if nlls else torch.tensor(0)).item()
103
  return ppl
104
 
105
+ def calculate_weighted_score(clf_score: float, ppl: float, burstiness: float, ttr: float, detected_lang: str) -> float:
106
+ """Calculate a weighted score combining classifier and features."""
107
+ ppl_norm = min(ppl / 200, 1.0) # Normalize perplexity (cap at 200)
108
+ burstiness_norm = min(burstiness / (2.0 if detected_lang == 'en' else 1.5), 1.0) # Normalize burstiness
109
+ ttr_norm = max(0.1 / max(ttr, 0.01), 1.0) # Normalize TTR (inverse, cap at 0.1)
110
+ feature_score = (ppl_norm + burstiness_norm + ttr_norm) / 3 # Average feature score
111
+ return 0.6 * clf_score + 0.4 * feature_score # Weight classifier higher
112
+
113
  def split_text(text: str, max_chars: int = 5000) -> list:
114
  """Split text into chunks of max_chars, preserving sentence boundaries."""
115
  sentences = re.split(r'(?<=[.!?])\s+', text)
 
155
  is_ensemble = detected_lang == 'en'
156
 
157
  # Thresholds for human classification
158
+ ppl_threshold = 150 # Increased from 100
159
+ burstiness_threshold = 1.7 if detected_lang == 'en' else 1.2 # Increased from 1.5/1.0
160
+ ttr_threshold = 0.08 # Decreased from 0.10
161
 
162
  if len(cleaned_text) > 10000:
163
  chunks = split_text(cleaned_text, max_chars=5000)
 
170
  chunk_clf_scores = []
171
  for det_idx, detector in enumerate(detectors):
172
  clf_score = get_classifier_score(chunk, detector)
173
+ label = "AI" if clf_score >= 0.90 else "Human" if clf_score < 0.60 else "Uncertain" # Adjusted from 0.95
174
  chunk_labels.append(label)
175
  chunk_clf_scores.append(clf_score)
176
  logging.debug(f"Chunk {chunk_idx}, Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
 
178
  chunk_final_label = Counter(chunk_labels).most_common(1)[0][0]
179
  avg_clf_score = np.mean(chunk_clf_scores)
180
 
181
+ # Count human-like features
182
  human_features = sum([
183
  chunk_ppl > ppl_threshold,
184
  burstiness > burstiness_threshold,
 
186
  ])
187
  feature_note = f"Human-like features: {human_features}/3 (PPL={chunk_ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
188
 
189
+ # Calculate weighted score
190
+ weighted_score = calculate_weighted_score(avg_clf_score, chunk_ppl, burstiness, ttr, detected_lang)
191
+ chunk_final_label = "AI" if weighted_score >= 0.7 else "Human" if weighted_score < 0.4 else "Uncertain"
192
+ # Require all 3 features to override to Human
193
+ if chunk_final_label == "Uncertain" or any(l == "Human" for l in chunk_labels):
194
+ if human_features == 3:
195
  chunk_final_label = "Human"
196
+ elif chunk_final_label == "AI" and avg_clf_score < 0.90 and human_features == 3:
197
  chunk_final_label = "Human"
198
 
199
  labels.append(chunk_final_label)
200
  clf_scores.append(avg_clf_score)
201
  ppls.append(chunk_ppl)
202
+ logging.debug(f"Chunk {chunk_idx} Final: Label={chunk_final_label}, Avg Classifier Score={avg_clf_score:.4f}, Weighted Score={weighted_score:.4f}, Perplexity={chunk_ppl:.2f}, {feature_note}")
203
 
204
  label_counts = Counter(labels)
205
  final_label = label_counts.most_common(1)[0][0]
206
+ avg_weighted_score = sum(calculate_weighted_score(clf_scores[i], ppls[i], burstiness, ttr, detected_lang) for i in range(len(clf_scores))) / len(clf_scores) if clf_scores else 0.0
207
+ final_label = "AI" if avg_weighted_score >= 0.7 else "Human" if avg_weighted_score < 0.4 else "Uncertain"
208
+ if final_label == "Uncertain" or any(l == "Human" for l in labels):
209
  human_features = sum([
210
  any(ppl > ppl_threshold for ppl in ppls),
211
  burstiness > burstiness_threshold,
212
  ttr < ttr_threshold
213
  ])
214
+ if human_features == 3:
215
  final_label = "Human"
216
+
217
  avg_clf_score = sum(clf_scores) / len(clf_scores) if clf_scores else 0.0
218
  avg_ppl = sum(ppls) / len(ppls) if ppls else 0.0
219
  logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Chunks: {len(chunks)} | Prediction: {final_label} | Avg Classifier Score: {avg_clf_score:.4f} | Avg Perplexity: {avg_ppl:.2f} | {note_features}")
 
221
  "prediction": final_label,
222
  "classifier_score": round(avg_clf_score, 4),
223
  "perplexity": round(avg_ppl, 2),
224
+ "note": f"{note_lang}. Text was split into {len(chunks)} chunks due to length > 10,000 characters. {note_features}. Weighted Score={avg_weighted_score:.4f}.",
225
  "chunk_results": [
226
  {"chunk": chunk[:50] + "...", "label": labels[i], "classifier_score": clf_scores[i], "perplexity": ppls[i], "burstiness": burstiness, "ttr": ttr}
227
  for i, chunk in enumerate(chunks)
 
233
  labels = []
234
  for det_idx, detector in enumerate(detectors):
235
  clf_score = get_classifier_score(cleaned_text, detector)
236
+ label = "AI" if clf_score >= 0.90 else "Human" if clf_score < 0.60 else "Uncertain" # Adjusted from 0.95
237
  labels.append(label)
238
  clf_scores.append(clf_score)
239
  logging.debug(f"Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
 
249
  ])
250
  feature_note = f"Human-like features: {human_features}/3 (PPL={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
251
 
252
+ # Calculate weighted score
253
+ weighted_score = calculate_weighted_score(avg_clf_score, ppl, burstiness, ttr, detected_lang)
254
+ final_label = "AI" if weighted_score >= 0.7 else "Human" if weighted_score < 0.4 else "Uncertain"
255
+ # Require all 3 features to override to Human
256
+ if final_label == "Uncertain" or any(l == "Human" for l in labels):
257
+ if human_features == 3:
258
  final_label = "Human"
259
+ elif final_label == "AI" and avg_clf_score < 0.90 and human_features == 3:
260
  final_label = "Human"
261
 
262
+ note = f"{note_lang}. Ensemble used: {len(detectors)} models. {note_features}. {feature_note}. Weighted Score={weighted_score:.4f}."
263
+ if 0.60 <= avg_clf_score < 0.90:
264
  note += " Warning: Close to threshold, result may be uncertain."
265
  logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Avg Classifier Score: {avg_clf_score:.4f} | Perplexity: {ppl:.2f} | {note_features} | {feature_note}")
266
  else:
267
  clf_score = get_classifier_score(cleaned_text, arabic_detector)
268
+ final_label = "AI" if clf_score >= 0.90 else "Human" if clf_score < 0.60 else "Uncertain" # Adjusted from 0.95
269
  # Count human-like features
270
  human_features = sum([
271
  ppl > ppl_threshold,
 
274
  ])
275
  feature_note = f"Human-like features: {human_features}/3 (PPL={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
276
 
277
+ # Calculate weighted score
278
+ weighted_score = calculate_weighted_score(clf_score, ppl, burstiness, ttr, detected_lang)
279
+ final_label = "AI" if weighted_score >= 0.7 else "Human" if weighted_score < 0.4 else "Uncertain"
280
+ # Require all 3 features to override to Human
281
  if final_label == "Uncertain" or final_label == "Human":
282
+ if human_features == 3:
283
  final_label = "Human"
284
+ elif final_label == "AI" and clf_score < 0.90 and human_features == 3:
285
  final_label = "Human"
286
+
287
+ note = f"{note_lang}. {note_features}. {feature_note}. Weighted Score={weighted_score:.4f}."
288
+ if 0.60 <= clf_score < 0.90:
289
  note += " Warning: Close to threshold, result may be uncertain."
290
+ logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Classifier Score: {clf_score:.4f} | Perplexity: {ppl:.2f} | {note_features} | {feature_note}")
291
  return {
292
  "prediction": final_label,
293
  "classifier_score": round(avg_clf_score, 4),