Spaces:

iyadalagha
/

ai-text-detector-app

Sleeping

App Files Files Community

iyadalagha commited on Aug 27

Commit

35534f7

1 Parent(s): 97c4648

handle both ar and eng

Browse files

Files changed (1) hide show

app.py +64 -24

app.py CHANGED Viewed

@@ -138,13 +138,19 @@ def detect(input_text: TextInput):
     cleaned_text = clean_text(input_text.text, detected_lang)
     burstiness = calculate_burstiness(cleaned_text)
     ttr = calculate_ttr(cleaned_text)
-    note_features = f"Burstiness: {burstiness:.2f} (high suggests human), TTR: {ttr:.2f} (low suggests human)"
     # Select appropriate models
     detectors = english_detectors if detected_lang == 'en' else [arabic_detector]
-    ppl_model = ppl_english if detected_lang == 'en' else ppl_arabic
     is_ensemble = detected_lang == 'en'
     if len(cleaned_text) > 10000:
         chunks = split_text(cleaned_text, max_chars=5000)
         labels = []
@@ -156,29 +162,43 @@ def detect(input_text: TextInput):
             chunk_clf_scores = []
             for det_idx, detector in enumerate(detectors):
                 clf_score = get_classifier_score(chunk, detector)
-                label = "AI" if clf_score >= 0.99 else "Human" if clf_score < 0.60 else "Uncertain"
                 chunk_labels.append(label)
                 chunk_clf_scores.append(clf_score)
                 logging.debug(f"Chunk {chunk_idx}, Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
-            ppl = get_perplexity(chunk, ppl_model["tokenizer"], ppl_model["model"])
             chunk_final_label = Counter(chunk_labels).most_common(1)[0][0]
             avg_clf_score = np.mean(chunk_clf_scores)
-            # Combine classifier, perplexity, burstiness, and TTR
             if chunk_final_label == "Uncertain" or len(set(chunk_labels)) == len(detectors) or any(l == "Human" for l in chunk_labels):
-                if ppl > 60 or burstiness > 1.2 or ttr < 0.12:
                     chunk_final_label = "Human"
-            elif chunk_final_label == "AI" and (ppl > 60 or burstiness > 1.2 or ttr < 0.12):
                 chunk_final_label = "Human"
             labels.append(chunk_final_label)
             clf_scores.append(avg_clf_score)
-            ppls.append(ppl)
-            logging.debug(f"Chunk {chunk_idx} Final: Label={chunk_final_label}, Avg Classifier Score={avg_clf_score:.4f}, Perplexity={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f}")
         label_counts = Counter(labels)
         final_label = label_counts.most_common(1)[0][0]
         if final_label == "Uncertain" or len(set(labels)) == len(detectors) or any(l == "Human" for l in labels):
-            if any(ppl > 60 for ppl in ppls) or burstiness > 1.2 or ttr < 0.12:
                 final_label = "Human"
         avg_clf_score = sum(clf_scores) / len(clf_scores) if clf_scores else 0.0
         avg_ppl = sum(ppls) / len(ppls) if ppls else 0.0
@@ -199,35 +219,55 @@ def detect(input_text: TextInput):
             labels = []
             for det_idx, detector in enumerate(detectors):
                 clf_score = get_classifier_score(cleaned_text, detector)
-                label = "AI" if clf_score >= 0.99 else "Human" if clf_score < 0.60 else "Uncertain"
                 labels.append(label)
                 clf_scores.append(clf_score)
                 logging.debug(f"Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
-            ppl = get_perplexity(cleaned_text, ppl_model["tokenizer"], ppl_model["model"])
             label_counts = Counter(labels)
             final_label = label_counts.most_common(1)[0][0]
             if final_label == "Uncertain" or len(set(labels)) == len(detectors) or any(l == "Human" for l in labels):
-                if ppl > 60 or burstiness > 1.2 or ttr < 0.12:
                     final_label = "Human"
-            elif final_label == "AI" and (ppl > 60 or burstiness > 1.2 or ttr < 0.12):
                 final_label = "Human"
-            avg_clf_score = sum(clf_scores) / len(clf_scores) if clf_scores else 0.0
-            note = f"{note_lang}. Ensemble used: {len(detectors)} models. {note_features}. Perplexity: {ppl:.2f}."
-            if 0.60 <= avg_clf_score < 0.99:
                 note += " Warning: Close to threshold, result may be uncertain."
-            logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Avg Classifier Score: {avg_clf_score:.4f} | Perplexity: {ppl:.2f} | Model Scores: {clf_scores} | {note_features}")
         else:
             clf_score = get_classifier_score(cleaned_text, arabic_detector)
-            ppl = get_perplexity(cleaned_text, ppl_model["tokenizer"], ppl_model["model"])
-            final_label = "AI" if clf_score >= 0.97 else "Human" if clf_score < 0.60 else "Uncertain"
             if final_label == "Uncertain" or final_label == "Human":
-                if ppl > 60 or burstiness > 0.8 or ttr < 0.12:
                     final_label = "Human"
             avg_clf_score = clf_score
-            note = f"{note_lang}. {note_features}. Perplexity: {ppl:.2f}."
-            if 0.60 <= clf_score < 0.97:
                 note += " Warning: Close to threshold, result may be uncertain."
-            logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Classifier Score: {avg_clf_score:.4f} | Perplexity: {ppl:.2f} | {note_features}")
         return {
             "prediction": final_label,
             "classifier_score": round(avg_clf_score, 4),

     cleaned_text = clean_text(input_text.text, detected_lang)
     burstiness = calculate_burstiness(cleaned_text)
     ttr = calculate_ttr(cleaned_text)
+    ppl_model = ppl_english if detected_lang == 'en' else ppl_arabic
+    ppl = get_perplexity(cleaned_text, ppl_model["tokenizer"], ppl_model["model"])
+    note_features = f"Burstiness: {burstiness:.2f} (high suggests human), TTR: {ttr:.2f} (low suggests human), Perplexity: {ppl:.2f} (high suggests human)"
     # Select appropriate models
     detectors = english_detectors if detected_lang == 'en' else [arabic_detector]
     is_ensemble = detected_lang == 'en'
+    # Thresholds for human classification
+    ppl_threshold = 100  # Increased from 60
+    burstiness_threshold = 1.5 if detected_lang == 'en' else 1.0  # Increased from 1.2/0.8
+    ttr_threshold = 0.10  # Decreased from 0.12
     if len(cleaned_text) > 10000:
         chunks = split_text(cleaned_text, max_chars=5000)
         labels = []
             chunk_clf_scores = []
             for det_idx, detector in enumerate(detectors):
                 clf_score = get_classifier_score(chunk, detector)
+                label = "AI" if clf_score >= 0.95 else "Human" if clf_score < 0.60 else "Uncertain"  # Adjusted AI threshold from 0.99
                 chunk_labels.append(label)
                 chunk_clf_scores.append(clf_score)
                 logging.debug(f"Chunk {chunk_idx}, Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
+            chunk_ppl = get_perplexity(chunk, ppl_model["tokenizer"], ppl_model["model"])
             chunk_final_label = Counter(chunk_labels).most_common(1)[0][0]
             avg_clf_score = np.mean(chunk_clf_scores)
+            # Count how many human-like features are present
+            human_features = sum([
+                chunk_ppl > ppl_threshold,
+                burstiness > burstiness_threshold,
+                ttr < ttr_threshold
+            ])
+            feature_note = f"Human-like features: {human_features}/3 (PPL={chunk_ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
+            # Require at least 2 features to override to Human
             if chunk_final_label == "Uncertain" or len(set(chunk_labels)) == len(detectors) or any(l == "Human" for l in chunk_labels):
+                if human_features >= 2:
                     chunk_final_label = "Human"
+            elif chunk_final_label == "AI" and avg_clf_score < 0.95 and human_features >= 2:
                 chunk_final_label = "Human"
             labels.append(chunk_final_label)
             clf_scores.append(avg_clf_score)
+            ppls.append(chunk_ppl)
+            logging.debug(f"Chunk {chunk_idx} Final: Label={chunk_final_label}, Avg Classifier Score={avg_clf_score:.4f}, Perplexity={chunk_ppl:.2f}, {feature_note}")
         label_counts = Counter(labels)
         final_label = label_counts.most_common(1)[0][0]
         if final_label == "Uncertain" or len(set(labels)) == len(detectors) or any(l == "Human" for l in labels):
+            human_features = sum([
+                any(ppl > ppl_threshold for ppl in ppls),
+                burstiness > burstiness_threshold,
+                ttr < ttr_threshold
+            ])
+            if human_features >= 2:
                 final_label = "Human"
         avg_clf_score = sum(clf_scores) / len(clf_scores) if clf_scores else 0.0
         avg_ppl = sum(ppls) / len(ppls) if ppls else 0.0
             labels = []
             for det_idx, detector in enumerate(detectors):
                 clf_score = get_classifier_score(cleaned_text, detector)
+                label = "AI" if clf_score >= 0.95 else "Human" if clf_score < 0.60 else "Uncertain"  # Adjusted AI threshold from 0.99
                 labels.append(label)
                 clf_scores.append(clf_score)
                 logging.debug(f"Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
             label_counts = Counter(labels)
             final_label = label_counts.most_common(1)[0][0]
+            avg_clf_score = sum(clf_scores) / len(clf_scores) if clf_scores else 0.0
+            # Count human-like features
+            human_features = sum([
+                ppl > ppl_threshold,
+                burstiness > burstiness_threshold,
+                ttr < ttr_threshold
+            ])
+            feature_note = f"Human-like features: {human_features}/3 (PPL={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
+            # Require at least 2 features to override to Human
             if final_label == "Uncertain" or len(set(labels)) == len(detectors) or any(l == "Human" for l in labels):
+                if human_features >= 2:
                     final_label = "Human"
+            elif final_label == "AI" and avg_clf_score < 0.95 and human_features >= 2:
                 final_label = "Human"
+            note = f"{note_lang}. Ensemble used: {len(detectors)} models. {note_features}. {feature_note}."
+            if 0.60 <= avg_clf_score < 0.95:
                 note += " Warning: Close to threshold, result may be uncertain."
+            logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Avg Classifier Score: {avg_clf_score:.4f} | Perplexity: {ppl:.2f} | {note_features} | {feature_note}")
         else:
             clf_score = get_classifier_score(cleaned_text, arabic_detector)
+            final_label = "AI" if clf_score >= 0.95 else "Human" if clf_score < 0.60 else "Uncertain"  # Adjusted AI threshold from 0.97
+            # Count human-like features
+            human_features = sum([
+                ppl > ppl_threshold,
+                burstiness > burstiness_threshold,
+                ttr < ttr_threshold
+            ])
+            feature_note = f"Human-like features: {human_features}/3 (PPL={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
+            # Require at least 2 features to override to Human
             if final_label == "Uncertain" or final_label == "Human":
+                if human_features >= 2:
                     final_label = "Human"
+            elif final_label == "AI" and clf_score < 0.95 and human_features >= 2:
+                final_label = "Human"
             avg_clf_score = clf_score
+            note = f"{note_lang}. {note_features}. {feature_note}."
+            if 0.60 <= clf_score < 0.95:
                 note += " Warning: Close to threshold, result may be uncertain."
+            logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Classifier Score: {avg_clf_score:.4f} | Perplexity: {ppl:.2f} | {note_features} | {feature_note}")
         return {
             "prediction": final_label,
             "classifier_score": round(avg_clf_score, 4),