Spaces:

iyadalagha
/

ai-text-detector-app

Sleeping

App Files Files Community

iyadalagha commited on Aug 27

Commit

c14e307

1 Parent(s): 35534f7

handle both ar and eng

Browse files

Files changed (1) hide show

app.py +48 -28

app.py CHANGED Viewed

@@ -102,6 +102,14 @@ def get_perplexity(text: str, tokenizer, model) -> float:
     ppl = torch.exp(torch.stack(nlls).sum() / end_loc if nlls else torch.tensor(0)).item()
     return ppl
 def split_text(text: str, max_chars: int = 5000) -> list:
     """Split text into chunks of max_chars, preserving sentence boundaries."""
     sentences = re.split(r'(?<=[.!?])\s+', text)
@@ -147,9 +155,9 @@ def detect(input_text: TextInput):
     is_ensemble = detected_lang == 'en'
     # Thresholds for human classification
-    ppl_threshold = 100  # Increased from 60
-    burstiness_threshold = 1.5 if detected_lang == 'en' else 1.0  # Increased from 1.2/0.8
-    ttr_threshold = 0.10  # Decreased from 0.12
     if len(cleaned_text) > 10000:
         chunks = split_text(cleaned_text, max_chars=5000)
@@ -162,7 +170,7 @@ def detect(input_text: TextInput):
             chunk_clf_scores = []
             for det_idx, detector in enumerate(detectors):
                 clf_score = get_classifier_score(chunk, detector)
-                label = "AI" if clf_score >= 0.95 else "Human" if clf_score < 0.60 else "Uncertain"  # Adjusted AI threshold from 0.99
                 chunk_labels.append(label)
                 chunk_clf_scores.append(clf_score)
                 logging.debug(f"Chunk {chunk_idx}, Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
@@ -170,7 +178,7 @@ def detect(input_text: TextInput):
             chunk_final_label = Counter(chunk_labels).most_common(1)[0][0]
             avg_clf_score = np.mean(chunk_clf_scores)
-            # Count how many human-like features are present
             human_features = sum([
                 chunk_ppl > ppl_threshold,
                 burstiness > burstiness_threshold,
@@ -178,28 +186,34 @@ def detect(input_text: TextInput):
             ])
             feature_note = f"Human-like features: {human_features}/3 (PPL={chunk_ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
-            # Require at least 2 features to override to Human
-            if chunk_final_label == "Uncertain" or len(set(chunk_labels)) == len(detectors) or any(l == "Human" for l in chunk_labels):
-                if human_features >= 2:
                     chunk_final_label = "Human"
-            elif chunk_final_label == "AI" and avg_clf_score < 0.95 and human_features >= 2:
                 chunk_final_label = "Human"
             labels.append(chunk_final_label)
             clf_scores.append(avg_clf_score)
             ppls.append(chunk_ppl)
-            logging.debug(f"Chunk {chunk_idx} Final: Label={chunk_final_label}, Avg Classifier Score={avg_clf_score:.4f}, Perplexity={chunk_ppl:.2f}, {feature_note}")
         label_counts = Counter(labels)
         final_label = label_counts.most_common(1)[0][0]
-        if final_label == "Uncertain" or len(set(labels)) == len(detectors) or any(l == "Human" for l in labels):
             human_features = sum([
                 any(ppl > ppl_threshold for ppl in ppls),
                 burstiness > burstiness_threshold,
                 ttr < ttr_threshold
             ])
-            if human_features >= 2:
                 final_label = "Human"
         avg_clf_score = sum(clf_scores) / len(clf_scores) if clf_scores else 0.0
         avg_ppl = sum(ppls) / len(ppls) if ppls else 0.0
         logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Chunks: {len(chunks)} | Prediction: {final_label} | Avg Classifier Score: {avg_clf_score:.4f} | Avg Perplexity: {avg_ppl:.2f} | {note_features}")
@@ -207,7 +221,7 @@ def detect(input_text: TextInput):
             "prediction": final_label,
             "classifier_score": round(avg_clf_score, 4),
             "perplexity": round(avg_ppl, 2),
-            "note": f"{note_lang}. Text was split into {len(chunks)} chunks due to length > 10,000 characters. {note_features}.",
             "chunk_results": [
                 {"chunk": chunk[:50] + "...", "label": labels[i], "classifier_score": clf_scores[i], "perplexity": ppls[i], "burstiness": burstiness, "ttr": ttr}
                 for i, chunk in enumerate(chunks)
@@ -219,7 +233,7 @@ def detect(input_text: TextInput):
             labels = []
             for det_idx, detector in enumerate(detectors):
                 clf_score = get_classifier_score(cleaned_text, detector)
-                label = "AI" if clf_score >= 0.95 else "Human" if clf_score < 0.60 else "Uncertain"  # Adjusted AI threshold from 0.99
                 labels.append(label)
                 clf_scores.append(clf_score)
                 logging.debug(f"Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
@@ -235,20 +249,23 @@ def detect(input_text: TextInput):
             ])
             feature_note = f"Human-like features: {human_features}/3 (PPL={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
-            # Require at least 2 features to override to Human
-            if final_label == "Uncertain" or len(set(labels)) == len(detectors) or any(l == "Human" for l in labels):
-                if human_features >= 2:
                     final_label = "Human"
-            elif final_label == "AI" and avg_clf_score < 0.95 and human_features >= 2:
                 final_label = "Human"
-            note = f"{note_lang}. Ensemble used: {len(detectors)} models. {note_features}. {feature_note}."
-            if 0.60 <= avg_clf_score < 0.95:
                 note += " Warning: Close to threshold, result may be uncertain."
             logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Avg Classifier Score: {avg_clf_score:.4f} | Perplexity: {ppl:.2f} | {note_features} | {feature_note}")
         else:
             clf_score = get_classifier_score(cleaned_text, arabic_detector)
-            final_label = "AI" if clf_score >= 0.95 else "Human" if clf_score < 0.60 else "Uncertain"  # Adjusted AI threshold from 0.97
             # Count human-like features
             human_features = sum([
                 ppl > ppl_threshold,
@@ -257,17 +274,20 @@ def detect(input_text: TextInput):
             ])
             feature_note = f"Human-like features: {human_features}/3 (PPL={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
-            # Require at least 2 features to override to Human
             if final_label == "Uncertain" or final_label == "Human":
-                if human_features >= 2:
                     final_label = "Human"
-            elif final_label == "AI" and clf_score < 0.95 and human_features >= 2:
                 final_label = "Human"
-            avg_clf_score = clf_score
-            note = f"{note_lang}. {note_features}. {feature_note}."
-            if 0.60 <= clf_score < 0.95:
                 note += " Warning: Close to threshold, result may be uncertain."
-            logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Classifier Score: {avg_clf_score:.4f} | Perplexity: {ppl:.2f} | {note_features} | {feature_note}")
         return {
             "prediction": final_label,
             "classifier_score": round(avg_clf_score, 4),

     ppl = torch.exp(torch.stack(nlls).sum() / end_loc if nlls else torch.tensor(0)).item()
     return ppl
+def calculate_weighted_score(clf_score: float, ppl: float, burstiness: float, ttr: float, detected_lang: str) -> float:
+    """Calculate a weighted score combining classifier and features."""
+    ppl_norm = min(ppl / 200, 1.0)  # Normalize perplexity (cap at 200)
+    burstiness_norm = min(burstiness / (2.0 if detected_lang == 'en' else 1.5), 1.0)  # Normalize burstiness
+    ttr_norm = max(0.1 / max(ttr, 0.01), 1.0)  # Normalize TTR (inverse, cap at 0.1)
+    feature_score = (ppl_norm + burstiness_norm + ttr_norm) / 3  # Average feature score
+    return 0.6 * clf_score + 0.4 * feature_score  # Weight classifier higher
 def split_text(text: str, max_chars: int = 5000) -> list:
     """Split text into chunks of max_chars, preserving sentence boundaries."""
     sentences = re.split(r'(?<=[.!?])\s+', text)
     is_ensemble = detected_lang == 'en'
     # Thresholds for human classification
+    ppl_threshold = 150  # Increased from 100
+    burstiness_threshold = 1.7 if detected_lang == 'en' else 1.2  # Increased from 1.5/1.0
+    ttr_threshold = 0.08  # Decreased from 0.10
     if len(cleaned_text) > 10000:
         chunks = split_text(cleaned_text, max_chars=5000)
             chunk_clf_scores = []
             for det_idx, detector in enumerate(detectors):
                 clf_score = get_classifier_score(chunk, detector)
+                label = "AI" if clf_score >= 0.90 else "Human" if clf_score < 0.60 else "Uncertain"  # Adjusted from 0.95
                 chunk_labels.append(label)
                 chunk_clf_scores.append(clf_score)
                 logging.debug(f"Chunk {chunk_idx}, Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
             chunk_final_label = Counter(chunk_labels).most_common(1)[0][0]
             avg_clf_score = np.mean(chunk_clf_scores)
+            # Count human-like features
             human_features = sum([
                 chunk_ppl > ppl_threshold,
                 burstiness > burstiness_threshold,
             ])
             feature_note = f"Human-like features: {human_features}/3 (PPL={chunk_ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
+            # Calculate weighted score
+            weighted_score = calculate_weighted_score(avg_clf_score, chunk_ppl, burstiness, ttr, detected_lang)
+            chunk_final_label = "AI" if weighted_score >= 0.7 else "Human" if weighted_score < 0.4 else "Uncertain"
+            # Require all 3 features to override to Human
+            if chunk_final_label == "Uncertain" or any(l == "Human" for l in chunk_labels):
+                if human_features == 3:
                     chunk_final_label = "Human"
+            elif chunk_final_label == "AI" and avg_clf_score < 0.90 and human_features == 3:
                 chunk_final_label = "Human"
             labels.append(chunk_final_label)
             clf_scores.append(avg_clf_score)
             ppls.append(chunk_ppl)
+            logging.debug(f"Chunk {chunk_idx} Final: Label={chunk_final_label}, Avg Classifier Score={avg_clf_score:.4f}, Weighted Score={weighted_score:.4f}, Perplexity={chunk_ppl:.2f}, {feature_note}")
         label_counts = Counter(labels)
         final_label = label_counts.most_common(1)[0][0]
+        avg_weighted_score = sum(calculate_weighted_score(clf_scores[i], ppls[i], burstiness, ttr, detected_lang) for i in range(len(clf_scores))) / len(clf_scores) if clf_scores else 0.0
+        final_label = "AI" if avg_weighted_score >= 0.7 else "Human" if avg_weighted_score < 0.4 else "Uncertain"
+        if final_label == "Uncertain" or any(l == "Human" for l in labels):
             human_features = sum([
                 any(ppl > ppl_threshold for ppl in ppls),
                 burstiness > burstiness_threshold,
                 ttr < ttr_threshold
             ])
+            if human_features == 3:
                 final_label = "Human"
         avg_clf_score = sum(clf_scores) / len(clf_scores) if clf_scores else 0.0
         avg_ppl = sum(ppls) / len(ppls) if ppls else 0.0
         logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Chunks: {len(chunks)} | Prediction: {final_label} | Avg Classifier Score: {avg_clf_score:.4f} | Avg Perplexity: {avg_ppl:.2f} | {note_features}")
             "prediction": final_label,
             "classifier_score": round(avg_clf_score, 4),
             "perplexity": round(avg_ppl, 2),
+            "note": f"{note_lang}. Text was split into {len(chunks)} chunks due to length > 10,000 characters. {note_features}. Weighted Score={avg_weighted_score:.4f}.",
             "chunk_results": [
                 {"chunk": chunk[:50] + "...", "label": labels[i], "classifier_score": clf_scores[i], "perplexity": ppls[i], "burstiness": burstiness, "ttr": ttr}
                 for i, chunk in enumerate(chunks)
             labels = []
             for det_idx, detector in enumerate(detectors):
                 clf_score = get_classifier_score(cleaned_text, detector)
+                label = "AI" if clf_score >= 0.90 else "Human" if clf_score < 0.60 else "Uncertain"  # Adjusted from 0.95
                 labels.append(label)
                 clf_scores.append(clf_score)
                 logging.debug(f"Model {det_idx}: Label={label}, Classifier Score={clf_score:.4f}")
             ])
             feature_note = f"Human-like features: {human_features}/3 (PPL={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
+            # Calculate weighted score
+            weighted_score = calculate_weighted_score(avg_clf_score, ppl, burstiness, ttr, detected_lang)
+            final_label = "AI" if weighted_score >= 0.7 else "Human" if weighted_score < 0.4 else "Uncertain"
+            # Require all 3 features to override to Human
+            if final_label == "Uncertain" or any(l == "Human" for l in labels):
+                if human_features == 3:
                     final_label = "Human"
+            elif final_label == "AI" and avg_clf_score < 0.90 and human_features == 3:
                 final_label = "Human"
+            note = f"{note_lang}. Ensemble used: {len(detectors)} models. {note_features}. {feature_note}. Weighted Score={weighted_score:.4f}."
+            if 0.60 <= avg_clf_score < 0.90:
                 note += " Warning: Close to threshold, result may be uncertain."
             logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Avg Classifier Score: {avg_clf_score:.4f} | Perplexity: {ppl:.2f} | {note_features} | {feature_note}")
         else:
             clf_score = get_classifier_score(cleaned_text, arabic_detector)
+            final_label = "AI" if clf_score >= 0.90 else "Human" if clf_score < 0.60 else "Uncertain"  # Adjusted from 0.95
             # Count human-like features
             human_features = sum([
                 ppl > ppl_threshold,
             ])
             feature_note = f"Human-like features: {human_features}/3 (PPL={ppl:.2f}, Burstiness={burstiness:.2f}, TTR={ttr:.2f})"
+            # Calculate weighted score
+            weighted_score = calculate_weighted_score(clf_score, ppl, burstiness, ttr, detected_lang)
+            final_label = "AI" if weighted_score >= 0.7 else "Human" if weighted_score < 0.4 else "Uncertain"
+            # Require all 3 features to override to Human
             if final_label == "Uncertain" or final_label == "Human":
+                if human_features == 3:
                     final_label = "Human"
+            elif final_label == "AI" and clf_score < 0.90 and human_features == 3:
                 final_label = "Human"
+            note = f"{note_lang}. {note_features}. {feature_note}. Weighted Score={weighted_score:.4f}."
+            if 0.60 <= clf_score < 0.90:
                 note += " Warning: Close to threshold, result may be uncertain."
+            logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Classifier Score: {clf_score:.4f} | Perplexity: {ppl:.2f} | {note_features} | {feature_note}")
         return {
             "prediction": final_label,
             "classifier_score": round(avg_clf_score, 4),