Spaces:

telecomadm1145
/

AIDetectV2

Sleeping

App Files Files Community

telecomadm1145 commited on 25 days ago

Commit

2ffd4a9

verified ·

1 Parent(s): edb115a

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -14

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ Swin/CAFormer/DINOv2 AI detection
 • Swin-V7 / V8 / V9                     : 4-class  (photo / anime × AI / Non-AI)
 • CAFormer-V10                          : 4-class  (photo / anime × AI / Non-AI)
 • DINOv2-4class                         : 4-class  (photo / anime × AI / Non-AI)
 -------------------------------------------------------------------
 Author: telecomadm1145
 """
@@ -35,7 +36,9 @@ HF_FILENAMES = {
     "V1-CAFormer":        "caformer_b36_4class.safetensors",
     "V2-CAFormer":        "caformer_b36_4class_95.safetensors",
     "V2.5-CAFormer":      "caformer_b36_4class_96.safetensors",
-    "DINOv2-4class":      "dinov2_4class.safetensors", # Added DINOv2 checkpoint
 }
 CKPT_META = {
@@ -57,13 +60,20 @@ CKPT_META = {
             "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]},
     "V2.5-CAFormer": { "n_cls": 4, "head": "v7", "backbone": "caformer_b36.sail_in22k_ft_in1k_384",
             "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]},
-    # Added DINOv2 metadata
     "DINOv2-4class": {
-        "model_type": "dinov2",
         "backbone": 'facebook/dinov2-base',
         "n_cls": 4,
         "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]
     },
 }
 DEFAULT_CKPT = "V1-CAFormer"
@@ -79,8 +89,8 @@ print(f"Using device: {device}")
 model, current_ckpt = None, None
 current_meta = None
-# --- Start of code from train.py ---
-class DINOv2Classifier(nn.Module):
     def __init__(self, model_name, num_classes):
         super().__init__()
         self.backbone = AutoModel.from_pretrained(model_name)
@@ -125,10 +135,36 @@ class DINOv2Classifier(nn.Module):
         pooling_weights = torch.softmax(raw_weights, dim=-1)
         pooled_output = torch.sum(outputs.last_hidden_state * pooling_weights.unsqueeze(-1), dim=1)
         return self.classifier(pooled_output)
-# --- End of code from train.py ---
-# Renamed to ImageClassifier for clarity, but keeping original name to avoid breaking changes if subclassed elsewhere.
 class SwinClassifier(nn.Module):
     def __init__(self, model_name, num_classes, pretrained=True,
                  head_version="v4"):
@@ -195,8 +231,6 @@ def load_model(ckpt_name: str):
     meta = CKPT_META[ckpt_name]
     ckpt_filename = HF_FILENAMES[ckpt_name]
-    # Check if the checkpoint is DINOv2 and handle its local path
-    # Download other models from HF Hub
     ckpt_file = hf_hub_download(
         repo_id=REPO_ID,
         filename=ckpt_filename,
@@ -205,8 +239,14 @@ def load_model(ckpt_name: str):
     print(f"Checkpoint: {ckpt_file}")
     # Build model structure based on model_type
-    if meta.get("model_type") == "dinov2":
-        model = DINOv2Classifier(
             model_name=meta["backbone"],
             num_classes=meta["n_cls"]
         ).to(device)
@@ -253,8 +293,8 @@ def predict(image: Image.Image,
     load_model(ckpt_name)
     # Select transform based on the current model type
-    if current_meta.get("model_type") == "dinov2":
-        # DINOv2 specific transform from train.py
         tfm = transforms.Compose([
             transforms.Resize((224, 224)),
             transforms.ToTensor(),
@@ -283,7 +323,7 @@ def launch():
         gr.Markdown("# AI Detector")
         gr.Markdown(
             "Choose a model checkpoint on the left, upload an image, "
-            "and click **Run** to see predictions. Checkpoint V7+ and DINOv2 outputs 4 classes."
         )
         with gr.Row():

 • Swin-V7 / V8 / V9                     : 4-class  (photo / anime × AI / Non-AI)
 • CAFormer-V10                          : 4-class  (photo / anime × AI / Non-AI)
 • DINOv2-4class                         : 4-class  (photo / anime × AI / Non-AI)
+• DINOv2-MeanPool-Contrastive           : 4-class  (photo / anime × AI / Non-AI)
 -------------------------------------------------------------------
 Author: telecomadm1145
 """
     "V1-CAFormer":        "caformer_b36_4class.safetensors",
     "V2-CAFormer":        "caformer_b36_4class_95.safetensors",
     "V2.5-CAFormer":      "caformer_b36_4class_96.safetensors",
+    "DINOv2-4class":      "dinov2_4class.safetensors",
+    # Added new DINOv2 checkpoint filename
+    "DINOv2-MeanPool-Contrastive": "dinov2-base-4class-contrastive_epoch4.safetensors",
 }
 CKPT_META = {
             "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]},
     "V2.5-CAFormer": { "n_cls": 4, "head": "v7", "backbone": "caformer_b36.sail_in22k_ft_in1k_384",
             "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]},
+    # Updated original DINOv2 metadata with a specific model_type
     "DINOv2-4class": {
+        "model_type": "dinov2_weighted_pool",
         "backbone": 'facebook/dinov2-base',
         "n_cls": 4,
         "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]
     },
+    # Added new DINOv2 model metadata
+    "DINOv2-MeanPool-Contrastive": {
+        "model_type": "dinov2_mean_pool",
+        "backbone": 'facebook/dinov2-base',
+        "n_cls": 4,
+        "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]
+    }
 }
 DEFAULT_CKPT = "V1-CAFormer"
 model, current_ckpt = None, None
 current_meta = None
+# --- Original DINOv2 Classifier (Weighted Attention Pooling) ---
+class DINOv2Classifier_WeightedPool(nn.Module):
     def __init__(self, model_name, num_classes):
         super().__init__()
         self.backbone = AutoModel.from_pretrained(model_name)
         pooling_weights = torch.softmax(raw_weights, dim=-1)
         pooled_output = torch.sum(outputs.last_hidden_state * pooling_weights.unsqueeze(-1), dim=1)
         return self.classifier(pooled_output)
+# --- New DINOv2 Classifier (Mean Pooling) ---
+class DINOv2Classifier_MeanPool(nn.Module):
+    def __init__(self, model_name, num_classes):
+        super().__init__()
+        self.backbone = AutoModel.from_pretrained(model_name)
+        self.classifier = nn.Sequential(
+            nn.Dropout(DROPOUT_RATE),
+            nn.Linear(self.backbone.config.hidden_size, self.backbone.config.hidden_size),
+            nn.LayerNorm(self.backbone.config.hidden_size),
+            nn.GELU(),
+            nn.Dropout(DROPOUT_RATE),
+            nn.Linear(self.backbone.config.hidden_size, num_classes)
+        )
+        for module in self.classifier:
+            if isinstance(module, nn.Linear):
+                nn.init.xavier_uniform_(module.weight)
+                nn.init.constant_(module.bias, 0)
+    def forward(self, x, return_features=False):
+        outputs = self.backbone(x)
+        pooled_output = outputs.last_hidden_state.mean(dim=1)
+        if return_features:
+            return pooled_output
+        return self.classifier(pooled_output)
 class SwinClassifier(nn.Module):
     def __init__(self, model_name, num_classes, pretrained=True,
                  head_version="v4"):
     meta = CKPT_META[ckpt_name]
     ckpt_filename = HF_FILENAMES[ckpt_name]
     ckpt_file = hf_hub_download(
         repo_id=REPO_ID,
         filename=ckpt_filename,
     print(f"Checkpoint: {ckpt_file}")
     # Build model structure based on model_type
+    model_type = meta.get("model_type")
+    if model_type == "dinov2_weighted_pool":
+        model = DINOv2Classifier_WeightedPool(
+            model_name=meta["backbone"],
+            num_classes=meta["n_cls"]
+        ).to(device)
+    elif model_type == "dinov2_mean_pool":
+        model = DINOv2Classifier_MeanPool(
             model_name=meta["backbone"],
             num_classes=meta["n_cls"]
         ).to(device)
     load_model(ckpt_name)
     # Select transform based on the current model type
+    if "dinov2" in current_meta.get("model_type", ""):
+        # DINOv2 specific transform
         tfm = transforms.Compose([
             transforms.Resize((224, 224)),
             transforms.ToTensor(),
         gr.Markdown("# AI Detector")
         gr.Markdown(
             "Choose a model checkpoint on the left, upload an image, "
+            "and click **Run** to see predictions. Checkpoint V7+ and all DINOv2 models output 4 classes."
         )
         with gr.Row():