Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Update src/comparison.py
Browse files- src/comparison.py +177 -104
    	
        src/comparison.py
    CHANGED
    
    | @@ -1,55 +1,95 @@ | |
| 1 | 
             
            import numpy as np
         | 
| 2 | 
            -
            from typing import List, Dict, Optional
         | 
| 3 | 
             
            from scipy.optimize import linear_sum_assignment
         | 
| 4 | 
             
            import yaml
         | 
| 5 | 
             
            import cv2
         | 
| 6 | 
             
            import hashlib
         | 
| 7 | 
             
            import torch
         | 
|  | |
| 8 | 
             
            from PIL import Image
         | 
| 9 | 
            -
            from transformers import SiglipModel, SiglipImageProcessor
         | 
| 10 |  | 
| 11 | 
            -
             | 
|  | |
| 12 | 
             
            try:
         | 
| 13 | 
            -
                 | 
| 14 | 
            -
                 | 
| 15 | 
            -
             | 
| 16 | 
            -
                print(" | 
| 17 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 18 |  | 
| 19 | 
            -
            #  | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 22 |  | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 |  | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 30 |  | 
| 31 | 
            -
             | 
| 32 | 
            -
             | 
| 33 | 
            -
                        model_name = 'google/siglip-base-patch16-224'
         | 
| 34 | 
            -
                        _GLOBAL_SIGLIP_PROCESSOR = SiglipImageProcessor.from_pretrained(model_name)
         | 
| 35 | 
            -
                        _GLOBAL_SIGLIP_MODEL = SiglipModel.from_pretrained(model_name).to(device)
         | 
| 36 | 
            -
                        _GLOBAL_SIGLIP_MODEL.eval()
         | 
| 37 |  | 
| 38 | 
            -
             | 
| 39 | 
            -
                             | 
|  | |
|  | |
| 40 |  | 
| 41 | 
            -
             | 
| 42 | 
            -
             | 
| 43 | 
            -
                            param.requires_grad = False
         | 
| 44 |  | 
| 45 | 
            -
             | 
| 46 |  | 
| 47 | 
             
                    except Exception as e:
         | 
| 48 | 
            -
                        print(f" | 
| 49 | 
            -
                         | 
| 50 | 
            -
                         | 
|  | |
|  | |
|  | |
| 51 |  | 
| 52 | 
            -
                return _GLOBAL_SIGLIP_MODEL, _GLOBAL_SIGLIP_PROCESSOR
         | 
| 53 |  | 
| 54 | 
             
            class DamageComparator:
         | 
| 55 | 
             
                """Enhanced damage comparator with DINOv2-based view-invariant re-identification"""
         | 
| @@ -70,14 +110,14 @@ class DamageComparator: | |
| 70 | 
             
                    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 71 |  | 
| 72 | 
             
                    # Get global DINOv2 model
         | 
| 73 | 
            -
                    self. | 
| 74 |  | 
| 75 | 
            -
                    # ReID thresholds ( | 
| 76 | 
            -
                    self.reid_similarity_threshold = 0. | 
| 77 | 
             
                    self.feature_cache = {}
         | 
| 78 |  | 
| 79 | 
            -
                    #  | 
| 80 | 
            -
                    self. | 
| 81 |  | 
| 82 | 
             
                def calculate_iou(self, box1: List[int], box2: List[int]) -> float:
         | 
| 83 | 
             
                    """Calculate Intersection over Union between two boxes"""
         | 
| @@ -99,48 +139,9 @@ class DamageComparator: | |
| 99 |  | 
| 100 | 
             
                    return intersection / union
         | 
| 101 |  | 
| 102 | 
            -
                def _extract_siglip_features(self, roi: np.ndarray) -> np.ndarray:
         | 
| 103 | 
            -
                    """Extract SigLIP 2 vision features - faster & more accurate than DINOv2 for ReID"""
         | 
| 104 | 
            -
                    try:
         | 
| 105 | 
            -
                        model, processor = self.siglip_model, self.siglip_processor
         | 
| 106 | 
            -
             | 
| 107 | 
            -
                        # Convert BGR to RGB and resize (SigLIP uses 224x224)
         | 
| 108 | 
            -
                        roi_rgb = cv2.cvtColor(cv2.resize(roi, (224, 224)), cv2.COLOR_BGR2RGB)
         | 
| 109 | 
            -
                        roi_pil = Image.fromarray(roi_rgb)
         | 
| 110 | 
            -
             | 
| 111 | 
            -
                        # CRITICAL: no_grad + autocast for speed/memory
         | 
| 112 | 
            -
                        with torch.no_grad(), torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
         | 
| 113 | 
            -
                            # Preprocess
         | 
| 114 | 
            -
                            inputs = processor(roi_pil, return_tensors="pt")
         | 
| 115 | 
            -
                            inputs = {k: v.to(self.device) for k, v in inputs.items()}
         | 
| 116 | 
            -
             | 
| 117 | 
            -
                            # Forward pass: Use vision_model to get CLS token (fix for get_image_features error)
         | 
| 118 | 
            -
                            # SigLIPModel.vision_model returns last_hidden_state [1, seq_len, 768]
         | 
| 119 | 
            -
                            outputs = model.vision_model(**inputs)
         | 
| 120 | 
            -
                            features = outputs.last_hidden_state[:, 0, :]  # CLS token as global feature
         | 
| 121 | 
            -
             | 
| 122 | 
            -
                            # Move to CPU immediately to free VRAM
         | 
| 123 | 
            -
                            features = features.cpu().numpy().flatten()
         | 
| 124 | 
            -
             | 
| 125 | 
            -
                            # Clear CUDA cache
         | 
| 126 | 
            -
                            if self.device.type == 'cuda':
         | 
| 127 | 
            -
                                torch.cuda.empty_cache()
         | 
| 128 | 
            -
             | 
| 129 | 
            -
                        # Optional: Combine with edge density (for texture robustness)
         | 
| 130 | 
            -
                        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
         | 
| 131 | 
            -
                        edges = cv2.Canny(gray, 50, 150)
         | 
| 132 | 
            -
                        edge_density = np.sum(edges > 0) / edges.size
         | 
| 133 | 
            -
                        features = np.concatenate([features[:120], [edge_density * 100] * 8])  # Match DINO dim
         | 
| 134 | 
            -
             | 
| 135 | 
            -
                        return features[:128]  # Trim for consistency
         | 
| 136 | 
            -
             | 
| 137 | 
            -
                    except Exception as e:
         | 
| 138 | 
            -
                        print(f"SigLIP 2 feature extraction error: {e}")
         | 
| 139 | 
            -
                        return np.zeros(128)
         | 
| 140 | 
            -
             | 
| 141 | 
             
                def extract_damage_features(self, image: np.ndarray, bbox: List[int]) -> np.ndarray:
         | 
| 142 | 
             
                    """
         | 
| 143 | 
            -
                    Extract view-invariant features for damage ReID using  | 
| 144 | 
             
                    Args:
         | 
| 145 | 
             
                        image: Full image (BGR format from OpenCV)
         | 
| 146 | 
             
                        bbox: [x1, y1, x2, y2] bounding box
         | 
| @@ -160,10 +161,10 @@ class DamageComparator: | |
| 160 |  | 
| 161 | 
             
                    features_list = []
         | 
| 162 |  | 
| 163 | 
            -
                    # 1.  | 
| 164 | 
            -
                    if self. | 
| 165 | 
            -
                         | 
| 166 | 
            -
                        features_list.append( | 
| 167 |  | 
| 168 | 
             
                    # 2. Geometric invariant features (always available)
         | 
| 169 | 
             
                    geometric_features = self._extract_geometric_features(damage_roi)
         | 
| @@ -187,6 +188,71 @@ class DamageComparator: | |
| 187 |  | 
| 188 | 
             
                    return combined_features
         | 
| 189 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 190 |  | 
| 191 | 
             
                def _extract_geometric_features(self, roi: np.ndarray) -> np.ndarray:
         | 
| 192 | 
             
                    """Extract geometric invariant features (Hu moments)"""
         | 
| @@ -277,24 +343,24 @@ class DamageComparator: | |
| 277 | 
             
                    ]
         | 
| 278 |  | 
| 279 | 
             
                    return np.array(features)
         | 
| 280 | 
            -
             | 
| 281 | 
             
                def compute_match_score(self, box1, box2, reid_sim,
         | 
| 282 | 
            -
             | 
| 283 | 
             
                    """
         | 
| 284 | 
             
                    Weighted score combining ReID, IoU, and position
         | 
| 285 | 
             
                    alpha, beta, gamma = weights
         | 
| 286 | 
             
                    """
         | 
| 287 | 
             
                    # IoU
         | 
| 288 | 
             
                    iou = self.calculate_iou(box1, box2)
         | 
| 289 | 
            -
             | 
| 290 | 
             
                    # Tính khoảng cách tâm
         | 
| 291 | 
             
                    cx1, cy1 = (box1[0] + box1[2]) / 2, (box1[1] + box1[3]) / 2
         | 
| 292 | 
             
                    cx2, cy2 = (box2[0] + box2[2]) / 2, (box2[1] + box2[3]) / 2
         | 
| 293 | 
             
                    dist = ((cx1 - cx2) ** 2 + (cy1 - cy2) ** 2) ** 0.5
         | 
| 294 | 
            -
             | 
| 295 | 
             
                    # Convert distance → score (0 → 1)
         | 
| 296 | 
             
                    pos_score = max(0, 1 - dist / self.position_tolerance)
         | 
| 297 | 
            -
             | 
| 298 | 
             
                    # Weighted score
         | 
| 299 | 
             
                    return alpha * reid_sim + beta * iou + gamma * pos_score
         | 
| 300 |  | 
| @@ -314,8 +380,10 @@ class DamageComparator: | |
| 314 |  | 
| 315 | 
             
                    boxes1 = detections1['boxes']
         | 
| 316 | 
             
                    boxes2 = detections2['boxes']
         | 
|  | |
| 317 | 
             
                    print(f"   Boxes1: {len(boxes1)}, Boxes2: {len(boxes2)}")
         | 
| 318 | 
             
                    print(f"   Images provided: {image1 is not None and image2 is not None}")
         | 
|  | |
| 319 |  | 
| 320 | 
             
                    if len(boxes1) == 0 and len(boxes2) == 0:
         | 
| 321 | 
             
                        return {
         | 
| @@ -356,7 +424,7 @@ class DamageComparator: | |
| 356 | 
             
                    if image1 is not None and image2 is not None:
         | 
| 357 | 
             
                        reid_matrix = np.zeros((len(boxes1), len(boxes2)))
         | 
| 358 |  | 
| 359 | 
            -
                        print("   Extracting | 
| 360 |  | 
| 361 | 
             
                        # Extract features for all boxes
         | 
| 362 | 
             
                        features1 = [self.extract_damage_features(image1, box) for box in boxes1]
         | 
| @@ -367,6 +435,7 @@ class DamageComparator: | |
| 367 | 
             
                            for j, feat2 in enumerate(features2):
         | 
| 368 | 
             
                                reid_matrix[i, j] = np.dot(feat1, feat2)  # Already normalized
         | 
| 369 |  | 
|  | |
| 370 |  | 
| 371 | 
             
                    if reid_matrix is not None:
         | 
| 372 | 
             
                        combined_matrix = np.zeros_like(reid_matrix)
         | 
| @@ -379,6 +448,7 @@ class DamageComparator: | |
| 379 | 
             
                    else:
         | 
| 380 | 
             
                        combined_matrix = iou_matrix
         | 
| 381 |  | 
|  | |
| 382 | 
             
                    # Hungarian algorithm for optimal matching
         | 
| 383 | 
             
                    cost_matrix = 1 - combined_matrix
         | 
| 384 | 
             
                    row_indices, col_indices = linear_sum_assignment(cost_matrix)
         | 
| @@ -393,25 +463,27 @@ class DamageComparator: | |
| 393 |  | 
| 394 | 
             
                    # Dòng ~560 trong comparison.py
         | 
| 395 | 
             
                    # Trong match_damages_with_reid, dòng ~560
         | 
|  | |
| 396 | 
             
                    for i, j in zip(row_indices, col_indices):
         | 
| 397 | 
             
                        score = combined_matrix[i, j]
         | 
| 398 | 
             
                        iou_score = iou_matrix[i, j]
         | 
| 399 | 
            -
             | 
| 400 | 
            -
                        # Logic mới | 
| 401 | 
            -
                         | 
| 402 | 
            -
             | 
| 403 | 
            -
             | 
| 404 | 
            -
                         | 
| 405 | 
            -
             | 
| 406 | 
            -
                            threshold_to_use =  | 
|  | |
|  | |
|  | |
| 407 | 
             
                        else:
         | 
| 408 | 
            -
                             | 
| 409 | 
            -
             | 
|  | |
| 410 | 
             
                        print(f"   Pair ({i},{j}): IoU={iou_score:.3f}, Score={score:.3f}, Threshold={threshold_to_use:.3f}")
         | 
| 411 | 
            -
                         | 
| 412 | 
            -
                        print(
         | 
| 413 | 
            -
                            f"   Will match: {score >= threshold_to_use and detections1['classes'][i] == detections2['classes'][j]}")
         | 
| 414 | 
            -
             | 
| 415 | 
             
                        if score >= threshold_to_use:
         | 
| 416 | 
             
                            if detections1['classes'][i] == detections2['classes'][j]:
         | 
| 417 | 
             
                                matched_pairs.append((i, j, score))
         | 
| @@ -499,6 +571,7 @@ class DamageComparator: | |
| 499 | 
             
                            'repaired': len(existing_damages),
         | 
| 500 | 
             
                            'new': len(new_damages),
         | 
| 501 | 
             
                            'using_reid': bool(before_image is not None and after_image is not None),
         | 
|  | |
| 502 | 
             
                        }
         | 
| 503 | 
             
                    }
         | 
| 504 |  | 
| @@ -542,7 +615,7 @@ class DamageComparator: | |
| 542 | 
             
                                                        detections_list: List[Dict],
         | 
| 543 | 
             
                                                        images_list: List[np.ndarray]) -> Dict:
         | 
| 544 | 
             
                    """
         | 
| 545 | 
            -
                    Deduplicate damages across multiple views using  | 
| 546 | 
             
                    Args:
         | 
| 547 | 
             
                        detections_list: List of detections from different views
         | 
| 548 | 
             
                        images_list: List of corresponding images
         | 
| @@ -551,7 +624,7 @@ class DamageComparator: | |
| 551 | 
             
                    """
         | 
| 552 | 
             
                    all_damages = []
         | 
| 553 |  | 
| 554 | 
            -
                    print(f"Deduplicating damages across {len(images_list)} ...")
         | 
| 555 |  | 
| 556 | 
             
                    # Collect all damages with their features
         | 
| 557 | 
             
                    for view_idx, (detections, image) in enumerate(zip(detections_list, images_list)):
         | 
|  | |
| 1 | 
             
            import numpy as np
         | 
| 2 | 
            +
            from typing import List, Dict, Tuple, Optional
         | 
| 3 | 
             
            from scipy.optimize import linear_sum_assignment
         | 
| 4 | 
             
            import yaml
         | 
| 5 | 
             
            import cv2
         | 
| 6 | 
             
            import hashlib
         | 
| 7 | 
             
            import torch
         | 
| 8 | 
            +
            import torch.nn as nn
         | 
| 9 | 
             
            from PIL import Image
         | 
|  | |
| 10 |  | 
| 11 | 
            +
            # DINOv2 availability check
         | 
| 12 | 
            +
            DINOV2_AVAILABLE = False
         | 
| 13 | 
             
            try:
         | 
| 14 | 
            +
                # Try loading DINOv2 from torch.hub first
         | 
| 15 | 
            +
                torch.hub._validate_not_a_forked_repo = lambda a, b, c: True  # Allow loading from hub
         | 
| 16 | 
            +
                DINOV2_AVAILABLE = True
         | 
| 17 | 
            +
                print("✓ DINOv2 will be loaded when needed")
         | 
| 18 | 
            +
            except Exception as e:
         | 
| 19 | 
            +
                print(f"⚠ DINOv2 preparation failed: {e}")
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                # Fallback: Try transformers library
         | 
| 22 | 
            +
                try:
         | 
| 23 | 
            +
                    from transformers import AutoImageProcessor, AutoModel
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                    DINOV2_AVAILABLE = True
         | 
| 26 | 
            +
                    print("✓ DINOv2 available via transformers library")
         | 
| 27 | 
            +
                except ImportError:
         | 
| 28 | 
            +
                    print("DINOv2 not available. Using traditional features only.")
         | 
| 29 | 
            +
                    print("  Install with: pip install transformers torch")
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            # Global DINOv2 model and processor
         | 
| 32 | 
            +
            _GLOBAL_DINOV2_MODEL = None
         | 
| 33 | 
            +
            _GLOBAL_DINOV2_PROCESSOR = None
         | 
| 34 | 
            +
            _GLOBAL_DINOV2_TRANSFORM = None
         | 
| 35 | 
            +
             | 
| 36 | 
            +
             | 
| 37 | 
            +
            def get_dinov2_model():
         | 
| 38 | 
            +
                """Get or initialize global DINOv2 model"""
         | 
| 39 | 
            +
                global _GLOBAL_DINOV2_MODEL, _GLOBAL_DINOV2_PROCESSOR, _GLOBAL_DINOV2_TRANSFORM
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                if _GLOBAL_DINOV2_MODEL is None and DINOV2_AVAILABLE:
         | 
| 42 | 
            +
                    try:
         | 
| 43 | 
            +
                        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 44 |  | 
| 45 | 
            +
                        # Try torch.hub first (preferred method)
         | 
| 46 | 
            +
                        try:
         | 
| 47 | 
            +
                            # Load DINOv2 small model (you can change to vitb14, vitl14, or vitg14 for larger models)
         | 
| 48 | 
            +
                            # vits14 = small (85M params), vitb14 = base (307M), vitl14 = large (1B), vitg14 = giant (1.8B)
         | 
| 49 | 
            +
                            model_name = 'dinov2_vits14'  # Using small model for speed
         | 
| 50 | 
            +
                            _GLOBAL_DINOV2_MODEL = torch.hub.load('facebookresearch/dinov2', model_name)
         | 
| 51 | 
            +
                            _GLOBAL_DINOV2_MODEL.to(device)
         | 
| 52 | 
            +
                            _GLOBAL_DINOV2_MODEL.eval()
         | 
| 53 |  | 
| 54 | 
            +
                            # Disable gradient computation for inference
         | 
| 55 | 
            +
                            for param in _GLOBAL_DINOV2_MODEL.parameters():
         | 
| 56 | 
            +
                                param.requires_grad = False
         | 
| 57 |  | 
| 58 | 
            +
                            # Create transform for DINOv2
         | 
| 59 | 
            +
                            import torchvision.transforms as T
         | 
| 60 | 
            +
                            _GLOBAL_DINOV2_TRANSFORM = T.Compose([
         | 
| 61 | 
            +
                                T.Resize(256, interpolation=T.InterpolationMode.BICUBIC),
         | 
| 62 | 
            +
                                T.CenterCrop(224),
         | 
| 63 | 
            +
                                T.ToTensor(),
         | 
| 64 | 
            +
                                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
         | 
| 65 | 
            +
                            ])
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                            print(f"✓ DINOv2 model loaded: {model_name} via torch.hub")
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                        except Exception as hub_error:
         | 
| 70 | 
            +
                            print(f"Torch.hub failed, trying transformers: {hub_error}")
         | 
| 71 |  | 
| 72 | 
            +
                            # Fallback to transformers library
         | 
| 73 | 
            +
                            from transformers import AutoImageProcessor, AutoModel
         | 
|  | |
|  | |
|  | |
|  | |
| 74 |  | 
| 75 | 
            +
                            model_name = "facebook/dinov2-small"  # Or dinov2-base, dinov2-large, dinov2-giant
         | 
| 76 | 
            +
                            _GLOBAL_DINOV2_PROCESSOR = AutoImageProcessor.from_pretrained(model_name)
         | 
| 77 | 
            +
                            _GLOBAL_DINOV2_MODEL = AutoModel.from_pretrained(model_name).to(device)
         | 
| 78 | 
            +
                            _GLOBAL_DINOV2_MODEL.eval()
         | 
| 79 |  | 
| 80 | 
            +
                            for param in _GLOBAL_DINOV2_MODEL.parameters():
         | 
| 81 | 
            +
                                param.requires_grad = False
         | 
|  | |
| 82 |  | 
| 83 | 
            +
                            print(f"✓ DINOv2 model loaded: {model_name} via transformers")
         | 
| 84 |  | 
| 85 | 
             
                    except Exception as e:
         | 
| 86 | 
            +
                        print(f"⚠ DINOv2 loading failed: {e}. Using fallback features.")
         | 
| 87 | 
            +
                        _GLOBAL_DINOV2_MODEL = None
         | 
| 88 | 
            +
                        _GLOBAL_DINOV2_PROCESSOR = None
         | 
| 89 | 
            +
                        _GLOBAL_DINOV2_TRANSFORM = None
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                return _GLOBAL_DINOV2_MODEL, _GLOBAL_DINOV2_PROCESSOR, _GLOBAL_DINOV2_TRANSFORM
         | 
| 92 |  | 
|  | |
| 93 |  | 
| 94 | 
             
            class DamageComparator:
         | 
| 95 | 
             
                """Enhanced damage comparator with DINOv2-based view-invariant re-identification"""
         | 
|  | |
| 110 | 
             
                    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 111 |  | 
| 112 | 
             
                    # Get global DINOv2 model
         | 
| 113 | 
            +
                    self.dinov2_model, self.dinov2_processor, self.dinov2_transform = get_dinov2_model()
         | 
| 114 |  | 
| 115 | 
            +
                    # ReID thresholds (DINOv2 typically needs different thresholds than CLIP)
         | 
| 116 | 
            +
                    self.reid_similarity_threshold = 0.7  # Slightly higher for DINOv2
         | 
| 117 | 
             
                    self.feature_cache = {}
         | 
| 118 |  | 
| 119 | 
            +
                    # DINOv2 feature dimension (depends on model size)
         | 
| 120 | 
            +
                    self.dinov2_feature_dim = 384 if 'vits' in str(self.dinov2_model.__class__) else 768
         | 
| 121 |  | 
| 122 | 
             
                def calculate_iou(self, box1: List[int], box2: List[int]) -> float:
         | 
| 123 | 
             
                    """Calculate Intersection over Union between two boxes"""
         | 
|  | |
| 139 |  | 
| 140 | 
             
                    return intersection / union
         | 
| 141 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 142 | 
             
                def extract_damage_features(self, image: np.ndarray, bbox: List[int]) -> np.ndarray:
         | 
| 143 | 
             
                    """
         | 
| 144 | 
            +
                    Extract view-invariant features for damage ReID using DINOv2
         | 
| 145 | 
             
                    Args:
         | 
| 146 | 
             
                        image: Full image (BGR format from OpenCV)
         | 
| 147 | 
             
                        bbox: [x1, y1, x2, y2] bounding box
         | 
|  | |
| 161 |  | 
| 162 | 
             
                    features_list = []
         | 
| 163 |  | 
| 164 | 
            +
                    # 1. DINOv2 features (if available) - Most powerful for ReID
         | 
| 165 | 
            +
                    if self.dinov2_model is not None:
         | 
| 166 | 
            +
                        dinov2_features = self._extract_dinov2_features(damage_roi)
         | 
| 167 | 
            +
                        features_list.append(dinov2_features)
         | 
| 168 |  | 
| 169 | 
             
                    # 2. Geometric invariant features (always available)
         | 
| 170 | 
             
                    geometric_features = self._extract_geometric_features(damage_roi)
         | 
|  | |
| 188 |  | 
| 189 | 
             
                    return combined_features
         | 
| 190 |  | 
| 191 | 
            +
                def _extract_dinov2_features(self, roi: np.ndarray) -> np.ndarray:
         | 
| 192 | 
            +
                    """Extract DINOv2 vision features - superior to CLIP for visual tasks"""
         | 
| 193 | 
            +
                    try:
         | 
| 194 | 
            +
                        # Convert BGR to RGB
         | 
| 195 | 
            +
                        roi_rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
         | 
| 196 | 
            +
             | 
| 197 | 
            +
                        # Convert to PIL Image
         | 
| 198 | 
            +
                        roi_pil = Image.fromarray(roi_rgb)
         | 
| 199 | 
            +
             | 
| 200 | 
            +
                        # CRITICAL: Always use no_grad context to save memory
         | 
| 201 | 
            +
                        with torch.no_grad():
         | 
| 202 | 
            +
                            if self.dinov2_transform is not None:
         | 
| 203 | 
            +
                                # Using torch.hub version
         | 
| 204 | 
            +
                                # Apply transforms
         | 
| 205 | 
            +
                                roi_tensor = self.dinov2_transform(roi_pil).unsqueeze(0).to(self.device)
         | 
| 206 | 
            +
             | 
| 207 | 
            +
                                # Extract features
         | 
| 208 | 
            +
                                features = self.dinov2_model(roi_tensor)
         | 
| 209 | 
            +
             | 
| 210 | 
            +
                                # DINOv2 returns [batch_size, num_patches+1, feature_dim]
         | 
| 211 | 
            +
                                # We use the [CLS] token (first token) as the global feature
         | 
| 212 | 
            +
                                if len(features.shape) == 3:
         | 
| 213 | 
            +
                                    features = features[:, 0, :]  # Get CLS token
         | 
| 214 | 
            +
             | 
| 215 | 
            +
                                # Move to CPU immediately to free VRAM
         | 
| 216 | 
            +
                                features = features.cpu().numpy().flatten()
         | 
| 217 | 
            +
             | 
| 218 | 
            +
                                # Clear CUDA cache if using GPU
         | 
| 219 | 
            +
                                if self.device.type == 'cuda':
         | 
| 220 | 
            +
                                    torch.cuda.empty_cache()
         | 
| 221 | 
            +
             | 
| 222 | 
            +
                            elif self.dinov2_processor is not None:
         | 
| 223 | 
            +
                                # Using transformers version
         | 
| 224 | 
            +
                                inputs = self.dinov2_processor(images=roi_pil, return_tensors="pt")
         | 
| 225 | 
            +
                                inputs = {k: v.to(self.device) for k, v in inputs.items()}
         | 
| 226 | 
            +
             | 
| 227 | 
            +
                                outputs = self.dinov2_model(**inputs)
         | 
| 228 | 
            +
                                # Use pooler_output if available, otherwise use last_hidden_state
         | 
| 229 | 
            +
                                if hasattr(outputs, 'pooler_output') and outputs.pooler_output is not None:
         | 
| 230 | 
            +
                                    features = outputs.pooler_output
         | 
| 231 | 
            +
                                else:
         | 
| 232 | 
            +
                                    # Use CLS token from last hidden state
         | 
| 233 | 
            +
                                    features = outputs.last_hidden_state[:, 0, :]
         | 
| 234 | 
            +
             | 
| 235 | 
            +
                                # Move to CPU immediately to free VRAM
         | 
| 236 | 
            +
                                features = features.cpu().numpy().flatten()
         | 
| 237 | 
            +
             | 
| 238 | 
            +
                                # Clear CUDA cache if using GPU
         | 
| 239 | 
            +
                                if self.device.type == 'cuda':
         | 
| 240 | 
            +
                                    torch.cuda.empty_cache()
         | 
| 241 | 
            +
                            else:
         | 
| 242 | 
            +
                                return np.zeros(128)
         | 
| 243 | 
            +
             | 
| 244 | 
            +
                        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
         | 
| 245 | 
            +
                        edges = cv2.Canny(gray, 50, 150)
         | 
| 246 | 
            +
                        edge_density = np.sum(edges > 0) / edges.size
         | 
| 247 | 
            +
                        
         | 
| 248 | 
            +
                        # Combine với DINOv2 features
         | 
| 249 | 
            +
                        features = np.concatenate([features[:120], [edge_density * 100] * 8])
         | 
| 250 | 
            +
                    
         | 
| 251 | 
            +
                        return features[:128]  # Take first 128 dimensions for consistency
         | 
| 252 | 
            +
                        
         | 
| 253 | 
            +
                    except Exception as e:
         | 
| 254 | 
            +
                        print(f"DINOv2 feature extraction error: {e}")
         | 
| 255 | 
            +
                        return np.zeros(128)
         | 
| 256 |  | 
| 257 | 
             
                def _extract_geometric_features(self, roi: np.ndarray) -> np.ndarray:
         | 
| 258 | 
             
                    """Extract geometric invariant features (Hu moments)"""
         | 
|  | |
| 343 | 
             
                    ]
         | 
| 344 |  | 
| 345 | 
             
                    return np.array(features)
         | 
| 346 | 
            +
                    
         | 
| 347 | 
             
                def compute_match_score(self, box1, box2, reid_sim,
         | 
| 348 | 
            +
                                    alpha=0.6, beta=0.3, gamma=0.1):
         | 
| 349 | 
             
                    """
         | 
| 350 | 
             
                    Weighted score combining ReID, IoU, and position
         | 
| 351 | 
             
                    alpha, beta, gamma = weights
         | 
| 352 | 
             
                    """
         | 
| 353 | 
             
                    # IoU
         | 
| 354 | 
             
                    iou = self.calculate_iou(box1, box2)
         | 
| 355 | 
            +
                
         | 
| 356 | 
             
                    # Tính khoảng cách tâm
         | 
| 357 | 
             
                    cx1, cy1 = (box1[0] + box1[2]) / 2, (box1[1] + box1[3]) / 2
         | 
| 358 | 
             
                    cx2, cy2 = (box2[0] + box2[2]) / 2, (box2[1] + box2[3]) / 2
         | 
| 359 | 
             
                    dist = ((cx1 - cx2) ** 2 + (cy1 - cy2) ** 2) ** 0.5
         | 
| 360 | 
            +
                
         | 
| 361 | 
             
                    # Convert distance → score (0 → 1)
         | 
| 362 | 
             
                    pos_score = max(0, 1 - dist / self.position_tolerance)
         | 
| 363 | 
            +
                
         | 
| 364 | 
             
                    # Weighted score
         | 
| 365 | 
             
                    return alpha * reid_sim + beta * iou + gamma * pos_score
         | 
| 366 |  | 
|  | |
| 380 |  | 
| 381 | 
             
                    boxes1 = detections1['boxes']
         | 
| 382 | 
             
                    boxes2 = detections2['boxes']
         | 
| 383 | 
            +
                    print(f"\n🔍 DEBUG match_damages_with_reid (DINOv2):")
         | 
| 384 | 
             
                    print(f"   Boxes1: {len(boxes1)}, Boxes2: {len(boxes2)}")
         | 
| 385 | 
             
                    print(f"   Images provided: {image1 is not None and image2 is not None}")
         | 
| 386 | 
            +
                    print(f"   DINOv2 available: {self.dinov2_model is not None}")
         | 
| 387 |  | 
| 388 | 
             
                    if len(boxes1) == 0 and len(boxes2) == 0:
         | 
| 389 | 
             
                        return {
         | 
|  | |
| 424 | 
             
                    if image1 is not None and image2 is not None:
         | 
| 425 | 
             
                        reid_matrix = np.zeros((len(boxes1), len(boxes2)))
         | 
| 426 |  | 
| 427 | 
            +
                        print("   Extracting DINOv2 features for damage matching...")
         | 
| 428 |  | 
| 429 | 
             
                        # Extract features for all boxes
         | 
| 430 | 
             
                        features1 = [self.extract_damage_features(image1, box) for box in boxes1]
         | 
|  | |
| 435 | 
             
                            for j, feat2 in enumerate(features2):
         | 
| 436 | 
             
                                reid_matrix[i, j] = np.dot(feat1, feat2)  # Already normalized
         | 
| 437 |  | 
| 438 | 
            +
                        print(f"   DINOv2 features extracted successfully")
         | 
| 439 |  | 
| 440 | 
             
                    if reid_matrix is not None:
         | 
| 441 | 
             
                        combined_matrix = np.zeros_like(reid_matrix)
         | 
|  | |
| 448 | 
             
                    else:
         | 
| 449 | 
             
                        combined_matrix = iou_matrix
         | 
| 450 |  | 
| 451 | 
            +
             | 
| 452 | 
             
                    # Hungarian algorithm for optimal matching
         | 
| 453 | 
             
                    cost_matrix = 1 - combined_matrix
         | 
| 454 | 
             
                    row_indices, col_indices = linear_sum_assignment(cost_matrix)
         | 
|  | |
| 463 |  | 
| 464 | 
             
                    # Dòng ~560 trong comparison.py
         | 
| 465 | 
             
                    # Trong match_damages_with_reid, dòng ~560
         | 
| 466 | 
            +
                   # Trong match_damages_with_reid, dòng 556-571
         | 
| 467 | 
             
                    for i, j in zip(row_indices, col_indices):
         | 
| 468 | 
             
                        score = combined_matrix[i, j]
         | 
| 469 | 
             
                        iou_score = iou_matrix[i, j]
         | 
| 470 | 
            +
                        
         | 
| 471 | 
            +
                        # Logic mới - phù hợp với config thực tế
         | 
| 472 | 
            +
                        if iou_score >= self.iou_threshold:  # >= 0.35 từ config
         | 
| 473 | 
            +
                            # Good IoU - dùng threshold thấp
         | 
| 474 | 
            +
                            threshold_to_use = self.combined_score_threshold  # 0.3
         | 
| 475 | 
            +
                        elif iou_score > 0.1:  
         | 
| 476 | 
            +
                            # Medium IoU - threshold vừa phải
         | 
| 477 | 
            +
                            threshold_to_use = 0.45  
         | 
| 478 | 
            +
                        elif iou_score > 0.05:
         | 
| 479 | 
            +
                            # Low IoU - cần score cao hơn
         | 
| 480 | 
            +
                            threshold_to_use = 0.55
         | 
| 481 | 
             
                        else:
         | 
| 482 | 
            +
                            # Very low IoU - cần ReID rất tốt
         | 
| 483 | 
            +
                            threshold_to_use = 0.65
         | 
| 484 | 
            +
                        
         | 
| 485 | 
             
                        print(f"   Pair ({i},{j}): IoU={iou_score:.3f}, Score={score:.3f}, Threshold={threshold_to_use:.3f}")
         | 
| 486 | 
            +
                        
         | 
|  | |
|  | |
|  | |
| 487 | 
             
                        if score >= threshold_to_use:
         | 
| 488 | 
             
                            if detections1['classes'][i] == detections2['classes'][j]:
         | 
| 489 | 
             
                                matched_pairs.append((i, j, score))
         | 
|  | |
| 571 | 
             
                            'repaired': len(existing_damages),
         | 
| 572 | 
             
                            'new': len(new_damages),
         | 
| 573 | 
             
                            'using_reid': bool(before_image is not None and after_image is not None),
         | 
| 574 | 
            +
                            'reid_model': 'DINOv2' if self.dinov2_model is not None else 'Traditional'
         | 
| 575 | 
             
                        }
         | 
| 576 | 
             
                    }
         | 
| 577 |  | 
|  | |
| 615 | 
             
                                                        detections_list: List[Dict],
         | 
| 616 | 
             
                                                        images_list: List[np.ndarray]) -> Dict:
         | 
| 617 | 
             
                    """
         | 
| 618 | 
            +
                    Deduplicate damages across multiple views using DINOv2 features
         | 
| 619 | 
             
                    Args:
         | 
| 620 | 
             
                        detections_list: List of detections from different views
         | 
| 621 | 
             
                        images_list: List of corresponding images
         | 
|  | |
| 624 | 
             
                    """
         | 
| 625 | 
             
                    all_damages = []
         | 
| 626 |  | 
| 627 | 
            +
                    print(f"Deduplicating damages across {len(images_list)} views using DINOv2...")
         | 
| 628 |  | 
| 629 | 
             
                    # Collect all damages with their features
         | 
| 630 | 
             
                    for view_idx, (detections, image) in enumerate(zip(detections_list, images_list)):
         | 
