import numpy as np from typing import List, Dict, Tuple import cv2 from pathlib import Path import yaml class YOLOv11Detector: """YOLOv11 detector for car damage detection""" def __init__(self, config_path: str = "config.yaml"): """Initialize YOLOv11 detector with configuration""" with open(config_path, 'r') as f: self.config = yaml.safe_load(f) model_path = self.config['model']['path'] # Check which model file exists if not Path(model_path).exists(): # Try to find available model files model_dir = Path("models") if (model_dir / "best.pt").exists(): model_path = str(model_dir / "best.pt") print(f"Using best.pt model from training") elif (model_dir / "last.pt").exists(): model_path = str(model_dir / "last.pt") print(f"Using last.pt checkpoint model") elif (model_dir / "best.onnx").exists(): model_path = str(model_dir / "best.onnx") print(f"Using best.onnx model") else: raise FileNotFoundError(f"No model files found in models/ directory!") self.model_path = model_path self.device = self.config['model']['device'] self.confidence = self.config['model']['confidence'] self.iou_threshold = self.config['model']['iou_threshold'] self.classes = self.config['detection']['classes'] # Load model based on format if model_path.endswith('.onnx'): self._load_onnx_model() else: # .pt format self._load_pytorch_model() def _load_pytorch_model(self): """Load PyTorch model using Ultralytics""" from ultralytics import YOLO self.model = YOLO(self.model_path) # Set model to appropriate device if self.device == 'cuda:0': self.model.to('cuda') print(f"Loaded PyTorch model: {self.model_path}") def _load_onnx_model(self): """Load ONNX model using OpenCV DNN""" self.net = cv2.dnn.readNet(self.model_path) # Set backend based on device if self.device == 'cuda:0': self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) else: self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV) self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) print(f"Loaded ONNX model: {self.model_path}") def detect(self, image: np.ndarray) -> Dict: """ Perform detection on image Args: image: Input image as numpy array (BGR format) Returns: Dictionary containing detection results """ if self.model_path.endswith('.onnx'): return self._detect_onnx(image) else: return self._detect_pytorch(image) def _detect_pytorch(self, image: np.ndarray) -> Dict: """Detection using PyTorch model""" # Run YOLO inference results = self.model( image, conf=self.confidence, iou=self.iou_threshold, device=self.device, verbose=False ) # Parse results detections = { 'boxes': [], 'confidences': [], 'classes': [], 'class_ids': [] } if len(results) > 0 and results[0].boxes is not None: boxes = results[0].boxes for box in boxes: # Get box coordinates (xyxy format) x1, y1, x2, y2 = box.xyxy[0].cpu().numpy() # Get confidence and class conf = float(box.conf[0].cpu().numpy()) cls_id = int(box.cls[0].cpu().numpy()) # Map class ID to class name if cls_id < len(self.classes): cls_name = self.classes[cls_id] else: cls_name = f"class_{cls_id}" detections['boxes'].append([int(x1), int(y1), int(x2), int(y2)]) detections['confidences'].append(conf) detections['classes'].append(cls_name) detections['class_ids'].append(cls_id) return detections def _detect_onnx(self, image: np.ndarray) -> Dict: """Detection using ONNX model (compatible with original code)""" height, width = image.shape[:2] # Preprocess image for ONNX blob = cv2.dnn.blobFromImage( image, 1/255.0, (640, 640), swapRB=True, crop=False ) self.net.setInput(blob) preds = self.net.forward() preds = preds.transpose((0, 2, 1)) # Extract outputs detections = self._extract_onnx_output( preds=preds, image_shape=(height, width), input_shape=(640, 640) ) return detections def _extract_onnx_output(self, preds: np.ndarray, image_shape: Tuple[int, int], input_shape: Tuple[int, int]) -> Dict: """Extract detection results from ONNX model output""" class_ids, confs, boxes = [], [], [] image_height, image_width = image_shape input_height, input_width = input_shape x_factor = image_width / input_width y_factor = image_height / input_height rows = preds[0].shape[0] for i in range(rows): row = preds[0][i] conf = row[4] classes_score = row[4:] _, _, _, max_idx = cv2.minMaxLoc(classes_score) class_id = max_idx[1] if classes_score[class_id] > self.confidence: confs.append(float(conf)) label = self.classes[int(class_id)] if int(class_id) < len(self.classes) else f"class_{class_id}" class_ids.append(label) # Extract boxes x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item() left = int((x - 0.5 * w) * x_factor) top = int((y - 0.5 * h) * y_factor) width = int(w * x_factor) height = int(h * y_factor) box = [left, top, left + width, top + height] boxes.append(box) # Apply NMS if len(boxes) > 0: indices = cv2.dnn.NMSBoxes( [[b[0], b[1], b[2]-b[0], b[3]-b[1]] for b in boxes], confs, self.confidence, self.iou_threshold ) if len(indices) > 0: indices = indices.flatten() return { 'boxes': [boxes[i] for i in indices], 'confidences': [confs[i] for i in indices], 'classes': [class_ids[i] for i in indices], 'class_ids': list(range(len(indices))) } return {'boxes': [], 'confidences': [], 'classes': [], 'class_ids': []} def detect_batch(self, images: List[np.ndarray]) -> List[Dict]: """Detect on multiple images""" return [self.detect(img) for img in images]