Spaces:
Running
Running
import numpy as np | |
from typing import List, Dict, Tuple | |
import cv2 | |
from pathlib import Path | |
import yaml | |
class YOLOv11Detector: | |
"""YOLOv11 detector for car damage detection""" | |
def __init__(self, config_path: str = "config.yaml"): | |
"""Initialize YOLOv11 detector with configuration""" | |
with open(config_path, 'r') as f: | |
self.config = yaml.safe_load(f) | |
model_path = self.config['model']['path'] | |
# Check which model file exists | |
if not Path(model_path).exists(): | |
# Try to find available model files | |
model_dir = Path("models") | |
if (model_dir / "best.pt").exists(): | |
model_path = str(model_dir / "best.pt") | |
print(f"Using best.pt model from training") | |
elif (model_dir / "last.pt").exists(): | |
model_path = str(model_dir / "last.pt") | |
print(f"Using last.pt checkpoint model") | |
elif (model_dir / "best.onnx").exists(): | |
model_path = str(model_dir / "best.onnx") | |
print(f"Using best.onnx model") | |
else: | |
raise FileNotFoundError(f"No model files found in models/ directory!") | |
self.model_path = model_path | |
self.device = self.config['model']['device'] | |
self.confidence = self.config['model']['confidence'] | |
self.iou_threshold = self.config['model']['iou_threshold'] | |
self.classes = self.config['detection']['classes'] | |
# Load model based on format | |
if model_path.endswith('.onnx'): | |
self._load_onnx_model() | |
else: # .pt format | |
self._load_pytorch_model() | |
def _load_pytorch_model(self): | |
"""Load PyTorch model using Ultralytics""" | |
from ultralytics import YOLO | |
self.model = YOLO(self.model_path) | |
# Set model to appropriate device | |
if self.device == 'cuda:0': | |
self.model.to('cuda') | |
print(f"Loaded PyTorch model: {self.model_path}") | |
def _load_onnx_model(self): | |
"""Load ONNX model using OpenCV DNN""" | |
self.net = cv2.dnn.readNet(self.model_path) | |
# Set backend based on device | |
if self.device == 'cuda:0': | |
self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) | |
self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) | |
else: | |
self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV) | |
self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) | |
print(f"Loaded ONNX model: {self.model_path}") | |
def detect(self, image: np.ndarray) -> Dict: | |
""" | |
Perform detection on image | |
Args: | |
image: Input image as numpy array (BGR format) | |
Returns: | |
Dictionary containing detection results | |
""" | |
if self.model_path.endswith('.onnx'): | |
return self._detect_onnx(image) | |
else: | |
return self._detect_pytorch(image) | |
def _detect_pytorch(self, image: np.ndarray) -> Dict: | |
"""Detection using PyTorch model""" | |
# Run YOLO inference | |
results = self.model( | |
image, | |
conf=self.confidence, | |
iou=self.iou_threshold, | |
device=self.device, | |
verbose=False | |
) | |
# Parse results | |
detections = { | |
'boxes': [], | |
'confidences': [], | |
'classes': [], | |
'class_ids': [] | |
} | |
if len(results) > 0 and results[0].boxes is not None: | |
boxes = results[0].boxes | |
for box in boxes: | |
# Get box coordinates (xyxy format) | |
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy() | |
# Get confidence and class | |
conf = float(box.conf[0].cpu().numpy()) | |
cls_id = int(box.cls[0].cpu().numpy()) | |
# Map class ID to class name | |
if cls_id < len(self.classes): | |
cls_name = self.classes[cls_id] | |
else: | |
cls_name = f"class_{cls_id}" | |
detections['boxes'].append([int(x1), int(y1), int(x2), int(y2)]) | |
detections['confidences'].append(conf) | |
detections['classes'].append(cls_name) | |
detections['class_ids'].append(cls_id) | |
return detections | |
def _detect_onnx(self, image: np.ndarray) -> Dict: | |
"""Detection using ONNX model (compatible with original code)""" | |
height, width = image.shape[:2] | |
# Preprocess image for ONNX | |
blob = cv2.dnn.blobFromImage( | |
image, 1/255.0, (640, 640), | |
swapRB=True, crop=False | |
) | |
self.net.setInput(blob) | |
preds = self.net.forward() | |
preds = preds.transpose((0, 2, 1)) | |
# Extract outputs | |
detections = self._extract_onnx_output( | |
preds=preds, | |
image_shape=(height, width), | |
input_shape=(640, 640) | |
) | |
return detections | |
def _extract_onnx_output(self, preds: np.ndarray, image_shape: Tuple[int, int], | |
input_shape: Tuple[int, int]) -> Dict: | |
"""Extract detection results from ONNX model output""" | |
class_ids, confs, boxes = [], [], [] | |
image_height, image_width = image_shape | |
input_height, input_width = input_shape | |
x_factor = image_width / input_width | |
y_factor = image_height / input_height | |
rows = preds[0].shape[0] | |
for i in range(rows): | |
row = preds[0][i] | |
conf = row[4] | |
classes_score = row[4:] | |
_, _, _, max_idx = cv2.minMaxLoc(classes_score) | |
class_id = max_idx[1] | |
if classes_score[class_id] > self.confidence: | |
confs.append(float(conf)) | |
label = self.classes[int(class_id)] if int(class_id) < len(self.classes) else f"class_{class_id}" | |
class_ids.append(label) | |
# Extract boxes | |
x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item() | |
left = int((x - 0.5 * w) * x_factor) | |
top = int((y - 0.5 * h) * y_factor) | |
width = int(w * x_factor) | |
height = int(h * y_factor) | |
box = [left, top, left + width, top + height] | |
boxes.append(box) | |
# Apply NMS | |
if len(boxes) > 0: | |
indices = cv2.dnn.NMSBoxes( | |
[[b[0], b[1], b[2]-b[0], b[3]-b[1]] for b in boxes], | |
confs, self.confidence, self.iou_threshold | |
) | |
if len(indices) > 0: | |
indices = indices.flatten() | |
return { | |
'boxes': [boxes[i] for i in indices], | |
'confidences': [confs[i] for i in indices], | |
'classes': [class_ids[i] for i in indices], | |
'class_ids': list(range(len(indices))) | |
} | |
return {'boxes': [], 'confidences': [], 'classes': [], 'class_ids': []} | |
def detect_batch(self, images: List[np.ndarray]) -> List[Dict]: | |
"""Detect on multiple images""" | |
return [self.detect(img) for img in images] |