yolocar / src /detection.py
minh9972t12's picture
Upload 12 files
c8a046c
raw
history blame
7.38 kB
import numpy as np
from typing import List, Dict, Tuple
import cv2
from pathlib import Path
import yaml
class YOLOv11Detector:
"""YOLOv11 detector for car damage detection"""
def __init__(self, config_path: str = "config.yaml"):
"""Initialize YOLOv11 detector with configuration"""
with open(config_path, 'r') as f:
self.config = yaml.safe_load(f)
model_path = self.config['model']['path']
# Check which model file exists
if not Path(model_path).exists():
# Try to find available model files
model_dir = Path("models")
if (model_dir / "best.pt").exists():
model_path = str(model_dir / "best.pt")
print(f"Using best.pt model from training")
elif (model_dir / "last.pt").exists():
model_path = str(model_dir / "last.pt")
print(f"Using last.pt checkpoint model")
elif (model_dir / "best.onnx").exists():
model_path = str(model_dir / "best.onnx")
print(f"Using best.onnx model")
else:
raise FileNotFoundError(f"No model files found in models/ directory!")
self.model_path = model_path
self.device = self.config['model']['device']
self.confidence = self.config['model']['confidence']
self.iou_threshold = self.config['model']['iou_threshold']
self.classes = self.config['detection']['classes']
# Load model based on format
if model_path.endswith('.onnx'):
self._load_onnx_model()
else: # .pt format
self._load_pytorch_model()
def _load_pytorch_model(self):
"""Load PyTorch model using Ultralytics"""
from ultralytics import YOLO
self.model = YOLO(self.model_path)
# Set model to appropriate device
if self.device == 'cuda:0':
self.model.to('cuda')
print(f"Loaded PyTorch model: {self.model_path}")
def _load_onnx_model(self):
"""Load ONNX model using OpenCV DNN"""
self.net = cv2.dnn.readNet(self.model_path)
# Set backend based on device
if self.device == 'cuda:0':
self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
else:
self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
print(f"Loaded ONNX model: {self.model_path}")
def detect(self, image: np.ndarray) -> Dict:
"""
Perform detection on image
Args:
image: Input image as numpy array (BGR format)
Returns:
Dictionary containing detection results
"""
if self.model_path.endswith('.onnx'):
return self._detect_onnx(image)
else:
return self._detect_pytorch(image)
def _detect_pytorch(self, image: np.ndarray) -> Dict:
"""Detection using PyTorch model"""
# Run YOLO inference
results = self.model(
image,
conf=self.confidence,
iou=self.iou_threshold,
device=self.device,
verbose=False
)
# Parse results
detections = {
'boxes': [],
'confidences': [],
'classes': [],
'class_ids': []
}
if len(results) > 0 and results[0].boxes is not None:
boxes = results[0].boxes
for box in boxes:
# Get box coordinates (xyxy format)
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
# Get confidence and class
conf = float(box.conf[0].cpu().numpy())
cls_id = int(box.cls[0].cpu().numpy())
# Map class ID to class name
if cls_id < len(self.classes):
cls_name = self.classes[cls_id]
else:
cls_name = f"class_{cls_id}"
detections['boxes'].append([int(x1), int(y1), int(x2), int(y2)])
detections['confidences'].append(conf)
detections['classes'].append(cls_name)
detections['class_ids'].append(cls_id)
return detections
def _detect_onnx(self, image: np.ndarray) -> Dict:
"""Detection using ONNX model (compatible with original code)"""
height, width = image.shape[:2]
# Preprocess image for ONNX
blob = cv2.dnn.blobFromImage(
image, 1/255.0, (640, 640),
swapRB=True, crop=False
)
self.net.setInput(blob)
preds = self.net.forward()
preds = preds.transpose((0, 2, 1))
# Extract outputs
detections = self._extract_onnx_output(
preds=preds,
image_shape=(height, width),
input_shape=(640, 640)
)
return detections
def _extract_onnx_output(self, preds: np.ndarray, image_shape: Tuple[int, int],
input_shape: Tuple[int, int]) -> Dict:
"""Extract detection results from ONNX model output"""
class_ids, confs, boxes = [], [], []
image_height, image_width = image_shape
input_height, input_width = input_shape
x_factor = image_width / input_width
y_factor = image_height / input_height
rows = preds[0].shape[0]
for i in range(rows):
row = preds[0][i]
conf = row[4]
classes_score = row[4:]
_, _, _, max_idx = cv2.minMaxLoc(classes_score)
class_id = max_idx[1]
if classes_score[class_id] > self.confidence:
confs.append(float(conf))
label = self.classes[int(class_id)] if int(class_id) < len(self.classes) else f"class_{class_id}"
class_ids.append(label)
# Extract boxes
x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item()
left = int((x - 0.5 * w) * x_factor)
top = int((y - 0.5 * h) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
box = [left, top, left + width, top + height]
boxes.append(box)
# Apply NMS
if len(boxes) > 0:
indices = cv2.dnn.NMSBoxes(
[[b[0], b[1], b[2]-b[0], b[3]-b[1]] for b in boxes],
confs, self.confidence, self.iou_threshold
)
if len(indices) > 0:
indices = indices.flatten()
return {
'boxes': [boxes[i] for i in indices],
'confidences': [confs[i] for i in indices],
'classes': [class_ids[i] for i in indices],
'class_ids': list(range(len(indices)))
}
return {'boxes': [], 'confidences': [], 'classes': [], 'class_ids': []}
def detect_batch(self, images: List[np.ndarray]) -> List[Dict]:
"""Detect on multiple images"""
return [self.detect(img) for img in images]