Spaces:

minh9972t12
/

yolocar

Sleeping

File size: 7,380 Bytes

c8a046c

import numpy as np
from typing import List, Dict, Tuple
import cv2
from pathlib import Path
import yaml

class YOLOv11Detector:
    """YOLOv11 detector for car damage detection"""

    def __init__(self, config_path: str = "config.yaml"):
        """Initialize YOLOv11 detector with configuration"""
        with open(config_path, 'r') as f:
            self.config = yaml.safe_load(f)

        model_path = self.config['model']['path']

        # Check which model file exists
        if not Path(model_path).exists():
            # Try to find available model files
            model_dir = Path("models")
            if (model_dir / "best.pt").exists():
                model_path = str(model_dir / "best.pt")
                print(f"Using best.pt model from training")
            elif (model_dir / "last.pt").exists():
                model_path = str(model_dir / "last.pt")
                print(f"Using last.pt checkpoint model")
            elif (model_dir / "best.onnx").exists():
                model_path = str(model_dir / "best.onnx")
                print(f"Using best.onnx model")
            else:
                raise FileNotFoundError(f"No model files found in models/ directory!")

        self.model_path = model_path
        self.device = self.config['model']['device']
        self.confidence = self.config['model']['confidence']
        self.iou_threshold = self.config['model']['iou_threshold']
        self.classes = self.config['detection']['classes']

        # Load model based on format
        if model_path.endswith('.onnx'):
            self._load_onnx_model()
        else:  # .pt format
            self._load_pytorch_model()

    def _load_pytorch_model(self):
        """Load PyTorch model using Ultralytics"""
        from ultralytics import YOLO
        self.model = YOLO(self.model_path)

        # Set model to appropriate device
        if self.device == 'cuda:0':
            self.model.to('cuda')

        print(f"Loaded PyTorch model: {self.model_path}")

    def _load_onnx_model(self):
        """Load ONNX model using OpenCV DNN"""
        self.net = cv2.dnn.readNet(self.model_path)

        # Set backend based on device
        if self.device == 'cuda:0':
            self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
            self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
        else:
            self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
            self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

        print(f"Loaded ONNX model: {self.model_path}")

    def detect(self, image: np.ndarray) -> Dict:
        """

        Perform detection on image



        Args:

            image: Input image as numpy array (BGR format)



        Returns:

            Dictionary containing detection results

        """
        if self.model_path.endswith('.onnx'):
            return self._detect_onnx(image)
        else:
            return self._detect_pytorch(image)

    def _detect_pytorch(self, image: np.ndarray) -> Dict:
        """Detection using PyTorch model"""
        # Run YOLO inference
        results = self.model(
            image,
            conf=self.confidence,
            iou=self.iou_threshold,
            device=self.device,
            verbose=False
        )

        # Parse results
        detections = {
            'boxes': [],
            'confidences': [],
            'classes': [],
            'class_ids': []
        }

        if len(results) > 0 and results[0].boxes is not None:
            boxes = results[0].boxes

            for box in boxes:
                # Get box coordinates (xyxy format)
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()

                # Get confidence and class
                conf = float(box.conf[0].cpu().numpy())
                cls_id = int(box.cls[0].cpu().numpy())

                # Map class ID to class name
                if cls_id < len(self.classes):
                    cls_name = self.classes[cls_id]
                else:
                    cls_name = f"class_{cls_id}"

                detections['boxes'].append([int(x1), int(y1), int(x2), int(y2)])
                detections['confidences'].append(conf)
                detections['classes'].append(cls_name)
                detections['class_ids'].append(cls_id)

        return detections

    def _detect_onnx(self, image: np.ndarray) -> Dict:
        """Detection using ONNX model (compatible with original code)"""
        height, width = image.shape[:2]

        # Preprocess image for ONNX
        blob = cv2.dnn.blobFromImage(
            image, 1/255.0, (640, 640),
            swapRB=True, crop=False
        )

        self.net.setInput(blob)
        preds = self.net.forward()
        preds = preds.transpose((0, 2, 1))

        # Extract outputs
        detections = self._extract_onnx_output(
            preds=preds,
            image_shape=(height, width),
            input_shape=(640, 640)
        )

        return detections

    def _extract_onnx_output(self, preds: np.ndarray, image_shape: Tuple[int, int],

                            input_shape: Tuple[int, int]) -> Dict:
        """Extract detection results from ONNX model output"""
        class_ids, confs, boxes = [], [], []

        image_height, image_width = image_shape
        input_height, input_width = input_shape
        x_factor = image_width / input_width
        y_factor = image_height / input_height

        rows = preds[0].shape[0]
        for i in range(rows):
            row = preds[0][i]
            conf = row[4]

            classes_score = row[4:]
            _, _, _, max_idx = cv2.minMaxLoc(classes_score)
            class_id = max_idx[1]

            if classes_score[class_id] > self.confidence:
                confs.append(float(conf))
                label = self.classes[int(class_id)] if int(class_id) < len(self.classes) else f"class_{class_id}"
                class_ids.append(label)

                # Extract boxes
                x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item()
                left = int((x - 0.5 * w) * x_factor)
                top = int((y - 0.5 * h) * y_factor)
                width = int(w * x_factor)
                height = int(h * y_factor)
                box = [left, top, left + width, top + height]
                boxes.append(box)

        # Apply NMS
        if len(boxes) > 0:
            indices = cv2.dnn.NMSBoxes(
                [[b[0], b[1], b[2]-b[0], b[3]-b[1]] for b in boxes],
                confs, self.confidence, self.iou_threshold
            )

            if len(indices) > 0:
                indices = indices.flatten()
                return {
                    'boxes': [boxes[i] for i in indices],
                    'confidences': [confs[i] for i in indices],
                    'classes': [class_ids[i] for i in indices],
                    'class_ids': list(range(len(indices)))
                }

        return {'boxes': [], 'confidences': [], 'classes': [], 'class_ids': []}

    def detect_batch(self, images: List[np.ndarray]) -> List[Dict]:
        """Detect on multiple images"""
        return [self.detect(img) for img in images]