File size: 8,156 Bytes
efafe9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# import onnxruntime
import numpy as np
import cv2

from typing import Tuple, List, Union
from .base_onnx import BaseONNX

class COMMON_DETECTION_ONNX(BaseONNX):

    def __init__(self, 
                 model_path,
                 labels: List[str],
                 # 输入图片大小
                 input_size=(640, 640), # (w, h)
                 iou_threshold: float = 0.5,
                 score_threshold: float = 0.2,
                 ):
        super().__init__(model_path, input_size)

        self.labels = labels
        self.label_colors = []
        for i in range(len(labels)):
            self.label_colors.append((np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
        
        self.iou_threshold = iou_threshold
        self.score_threshold = score_threshold

    def preprocess_image(self, image: cv2.UMat, to_rgb: bool = True) -> Tuple[np.ndarray, float, Tuple[int, int]]:

        if to_rgb:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
        target_size = self.input_size
        ori_shape = image.shape[:2]
            
        # 1. Resize with keep_ratio=True
        h, w = image.shape[:2]
        scale = min(target_size[0] / h, target_size[1] / w)
        new_h, new_w = int(h * scale), int(w * scale)
        resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
        
        # 2. Pad to 640x640
        pad_h = target_size[0] - new_h
        pad_w = target_size[1] - new_w
        top, bottom = 0, pad_h
        left, right = 0, pad_w
        
        padded = cv2.copyMakeBorder(
            resized, top, bottom, left, right, 
            cv2.BORDER_CONSTANT, value=(114, 114, 114)
        )

        # img = img.astype(np.float32)
        
        # 3. Normalize (BGR format, matching mmdet pipeline)
        mean = np.array([103.53, 116.28, 123.675], dtype=np.float32)
        std = np.array([57.375, 57.12, 58.395], dtype=np.float32)
        
        normalized = (padded.astype(np.float32) - mean) / std
        
        # 4. Convert to (C, H, W) and add batch dimension
        input_tensor = normalized.transpose(2, 0, 1)[np.newaxis, ...]

        return input_tensor, scale, ori_shape
    
    def post_bbox(self, boxes, origin_shape, scale):
        """
        将onnx的输出结果转换为mmdet的输出结果, 与 preprocess_image 中 的预处理相反
        boxes: (N, 4) x1, y1, x2, y2
        origin_shape: (H, W)
        scale: 缩放因子,从 preprocess_image 获取
        return: (N, 4) x1, y1, x2, y2
        """
        if boxes is None or len(boxes) == 0:
            return boxes
        
        boxes = boxes.copy()
        
        # 如果没有提供scale,假设是640x640输入,根据origin_shape计算scale
        if scale is None:
            target_size = 640
            h, w = origin_shape
            scale = min(target_size / h, target_size / w)
        
        # 将坐标从缩放后的图像空间转换回原始图像空间
        boxes /= scale
        
        # 裁剪到原始图像边界内
        h, w = origin_shape
        boxes[:, 0] = np.clip(boxes[:, 0], 0, w)  # x1
        boxes[:, 1] = np.clip(boxes[:, 1], 0, h)  # y1
        boxes[:, 2] = np.clip(boxes[:, 2], 0, w)  # x2
        boxes[:, 3] = np.clip(boxes[:, 3], 0, h)  # y2
        
        return boxes


    def filter_results(self, boxes: np.ndarray, scores: np.ndarray, labels: np.ndarray, iou_threshold: float, score_threshold: float) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
        """
        Filter the boxes based on the iou_threshold and score_threshold.
        """
        mask_score = scores >= score_threshold
        
        
        # 1. 过滤掉 score 小于 score_threshold 的 boxes
        target_boxes = boxes[mask_score]
        target_scores = scores[mask_score]
        target_labels = labels[mask_score]
        
        # 2. 过滤掉 iou 小于 iou_threshold 的 boxes
        mask_iou = self.nms(target_boxes, target_scores, iou_threshold)
        
        target_boxes = target_boxes[mask_iou]
        target_scores = target_scores[mask_iou]
        target_labels = target_labels[mask_iou]
        
        return target_boxes, target_scores, target_labels

    def nms(self, boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> np.ndarray:
        """
        Non-maximum suppression.
        当 iou 大于 iou_threshold 时,保留 score 最大的 box
        
        """
        if len(boxes) == 0:
            return np.array([], dtype=np.int32)

        # 获取坐标
        x1 = boxes[:, 0]
        y1 = boxes[:, 1]
        x2 = boxes[:, 2]
        y2 = boxes[:, 3]

        # 计算面积
        areas = (x2 - x1 + 1) * (y2 - y1 + 1)

        # 按分数排序,从高到低
        order = np.argsort(scores)[::-1]

        keep = []
        while order.size > 0:
            i = order[0]
            keep.append(i)

            # 计算当前框与其他框的交集
            xx1 = np.maximum(x1[i], x1[order[1:]])
            yy1 = np.maximum(y1[i], y1[order[1:]])
            xx2 = np.minimum(x2[i], x2[order[1:]])
            yy2 = np.minimum(y2[i], y2[order[1:]])

            # 计算交集面积
            w = np.maximum(0.0, xx2 - xx1 + 1)
            h = np.maximum(0.0, yy2 - yy1 + 1)
            inter = w * h

            # 计算IoU
            iou = inter / (areas[i] + areas[order[1:]] - inter)

            # 保留IoU小于阈值的框
            inds = np.where(iou <= iou_threshold)[0]
            order = order[inds + 1]

        return np.array(keep, dtype=np.int32)

    def run_inference(self, image: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
        """
        Run inference on the image.

        Args:
            image (np.ndarray): The image to run inference on.

        Returns:
            boxes: (N, 4) x1, y1, x2, y2
            scores: (N,)
            labels: (N,)
        """
        # 运行推理
        ort_outs = self.session.run(None, {self.input_name: image})
        
        boxes_scores, labels = ort_outs[0], ort_outs[1]  # RTMDet outputs cls_scores and bbox_preds
        boxes = boxes_scores[0, :, :4]
        scores = boxes_scores[0, :, 4]
        labels = labels[0]
        
        return boxes, scores, labels

    def pred(self, image: Union[cv2.UMat, str], to_rgb: bool = False) -> Tuple[np.ndarray, np.ndarray, List[str]]:
        """
        Predict the detection results of the image.

        Args:
            image (cv2.UMat, str): The image to predict.

        Returns:
          
        """
        if isinstance(image, str):
            img = cv2.imread(image)
        else:
            img = image.copy()

        image, scale, ori_shape = self.preprocess_image(img, to_rgb)

        boxes, scores, labels = self.run_inference(image)
        
        
            # 过滤结果
        filtered_boxes, filtered_scores, filtered_labels = self.filter_results(boxes, scores, labels, self.iou_threshold, self.score_threshold)
        
        
        
        # to origin bbox
        origin_boxes = self.post_bbox(filtered_boxes, ori_shape, scale)
        
        # label_names
        label_names = [self.labels[label] for label in filtered_labels]
        

        return origin_boxes, filtered_scores, label_names
    
    def draw_pred(self, image: cv2.UMat, boxes: np.ndarray, scores: np.ndarray, labels: List[str]) -> cv2.UMat:
        
        # 不同label 对应不同颜色,一共
        colors = self.label_colors

        # 在图像上绘制预测 bboxes 和 labels
        # boxes = boxes.tolist()
        # scores = scores.tolist()
        
        for box, score, label in zip(boxes, scores, labels):
            x1, y1, x2, y2 = box
            
            x1 = int(x1)
            y1 = int(y1)
            x2 = int(x2)
            y2 = int(y2)
            label_index = self.labels.index(label)
            
            cv2.rectangle(image, (x1, y1), (x2, y2), colors[label_index], 2)
            cv2.putText(image, f"{label} {score:.2f}", (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[label_index], 2)

        return image