Poker_Detection_hf / core /runonnx /common_detection.py
yolo12138's picture
code
efafe9b
# import onnxruntime
import numpy as np
import cv2
from typing import Tuple, List, Union
from .base_onnx import BaseONNX
class COMMON_DETECTION_ONNX(BaseONNX):
def __init__(self,
model_path,
labels: List[str],
# 输入图片大小
input_size=(640, 640), # (w, h)
iou_threshold: float = 0.5,
score_threshold: float = 0.2,
):
super().__init__(model_path, input_size)
self.labels = labels
self.label_colors = []
for i in range(len(labels)):
self.label_colors.append((np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
self.iou_threshold = iou_threshold
self.score_threshold = score_threshold
def preprocess_image(self, image: cv2.UMat, to_rgb: bool = True) -> Tuple[np.ndarray, float, Tuple[int, int]]:
if to_rgb:
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
target_size = self.input_size
ori_shape = image.shape[:2]
# 1. Resize with keep_ratio=True
h, w = image.shape[:2]
scale = min(target_size[0] / h, target_size[1] / w)
new_h, new_w = int(h * scale), int(w * scale)
resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
# 2. Pad to 640x640
pad_h = target_size[0] - new_h
pad_w = target_size[1] - new_w
top, bottom = 0, pad_h
left, right = 0, pad_w
padded = cv2.copyMakeBorder(
resized, top, bottom, left, right,
cv2.BORDER_CONSTANT, value=(114, 114, 114)
)
# img = img.astype(np.float32)
# 3. Normalize (BGR format, matching mmdet pipeline)
mean = np.array([103.53, 116.28, 123.675], dtype=np.float32)
std = np.array([57.375, 57.12, 58.395], dtype=np.float32)
normalized = (padded.astype(np.float32) - mean) / std
# 4. Convert to (C, H, W) and add batch dimension
input_tensor = normalized.transpose(2, 0, 1)[np.newaxis, ...]
return input_tensor, scale, ori_shape
def post_bbox(self, boxes, origin_shape, scale):
"""
将onnx的输出结果转换为mmdet的输出结果, 与 preprocess_image 中 的预处理相反
boxes: (N, 4) x1, y1, x2, y2
origin_shape: (H, W)
scale: 缩放因子,从 preprocess_image 获取
return: (N, 4) x1, y1, x2, y2
"""
if boxes is None or len(boxes) == 0:
return boxes
boxes = boxes.copy()
# 如果没有提供scale,假设是640x640输入,根据origin_shape计算scale
if scale is None:
target_size = 640
h, w = origin_shape
scale = min(target_size / h, target_size / w)
# 将坐标从缩放后的图像空间转换回原始图像空间
boxes /= scale
# 裁剪到原始图像边界内
h, w = origin_shape
boxes[:, 0] = np.clip(boxes[:, 0], 0, w) # x1
boxes[:, 1] = np.clip(boxes[:, 1], 0, h) # y1
boxes[:, 2] = np.clip(boxes[:, 2], 0, w) # x2
boxes[:, 3] = np.clip(boxes[:, 3], 0, h) # y2
return boxes
def filter_results(self, boxes: np.ndarray, scores: np.ndarray, labels: np.ndarray, iou_threshold: float, score_threshold: float) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Filter the boxes based on the iou_threshold and score_threshold.
"""
mask_score = scores >= score_threshold
# 1. 过滤掉 score 小于 score_threshold 的 boxes
target_boxes = boxes[mask_score]
target_scores = scores[mask_score]
target_labels = labels[mask_score]
# 2. 过滤掉 iou 小于 iou_threshold 的 boxes
mask_iou = self.nms(target_boxes, target_scores, iou_threshold)
target_boxes = target_boxes[mask_iou]
target_scores = target_scores[mask_iou]
target_labels = target_labels[mask_iou]
return target_boxes, target_scores, target_labels
def nms(self, boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> np.ndarray:
"""
Non-maximum suppression.
当 iou 大于 iou_threshold 时,保留 score 最大的 box
"""
if len(boxes) == 0:
return np.array([], dtype=np.int32)
# 获取坐标
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
# 计算面积
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
# 按分数排序,从高到低
order = np.argsort(scores)[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
# 计算当前框与其他框的交集
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
# 计算交集面积
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
# 计算IoU
iou = inter / (areas[i] + areas[order[1:]] - inter)
# 保留IoU小于阈值的框
inds = np.where(iou <= iou_threshold)[0]
order = order[inds + 1]
return np.array(keep, dtype=np.int32)
def run_inference(self, image: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Run inference on the image.
Args:
image (np.ndarray): The image to run inference on.
Returns:
boxes: (N, 4) x1, y1, x2, y2
scores: (N,)
labels: (N,)
"""
# 运行推理
ort_outs = self.session.run(None, {self.input_name: image})
boxes_scores, labels = ort_outs[0], ort_outs[1] # RTMDet outputs cls_scores and bbox_preds
boxes = boxes_scores[0, :, :4]
scores = boxes_scores[0, :, 4]
labels = labels[0]
return boxes, scores, labels
def pred(self, image: Union[cv2.UMat, str], to_rgb: bool = False) -> Tuple[np.ndarray, np.ndarray, List[str]]:
"""
Predict the detection results of the image.
Args:
image (cv2.UMat, str): The image to predict.
Returns:
"""
if isinstance(image, str):
img = cv2.imread(image)
else:
img = image.copy()
image, scale, ori_shape = self.preprocess_image(img, to_rgb)
boxes, scores, labels = self.run_inference(image)
# 过滤结果
filtered_boxes, filtered_scores, filtered_labels = self.filter_results(boxes, scores, labels, self.iou_threshold, self.score_threshold)
# to origin bbox
origin_boxes = self.post_bbox(filtered_boxes, ori_shape, scale)
# label_names
label_names = [self.labels[label] for label in filtered_labels]
return origin_boxes, filtered_scores, label_names
def draw_pred(self, image: cv2.UMat, boxes: np.ndarray, scores: np.ndarray, labels: List[str]) -> cv2.UMat:
# 不同label 对应不同颜色,一共
colors = self.label_colors
# 在图像上绘制预测 bboxes 和 labels
# boxes = boxes.tolist()
# scores = scores.tolist()
for box, score, label in zip(boxes, scores, labels):
x1, y1, x2, y2 = box
x1 = int(x1)
y1 = int(y1)
x2 = int(x2)
y2 = int(y2)
label_index = self.labels.index(label)
cv2.rectangle(image, (x1, y1), (x2, y2), colors[label_index], 2)
cv2.putText(image, f"{label} {score:.2f}", (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[label_index], 2)
return image