Spaces:

yolo12138
/

Poker_Detection_hf

Sleeping

App Files Files Community

Poker_Detection_hf / core /runonnx /common_detection.py

yolo12138

code

efafe9b 2 months ago

raw

history blame contribute delete

8.16 kB

	# import onnxruntime
	import numpy as np
	import cv2

	from typing import Tuple, List, Union
	from .base_onnx import BaseONNX

	class COMMON_DETECTION_ONNX(BaseONNX):

	def __init__(self,
	model_path,
	labels: List[str],
	# 输入图片大小
	input_size=(640, 640), # (w, h)
	iou_threshold: float = 0.5,
	score_threshold: float = 0.2,
	):
	super().__init__(model_path, input_size)

	self.labels = labels
	self.label_colors = []
	for i in range(len(labels)):
	self.label_colors.append((np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))

	self.iou_threshold = iou_threshold
	self.score_threshold = score_threshold

	def preprocess_image(self, image: cv2.UMat, to_rgb: bool = True) -> Tuple[np.ndarray, float, Tuple[int, int]]:

	if to_rgb:
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

	target_size = self.input_size
	ori_shape = image.shape[:2]

	# 1. Resize with keep_ratio=True
	h, w = image.shape[:2]
	scale = min(target_size[0] / h, target_size[1] / w)
	new_h, new_w = int(h * scale), int(w * scale)
	resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)

	# 2. Pad to 640x640
	pad_h = target_size[0] - new_h
	pad_w = target_size[1] - new_w
	top, bottom = 0, pad_h
	left, right = 0, pad_w

	padded = cv2.copyMakeBorder(
	resized, top, bottom, left, right,
	cv2.BORDER_CONSTANT, value=(114, 114, 114)
	)

	# img = img.astype(np.float32)

	# 3. Normalize (BGR format, matching mmdet pipeline)
	mean = np.array([103.53, 116.28, 123.675], dtype=np.float32)
	std = np.array([57.375, 57.12, 58.395], dtype=np.float32)

	normalized = (padded.astype(np.float32) - mean) / std

	# 4. Convert to (C, H, W) and add batch dimension
	input_tensor = normalized.transpose(2, 0, 1)[np.newaxis, ...]

	return input_tensor, scale, ori_shape

	def post_bbox(self, boxes, origin_shape, scale):
	"""
	将onnx的输出结果转换为mmdet的输出结果, 与 preprocess_image 中的预处理相反
	boxes: (N, 4) x1, y1, x2, y2
	origin_shape: (H, W)
	scale: 缩放因子，从 preprocess_image 获取
	return: (N, 4) x1, y1, x2, y2
	"""
	if boxes is None or len(boxes) == 0:
	return boxes

	boxes = boxes.copy()

	# 如果没有提供scale，假设是640x640输入，根据origin_shape计算scale
	if scale is None:
	target_size = 640
	h, w = origin_shape
	scale = min(target_size / h, target_size / w)

	# 将坐标从缩放后的图像空间转换回原始图像空间
	boxes /= scale

	# 裁剪到原始图像边界内
	h, w = origin_shape
	boxes[:, 0] = np.clip(boxes[:, 0], 0, w) # x1
	boxes[:, 1] = np.clip(boxes[:, 1], 0, h) # y1
	boxes[:, 2] = np.clip(boxes[:, 2], 0, w) # x2
	boxes[:, 3] = np.clip(boxes[:, 3], 0, h) # y2

	return boxes


	def filter_results(self, boxes: np.ndarray, scores: np.ndarray, labels: np.ndarray, iou_threshold: float, score_threshold: float) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
	"""
	Filter the boxes based on the iou_threshold and score_threshold.
	"""
	mask_score = scores >= score_threshold


	# 1. 过滤掉 score 小于 score_threshold 的 boxes
	target_boxes = boxes[mask_score]
	target_scores = scores[mask_score]
	target_labels = labels[mask_score]

	# 2. 过滤掉 iou 小于 iou_threshold 的 boxes
	mask_iou = self.nms(target_boxes, target_scores, iou_threshold)

	target_boxes = target_boxes[mask_iou]
	target_scores = target_scores[mask_iou]
	target_labels = target_labels[mask_iou]

	return target_boxes, target_scores, target_labels

	def nms(self, boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> np.ndarray:
	"""
	Non-maximum suppression.
	当 iou 大于 iou_threshold 时，保留 score 最大的 box

	"""
	if len(boxes) == 0:
	return np.array([], dtype=np.int32)

	# 获取坐标
	x1 = boxes[:, 0]
	y1 = boxes[:, 1]
	x2 = boxes[:, 2]
	y2 = boxes[:, 3]

	# 计算面积
	areas = (x2 - x1 + 1) * (y2 - y1 + 1)

	# 按分数排序，从高到低
	order = np.argsort(scores)[::-1]

	keep = []
	while order.size > 0:
	i = order[0]
	keep.append(i)

	# 计算当前框与其他框的交集
	xx1 = np.maximum(x1[i], x1[order[1:]])
	yy1 = np.maximum(y1[i], y1[order[1:]])
	xx2 = np.minimum(x2[i], x2[order[1:]])
	yy2 = np.minimum(y2[i], y2[order[1:]])

	# 计算交集面积
	w = np.maximum(0.0, xx2 - xx1 + 1)
	h = np.maximum(0.0, yy2 - yy1 + 1)
	inter = w * h

	# 计算IoU
	iou = inter / (areas[i] + areas[order[1:]] - inter)

	# 保留IoU小于阈值的框
	inds = np.where(iou <= iou_threshold)[0]
	order = order[inds + 1]

	return np.array(keep, dtype=np.int32)

	def run_inference(self, image: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
	"""
	Run inference on the image.

	Args:
	image (np.ndarray): The image to run inference on.

	Returns:
	boxes: (N, 4) x1, y1, x2, y2
	scores: (N,)
	labels: (N,)
	"""
	# 运行推理
	ort_outs = self.session.run(None, {self.input_name: image})

	boxes_scores, labels = ort_outs[0], ort_outs[1] # RTMDet outputs cls_scores and bbox_preds
	boxes = boxes_scores[0, :, :4]
	scores = boxes_scores[0, :, 4]
	labels = labels[0]

	return boxes, scores, labels

	def pred(self, image: Union[cv2.UMat, str], to_rgb: bool = False) -> Tuple[np.ndarray, np.ndarray, List[str]]:
	"""
	Predict the detection results of the image.

	Args:
	image (cv2.UMat, str): The image to predict.

	Returns:

	"""
	if isinstance(image, str):
	img = cv2.imread(image)
	else:
	img = image.copy()

	image, scale, ori_shape = self.preprocess_image(img, to_rgb)

	boxes, scores, labels = self.run_inference(image)


	# 过滤结果
	filtered_boxes, filtered_scores, filtered_labels = self.filter_results(boxes, scores, labels, self.iou_threshold, self.score_threshold)



	# to origin bbox
	origin_boxes = self.post_bbox(filtered_boxes, ori_shape, scale)

	# label_names
	label_names = [self.labels[label] for label in filtered_labels]


	return origin_boxes, filtered_scores, label_names

	def draw_pred(self, image: cv2.UMat, boxes: np.ndarray, scores: np.ndarray, labels: List[str]) -> cv2.UMat:

	# 不同label 对应不同颜色，一共
	colors = self.label_colors

	# 在图像上绘制预测 bboxes 和 labels
	# boxes = boxes.tolist()
	# scores = scores.tolist()

	for box, score, label in zip(boxes, scores, labels):
	x1, y1, x2, y2 = box

	x1 = int(x1)
	y1 = int(y1)
	x2 = int(x2)
	y2 = int(y2)
	label_index = self.labels.index(label)

	cv2.rectangle(image, (x1, y1), (x2, y2), colors[label_index], 2)
	cv2.putText(image, f"{label} {score:.2f}", (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[label_index], 2)

	return image