Spaces:

yolo12138
/

Poker_Detection_hf

Sleeping

App Files Files Community

yolo12138 commited on Jul 19

Commit

efafe9b

1 Parent(s): d26f281

code

Browse files

Files changed (6) hide show

.gitignore +3 -0
app.py +85 -0
core/poker_detector.py +42 -0
core/runonnx/base_onnx.py +88 -0
core/runonnx/common_detection.py +244 -0
requirements.txt +2 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+__pycache__/
+coverage/
+.DS_Store

app.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import gradio as gr
+# import cv2
+import os
+import base64
+from pathlib import Path
+from core.poker_detector import PokerDetector
+detector = PokerDetector(
+    model_path="onnx/poker_detection_v4_rank.onnx"
+)
+### 构建 examples
+def build_examples():
+    examples = []
+    # 读取 examples 目录下的所有图片
+    for file in os.listdir("examples"):
+        if file.endswith(".jpg") or file.endswith(".png"):
+            image_path = os.path.join("examples", file)
+            examples.append([image_path])
+    return examples
+full_examples = build_examples()
+with gr.Blocks(css="""
+        .image img {
+            max-height: 512px;
+        }
+    """
+) as demo:
+    gr.Markdown("""
+                ## 扑克牌检测
+                """
+    )
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(label="上传扑克牌图片", type="numpy", elem_classes="image")
+        with gr.Column():
+            with gr.Column():
+                result_image = gr.Image(
+                    label="检测结果",
+                    interactive=False,
+                    visible=True,
+                    elem_classes="image"
+                )
+            with gr.Column():
+                use_time = gr.Textbox(
+                    label="用时",
+                    interactive=False,
+                    visible=True,
+                )
+    with gr.Row():
+        with gr.Column():
+            gr.Examples(
+                full_examples[:10], inputs=[image_input], label="示例图片",  examples_per_page=10,)
+    def detect_poker(image):
+        if image is None:
+            return None, ""
+        try:
+            image_rgb_with_pred, time_info = detector.pred_and_draw(image)
+        except Exception as e:
+            gr.Warning(f"检测失败: {e}")
+            return None, "检测失败"
+        return image_rgb_with_pred, time_info
+    image_input.change(fn=detect_poker,
+                       inputs=[image_input],
+                       outputs=[result_image, use_time])
+if __name__ == "__main__":
+    demo.launch()

core/poker_detector.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import time
+import numpy as np
+import cv2
+from typing import List, Tuple, Union
+from .runonnx.common_detection import COMMON_DETECTION_ONNX
+class PokerDetector:
+    def __init__(self,
+                 model_path: str,
+                 ):
+        self.poker_detection = COMMON_DETECTION_ONNX(
+            model_path=model_path,
+            labels=['A', '2', '3', '4', '5', '6', '7', '8', '9', '10', 'J', 'Q', 'K', 'R', 'B'],
+        )
+    # 检测棋盘 detect board
+    def pred_and_draw(self, image_rgb: Union[np.ndarray, None] = None) -> Tuple[Union[np.ndarray, None], str]:
+        if image_rgb is None:
+            return None, ""
+        start_time = time.time()
+        try:
+            image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
+            origin_boxes, filtered_scores, label_names = self.poker_detection.pred(image=image_bgr)
+            # draw
+            image_rgb_with_pred = self.poker_detection.draw_pred(image_rgb, boxes=origin_boxes, scores=filtered_scores, labels=label_names)
+        except Exception as e:
+            print("检测失败2", e)
+            return None, "检测失败2"
+        use_time = time.time() - start_time
+        time_info = f"推理用时: {use_time:.2f}s"
+        return image_rgb_with_pred, time_info

core/runonnx/base_onnx.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import onnxruntime
+import numpy as np
+import cv2
+from abc import ABC, abstractmethod
+from typing import Any, Tuple, Union, List
+class BaseONNX(ABC):
+    def __init__(self, model_path: str, input_size: Tuple[int, int]):
+        """初始化ONNX模型基类
+        Args:
+            model_path (str): ONNX模型路径
+            input_size (tuple): 模型输入尺寸 (width, height)
+        """
+        self.session = onnxruntime.InferenceSession(model_path)
+        self.input_name = self.session.get_inputs()[0].name
+        self.input_size = input_size
+    def load_image(self, image: Union[cv2.UMat, str]) -> cv2.UMat:
+        """加载图像
+        Args:
+            image (Union[cv2.UMat, str]): 图像路径或cv2图像对象
+        Returns:
+            cv2.UMat: 加载的图像
+        """
+        if isinstance(image, str):
+            return cv2.imread(image)
+        return image.copy()
+    @abstractmethod
+    def preprocess_image(self, img_bgr: cv2.UMat, *args, **kwargs) -> np.ndarray:
+        """图像预处理抽象方法
+        Args:
+            img_bgr (cv2.UMat): BGR格式的输入图像
+        Returns:
+            np.ndarray: 预处理后的图像
+        """
+        pass
+    @abstractmethod
+    def run_inference(self, image: np.ndarray) -> Any:
+        """运行推理的抽象方法
+        Args:
+            image (np.ndarray): 预处理后的输入图像
+        Returns:
+            Any: 模型输出结果
+        """
+        pass
+    @abstractmethod
+    def pred(self, image: Union[cv2.UMat, str], *args, **kwargs) -> Any:
+        """预测的抽象方法
+        Args:
+            image (Union[cv2.UMat, str]): 输入图像或图像路径
+        Returns:
+            Any: 预测结果
+        """
+        pass
+    @abstractmethod
+    def draw_pred(self, img: cv2.UMat, *args, **kwargs) -> cv2.UMat:
+        """绘制预测结果的抽象方法
+        Args:
+            img (cv2.UMat): 要绘制的图像
+        Returns:
+            cv2.UMat: 绘制结果后的图像
+        """
+        pass
+    def check_images_list(self, images: List[Union[cv2.UMat, str, np.ndarray]]):
+        """
+        检查图像列表是否有效
+        """
+        for image in images:
+            if not isinstance(image, cv2.UMat) and not isinstance(image, str) and not isinstance(image, np.ndarray):
+                raise ValueError("The images must be a list of cv2.UMat or str or np.ndarray.")

core/runonnx/common_detection.py ADDED Viewed

	@@ -0,0 +1,244 @@

+# import onnxruntime
+import numpy as np
+import cv2
+from typing import Tuple, List, Union
+from .base_onnx import BaseONNX
+class COMMON_DETECTION_ONNX(BaseONNX):
+    def __init__(self,
+                 model_path,
+                 labels: List[str],
+                 # 输入图片大小
+                 input_size=(640, 640), # (w, h)
+                 iou_threshold: float = 0.5,
+                 score_threshold: float = 0.2,
+                 ):
+        super().__init__(model_path, input_size)
+        self.labels = labels
+        self.label_colors = []
+        for i in range(len(labels)):
+            self.label_colors.append((np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
+        self.iou_threshold = iou_threshold
+        self.score_threshold = score_threshold
+    def preprocess_image(self, image: cv2.UMat, to_rgb: bool = True) -> Tuple[np.ndarray, float, Tuple[int, int]]:
+        if to_rgb:
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        target_size = self.input_size
+        ori_shape = image.shape[:2]
+        # 1. Resize with keep_ratio=True
+        h, w = image.shape[:2]
+        scale = min(target_size[0] / h, target_size[1] / w)
+        new_h, new_w = int(h * scale), int(w * scale)
+        resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
+        # 2. Pad to 640x640
+        pad_h = target_size[0] - new_h
+        pad_w = target_size[1] - new_w
+        top, bottom = 0, pad_h
+        left, right = 0, pad_w
+        padded = cv2.copyMakeBorder(
+            resized, top, bottom, left, right,
+            cv2.BORDER_CONSTANT, value=(114, 114, 114)
+        )
+        # img = img.astype(np.float32)
+        # 3. Normalize (BGR format, matching mmdet pipeline)
+        mean = np.array([103.53, 116.28, 123.675], dtype=np.float32)
+        std = np.array([57.375, 57.12, 58.395], dtype=np.float32)
+        normalized = (padded.astype(np.float32) - mean) / std
+        # 4. Convert to (C, H, W) and add batch dimension
+        input_tensor = normalized.transpose(2, 0, 1)[np.newaxis, ...]
+        return input_tensor, scale, ori_shape
+    def post_bbox(self, boxes, origin_shape, scale):
+        """
+        将onnx的输出结果转换为mmdet的输出结果, 与 preprocess_image 中 的预处理相反
+        boxes: (N, 4) x1, y1, x2, y2
+        origin_shape: (H, W)
+        scale: 缩放因子，从 preprocess_image 获取
+        return: (N, 4) x1, y1, x2, y2
+        """
+        if boxes is None or len(boxes) == 0:
+            return boxes
+        boxes = boxes.copy()
+        # 如果没有提供scale，假设是640x640输入，根据origin_shape计算scale
+        if scale is None:
+            target_size = 640
+            h, w = origin_shape
+            scale = min(target_size / h, target_size / w)
+        # 将坐标从缩放后的图像空间转换回原始图像空间
+        boxes /= scale
+        # 裁剪到原始图像边界内
+        h, w = origin_shape
+        boxes[:, 0] = np.clip(boxes[:, 0], 0, w)  # x1
+        boxes[:, 1] = np.clip(boxes[:, 1], 0, h)  # y1
+        boxes[:, 2] = np.clip(boxes[:, 2], 0, w)  # x2
+        boxes[:, 3] = np.clip(boxes[:, 3], 0, h)  # y2
+        return boxes
+    def filter_results(self, boxes: np.ndarray, scores: np.ndarray, labels: np.ndarray, iou_threshold: float, score_threshold: float) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Filter the boxes based on the iou_threshold and score_threshold.
+        """
+        mask_score = scores >= score_threshold
+        # 1. 过滤掉 score 小于 score_threshold 的 boxes
+        target_boxes = boxes[mask_score]
+        target_scores = scores[mask_score]
+        target_labels = labels[mask_score]
+        # 2. 过滤掉 iou 小于 iou_threshold 的 boxes
+        mask_iou = self.nms(target_boxes, target_scores, iou_threshold)
+        target_boxes = target_boxes[mask_iou]
+        target_scores = target_scores[mask_iou]
+        target_labels = target_labels[mask_iou]
+        return target_boxes, target_scores, target_labels
+    def nms(self, boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> np.ndarray:
+        """
+        Non-maximum suppression.
+        当 iou 大于 iou_threshold 时，保留 score 最大的 box
+        """
+        if len(boxes) == 0:
+            return np.array([], dtype=np.int32)
+        # 获取坐标
+        x1 = boxes[:, 0]
+        y1 = boxes[:, 1]
+        x2 = boxes[:, 2]
+        y2 = boxes[:, 3]
+        # 计算面积
+        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+        # 按分数排序，从高到低
+        order = np.argsort(scores)[::-1]
+        keep = []
+        while order.size > 0:
+            i = order[0]
+            keep.append(i)
+            # 计算当前框与其他框的交集
+            xx1 = np.maximum(x1[i], x1[order[1:]])
+            yy1 = np.maximum(y1[i], y1[order[1:]])
+            xx2 = np.minimum(x2[i], x2[order[1:]])
+            yy2 = np.minimum(y2[i], y2[order[1:]])
+            # 计算交集面积
+            w = np.maximum(0.0, xx2 - xx1 + 1)
+            h = np.maximum(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            # 计算IoU
+            iou = inter / (areas[i] + areas[order[1:]] - inter)
+            # 保留IoU小于阈值的框
+            inds = np.where(iou <= iou_threshold)[0]
+            order = order[inds + 1]
+        return np.array(keep, dtype=np.int32)
+    def run_inference(self, image: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Run inference on the image.
+        Args:
+            image (np.ndarray): The image to run inference on.
+        Returns:
+            boxes: (N, 4) x1, y1, x2, y2
+            scores: (N,)
+            labels: (N,)
+        """
+        # 运行推理
+        ort_outs = self.session.run(None, {self.input_name: image})
+        boxes_scores, labels = ort_outs[0], ort_outs[1]  # RTMDet outputs cls_scores and bbox_preds
+        boxes = boxes_scores[0, :, :4]
+        scores = boxes_scores[0, :, 4]
+        labels = labels[0]
+        return boxes, scores, labels
+    def pred(self, image: Union[cv2.UMat, str], to_rgb: bool = False) -> Tuple[np.ndarray, np.ndarray, List[str]]:
+        """
+        Predict the detection results of the image.
+        Args:
+            image (cv2.UMat, str): The image to predict.
+        Returns:
+        """
+        if isinstance(image, str):
+            img = cv2.imread(image)
+        else:
+            img = image.copy()
+        image, scale, ori_shape = self.preprocess_image(img, to_rgb)
+        boxes, scores, labels = self.run_inference(image)
+            # 过滤结果
+        filtered_boxes, filtered_scores, filtered_labels = self.filter_results(boxes, scores, labels, self.iou_threshold, self.score_threshold)
+        # to origin bbox
+        origin_boxes = self.post_bbox(filtered_boxes, ori_shape, scale)
+        # label_names
+        label_names = [self.labels[label] for label in filtered_labels]
+        return origin_boxes, filtered_scores, label_names
+    def draw_pred(self, image: cv2.UMat, boxes: np.ndarray, scores: np.ndarray, labels: List[str]) -> cv2.UMat:
+        # 不同label 对应不同颜色，一共
+        colors = self.label_colors
+        # 在图像上绘制预测 bboxes 和 labels
+        # boxes = boxes.tolist()
+        # scores = scores.tolist()
+        for box, score, label in zip(boxes, scores, labels):
+            x1, y1, x2, y2 = box
+            x1 = int(x1)
+            y1 = int(y1)
+            x2 = int(x2)
+            y2 = int(y2)
+            label_index = self.labels.index(label)
+            cv2.rectangle(image, (x1, y1), (x2, y2), colors[label_index], 2)
+            cv2.putText(image, f"{label} {score:.2f}", (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[label_index], 2)
+        return image

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ opencv-python
2	+ onnxruntime