File size: 3,674 Bytes
d138ef9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from typing import Tuple
import gradio as gr
import supervision as sv
import numpy as np
from PIL import Image
from huggingface_hub import hf_hub_download
from ultralytics import YOLO

# Load the YOLO model from Hugging Face
model_path = hf_hub_download(
    repo_id="cultural-heritage/medieval-manuscript-yolov11",
    filename="medieval-yolov11n.pt"
)
# Load the YOLO model from local path
model = YOLO(model_path)

# Create annotators
LABEL_ANNOTATOR = sv.LabelAnnotator(text_color=sv.Color.BLACK)
BOX_ANNOTATOR = sv.BoxAnnotator()

def detect_and_annotate(
    image: np.ndarray,
    conf_threshold: float,
    iou_threshold: float
) -> np.ndarray:
    # Perform inference
    results = model.predict(
        image,
        conf=conf_threshold,
        iou=iou_threshold
    )[0]
    
    # Convert results to supervision Detections
    boxes = results.boxes.xyxy.cpu().numpy()
    confidence = results.boxes.conf.cpu().numpy()
    class_ids = results.boxes.cls.cpu().numpy().astype(int)
    
    # Create Detections object
    detections = sv.Detections(
        xyxy=boxes,
        confidence=confidence,
        class_id=class_ids
    )
    
    # Create labels with confidence scores
    labels = [
        f"{results.names[class_id]} ({conf:.2f})"
        for class_id, conf
        in zip(class_ids, confidence)
    ]

    # Annotate image
    annotated_image = image.copy()
    annotated_image = BOX_ANNOTATOR.annotate(scene=annotated_image, detections=detections)
    annotated_image = LABEL_ANNOTATOR.annotate(scene=annotated_image, detections=detections, labels=labels)
    
    return annotated_image

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Medieval Manuscript Detection with YOLO")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(
                label="Input Image",
                type='numpy'
            )
            with gr.Accordion("Detection Settings", open=True):
                with gr.Row():
                    conf_threshold = gr.Slider(
                        label="Confidence Threshold",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.05,
                        value=0.25,
                    )
                    iou_threshold = gr.Slider(
                        label="IoU Threshold",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.05,
                        value=0.45,
                        info="Decrease for stricter detection, increase for more overlapping boxes"
                    )
            with gr.Row():
                clear_btn = gr.Button("Clear")
                detect_btn = gr.Button("Detect", variant="primary")
                
        with gr.Column():
            output_image = gr.Image(
                label="Detection Result",
                type='numpy'
            )

    def process_image(
        image: np.ndarray,
        conf_threshold: float,
        iou_threshold: float
    ) -> Tuple[np.ndarray, np.ndarray]:
        if image is None:
            return None, None
        annotated_image = detect_and_annotate(image, conf_threshold, iou_threshold)
        return image, annotated_image

    def clear():
        return None, None

    # Connect buttons to functions
    detect_btn.click(
        process_image,
        inputs=[input_image, conf_threshold, iou_threshold],
        outputs=[input_image, output_image]
    )
    clear_btn.click(
        clear,
        inputs=None,
        outputs=[input_image, output_image]
    )

if __name__ == "__main__":
    demo.launch(debug=True, show_error=True)