File size: 4,672 Bytes
d038733
d138ef9
 
 
2953294
d138ef9
 
 
d038733
 
0eacbf2
d038733
 
 
 
 
 
 
 
59259b5
d038733
 
 
d138ef9
 
 
59259b5
d138ef9
 
 
d038733
d138ef9
 
 
d038733
 
 
d138ef9
 
 
 
 
 
 
 
 
 
 
8d8b58e
 
 
 
 
c4194df
 
 
139a78d
 
c4194df
 
 
 
 
 
d138ef9
 
 
 
 
59259b5
 
d138ef9
 
 
 
 
 
 
 
 
139a78d
d138ef9
8d8b58e
 
d138ef9
 
 
 
 
 
139a78d
d138ef9
 
 
139a78d
d138ef9
d038733
 
 
 
 
 
d138ef9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139a78d
d138ef9
139a78d
d138ef9
 
d038733
d138ef9
 
 
 
 
 
 
d038733
d138ef9
 
139a78d
d138ef9
 
59259b5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from typing import Tuple, Dict
import gradio as gr
import supervision as sv
import numpy as np
import cv2
from huggingface_hub import hf_hub_download
from ultralytics import YOLO

# Define models
MODEL_OPTIONS = {
    "YOLOv11-Small": "medieval-yolo11s-seg.pt"
}

# Dictionary to store loaded models
models: Dict[str, YOLO] = {}

# Load all models
for name, model_file in MODEL_OPTIONS.items():
    model_path = hf_hub_download(
        repo_id="johnlockejrr/medieval-manuscript-yolov11-seg",
        filename=model_file
    )
    models[name] = YOLO(model_path)

# Create annotators
LABEL_ANNOTATOR = sv.LabelAnnotator(text_color=sv.Color.BLACK)
MASK_ANNOTATOR = sv.MaskAnnotator()

def detect_and_annotate(
    image: np.ndarray,
    model_name: str,
    conf_threshold: float,
    iou_threshold: float
) -> np.ndarray:
    # Get the selected model
    model = models[model_name]
    
    # Perform inference
    results = model.predict(
        image,
        conf=conf_threshold,
        iou=iou_threshold
    )[0]
    
    # Convert results to supervision Detections
    boxes = results.boxes.xyxy.cpu().numpy()
    confidence = results.boxes.conf.cpu().numpy()
    class_ids = results.boxes.cls.cpu().numpy().astype(int)
    
    # Handle masks if they exist
    masks = None
    if results.masks is not None:
        masks = results.masks.data.cpu().numpy()
        # Reshape masks to (num_masks, H, W)
        masks = np.transpose(masks, (1, 2, 0))  # From (H, W, num_masks) to (num_masks, H, W)
        
        # Resize masks to match original image dimensions
        h, w = image.shape[:2]
        resized_masks = []
        for mask in masks:
            resized_mask = cv2.resize(mask.astype(float), (w, h), interpolation=cv2.INTER_LINEAR)
            resized_masks.append(resized_mask)
        masks = np.array(resized_masks)
        masks = masks.astype(bool)
    
    # Create Detections object
    detections = sv.Detections(
        xyxy=boxes,
        confidence=confidence,
        class_id=class_ids,
        mask=masks
    )
    
    # Create labels with confidence scores
    labels = [
        f"{results.names[class_id]} ({conf:.2f})"
        for class_id, conf
        in zip(class_ids, confidence)
    ]

    # Annotate image
    annotated_image = image.copy()
    if masks is not None:
        annotated_image = MASK_ANNOTATOR.annotate(scene=annotated_image, detections=detections)
    annotated_image = LABEL_ANNOTATOR.annotate(scene=annotated_image, detections=detections, labels=labels)
    
    return annotated_image

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Medieval Manuscript Segmentation with YOLO")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(label="Input Image", type='numpy')
            with gr.Accordion("Detection Settings", open=True):
                model_selector = gr.Dropdown(
                    choices=list(MODEL_OPTIONS.keys()),
                    value=list(MODEL_OPTIONS.keys())[0],
                    label="Model",
                    info="Select YOLO model variant"
                )
                with gr.Row():
                    conf_threshold = gr.Slider(
                        label="Confidence Threshold",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.05,
                        value=0.25,
                    )
                    iou_threshold = gr.Slider(
                        label="IoU Threshold",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.05,
                        value=0.45,
                        info="Decrease for stricter detection, increase for more overlapping boxes"
                    )
            with gr.Row():
                clear_btn = gr.Button("Clear")
                detect_btn = gr.Button("Detect", variant="primary")
                
        with gr.Column():
            output_image = gr.Image(label="Segmentation Result", type='numpy')

    def process_image(image, model_name, conf_threshold, iou_threshold):
        if image is None:
            return None, None
        annotated_image = detect_and_annotate(image, model_name, conf_threshold, iou_threshold)
        return image, annotated_image

    def clear():
        return None, None

    detect_btn.click(
        process_image,
        inputs=[input_image, model_selector, conf_threshold, iou_threshold],
        outputs=[input_image, output_image]
    )
    clear_btn.click(clear, inputs=None, outputs=[input_image, output_image])

if __name__ == "__main__":
    demo.launch(debug=True, show_error=True)