Spaces:
Sleeping
Sleeping
File size: 4,672 Bytes
d038733 d138ef9 2953294 d138ef9 d038733 0eacbf2 d038733 59259b5 d038733 d138ef9 59259b5 d138ef9 d038733 d138ef9 d038733 d138ef9 8d8b58e c4194df 139a78d c4194df d138ef9 59259b5 d138ef9 139a78d d138ef9 8d8b58e d138ef9 139a78d d138ef9 139a78d d138ef9 d038733 d138ef9 139a78d d138ef9 139a78d d138ef9 d038733 d138ef9 d038733 d138ef9 139a78d d138ef9 59259b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
from typing import Tuple, Dict
import gradio as gr
import supervision as sv
import numpy as np
import cv2
from huggingface_hub import hf_hub_download
from ultralytics import YOLO
# Define models
MODEL_OPTIONS = {
"YOLOv11-Small": "medieval-yolo11s-seg.pt"
}
# Dictionary to store loaded models
models: Dict[str, YOLO] = {}
# Load all models
for name, model_file in MODEL_OPTIONS.items():
model_path = hf_hub_download(
repo_id="johnlockejrr/medieval-manuscript-yolov11-seg",
filename=model_file
)
models[name] = YOLO(model_path)
# Create annotators
LABEL_ANNOTATOR = sv.LabelAnnotator(text_color=sv.Color.BLACK)
MASK_ANNOTATOR = sv.MaskAnnotator()
def detect_and_annotate(
image: np.ndarray,
model_name: str,
conf_threshold: float,
iou_threshold: float
) -> np.ndarray:
# Get the selected model
model = models[model_name]
# Perform inference
results = model.predict(
image,
conf=conf_threshold,
iou=iou_threshold
)[0]
# Convert results to supervision Detections
boxes = results.boxes.xyxy.cpu().numpy()
confidence = results.boxes.conf.cpu().numpy()
class_ids = results.boxes.cls.cpu().numpy().astype(int)
# Handle masks if they exist
masks = None
if results.masks is not None:
masks = results.masks.data.cpu().numpy()
# Reshape masks to (num_masks, H, W)
masks = np.transpose(masks, (1, 2, 0)) # From (H, W, num_masks) to (num_masks, H, W)
# Resize masks to match original image dimensions
h, w = image.shape[:2]
resized_masks = []
for mask in masks:
resized_mask = cv2.resize(mask.astype(float), (w, h), interpolation=cv2.INTER_LINEAR)
resized_masks.append(resized_mask)
masks = np.array(resized_masks)
masks = masks.astype(bool)
# Create Detections object
detections = sv.Detections(
xyxy=boxes,
confidence=confidence,
class_id=class_ids,
mask=masks
)
# Create labels with confidence scores
labels = [
f"{results.names[class_id]} ({conf:.2f})"
for class_id, conf
in zip(class_ids, confidence)
]
# Annotate image
annotated_image = image.copy()
if masks is not None:
annotated_image = MASK_ANNOTATOR.annotate(scene=annotated_image, detections=detections)
annotated_image = LABEL_ANNOTATOR.annotate(scene=annotated_image, detections=detections, labels=labels)
return annotated_image
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Medieval Manuscript Segmentation with YOLO")
with gr.Row():
with gr.Column():
input_image = gr.Image(label="Input Image", type='numpy')
with gr.Accordion("Detection Settings", open=True):
model_selector = gr.Dropdown(
choices=list(MODEL_OPTIONS.keys()),
value=list(MODEL_OPTIONS.keys())[0],
label="Model",
info="Select YOLO model variant"
)
with gr.Row():
conf_threshold = gr.Slider(
label="Confidence Threshold",
minimum=0.0,
maximum=1.0,
step=0.05,
value=0.25,
)
iou_threshold = gr.Slider(
label="IoU Threshold",
minimum=0.0,
maximum=1.0,
step=0.05,
value=0.45,
info="Decrease for stricter detection, increase for more overlapping boxes"
)
with gr.Row():
clear_btn = gr.Button("Clear")
detect_btn = gr.Button("Detect", variant="primary")
with gr.Column():
output_image = gr.Image(label="Segmentation Result", type='numpy')
def process_image(image, model_name, conf_threshold, iou_threshold):
if image is None:
return None, None
annotated_image = detect_and_annotate(image, model_name, conf_threshold, iou_threshold)
return image, annotated_image
def clear():
return None, None
detect_btn.click(
process_image,
inputs=[input_image, model_selector, conf_threshold, iou_threshold],
outputs=[input_image, output_image]
)
clear_btn.click(clear, inputs=None, outputs=[input_image, output_image])
if __name__ == "__main__":
demo.launch(debug=True, show_error=True) |