medieval-yolo11-seg

Sleeping

App Files Files Community

medieval-yolo11-seg / app.py

johnlockejrr

Update app.py

c4194df verified 3 months ago

raw

history blame

4.67 kB

	from typing import Tuple, Dict
	import gradio as gr
	import supervision as sv
	import numpy as np
	import cv2
	from huggingface_hub import hf_hub_download
	from ultralytics import YOLO

	# Define models
	MODEL_OPTIONS = {
	"YOLOv11-Small": "medieval-yolo11s-seg.pt"
	}

	# Dictionary to store loaded models
	models: Dict[str, YOLO] = {}

	# Load all models
	for name, model_file in MODEL_OPTIONS.items():
	model_path = hf_hub_download(
	repo_id="johnlockejrr/medieval-manuscript-yolov11-seg",
	filename=model_file
	)
	models[name] = YOLO(model_path)

	# Create annotators
	LABEL_ANNOTATOR = sv.LabelAnnotator(text_color=sv.Color.BLACK)
	MASK_ANNOTATOR = sv.MaskAnnotator()

	def detect_and_annotate(
	image: np.ndarray,
	model_name: str,
	conf_threshold: float,
	iou_threshold: float
	) -> np.ndarray:
	# Get the selected model
	model = models[model_name]

	# Perform inference
	results = model.predict(
	image,
	conf=conf_threshold,
	iou=iou_threshold
	)[0]

	# Convert results to supervision Detections
	boxes = results.boxes.xyxy.cpu().numpy()
	confidence = results.boxes.conf.cpu().numpy()
	class_ids = results.boxes.cls.cpu().numpy().astype(int)

	# Handle masks if they exist
	masks = None
	if results.masks is not None:
	masks = results.masks.data.cpu().numpy()
	# Reshape masks to (num_masks, H, W)
	masks = np.transpose(masks, (1, 2, 0)) # From (H, W, num_masks) to (num_masks, H, W)

	# Resize masks to match original image dimensions
	h, w = image.shape[:2]
	resized_masks = []
	for mask in masks:
	resized_mask = cv2.resize(mask.astype(float), (w, h), interpolation=cv2.INTER_LINEAR)
	resized_masks.append(resized_mask)
	masks = np.array(resized_masks)
	masks = masks.astype(bool)

	# Create Detections object
	detections = sv.Detections(
	xyxy=boxes,
	confidence=confidence,
	class_id=class_ids,
	mask=masks
	)

	# Create labels with confidence scores
	labels = [
	f"{results.names[class_id]} ({conf:.2f})"
	for class_id, conf
	in zip(class_ids, confidence)
	]

	# Annotate image
	annotated_image = image.copy()
	if masks is not None:
	annotated_image = MASK_ANNOTATOR.annotate(scene=annotated_image, detections=detections)
	annotated_image = LABEL_ANNOTATOR.annotate(scene=annotated_image, detections=detections, labels=labels)

	return annotated_image

	# Create Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# Medieval Manuscript Segmentation with YOLO")

	with gr.Row():
	with gr.Column():
	input_image = gr.Image(label="Input Image", type='numpy')
	with gr.Accordion("Detection Settings", open=True):
	model_selector = gr.Dropdown(
	choices=list(MODEL_OPTIONS.keys()),
	value=list(MODEL_OPTIONS.keys())[0],
	label="Model",
	info="Select YOLO model variant"
	)
	with gr.Row():
	conf_threshold = gr.Slider(
	label="Confidence Threshold",
	minimum=0.0,
	maximum=1.0,
	step=0.05,
	value=0.25,
	)
	iou_threshold = gr.Slider(
	label="IoU Threshold",
	minimum=0.0,
	maximum=1.0,
	step=0.05,
	value=0.45,
	info="Decrease for stricter detection, increase for more overlapping boxes"
	)
	with gr.Row():
	clear_btn = gr.Button("Clear")
	detect_btn = gr.Button("Detect", variant="primary")

	with gr.Column():
	output_image = gr.Image(label="Segmentation Result", type='numpy')

	def process_image(image, model_name, conf_threshold, iou_threshold):
	if image is None:
	return None, None
	annotated_image = detect_and_annotate(image, model_name, conf_threshold, iou_threshold)
	return image, annotated_image

	def clear():
	return None, None

	detect_btn.click(
	process_image,
	inputs=[input_image, model_selector, conf_threshold, iou_threshold],
	outputs=[input_image, output_image]
	)
	clear_btn.click(clear, inputs=None, outputs=[input_image, output_image])

	if __name__ == "__main__":
	demo.launch(debug=True, show_error=True)