Spaces:

atalaydenknalbant
/

Yolov13

Running on Zero

App Files Files Community

Yolov13 / app.py

atalaydenknalbant

Update app.py

f830549 verified 26 days ago

raw

history blame

7.72 kB

	import spaces
	import gradio as gr
	from PIL import Image, ImageDraw, ImageFont
	from ultralytics import YOLO
	from huggingface_hub import hf_hub_download
	import cv2
	import tempfile
	import numpy as np

	def download_model(model_filename):
	return hf_hub_download(repo_id="atalaydenknalbant/Yolov13", filename=model_filename)

	@spaces.GPU
	def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection):
	model_path = download_model(model_id)

	if input_type == "Image":
	if image is None:
	width, height = 640, 480
	blank_image = Image.new("RGB", (width, height), color="white")
	draw = ImageDraw.Draw(blank_image)
	message = "No image provided"
	font = ImageFont.load_default(size=40)
	bbox = draw.textbbox((0, 0), message, font=font)
	text_width = bbox[2] - bbox[0]
	text_height = bbox[3] - bbox[1]
	text_x = (width - text_width) / 2
	text_y = (height - text_height) / 2
	draw.text((text_x, text_y), message, fill="black", font=font)
	return blank_image, None

	model = YOLO(model_path)
	results = model.predict(
	source=image,
	conf=conf_threshold,
	iou=iou_threshold,
	imgsz=640,
	max_det=max_detection,
	show_labels=True,
	show_conf=True,
	)
	for r in results:
	image_array = r.plot()
	annotated_image = Image.fromarray(image_array[..., ::-1])
	return annotated_image, None

	elif input_type == "Video":
	if video is None:
	width, height = 640, 480
	blank_image = Image.new("RGB", (width, height), color="white")
	draw = ImageDraw.Draw(blank_image)
	message = "No video provided"
	font = ImageFont.load_default(size=40)
	bbox = draw.textbbox((0, 0), message, font=font)
	text_width = bbox[2] - bbox[0]
	text_height = bbox[3] - bbox[1]
	text_x = (width - text_width) / 2
	text_y = (height - text_height) / 2
	draw.text((text_x, text_y), message, fill="black", font=font)
	temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
	fourcc = cv2.VideoWriter_fourcc(*"mp4v")
	out = cv2.VideoWriter(temp_video_file, fourcc, 1, (width, height))
	frame = cv2.cvtColor(np.array(blank_image), cv2.COLOR_RGB2BGR)
	out.write(frame)
	out.release()
	return None, temp_video_file

	model = YOLO(model_path)
	cap = cv2.VideoCapture(video)
	fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 25
	frames = []
	while True:
	ret, frame = cap.read()
	if not ret:
	break
	pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
	results = model.predict(
	source=pil_frame,
	conf=conf_threshold,
	iou=iou_threshold,
	imgsz=640,
	max_det=max_detection,
	show_labels=True,
	show_conf=True,
	)
	for r in results:
	annotated_frame_array = r.plot()
	annotated_frame = cv2.cvtColor(annotated_frame_array, cv2.COLOR_BGR2RGB)
	frames.append(annotated_frame)
	cap.release()
	if not frames:
	return None, None

	height_out, width_out, _ = frames[0].shape
	temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
	fourcc = cv2.VideoWriter_fourcc(*"mp4v")
	out = cv2.VideoWriter(temp_video_file, fourcc, fps, (width_out, height_out))
	for f in frames:
	f_bgr = cv2.cvtColor(f, cv2.COLOR_RGB2BGR)
	out.write(f_bgr)
	out.release()
	return None, temp_video_file

	return None, None

	def update_visibility(input_type):
	if input_type == "Image":
	return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
	else:
	return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=true)

	def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
	annotated_image, _ = yolo_inference(
	input_type="Image",
	image=image,
	video=None,
	model_id=model_id,
	conf_threshold=conf_threshold,
	iou_threshold=iou_threshold,
	max_detection=max_detection
	)
	return gr.update(value="Image"), annotated_image

	with gr.Blocks() as app:
	gr.Markdown("# Yolo13: Object Detection")
	gr.Markdown("Upload an image or video for inference using the latest YOLOv13 models.")
	gr.Markdown("📝 Note: Better-trained models will be deployed as they become available.")
	with gr.Accordion("Paper and Citation", open=False):
	gr.Markdown("""
	This application is based on the research from the paper: YOLOv13: Real-Time Object Detection with Hypergraph-Enhanced Adaptive Visual Perception.

	- Authors: Mengqi Lei, Siqi Li, Yihong Wu, et al.
	- Preprint Link: [https://arxiv.org/abs/2506.17733](https://arxiv.org/abs/2506.17733)

	BibTeX:
	```
	@article{yolov13,
	title={YOLOv13: Real-Time Object Detection with Hypergraph-Enhanced Adaptive Visual Perception},
	author={Lei, Mengqi and Li, Siqi and Wu, Yihong and et al.},
	journal={arXiv preprint arXiv:2506.17733},
	year={2025}
	}
	```
	""")

	with gr.Row():
	with gr.Column():
	image = gr.Image(type="pil", label="Image", visible=True)
	video = gr.Video(label="Video", visible=False)
	input_type = gr.Radio(
	choices=["Image", "Video"],
	value="Image",
	label="Input Type",
	)
	model_id = gr.Dropdown(
	label="Model Name",
	choices=[
	'yolov13n.pt', 'yolov13s.pt', 'yolov13l.pt', 'yolov13x.pt',
	],
	value="yolov13n.pt",
	)
	conf_threshold = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence Threshold")
	iou_threshold = gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU Threshold")
	max_detection = gr.Slider(minimum=1, maximum=300, step=1, value=300, label="Max Detection")
	infer_button = gr.Button("Detect Objects")
	with gr.Column():
	output_image = gr.Image(type="pil", label="Annotated Image", visible=True)
	output_video = gr.Video(label="Annotated Video", visible=False)
	gr.DeepLinkButton()

	input_type.change(
	fn=update_visibility,
	inputs=input_type,
	outputs=[image, video, output_image, output_video],
	)

	infer_button.click(
	fn=yolo_inference,
	inputs=[input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection],
	outputs=[output_image, output_video],
	)

	gr.Examples(
	examples=[
	["zidane.jpg", "yolov13s.pt", 0.35, 0.45, 300],
	["bus.jpg", "yolov13l.pt", 0.35, 0.45, 300],
	["yolo_vision.jpg", "yolov13x.pt", 0.35, 0.45, 300],
	],
	fn=yolo_inference_for_examples,
	inputs=[image, model_id, conf_threshold, iou_threshold, max_detection],
	outputs=[input_type, output_image],
	label="Examples (Images)",
	)

	if __name__ == '__main__':
	app.launch()