atalaydenknalbant commited on
Commit
3ea53a0
·
verified ·
1 Parent(s): dcf0229

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +194 -193
app.py CHANGED
@@ -1,194 +1,195 @@
1
- import spaces
2
- import gradio as gr
3
- from PIL import Image, ImageDraw, ImageFont
4
- from ultralytics import YOLO
5
- from huggingface_hub import hf_hub_download
6
- import cv2
7
- import tempfile
8
-
9
- def download_model(model_filename):
10
- return hf_hub_download(repo_id="atalaydenknalbant/Yolov13", filename=model_filename)
11
-
12
- @spaces.GPU
13
- def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection):
14
- model_path = download_model(model_id)
15
-
16
- if input_type == "Image":
17
- if image is None:
18
- width, height = 640, 480
19
- blank_image = Image.new("RGB", (width, height), color="white")
20
- draw = ImageDraw.Draw(blank_image)
21
- message = "No image provided"
22
- font = ImageFont.load_default(size=40)
23
- bbox = draw.textbbox((0, 0), message, font=font)
24
- text_width = bbox[2] - bbox[0]
25
- text_height = bbox[3] - bbox[1]
26
- text_x = (width - text_width) / 2
27
- text_y = (height - text_height) / 2
28
- draw.text((text_x, text_y), message, fill="black", font=font)
29
- return blank_image, None
30
-
31
- model = YOLO(model_path)
32
- results = model.predict(
33
- source=image,
34
- conf=conf_threshold,
35
- iou=iou_threshold,
36
- imgsz=640,
37
- max_det=max_detection,
38
- show_labels=True,
39
- show_conf=True,
40
- )
41
- for r in results:
42
- image_array = r.plot()
43
- annotated_image = Image.fromarray(image_array[..., ::-1])
44
- return annotated_image, None
45
-
46
- elif input_type == "Video":
47
- if video is None:
48
- width, height = 640, 480
49
- blank_image = Image.new("RGB", (width, height), color="white")
50
- draw = ImageDraw.Draw(blank_image)
51
- message = "No video provided"
52
- font = ImageFont.load_default(size=40)
53
- bbox = draw.textbbox((0, 0), message, font=font)
54
- text_width = bbox[2] - bbox[0]
55
- text_height = bbox[3] - bbox[1]
56
- text_x = (width - text_width) / 2
57
- text_y = (height - text_height) / 2
58
- draw.text((text_x, text_y), message, fill="black", font=font)
59
- temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
60
- fourcc = cv2.VideoWriter_fourcc(*"mp4v")
61
- out = cv2.VideoWriter(temp_video_file, fourcc, 1, (width, height))
62
- frame = cv2.cvtColor(np.array(blank_image), cv2.COLOR_RGB2BGR)
63
- out.write(frame)
64
- out.release()
65
- return None, temp_video_file
66
-
67
- model = YOLO(model_path)
68
- cap = cv2.VideoCapture(video)
69
- fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 25
70
- frames = []
71
- while True:
72
- ret, frame = cap.read()
73
- if not ret:
74
- break
75
- pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
76
- results = model.predict(
77
- source=pil_frame,
78
- conf=conf_threshold,
79
- iou=iou_threshold,
80
- imgsz=640,
81
- max_det=max_detection,
82
- show_labels=True,
83
- show_conf=True,
84
- )
85
- for r in results:
86
- annotated_frame_array = r.plot()
87
- annotated_frame = cv2.cvtColor(annotated_frame_array, cv2.COLOR_BGR2RGB)
88
- frames.append(annotated_frame)
89
- cap.release()
90
- if not frames:
91
- return None, None
92
-
93
- height_out, width_out, _ = frames[0].shape
94
- temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
95
- fourcc = cv2.VideoWriter_fourcc(*"mp4v")
96
- out = cv2.VideoWriter(temp_video_file, fourcc, fps, (width_out, height_out))
97
- for f in frames:
98
- f_bgr = cv2.cvtColor(f, cv2.COLOR_RGB2BGR)
99
- out.write(f_bgr)
100
- out.release()
101
- return None, temp_video_file
102
-
103
- return None, None
104
-
105
- def update_visibility(input_type):
106
- if input_type == "Image":
107
- return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
108
- else:
109
- return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=true)
110
-
111
- def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
112
- annotated_image, _ = yolo_inference(
113
- input_type="Image",
114
- image=image,
115
- video=None,
116
- model_id=model_id,
117
- conf_threshold=conf_threshold,
118
- iou_threshold=iou_threshold,
119
- max_detection=max_detection
120
- )
121
- return gr.update(value="Image"), annotated_image
122
-
123
- with gr.Blocks() as app:
124
- gr.Markdown("# Yolo13: Object Detection")
125
- gr.Markdown("Upload an image or video for inference using the latest YOLOv13 models.")
126
- with gr.Accordion("Paper and Citation", open=False):
127
- gr.Markdown("""
128
- This application is based on the research from the paper: **YOLOv13: Real-Time Object Detection with Hypergraph-Enhanced Adaptive Visual Perception**.
129
-
130
- - **Authors:** Mengqi Lei, Siqi Li, Yihong Wu, et al.
131
- - **Preprint Link:** [https://arxiv.org/abs/2506.17733](https://arxiv.org/abs/2506.17733)
132
-
133
- **BibTeX:**
134
- ```
135
- @article{yolov13,
136
- title={YOLOv13: Real-Time Object Detection with Hypergraph-Enhanced Adaptive Visual Perception},
137
- author={Lei, Mengqi and Li, Siqi and Wu, Yihong and et al.},
138
- journal={arXiv preprint arXiv:2506.17733},
139
- year={2025}
140
- }
141
- ```
142
- """)
143
-
144
- with gr.Row():
145
- with gr.Column():
146
- image = gr.Image(type="pil", label="Image", visible=True)
147
- video = gr.Video(label="Video", visible=False)
148
- input_type = gr.Radio(
149
- choices=["Image", "Video"],
150
- value="Image",
151
- label="Input Type",
152
- )
153
- model_id = gr.Dropdown(
154
- label="Model Name",
155
- choices=[
156
- 'yolov13n.pt', 'yolov13s.pt', 'yolov13l.pt', 'yolov13x.pt',
157
- ],
158
- value="yolov13n.pt",
159
- )
160
- conf_threshold = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence Threshold")
161
- iou_threshold = gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU Threshold")
162
- max_detection = gr.Slider(minimum=1, maximum=300, step=1, value=300, label="Max Detection")
163
- infer_button = gr.Button("Detect Objects")
164
- with gr.Column():
165
- output_image = gr.Image(type="pil", label="Annotated Image", visible=True)
166
- output_video = gr.Video(label="Annotated Video", visible=False)
167
- gr.DeepLinkButton()
168
-
169
- input_type.change(
170
- fn=update_visibility,
171
- inputs=input_type,
172
- outputs=[image, video, output_image, output_video],
173
- )
174
-
175
- infer_button.click(
176
- fn=yolo_inference,
177
- inputs=[input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection],
178
- outputs=[output_image, output_video],
179
- )
180
-
181
- gr.Examples(
182
- examples=[
183
- ["zidane.jpg", "yolov13s.pt", 0.25, 0.45, 300],
184
- ["bus.jpg", "yolov13l.pt", 0.25, 0.45, 300],
185
- ["yolo_vision.jpg", "yolov13x.pt", 0.25, 0.45, 300],
186
- ],
187
- fn=yolo_inference_for_examples,
188
- inputs=[image, model_id, conf_threshold, iou_threshold, max_detection],
189
- outputs=[input_type, output_image],
190
- label="Examples (Images)",
191
- )
192
-
193
- if __name__ == '__main__':
 
194
  app.launch()
 
1
+ import spaces
2
+ import gradio as gr
3
+ from PIL import Image, ImageDraw, ImageFont
4
+ from ultralytics import YOLO
5
+ from huggingface_hub import hf_hub_download
6
+ import cv2
7
+ import tempfile
8
+
9
+ def download_model(model_filename):
10
+ return hf_hub_download(repo_id="atalaydenknalbant/Yolov13", filename=model_filename)
11
+
12
+ @spaces.GPU
13
+ def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection):
14
+ model_path = download_model(model_id)
15
+
16
+ if input_type == "Image":
17
+ if image is None:
18
+ width, height = 640, 480
19
+ blank_image = Image.new("RGB", (width, height), color="white")
20
+ draw = ImageDraw.Draw(blank_image)
21
+ message = "No image provided"
22
+ font = ImageFont.load_default(size=40)
23
+ bbox = draw.textbbox((0, 0), message, font=font)
24
+ text_width = bbox[2] - bbox[0]
25
+ text_height = bbox[3] - bbox[1]
26
+ text_x = (width - text_width) / 2
27
+ text_y = (height - text_height) / 2
28
+ draw.text((text_x, text_y), message, fill="black", font=font)
29
+ return blank_image, None
30
+
31
+ model = YOLO(model_path)
32
+ results = model.predict(
33
+ source=image,
34
+ conf=conf_threshold,
35
+ iou=iou_threshold,
36
+ imgsz=640,
37
+ max_det=max_detection,
38
+ show_labels=True,
39
+ show_conf=True,
40
+ )
41
+ for r in results:
42
+ image_array = r.plot()
43
+ annotated_image = Image.fromarray(image_array[..., ::-1])
44
+ return annotated_image, None
45
+
46
+ elif input_type == "Video":
47
+ if video is None:
48
+ width, height = 640, 480
49
+ blank_image = Image.new("RGB", (width, height), color="white")
50
+ draw = ImageDraw.Draw(blank_image)
51
+ message = "No video provided"
52
+ font = ImageFont.load_default(size=40)
53
+ bbox = draw.textbbox((0, 0), message, font=font)
54
+ text_width = bbox[2] - bbox[0]
55
+ text_height = bbox[3] - bbox[1]
56
+ text_x = (width - text_width) / 2
57
+ text_y = (height - text_height) / 2
58
+ draw.text((text_x, text_y), message, fill="black", font=font)
59
+ temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
60
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
61
+ out = cv2.VideoWriter(temp_video_file, fourcc, 1, (width, height))
62
+ frame = cv2.cvtColor(np.array(blank_image), cv2.COLOR_RGB2BGR)
63
+ out.write(frame)
64
+ out.release()
65
+ return None, temp_video_file
66
+
67
+ model = YOLO(model_path)
68
+ cap = cv2.VideoCapture(video)
69
+ fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 25
70
+ frames = []
71
+ while True:
72
+ ret, frame = cap.read()
73
+ if not ret:
74
+ break
75
+ pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
76
+ results = model.predict(
77
+ source=pil_frame,
78
+ conf=conf_threshold,
79
+ iou=iou_threshold,
80
+ imgsz=640,
81
+ max_det=max_detection,
82
+ show_labels=True,
83
+ show_conf=True,
84
+ )
85
+ for r in results:
86
+ annotated_frame_array = r.plot()
87
+ annotated_frame = cv2.cvtColor(annotated_frame_array, cv2.COLOR_BGR2RGB)
88
+ frames.append(annotated_frame)
89
+ cap.release()
90
+ if not frames:
91
+ return None, None
92
+
93
+ height_out, width_out, _ = frames[0].shape
94
+ temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
95
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
96
+ out = cv2.VideoWriter(temp_video_file, fourcc, fps, (width_out, height_out))
97
+ for f in frames:
98
+ f_bgr = cv2.cvtColor(f, cv2.COLOR_RGB2BGR)
99
+ out.write(f_bgr)
100
+ out.release()
101
+ return None, temp_video_file
102
+
103
+ return None, None
104
+
105
+ def update_visibility(input_type):
106
+ if input_type == "Image":
107
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
108
+ else:
109
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=true)
110
+
111
+ def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
112
+ annotated_image, _ = yolo_inference(
113
+ input_type="Image",
114
+ image=image,
115
+ video=None,
116
+ model_id=model_id,
117
+ conf_threshold=conf_threshold,
118
+ iou_threshold=iou_threshold,
119
+ max_detection=max_detection
120
+ )
121
+ return gr.update(value="Image"), annotated_image
122
+
123
+ with gr.Blocks() as app:
124
+ gr.Markdown("# Yolo13: Object Detection")
125
+ gr.Markdown("Upload an image or video for inference using the latest YOLOv13 models.")
126
+ gr.Markdown("📝 **Note:** Better trained models will be deployed when they are available.")
127
+ with gr.Accordion("Paper and Citation", open=False):
128
+ gr.Markdown("""
129
+ This application is based on the research from the paper: **YOLOv13: Real-Time Object Detection with Hypergraph-Enhanced Adaptive Visual Perception**.
130
+
131
+ - **Authors:** Mengqi Lei, Siqi Li, Yihong Wu, et al.
132
+ - **Preprint Link:** [https://arxiv.org/abs/2506.17733](https://arxiv.org/abs/2506.17733)
133
+
134
+ **BibTeX:**
135
+ ```
136
+ @article{yolov13,
137
+ title={YOLOv13: Real-Time Object Detection with Hypergraph-Enhanced Adaptive Visual Perception},
138
+ author={Lei, Mengqi and Li, Siqi and Wu, Yihong and et al.},
139
+ journal={arXiv preprint arXiv:2506.17733},
140
+ year={2025}
141
+ }
142
+ ```
143
+ """)
144
+
145
+ with gr.Row():
146
+ with gr.Column():
147
+ image = gr.Image(type="pil", label="Image", visible=True)
148
+ video = gr.Video(label="Video", visible=False)
149
+ input_type = gr.Radio(
150
+ choices=["Image", "Video"],
151
+ value="Image",
152
+ label="Input Type",
153
+ )
154
+ model_id = gr.Dropdown(
155
+ label="Model Name",
156
+ choices=[
157
+ 'yolov13n.pt', 'yolov13s.pt', 'yolov13l.pt', 'yolov13x.pt',
158
+ ],
159
+ value="yolov13n.pt",
160
+ )
161
+ conf_threshold = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence Threshold")
162
+ iou_threshold = gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU Threshold")
163
+ max_detection = gr.Slider(minimum=1, maximum=300, step=1, value=300, label="Max Detection")
164
+ infer_button = gr.Button("Detect Objects")
165
+ with gr.Column():
166
+ output_image = gr.Image(type="pil", label="Annotated Image", visible=True)
167
+ output_video = gr.Video(label="Annotated Video", visible=False)
168
+ gr.DeepLinkButton()
169
+
170
+ input_type.change(
171
+ fn=update_visibility,
172
+ inputs=input_type,
173
+ outputs=[image, video, output_image, output_video],
174
+ )
175
+
176
+ infer_button.click(
177
+ fn=yolo_inference,
178
+ inputs=[input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection],
179
+ outputs=[output_image, output_video],
180
+ )
181
+
182
+ gr.Examples(
183
+ examples=[
184
+ ["zidane.jpg", "yolov13s.pt", 0.35, 0.45, 300],
185
+ ["bus.jpg", "yolov13l.pt", 0.35, 0.45, 300],
186
+ ["yolo_vision.jpg", "yolov13x.pt", 0.35, 0.45, 300],
187
+ ],
188
+ fn=yolo_inference_for_examples,
189
+ inputs=[image, model_id, conf_threshold, iou_threshold, max_detection],
190
+ outputs=[input_type, output_image],
191
+ label="Examples (Images)",
192
+ )
193
+
194
+ if __name__ == '__main__':
195
  app.launch()