Files changed (2) hide show
  1. app.py +23 -48
  2. requirements.txt +1 -2
app.py CHANGED
@@ -6,7 +6,7 @@ import gradio as gr
6
  import numpy as np
7
  import supervision as sv
8
  from PIL import Image
9
- from rfdetr import RFDETRNano, RFDETRSmall, RFDETRMedium, RFDETRBase, RFDETRLarge
10
  from rfdetr.detr import RFDETR
11
  from rfdetr.util.coco_classes import COCO_CLASSES
12
 
@@ -17,24 +17,23 @@ ImageType = TypeVar("ImageType", Image.Image, np.ndarray)
17
 
18
  MARKDOWN = """
19
  # RF-DETR 🔥
 
20
  [`[code]`](https://github.com/roboflow/rf-detr)
21
  [`[blog]`](https://blog.roboflow.com/rf-detr)
22
  [`[notebook]`](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb)
 
23
  RF-DETR is a real-time, transformer-based object detection model architecture developed
24
  by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
25
  """
26
 
27
  IMAGE_PROCESSING_EXAMPLES = [
28
- ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 1024, "medium"],
29
- ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 1024, "medium"],
30
- ['https://media.roboflow.com/supervision/image-examples/motorbike.png', 0.3, 1024, "medium"],
31
- ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 512, "nano"],
32
- ['https://media.roboflow.com/notebooks/examples/dog-3.jpeg', 0.5, 512, "nano"],
33
- ['https://media.roboflow.com/supervision/image-examples/basketball-1.png', 0.5, 512, "nano"],
34
  ]
35
  VIDEO_PROCESSING_EXAMPLES = [
36
- ["videos/people-walking.mp4", 0.3, 1024, "medium"],
37
- ["videos/vehicles.mp4", 0.3, 1024, "medium"],
38
  ]
39
 
40
  COLOR = sv.ColorPalette.from_hex([
@@ -52,7 +51,7 @@ create_directory(directory_path=VIDEO_TARGET_DIRECTORY)
52
  def detect_and_annotate(
53
  model: RFDETR,
54
  image: ImageType,
55
- confidence: float,
56
  ) -> ImageType:
57
  detections = model.predict(image, threshold=confidence)
58
 
@@ -80,12 +79,6 @@ def detect_and_annotate(
80
 
81
 
82
  def load_model(resolution: int, checkpoint: str) -> RFDETR:
83
- if checkpoint == "nano":
84
- return RFDETRNano(resolution=resolution)
85
- if checkpoint == "small":
86
- return RFDETRSmall(resolution=resolution)
87
- if checkpoint == "medium":
88
- return RFDETRMedium(resolution=resolution)
89
  if checkpoint == "base":
90
  return RFDETRBase(resolution=resolution)
91
  elif checkpoint == "large":
@@ -93,33 +86,12 @@ def load_model(resolution: int, checkpoint: str) -> RFDETR:
93
  raise TypeError("Checkpoint must be a base or large.")
94
 
95
 
96
- def adjust_resolution(checkpoint: str, resolution: int) -> int:
97
- if checkpoint in {"nano", "small", "medium"}:
98
- divisor = 32
99
- elif checkpoint in {"base", "large"}:
100
- divisor = 56
101
- else:
102
- raise ValueError(f"Unknown checkpoint: {checkpoint}")
103
-
104
- remainder = resolution % divisor
105
- if remainder == 0:
106
- return resolution
107
- lower = resolution - remainder
108
- upper = lower + divisor
109
-
110
- if resolution - lower < upper - resolution:
111
- return lower
112
- else:
113
- return upper
114
-
115
-
116
  def image_processing_inference(
117
  input_image: Image.Image,
118
  confidence: float,
119
  resolution: int,
120
  checkpoint: str
121
  ):
122
- resolution = adjust_resolution(checkpoint=checkpoint, resolution=resolution)
123
  model = load_model(resolution=resolution, checkpoint=checkpoint)
124
  return detect_and_annotate(model=model, image=input_image, confidence=confidence)
125
 
@@ -129,8 +101,8 @@ def video_processing_inference(
129
  confidence: float,
130
  resolution: int,
131
  checkpoint: str,
 
132
  ):
133
- resolution = adjust_resolution(checkpoint=checkpoint, resolution=resolution)
134
  model = load_model(resolution=resolution, checkpoint=checkpoint)
135
 
136
  name = generate_unique_name()
@@ -148,7 +120,7 @@ def video_processing_inference(
148
  annotated_frame = detect_and_annotate(
149
  model=model,
150
  image=frame,
151
- confidence=confidence,
152
  )
153
  annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
154
  sink.write_frame(annotated_frame)
@@ -182,15 +154,15 @@ with gr.Blocks() as demo:
182
  )
183
  image_processing_resolution_slider = gr.Slider(
184
  label="Inference resolution",
185
- minimum=224,
186
- maximum=2240,
187
- step=1,
188
- value=896,
189
  )
190
  image_processing_checkpoint_dropdown = gr.Dropdown(
191
  label="Checkpoint",
192
- choices=["nano", "small", "medium"],
193
- value="medium"
194
  )
195
  with gr.Column():
196
  image_processing_submit_button = gr.Button("Submit", value="primary")
@@ -205,6 +177,8 @@ with gr.Blocks() as demo:
205
  image_processing_checkpoint_dropdown
206
  ],
207
  outputs=image_processing_output_image,
 
 
208
  )
209
 
210
  image_processing_submit_button.click(
@@ -245,8 +219,8 @@ with gr.Blocks() as demo:
245
  )
246
  video_processing_checkpoint_dropdown = gr.Dropdown(
247
  label="Checkpoint",
248
- choices=["nano", "small", "medium"],
249
- value="medium"
250
  )
251
  with gr.Column():
252
  video_processing_submit_button = gr.Button("Submit", value="primary")
@@ -260,7 +234,8 @@ with gr.Blocks() as demo:
260
  video_processing_resolution_slider,
261
  video_processing_checkpoint_dropdown
262
  ],
263
- outputs=video_processing_output_video
 
264
  )
265
 
266
  video_processing_submit_button.click(
 
6
  import numpy as np
7
  import supervision as sv
8
  from PIL import Image
9
+ from rfdetr import RFDETRBase, RFDETRLarge
10
  from rfdetr.detr import RFDETR
11
  from rfdetr.util.coco_classes import COCO_CLASSES
12
 
 
17
 
18
  MARKDOWN = """
19
  # RF-DETR 🔥
20
+
21
  [`[code]`](https://github.com/roboflow/rf-detr)
22
  [`[blog]`](https://blog.roboflow.com/rf-detr)
23
  [`[notebook]`](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb)
24
+
25
  RF-DETR is a real-time, transformer-based object detection model architecture developed
26
  by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
27
  """
28
 
29
  IMAGE_PROCESSING_EXAMPLES = [
30
+ ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
31
+ ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
32
+ ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
 
 
 
33
  ]
34
  VIDEO_PROCESSING_EXAMPLES = [
35
+ ["videos/people-walking.mp4", 0.3, 728, "large"],
36
+ ["videos/vehicles.mp4", 0.3, 728, "large"],
37
  ]
38
 
39
  COLOR = sv.ColorPalette.from_hex([
 
51
  def detect_and_annotate(
52
  model: RFDETR,
53
  image: ImageType,
54
+ confidence: float
55
  ) -> ImageType:
56
  detections = model.predict(image, threshold=confidence)
57
 
 
79
 
80
 
81
  def load_model(resolution: int, checkpoint: str) -> RFDETR:
 
 
 
 
 
 
82
  if checkpoint == "base":
83
  return RFDETRBase(resolution=resolution)
84
  elif checkpoint == "large":
 
86
  raise TypeError("Checkpoint must be a base or large.")
87
 
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def image_processing_inference(
90
  input_image: Image.Image,
91
  confidence: float,
92
  resolution: int,
93
  checkpoint: str
94
  ):
 
95
  model = load_model(resolution=resolution, checkpoint=checkpoint)
96
  return detect_and_annotate(model=model, image=input_image, confidence=confidence)
97
 
 
101
  confidence: float,
102
  resolution: int,
103
  checkpoint: str,
104
+ progress=gr.Progress(track_tqdm=True)
105
  ):
 
106
  model = load_model(resolution=resolution, checkpoint=checkpoint)
107
 
108
  name = generate_unique_name()
 
120
  annotated_frame = detect_and_annotate(
121
  model=model,
122
  image=frame,
123
+ confidence=confidence
124
  )
125
  annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
126
  sink.write_frame(annotated_frame)
 
154
  )
155
  image_processing_resolution_slider = gr.Slider(
156
  label="Inference resolution",
157
+ minimum=560,
158
+ maximum=1120,
159
+ step=56,
160
+ value=728,
161
  )
162
  image_processing_checkpoint_dropdown = gr.Dropdown(
163
  label="Checkpoint",
164
+ choices=["base", "large"],
165
+ value="base"
166
  )
167
  with gr.Column():
168
  image_processing_submit_button = gr.Button("Submit", value="primary")
 
177
  image_processing_checkpoint_dropdown
178
  ],
179
  outputs=image_processing_output_image,
180
+ cache_examples=True,
181
+ run_on_click=True
182
  )
183
 
184
  image_processing_submit_button.click(
 
219
  )
220
  video_processing_checkpoint_dropdown = gr.Dropdown(
221
  label="Checkpoint",
222
+ choices=["base", "large"],
223
+ value="base"
224
  )
225
  with gr.Column():
226
  video_processing_submit_button = gr.Button("Submit", value="primary")
 
234
  video_processing_resolution_slider,
235
  video_processing_checkpoint_dropdown
236
  ],
237
+ outputs=video_processing_output_video,
238
+ run_on_click=True
239
  )
240
 
241
  video_processing_submit_button.click(
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
  gradio
2
  spaces
3
- # rfdetr
4
- git+https://github.com/roboflow/rf-detr.git@size-release
5
  tqdm
 
1
  gradio
2
  spaces
3
+ rfdetr
 
4
  tqdm