Spaces:

Roboflow
/

RF-DETR

Running on T4

App Files Files Community

gradio-runtime-fixes

by onuralpszr - opened Jul 20

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+24

-50

Files changed (2) hide show

app.py +23 -48
requirements.txt +1 -2

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import gradio as gr
 import numpy as np
 import supervision as sv
 from PIL import Image
-from rfdetr import RFDETRNano, RFDETRSmall, RFDETRMedium, RFDETRBase, RFDETRLarge
 from rfdetr.detr import RFDETR
 from rfdetr.util.coco_classes import COCO_CLASSES
@@ -17,24 +17,23 @@ ImageType = TypeVar("ImageType", Image.Image, np.ndarray)
 MARKDOWN = """
 # RF-DETR 🔥
 [`[code]`](https://github.com/roboflow/rf-detr)
 [`[blog]`](https://blog.roboflow.com/rf-detr)
 [`[notebook]`](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb)
 RF-DETR is a real-time, transformer-based object detection model architecture developed
 by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
 """
 IMAGE_PROCESSING_EXAMPLES = [
-    ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 1024, "medium"],
-    ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 1024, "medium"],
-    ['https://media.roboflow.com/supervision/image-examples/motorbike.png', 0.3, 1024, "medium"],
-    ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 512, "nano"],
-    ['https://media.roboflow.com/notebooks/examples/dog-3.jpeg', 0.5, 512, "nano"],
-    ['https://media.roboflow.com/supervision/image-examples/basketball-1.png', 0.5, 512, "nano"],
 ]
 VIDEO_PROCESSING_EXAMPLES = [
-    ["videos/people-walking.mp4", 0.3, 1024, "medium"],
-    ["videos/vehicles.mp4", 0.3, 1024, "medium"],
 ]
 COLOR = sv.ColorPalette.from_hex([
@@ -52,7 +51,7 @@ create_directory(directory_path=VIDEO_TARGET_DIRECTORY)
 def detect_and_annotate(
         model: RFDETR,
         image: ImageType,
-        confidence: float,
 ) -> ImageType:
     detections = model.predict(image, threshold=confidence)
@@ -80,12 +79,6 @@ def detect_and_annotate(
 def load_model(resolution: int, checkpoint: str) -> RFDETR:
-    if checkpoint == "nano":
-        return RFDETRNano(resolution=resolution)
-    if checkpoint == "small":
-        return RFDETRSmall(resolution=resolution)
-    if checkpoint == "medium":
-        return RFDETRMedium(resolution=resolution)
     if checkpoint == "base":
         return RFDETRBase(resolution=resolution)
     elif checkpoint == "large":
@@ -93,33 +86,12 @@ def load_model(resolution: int, checkpoint: str) -> RFDETR:
     raise TypeError("Checkpoint must be a base or large.")
-def adjust_resolution(checkpoint: str, resolution: int) -> int:
-    if checkpoint in {"nano", "small", "medium"}:
-        divisor = 32
-    elif checkpoint in {"base", "large"}:
-        divisor = 56
-    else:
-        raise ValueError(f"Unknown checkpoint: {checkpoint}")
-    remainder = resolution % divisor
-    if remainder == 0:
-        return resolution
-    lower = resolution - remainder
-    upper = lower + divisor
-    if resolution - lower < upper - resolution:
-        return lower
-    else:
-        return upper
 def image_processing_inference(
         input_image: Image.Image,
         confidence: float,
         resolution: int,
         checkpoint: str
 ):
-    resolution = adjust_resolution(checkpoint=checkpoint, resolution=resolution)
     model = load_model(resolution=resolution, checkpoint=checkpoint)
     return detect_and_annotate(model=model, image=input_image, confidence=confidence)
@@ -129,8 +101,8 @@ def video_processing_inference(
         confidence: float,
         resolution: int,
         checkpoint: str,
 ):
-    resolution = adjust_resolution(checkpoint=checkpoint, resolution=resolution)
     model = load_model(resolution=resolution, checkpoint=checkpoint)
     name = generate_unique_name()
@@ -148,7 +120,7 @@ def video_processing_inference(
             annotated_frame = detect_and_annotate(
                 model=model,
                 image=frame,
-                confidence=confidence,
             )
             annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
             sink.write_frame(annotated_frame)
@@ -182,15 +154,15 @@ with gr.Blocks() as demo:
                 )
                 image_processing_resolution_slider = gr.Slider(
                     label="Inference resolution",
-                    minimum=224,
-                    maximum=2240,
-                    step=1,
-                    value=896,
                 )
                 image_processing_checkpoint_dropdown = gr.Dropdown(
                     label="Checkpoint",
-                    choices=["nano", "small", "medium"],
-                    value="medium"
                 )
             with gr.Column():
                 image_processing_submit_button = gr.Button("Submit", value="primary")
@@ -205,6 +177,8 @@ with gr.Blocks() as demo:
                 image_processing_checkpoint_dropdown
             ],
             outputs=image_processing_output_image,
         )
         image_processing_submit_button.click(
@@ -245,8 +219,8 @@ with gr.Blocks() as demo:
                 )
                 video_processing_checkpoint_dropdown = gr.Dropdown(
                     label="Checkpoint",
-                    choices=["nano", "small", "medium"],
-                    value="medium"
                 )
             with gr.Column():
                 video_processing_submit_button = gr.Button("Submit", value="primary")
@@ -260,7 +234,8 @@ with gr.Blocks() as demo:
                 video_processing_resolution_slider,
                 video_processing_checkpoint_dropdown
             ],
-            outputs=video_processing_output_video
         )
         video_processing_submit_button.click(

 import numpy as np
 import supervision as sv
 from PIL import Image
+from rfdetr import RFDETRBase, RFDETRLarge
 from rfdetr.detr import RFDETR
 from rfdetr.util.coco_classes import COCO_CLASSES
 MARKDOWN = """
 # RF-DETR 🔥
 [`[code]`](https://github.com/roboflow/rf-detr)
 [`[blog]`](https://blog.roboflow.com/rf-detr)
 [`[notebook]`](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb)
 RF-DETR is a real-time, transformer-based object detection model architecture developed
 by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
 """
 IMAGE_PROCESSING_EXAMPLES = [
+    ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
+    ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
+    ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
 ]
 VIDEO_PROCESSING_EXAMPLES = [
+    ["videos/people-walking.mp4", 0.3, 728, "large"],
+    ["videos/vehicles.mp4", 0.3, 728, "large"],
 ]
 COLOR = sv.ColorPalette.from_hex([
 def detect_and_annotate(
         model: RFDETR,
         image: ImageType,
+        confidence: float
 ) -> ImageType:
     detections = model.predict(image, threshold=confidence)
 def load_model(resolution: int, checkpoint: str) -> RFDETR:
     if checkpoint == "base":
         return RFDETRBase(resolution=resolution)
     elif checkpoint == "large":
     raise TypeError("Checkpoint must be a base or large.")
 def image_processing_inference(
         input_image: Image.Image,
         confidence: float,
         resolution: int,
         checkpoint: str
 ):
     model = load_model(resolution=resolution, checkpoint=checkpoint)
     return detect_and_annotate(model=model, image=input_image, confidence=confidence)
         confidence: float,
         resolution: int,
         checkpoint: str,
+        progress=gr.Progress(track_tqdm=True)
 ):
     model = load_model(resolution=resolution, checkpoint=checkpoint)
     name = generate_unique_name()
             annotated_frame = detect_and_annotate(
                 model=model,
                 image=frame,
+                confidence=confidence
             )
             annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
             sink.write_frame(annotated_frame)
                 )
                 image_processing_resolution_slider = gr.Slider(
                     label="Inference resolution",
+                    minimum=560,
+                    maximum=1120,
+                    step=56,
+                    value=728,
                 )
                 image_processing_checkpoint_dropdown = gr.Dropdown(
                     label="Checkpoint",
+                    choices=["base", "large"],
+                    value="base"
                 )
             with gr.Column():
                 image_processing_submit_button = gr.Button("Submit", value="primary")
                 image_processing_checkpoint_dropdown
             ],
             outputs=image_processing_output_image,
+            cache_examples=True,
+            run_on_click=True
         )
         image_processing_submit_button.click(
                 )
                 video_processing_checkpoint_dropdown = gr.Dropdown(
                     label="Checkpoint",
+                    choices=["base", "large"],
+                    value="base"
                 )
             with gr.Column():
                 video_processing_submit_button = gr.Button("Submit", value="primary")
                 video_processing_resolution_slider,
                 video_processing_checkpoint_dropdown
             ],
+            outputs=video_processing_output_video,
+            run_on_click=True
         )
         video_processing_submit_button.click(

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
 gradio
 spaces
-# rfdetr
-git+https://github.com/roboflow/rf-detr.git@size-release
 tqdm

 gradio
 spaces
+rfdetr
 tqdm