import gradio as gr import spaces import supervision as sv from rfdetr import RFDETRBase, RFDETRLarge from rfdetr.util.coco_classes import COCO_CLASSES from utils.video import create_directory MARKDOWN = """ # RF-DETR 🔥

RF-DETR is a real-time, transformer-based object detection model architecture developed by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license. """ IMAGE_EXAMPLES = [ ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"], ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"], ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"], ] COLOR = sv.ColorPalette.from_hex([ "#ffff00", "#ff9b00", "#ff8080", "#ff66b2", "#ff66ff", "#b266ff", "#9999ff", "#3399ff", "#66ffff", "#33ff99", "#66ff66", "#99ff00" ]) VIDEO_SCALE_FACTOR = 0.5 VIDEO_TARGET_DIRECTORY = "tmp" create_directory(directory_path=VIDEO_TARGET_DIRECTORY) @spaces.GPU() def inference(image, confidence: float, resolution: int, checkpoint: str): model_class = RFDETRBase if checkpoint == "base" else RFDETRLarge model = model_class(resolution=resolution) detections = model.predict(image, threshold=confidence) text_scale = sv.calculate_optimal_text_scale(resolution_wh=image.size) thickness = sv.calculate_optimal_line_thickness(resolution_wh=image.size) bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness) label_annotator = sv.LabelAnnotator( color=COLOR, text_color=sv.Color.BLACK, text_scale=text_scale, smart_position=True ) labels = [ f"{COCO_CLASSES[class_id]} {confidence:.2f}" for class_id, confidence in zip(detections.class_id, detections.confidence) ] annotated_image = image.copy() annotated_image = bbox_annotator.annotate(annotated_image, detections) annotated_image = label_annotator.annotate(annotated_image, detections, labels) return annotated_image with gr.Blocks() as demo: gr.Markdown(MARKDOWN) with gr.Row(): with gr.Column(): input_image = gr.Image( label="Input Image", image_mode='RGB', type='pil', height=600 ) confidence_slider = gr.Slider( label="Confidence", minimum=0.0, maximum=1.0, step=0.05, value=0.5, ) resolution_slider = gr.Slider( label="Inference resolution", minimum=560, maximum=1120, step=56, value=728, ) with gr.Row(): checkpoint_dropdown = gr.Dropdown( label="Checkpoint", choices=["base", "large"], value="base" ) submit_button = gr.Button("Submit") with gr.Column(): output_image = gr.Image( label="Input Image", image_mode='RGB', type='pil', height=600 ) gr.Examples( fn=inference, examples=IMAGE_EXAMPLES, inputs=[input_image, confidence_slider, resolution_slider, checkpoint_dropdown], outputs=output_image ) submit_button.click( inference, inputs=[input_image, confidence_slider, resolution_slider, checkpoint_dropdown], outputs=output_image ) demo.launch(debug=False, show_error=True)