# ------------------------------------------------------------------------ # RF-DETR # Copyright (c) 2025 Roboflow. All Rights Reserved. # Licensed under the Apache License, Version 2.0 [see LICENSE for details] # ------------------------------------------------------------------------ from pydantic import BaseModel from typing import List, Optional, Literal, Type import torch DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" class ModelConfig(BaseModel): encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] out_feature_indexes: List[int] dec_layers: int = 3 two_stage: bool = True projector_scale: List[Literal["P3", "P4", "P5"]] hidden_dim: int sa_nheads: int ca_nheads: int dec_n_points: int bbox_reparam: bool = True lite_refpoint_refine: bool = True layer_norm: bool = True amp: bool = True num_classes: int = 90 pretrain_weights: Optional[str] = None device: Literal["cpu", "cuda", "mps"] = DEVICE resolution: int = 560 group_detr: int = 13 gradient_checkpointing: bool = False class RFDETRBaseConfig(ModelConfig): encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] = "dinov2_windowed_small" hidden_dim: int = 256 sa_nheads: int = 8 ca_nheads: int = 16 dec_n_points: int = 2 num_queries: int = 300 num_select: int = 300 projector_scale: List[Literal["P3", "P4", "P5"]] = ["P4"] out_feature_indexes: List[int] = [2, 5, 8, 11] pretrain_weights: Optional[str] = "rf-detr-base.pth" class RFDETRLargeConfig(RFDETRBaseConfig): encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] = "dinov2_windowed_base" hidden_dim: int = 384 sa_nheads: int = 12 ca_nheads: int = 24 dec_n_points: int = 4 projector_scale: List[Literal["P3", "P4", "P5"]] = ["P3", "P5"] pretrain_weights: Optional[str] = "rf-detr-large.pth" class TrainConfig(BaseModel): lr: float = 1e-4 lr_encoder: float = 1.5e-4 batch_size: int = 4 grad_accum_steps: int = 4 epochs: int = 100 ema_decay: float = 0.993 ema_tau: int = 100 lr_drop: int = 100 checkpoint_interval: int = 10 warmup_epochs: int = 0 lr_vit_layer_decay: float = 0.8 lr_component_decay: float = 0.7 drop_path: float = 0.0 group_detr: int = 13 ia_bce_loss: bool = True cls_loss_coef: float = 1.0 num_select: int = 300 dataset_file: Literal["coco", "o365", "roboflow"] = "roboflow" square_resize_div_64: bool = True dataset_dir: str output_dir: str = "output" multi_scale: bool = True expanded_scales: bool = True use_ema: bool = True num_workers: int = 2 weight_decay: float = 1e-4 early_stopping: bool = False early_stopping_patience: int = 10 early_stopping_min_delta: float = 0.001 early_stopping_use_ema: bool = False tensorboard: bool = True wandb: bool = False project: Optional[str] = None run: Optional[str] = None class_names: List[str] = None