asammoud
add redetr
3f2c461
# ------------------------------------------------------------------------
# RF-DETR
# Copyright (c) 2025 Roboflow. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
from pydantic import BaseModel
from typing import List, Optional, Literal, Type
import torch
DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
class ModelConfig(BaseModel):
encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"]
out_feature_indexes: List[int]
dec_layers: int = 3
two_stage: bool = True
projector_scale: List[Literal["P3", "P4", "P5"]]
hidden_dim: int
sa_nheads: int
ca_nheads: int
dec_n_points: int
bbox_reparam: bool = True
lite_refpoint_refine: bool = True
layer_norm: bool = True
amp: bool = True
num_classes: int = 90
pretrain_weights: Optional[str] = None
device: Literal["cpu", "cuda", "mps"] = DEVICE
resolution: int = 560
group_detr: int = 13
gradient_checkpointing: bool = False
class RFDETRBaseConfig(ModelConfig):
encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] = "dinov2_windowed_small"
hidden_dim: int = 256
sa_nheads: int = 8
ca_nheads: int = 16
dec_n_points: int = 2
num_queries: int = 300
num_select: int = 300
projector_scale: List[Literal["P3", "P4", "P5"]] = ["P4"]
out_feature_indexes: List[int] = [2, 5, 8, 11]
pretrain_weights: Optional[str] = "rf-detr-base.pth"
class RFDETRLargeConfig(RFDETRBaseConfig):
encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] = "dinov2_windowed_base"
hidden_dim: int = 384
sa_nheads: int = 12
ca_nheads: int = 24
dec_n_points: int = 4
projector_scale: List[Literal["P3", "P4", "P5"]] = ["P3", "P5"]
pretrain_weights: Optional[str] = "rf-detr-large.pth"
class TrainConfig(BaseModel):
lr: float = 1e-4
lr_encoder: float = 1.5e-4
batch_size: int = 4
grad_accum_steps: int = 4
epochs: int = 100
ema_decay: float = 0.993
ema_tau: int = 100
lr_drop: int = 100
checkpoint_interval: int = 10
warmup_epochs: int = 0
lr_vit_layer_decay: float = 0.8
lr_component_decay: float = 0.7
drop_path: float = 0.0
group_detr: int = 13
ia_bce_loss: bool = True
cls_loss_coef: float = 1.0
num_select: int = 300
dataset_file: Literal["coco", "o365", "roboflow"] = "roboflow"
square_resize_div_64: bool = True
dataset_dir: str
output_dir: str = "output"
multi_scale: bool = True
expanded_scales: bool = True
use_ema: bool = True
num_workers: int = 2
weight_decay: float = 1e-4
early_stopping: bool = False
early_stopping_patience: int = 10
early_stopping_min_delta: float = 0.001
early_stopping_use_ema: bool = False
tensorboard: bool = True
wandb: bool = False
project: Optional[str] = None
run: Optional[str] = None
class_names: List[str] = None