|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from pydantic import BaseModel
|
|
from typing import List, Optional, Literal, Type
|
|
import torch
|
|
DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
|
|
|
|
class ModelConfig(BaseModel):
|
|
encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"]
|
|
out_feature_indexes: List[int]
|
|
dec_layers: int = 3
|
|
two_stage: bool = True
|
|
projector_scale: List[Literal["P3", "P4", "P5"]]
|
|
hidden_dim: int
|
|
sa_nheads: int
|
|
ca_nheads: int
|
|
dec_n_points: int
|
|
bbox_reparam: bool = True
|
|
lite_refpoint_refine: bool = True
|
|
layer_norm: bool = True
|
|
amp: bool = True
|
|
num_classes: int = 90
|
|
pretrain_weights: Optional[str] = None
|
|
device: Literal["cpu", "cuda", "mps"] = DEVICE
|
|
resolution: int = 560
|
|
group_detr: int = 13
|
|
gradient_checkpointing: bool = False
|
|
|
|
class RFDETRBaseConfig(ModelConfig):
|
|
encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] = "dinov2_windowed_small"
|
|
hidden_dim: int = 256
|
|
sa_nheads: int = 8
|
|
ca_nheads: int = 16
|
|
dec_n_points: int = 2
|
|
num_queries: int = 300
|
|
num_select: int = 300
|
|
projector_scale: List[Literal["P3", "P4", "P5"]] = ["P4"]
|
|
out_feature_indexes: List[int] = [2, 5, 8, 11]
|
|
pretrain_weights: Optional[str] = "rf-detr-base.pth"
|
|
|
|
class RFDETRLargeConfig(RFDETRBaseConfig):
|
|
encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] = "dinov2_windowed_base"
|
|
hidden_dim: int = 384
|
|
sa_nheads: int = 12
|
|
ca_nheads: int = 24
|
|
dec_n_points: int = 4
|
|
projector_scale: List[Literal["P3", "P4", "P5"]] = ["P3", "P5"]
|
|
pretrain_weights: Optional[str] = "rf-detr-large.pth"
|
|
|
|
class TrainConfig(BaseModel):
|
|
lr: float = 1e-4
|
|
lr_encoder: float = 1.5e-4
|
|
batch_size: int = 4
|
|
grad_accum_steps: int = 4
|
|
epochs: int = 100
|
|
ema_decay: float = 0.993
|
|
ema_tau: int = 100
|
|
lr_drop: int = 100
|
|
checkpoint_interval: int = 10
|
|
warmup_epochs: int = 0
|
|
lr_vit_layer_decay: float = 0.8
|
|
lr_component_decay: float = 0.7
|
|
drop_path: float = 0.0
|
|
group_detr: int = 13
|
|
ia_bce_loss: bool = True
|
|
cls_loss_coef: float = 1.0
|
|
num_select: int = 300
|
|
dataset_file: Literal["coco", "o365", "roboflow"] = "roboflow"
|
|
square_resize_div_64: bool = True
|
|
dataset_dir: str
|
|
output_dir: str = "output"
|
|
multi_scale: bool = True
|
|
expanded_scales: bool = True
|
|
use_ema: bool = True
|
|
num_workers: int = 2
|
|
weight_decay: float = 1e-4
|
|
early_stopping: bool = False
|
|
early_stopping_patience: int = 10
|
|
early_stopping_min_delta: float = 0.001
|
|
early_stopping_use_ema: bool = False
|
|
tensorboard: bool = True
|
|
wandb: bool = False
|
|
project: Optional[str] = None
|
|
run: Optional[str] = None
|
|
class_names: List[str] = None
|
|
|