{ "activation_dropout": 0.0, "activation_function": "silu", "anchor_image_size": null, "architectures": [ "DFineForObjectDetection" ], "attention_dropout": 0.0, "auxiliary_loss": true, "backbone": null, "backbone_config": { "depths": [ 3, 4, 6, 3 ], "downsample_in_bottleneck": false, "downsample_in_first_stage": false, "embedding_size": 32, "hidden_act": "relu", "hidden_sizes": [ 128, 256, 512, 1024 ], "initializer_range": 0.02, "layer_type": "basic", "model_type": "hgnet_v2", "num_channels": 3, "out_features": [ "stage2", "stage3", "stage4" ], "out_indices": [ 2, 3, 4 ], "stage_downsample": [ false, true, true, true ], "stage_in_channels": [ 16, 64, 256, 512 ], "stage_kernel_size": [ 3, 3, 5, 5 ], "stage_light_block": [ false, false, true, true ], "stage_mid_channels": [ 16, 32, 64, 128 ], "stage_names": [ "stem", "stage1", "stage2", "stage3", "stage4" ], "stage_num_blocks": [ 1, 1, 2, 1 ], "stage_numb_of_layers": [ 3, 3, 3, 3 ], "stage_out_channels": [ 64, 256, 512, 1024 ], "stem_channels": [ 3, 16, 16 ], "use_learnable_affine_block": true }, "backbone_kwargs": null, "batch_norm_eps": 1e-05, "box_noise_scale": 1.0, "d_model": 256, "decoder_activation_function": "relu", "decoder_attention_heads": 8, "decoder_ffn_dim": 1024, "decoder_in_channels": [ 256, 256, 256 ], "decoder_layers": 3, "decoder_method": "default", "decoder_n_points": [ 3, 6, 3 ], "decoder_offset_scale": 0.5, "depth_mult": 0.34, "dropout": 0.0, "encode_proj_layers": [ 2 ], "encoder_activation_function": "gelu", "encoder_attention_heads": 8, "encoder_ffn_dim": 1024, "encoder_hidden_dim": 256, "encoder_in_channels": [ 256, 512, 1024 ], "encoder_layers": 1, "eos_coefficient": 0.0001, "eval_idx": -1, "eval_size": null, "feat_strides": [ 8, 16, 32 ], "focal_loss_alpha": 0.75, "focal_loss_gamma": 2.0, "freeze_backbone_batch_norms": true, "hidden_expansion": 0.5, "id2label": { "0": "0", "1": "1", "2": "2", "3": "3", "4": "4", "5": "5", "6": "6" }, "initializer_bias_prior_prob": null, "initializer_range": 0.01, "is_encoder_decoder": true, "label2id": { "0": 0, "1": 1, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6 }, "label_noise_ratio": 0.5, "layer_norm_eps": 1e-05, "layer_scale": 1, "learn_initial_query": false, "lqe_hidden_dim": 64, "lqe_layers": 2, "matcher_alpha": 0.25, "matcher_bbox_cost": 5.0, "matcher_class_cost": 2.0, "matcher_gamma": 2.0, "matcher_giou_cost": 2.0, "max_num_bins": 32, "model_type": "d_fine", "normalize_before": false, "num_denoising": 100, "num_feature_levels": 3, "num_queries": 300, "positional_encoding_temperature": 10000, "reg_scale": 4.0, "top_prob_values": 4, "torch_dtype": "float32", "transformers_version": "4.52.0.dev0", "up": 0.5, "use_focal_loss": true, "use_pretrained_backbone": false, "use_timm_backbone": false, "weight_loss_bbox": 5.0, "weight_loss_ddf": 1.5, "weight_loss_fgl": 0.15, "weight_loss_giou": 2.0, "weight_loss_vfl": 1.0, "with_box_refine": true }