dfine-test / config.json
davanstrien's picture
davanstrien HF Staff
Training in progress, epoch 1
785c567 verified
raw
history blame
3.65 kB
{
"activation_dropout": 0.0,
"activation_function": "silu",
"anchor_image_size": null,
"architectures": [
"DFineForObjectDetection"
],
"attention_dropout": 0.0,
"auxiliary_loss": true,
"backbone": null,
"backbone_config": {
"depths": [
3,
4,
6,
3
],
"downsample_in_bottleneck": false,
"downsample_in_first_stage": false,
"embedding_size": 32,
"hidden_act": "relu",
"hidden_sizes": [
128,
256,
512,
1024
],
"initializer_range": 0.02,
"layer_type": "basic",
"model_type": "hgnet_v2",
"num_channels": 3,
"out_features": [
"stage2",
"stage3",
"stage4"
],
"out_indices": [
2,
3,
4
],
"stage_downsample": [
false,
true,
true,
true
],
"stage_in_channels": [
16,
64,
256,
512
],
"stage_kernel_size": [
3,
3,
5,
5
],
"stage_light_block": [
false,
false,
true,
true
],
"stage_mid_channels": [
16,
32,
64,
128
],
"stage_names": [
"stem",
"stage1",
"stage2",
"stage3",
"stage4"
],
"stage_num_blocks": [
1,
1,
2,
1
],
"stage_numb_of_layers": [
3,
3,
3,
3
],
"stage_out_channels": [
64,
256,
512,
1024
],
"stem_channels": [
3,
16,
16
],
"use_learnable_affine_block": true
},
"backbone_kwargs": null,
"batch_norm_eps": 1e-05,
"box_noise_scale": 1.0,
"d_model": 256,
"decoder_activation_function": "relu",
"decoder_attention_heads": 8,
"decoder_ffn_dim": 1024,
"decoder_in_channels": [
256,
256,
256
],
"decoder_layers": 3,
"decoder_method": "default",
"decoder_n_points": [
3,
6,
3
],
"decoder_offset_scale": 0.5,
"depth_mult": 0.34,
"dropout": 0.0,
"encode_proj_layers": [
2
],
"encoder_activation_function": "gelu",
"encoder_attention_heads": 8,
"encoder_ffn_dim": 1024,
"encoder_hidden_dim": 256,
"encoder_in_channels": [
256,
512,
1024
],
"encoder_layers": 1,
"eos_coefficient": 0.0001,
"eval_idx": -1,
"eval_size": null,
"feat_strides": [
8,
16,
32
],
"focal_loss_alpha": 0.75,
"focal_loss_gamma": 2.0,
"freeze_backbone_batch_norms": true,
"hidden_expansion": 0.5,
"id2label": {
"0": "0",
"1": "1",
"2": "2",
"3": "3",
"4": "4",
"5": "5",
"6": "6"
},
"initializer_bias_prior_prob": null,
"initializer_range": 0.01,
"is_encoder_decoder": true,
"label2id": {
"0": 0,
"1": 1,
"2": 2,
"3": 3,
"4": 4,
"5": 5,
"6": 6
},
"label_noise_ratio": 0.5,
"layer_norm_eps": 1e-05,
"layer_scale": 1,
"learn_initial_query": false,
"lqe_hidden_dim": 64,
"lqe_layers": 2,
"matcher_alpha": 0.25,
"matcher_bbox_cost": 5.0,
"matcher_class_cost": 2.0,
"matcher_gamma": 2.0,
"matcher_giou_cost": 2.0,
"max_num_bins": 32,
"model_type": "d_fine",
"normalize_before": false,
"num_denoising": 100,
"num_feature_levels": 3,
"num_queries": 300,
"positional_encoding_temperature": 10000,
"reg_scale": 4.0,
"top_prob_values": 4,
"torch_dtype": "float32",
"transformers_version": "4.52.0.dev0",
"up": 0.5,
"use_focal_loss": true,
"use_pretrained_backbone": false,
"use_timm_backbone": false,
"weight_loss_bbox": 5.0,
"weight_loss_ddf": 1.5,
"weight_loss_fgl": 0.15,
"weight_loss_giou": 2.0,
"weight_loss_vfl": 1.0,
"with_box_refine": true
}