ashercn97/face-det-v1

Browse files

Files changed (5) hide show

README.md +57 -0
config.json +186 -0
model.safetensors +3 -0
preprocessor_config.json +35 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+---
+library_name: transformers
+license: apache-2.0
+base_model: ustc-community/dfine-nano-coco
+tags:
+- generated_from_trainer
+model-index:
+- name: d-fine-m-cppe5-finetune-3
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# d-fine-m-cppe5-finetune-3
+This model is a fine-tuned version of [ustc-community/dfine-nano-coco](https://huggingface.co/ustc-community/dfine-nano-coco) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- eval_loss: 1.7779
+- eval_runtime: 4.1804
+- eval_samples_per_second: 154.053
+- eval_steps_per_second: 19.376
+- epoch: 7.8431
+- step: 12000
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 300
+- num_epochs: 30
+### Framework versions
+- Transformers 4.52.0.dev0
+- Pytorch 2.6.0
+- Datasets 3.6.0
+- Tokenizers 0.21.1

config.json ADDED Viewed

	@@ -0,0 +1,186 @@

+{
+  "activation_dropout": 0.0,
+  "activation_function": "silu",
+  "anchor_image_size": null,
+  "architectures": [
+    "DFineForObjectDetection"
+  ],
+  "attention_dropout": 0.0,
+  "auxiliary_loss": true,
+  "backbone": null,
+  "backbone_config": {
+    "depths": [
+      3,
+      4,
+      6,
+      3
+    ],
+    "downsample_in_bottleneck": false,
+    "downsample_in_first_stage": false,
+    "embedding_size": 32,
+    "hidden_act": "relu",
+    "hidden_sizes": [
+      128,
+      256,
+      512,
+      1024
+    ],
+    "initializer_range": 0.02,
+    "layer_type": "basic",
+    "model_type": "hgnet_v2",
+    "num_channels": 3,
+    "out_features": [
+      "stage3",
+      "stage4"
+    ],
+    "out_indices": [
+      3,
+      4
+    ],
+    "stage_downsample": [
+      false,
+      true,
+      true,
+      true
+    ],
+    "stage_in_channels": [
+      16,
+      64,
+      256,
+      512
+    ],
+    "stage_kernel_size": [
+      3,
+      3,
+      5,
+      5
+    ],
+    "stage_light_block": [
+      false,
+      false,
+      true,
+      true
+    ],
+    "stage_mid_channels": [
+      16,
+      32,
+      64,
+      128
+    ],
+    "stage_names": [
+      "stem",
+      "stage1",
+      "stage2",
+      "stage3",
+      "stage4"
+    ],
+    "stage_num_blocks": [
+      1,
+      1,
+      2,
+      1
+    ],
+    "stage_numb_of_layers": [
+      3,
+      3,
+      3,
+      3
+    ],
+    "stage_out_channels": [
+      64,
+      256,
+      512,
+      1024
+    ],
+    "stem_channels": [
+      3,
+      16,
+      16
+    ],
+    "use_learnable_affine_block": true
+  },
+  "backbone_kwargs": null,
+  "batch_norm_eps": 1e-05,
+  "box_noise_scale": 1.0,
+  "d_model": 128,
+  "decoder_activation_function": "relu",
+  "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 512,
+  "decoder_in_channels": [
+    128,
+    128
+  ],
+  "decoder_layers": 3,
+  "decoder_method": "default",
+  "decoder_n_points": [
+    6,
+    6
+  ],
+  "decoder_offset_scale": 0.5,
+  "depth_mult": 0.5,
+  "dropout": 0.0,
+  "encode_proj_layers": [
+    1
+  ],
+  "encoder_activation_function": "gelu",
+  "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 512,
+  "encoder_hidden_dim": 128,
+  "encoder_in_channels": [
+    512,
+    1024
+  ],
+  "encoder_layers": 1,
+  "eos_coefficient": 0.0001,
+  "eval_idx": -1,
+  "eval_size": null,
+  "feat_strides": [
+    16,
+    32
+  ],
+  "focal_loss_alpha": 0.75,
+  "focal_loss_gamma": 2.0,
+  "freeze_backbone_batch_norms": true,
+  "hidden_expansion": 0.34,
+  "id2label": {
+    "0": "face"
+  },
+  "initializer_bias_prior_prob": null,
+  "initializer_range": 0.01,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "face": 0
+  },
+  "label_noise_ratio": 0.5,
+  "layer_norm_eps": 1e-05,
+  "layer_scale": 1,
+  "learn_initial_query": false,
+  "lqe_hidden_dim": 64,
+  "lqe_layers": 2,
+  "matcher_alpha": 0.25,
+  "matcher_bbox_cost": 5.0,
+  "matcher_class_cost": 2.0,
+  "matcher_gamma": 2.0,
+  "matcher_giou_cost": 2.0,
+  "max_num_bins": 32,
+  "model_type": "d_fine",
+  "normalize_before": false,
+  "num_denoising": 100,
+  "num_feature_levels": 2,
+  "num_queries": 300,
+  "positional_encoding_temperature": 10000,
+  "reg_scale": 4.0,
+  "top_prob_values": 4,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.0.dev0",
+  "up": 0.5,
+  "use_focal_loss": true,
+  "use_pretrained_backbone": false,
+  "use_timm_backbone": false,
+  "weight_loss_bbox": 5.0,
+  "weight_loss_ddf": 1.5,
+  "weight_loss_fgl": 0.15,
+  "weight_loss_giou": 2.0,
+  "weight_loss_vfl": 1.0,
+  "with_box_refine": true
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7bc1bdb06da1f25633fae2b6607208e7c0e24e648212d0bb7c44a28bd545e022
+size 15075476

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "crop_size": null,
+  "data_format": "channels_first",
+  "default_to_square": false,
+  "device": null,
+  "do_center_crop": null,
+  "do_convert_annotations": true,
+  "do_convert_rgb": null,
+  "do_normalize": false,
+  "do_pad": false,
+  "do_rescale": true,
+  "do_resize": true,
+  "format": "coco_detection",
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "RTDetrImageProcessorFast",
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "input_data_format": null,
+  "pad_size": null,
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "return_segmentation_masks": null,
+  "return_tensors": null,
+  "size": {
+    "height": 640,
+    "width": 640
+  }
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6faab4181747fe84f1ac5b46ba98978f0b602daa4a27665855f87bce9ef5c1e
+size 5304