--- library_name: transformers tags: [] --- # Model Card for distill-lab/distill-n4_00-01_combined_cls_v1b2 current batches: `nv3[v0] (1700) | nv4[v1-2k] (4000) | nv4[v1-210k] (b1b2: 4000)` metrics: ``` 636 ***** train metrics ***** 1637 epoch = 20.0 1638 total_flos = 66966619017GF 1639 train_loss = 0.2338 1640 train_runtime = 0:58:49.65 1641 train_samples_per_second = 56.736 1642 train_steps_per_second = 0.89 1644 ***** eval metrics ***** 1645 epoch = 20.0 1646 eval_accuracy = 0.7521 1647 eval_loss = 0.8814 1648 eval_runtime = 0:00:12.42 1649 eval_samples_per_second = 142.171 1650 eval_steps_per_second = 2.977 ``` ## Model details: ```bash # since ordinal on pretrained anime is really bad, let's try doing it on pretrained as classifier instead: BASE_MODEL = "facebook/dinov2-with-registers-large" DATASET = "distill-lab/COMBINE_nai-distill_00-01_eagle.library" TASK = "classification" # using single card to train it, so had to do higher batch size cmd = f"""python -m trainlib.hf_trainer.cli \ --model_name_or_path {BASE_MODEL} \ --dataset_name {DATASET} \ --output_dir distill-n4_00-01_combined_cls_v1b2-100e \ --remove_unused_columns False \ --label_column_name star \ --task {TASK} \ --do_train \ --do_eval \ --eval_strategy steps \ --eval_steps 100 \ --learning_rate 1e-5 \ --num_train_epochs 20 \ --per_device_train_batch_size 64 \ --per_device_eval_batch_size 48 \ --logging_strategy steps \ --logging_steps 2 \ --save_total_limit 1 \ --seed 1337 \ --lr_scheduler_type cosine \ --dataloader_num_workers 16 \ --ignore_mismatched_sizes True """ rest = f""" --push_to_hub: True \ --push_to_hub_organization distill-lab \ --hub_model_id nai-distill_00-01_combined_eagle_{TASK} \ --hub_strategy "end""" print(cmd) !{cmd} ```