sandernotenbaert commited on
Commit
630d7ab
·
verified ·
1 Parent(s): 51cc507

Training in progress, step 4500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:761ed6751920843cffb7ea195e9fda4cc5b52e9c232bfc7dee3bb45f6619637e
3
  size 30214176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d91a252e47567440fa05097e8a1b9f9cfd655ebb6eff2d149a70eed8179f3ab
3
  size 30214176
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1202b871306e5a47b46fb1bcf989facc74951cc8514d655f034a9ea5ed1ee58
3
  size 291962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afd6a63953e3ca088918b5a47553148cf1516f98523ee4612b03e5e48b50b798
3
  size 291962
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:288381390ef2c4e9c9473d5ee077feaf5f5cefbdb2b7cf1c96b6907ae793487e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bb39ecd9b5069f71ac0c29d2d36a62574cf1415dfe0c4abd4599c7c59329be5
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37caeb1bb49fdf577e04e454fc0c1ffb92bd891d0dbc0b5a4547415a9f259799
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d5089b28a615f02511d264296c6980fb39c1a64aaf6b3be2813182a38db48d
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50ac02c40c90996d4ad6bb7daa053a54b85676d36aaa3b4b892740a97de5c357
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac573a74d7ecfa65e1fa3220665a94cb8f7df1cc97a38d73ad33d04e26d5f57
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 3000,
3
- "best_metric": 1.6028199195861816,
4
- "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-3000",
5
- "epoch": 1.7809656323918186,
6
  "eval_steps": 500,
7
- "global_step": 4000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -632,6 +632,84 @@
632
  "eval_samples_per_second": 387.611,
633
  "eval_steps_per_second": 48.466,
634
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
635
  }
636
  ],
637
  "logging_steps": 50,
@@ -646,7 +724,7 @@
646
  "early_stopping_threshold": 0.0
647
  },
648
  "attributes": {
649
- "early_stopping_patience_counter": 2
650
  }
651
  },
652
  "TrainerControl": {
@@ -660,7 +738,7 @@
660
  "attributes": {}
661
  }
662
  },
663
- "total_flos": 1.8688429934168064e+16,
664
  "train_batch_size": 4,
665
  "trial_name": null,
666
  "trial_params": null
 
1
  {
2
+ "best_global_step": 4500,
3
+ "best_metric": 1.5784235000610352,
4
+ "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-4500",
5
+ "epoch": 2.0035619869208294,
6
  "eval_steps": 500,
7
+ "global_step": 4500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
632
  "eval_samples_per_second": 387.611,
633
  "eval_steps_per_second": 48.466,
634
  "step": 4000
635
+ },
636
+ {
637
+ "epoch": 1.8032280506470015,
638
+ "grad_norm": 1.137293815612793,
639
+ "learning_rate": 5e-05,
640
+ "loss": 1.6288,
641
+ "step": 4050
642
+ },
643
+ {
644
+ "epoch": 1.8254904689021845,
645
+ "grad_norm": 1.1091963052749634,
646
+ "learning_rate": 5e-05,
647
+ "loss": 1.6421,
648
+ "step": 4100
649
+ },
650
+ {
651
+ "epoch": 1.8477528871573674,
652
+ "grad_norm": 1.0500215291976929,
653
+ "learning_rate": 5e-05,
654
+ "loss": 1.6594,
655
+ "step": 4150
656
+ },
657
+ {
658
+ "epoch": 1.8700153054125503,
659
+ "grad_norm": 1.2211509943008423,
660
+ "learning_rate": 5e-05,
661
+ "loss": 1.658,
662
+ "step": 4200
663
+ },
664
+ {
665
+ "epoch": 1.8922777236677333,
666
+ "grad_norm": 1.1174074411392212,
667
+ "learning_rate": 5e-05,
668
+ "loss": 1.6815,
669
+ "step": 4250
670
+ },
671
+ {
672
+ "epoch": 1.9145401419229162,
673
+ "grad_norm": 1.1086102724075317,
674
+ "learning_rate": 5e-05,
675
+ "loss": 1.7094,
676
+ "step": 4300
677
+ },
678
+ {
679
+ "epoch": 1.9368025601780994,
680
+ "grad_norm": 1.3630105257034302,
681
+ "learning_rate": 5e-05,
682
+ "loss": 1.7099,
683
+ "step": 4350
684
+ },
685
+ {
686
+ "epoch": 1.9590649784332823,
687
+ "grad_norm": 1.2096022367477417,
688
+ "learning_rate": 5e-05,
689
+ "loss": 1.7082,
690
+ "step": 4400
691
+ },
692
+ {
693
+ "epoch": 1.9813273966884652,
694
+ "grad_norm": 1.1671497821807861,
695
+ "learning_rate": 5e-05,
696
+ "loss": 1.7031,
697
+ "step": 4450
698
+ },
699
+ {
700
+ "epoch": 2.0035619869208294,
701
+ "grad_norm": 1.090248465538025,
702
+ "learning_rate": 5e-05,
703
+ "loss": 1.7077,
704
+ "step": 4500
705
+ },
706
+ {
707
+ "epoch": 2.0035619869208294,
708
+ "eval_loss": 1.5784235000610352,
709
+ "eval_runtime": 41.0244,
710
+ "eval_samples_per_second": 389.305,
711
+ "eval_steps_per_second": 48.678,
712
+ "step": 4500
713
  }
714
  ],
715
  "logging_steps": 50,
 
724
  "early_stopping_threshold": 0.0
725
  },
726
  "attributes": {
727
+ "early_stopping_patience_counter": 0
728
  }
729
  },
730
  "TrainerControl": {
 
738
  "attributes": {}
739
  }
740
  },
741
+ "total_flos": 1.9362186109077504e+16,
742
  "train_batch_size": 4,
743
  "trial_name": null,
744
  "trial_params": null