sandernotenbaert commited on
Commit
fd2096f
·
verified ·
1 Parent(s): a4c2a54

Training in progress, step 6000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b56fbdf7fe55206aba37bd5ae400349b2464f89eeb757ae10fcf1d6efcc7644f
3
  size 30214176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0ba7be4e1197b6ebae5a1fb0d6ae29bbafd16d90726437a2da8408d73641f03
3
  size 30214176
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:613a8d1a657ba89ada23ab7d7633f3cf31fb4364c0614b7c8174ec86a031e3af
3
  size 291962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bebcf4136f2e6e53dd975d0ec5899e311738161c26efd5f1866b8fbda4dd14d
3
  size 291962
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78393450795811d9dfafdce73145f39f52c703d761292801d15750c186cc2118
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:788c6b064d884311e214b6335a80e7fe5e75085f966367611657f1664cf160c2
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27cf200ae168599617b8ef6dbb08dc689bd11010b7655ba355b260440e03cb3c
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a2836138899bf244344610cf1cfa7f6c9e6417ce66d8196cf2ef6125f38e4e
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa325f8460c9be76052157bd9b7d0c22035d7e2dbd36acf508d25edf25974d7b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e81fbcc85fe8453ce60c9a14a7fd7bb3512bae9747d397407e0552c8567a7c1e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 4500,
3
  "best_metric": 1.5784235000610352,
4
  "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-4500",
5
- "epoch": 2.4488103520244886,
6
  "eval_steps": 500,
7
- "global_step": 5500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -866,6 +866,84 @@
866
  "eval_samples_per_second": 392.036,
867
  "eval_steps_per_second": 49.02,
868
  "step": 5500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
869
  }
870
  ],
871
  "logging_steps": 50,
@@ -880,7 +958,7 @@
880
  "early_stopping_threshold": 0.0
881
  },
882
  "attributes": {
883
- "early_stopping_patience_counter": 2
884
  }
885
  },
886
  "TrainerControl": {
@@ -889,12 +967,12 @@
889
  "should_evaluate": false,
890
  "should_log": false,
891
  "should_save": true,
892
- "should_training_stop": false
893
  },
894
  "attributes": {}
895
  }
896
  },
897
- "total_flos": 2.4081224007407616e+16,
898
  "train_batch_size": 4,
899
  "trial_name": null,
900
  "trial_params": null
 
2
  "best_global_step": 4500,
3
  "best_metric": 1.5784235000610352,
4
  "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-4500",
5
+ "epoch": 2.6714345345763184,
6
  "eval_steps": 500,
7
+ "global_step": 6000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
866
  "eval_samples_per_second": 392.036,
867
  "eval_steps_per_second": 49.02,
868
  "step": 5500
869
+ },
870
+ {
871
+ "epoch": 2.4710727702796715,
872
+ "grad_norm": 1.2289257049560547,
873
+ "learning_rate": 5e-05,
874
+ "loss": 1.5918,
875
+ "step": 5550
876
+ },
877
+ {
878
+ "epoch": 2.4933351885348545,
879
+ "grad_norm": 1.0900951623916626,
880
+ "learning_rate": 5e-05,
881
+ "loss": 1.6005,
882
+ "step": 5600
883
+ },
884
+ {
885
+ "epoch": 2.5155976067900374,
886
+ "grad_norm": 0.9930930137634277,
887
+ "learning_rate": 5e-05,
888
+ "loss": 1.6151,
889
+ "step": 5650
890
+ },
891
+ {
892
+ "epoch": 2.5378600250452203,
893
+ "grad_norm": 0.9901494979858398,
894
+ "learning_rate": 5e-05,
895
+ "loss": 1.59,
896
+ "step": 5700
897
+ },
898
+ {
899
+ "epoch": 2.5601224433004033,
900
+ "grad_norm": 0.9367809891700745,
901
+ "learning_rate": 5e-05,
902
+ "loss": 1.5844,
903
+ "step": 5750
904
+ },
905
+ {
906
+ "epoch": 2.582384861555586,
907
+ "grad_norm": 1.0291093587875366,
908
+ "learning_rate": 5e-05,
909
+ "loss": 1.5841,
910
+ "step": 5800
911
+ },
912
+ {
913
+ "epoch": 2.6046472798107696,
914
+ "grad_norm": 0.8904668688774109,
915
+ "learning_rate": 5e-05,
916
+ "loss": 1.5883,
917
+ "step": 5850
918
+ },
919
+ {
920
+ "epoch": 2.6269096980659525,
921
+ "grad_norm": 0.9640474915504456,
922
+ "learning_rate": 5e-05,
923
+ "loss": 1.5855,
924
+ "step": 5900
925
+ },
926
+ {
927
+ "epoch": 2.6491721163211355,
928
+ "grad_norm": 0.979326605796814,
929
+ "learning_rate": 5e-05,
930
+ "loss": 1.5798,
931
+ "step": 5950
932
+ },
933
+ {
934
+ "epoch": 2.6714345345763184,
935
+ "grad_norm": 1.2588844299316406,
936
+ "learning_rate": 5e-05,
937
+ "loss": 1.6018,
938
+ "step": 6000
939
+ },
940
+ {
941
+ "epoch": 2.6714345345763184,
942
+ "eval_loss": 1.5868676900863647,
943
+ "eval_runtime": 40.9701,
944
+ "eval_samples_per_second": 389.821,
945
+ "eval_steps_per_second": 48.743,
946
+ "step": 6000
947
  }
948
  ],
949
  "logging_steps": 50,
 
958
  "early_stopping_threshold": 0.0
959
  },
960
  "attributes": {
961
+ "early_stopping_patience_counter": 3
962
  }
963
  },
964
  "TrainerControl": {
 
967
  "should_evaluate": false,
968
  "should_log": false,
969
  "should_save": true,
970
+ "should_training_stop": true
971
  },
972
  "attributes": {}
973
  }
974
  },
975
+ "total_flos": 2.7676743726557184e+16,
976
  "train_batch_size": 4,
977
  "trial_name": null,
978
  "trial_params": null