farmery commited on
Commit
06b5f22
·
verified ·
1 Parent(s): 97e4fae

Training in progress, step 666, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cfa65a3b114689d9988157f964345cffd0fdf3868c0528e061bd0c6d337077a
3
  size 83115256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be6a6293e873d29e9c123cfed8b9129a3a500086fa1097f67ec4d0c631d4846a
3
  size 83115256
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04d5924b6afa812a2c7e3e24840d388b87a9c5b91c12a061e12d9f2ece70cb59
3
  size 166439638
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cdb0e85e05885b3a3fd540038294f26ab6ddd4d6a05b3a4065c07d822518d2f
3
  size 166439638
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80b1cc2052c8c3383c26f8c510efc9f3bd0906692def428910029357fb332fa6
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18b8b2bf3961b9182299da2d15b0278c7b409aa9c6efb89fb915ee1cccd96964
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1948f1667c5eac8eab3aad84caa2ddcfa7b6b58f0c921e1e247a9f2b914505ee
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27e6ba4d45d5ee5eee425ae6ed411a0c7494450474b7f8f6b3e554abac44894f
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a8364beb1e2bd0b9ee8df0b70de706380e0a681bffd14410911a9b40a0e1e37
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51281b34edba8f636a3adfd197e66e7cdd44d38de5c21737cb92d4f19959848b
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29652dc1901dc027f97222930a55463d6f44f87fdf6854a4e0e21917ae06dc0b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a7a4add07966596d388a83a7b9df388bbae65c00a672369fcb325b7b27f1944
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:112390c5500004aa6fb3c2a337356c6031076ed6ffd64d9b3c825b8c3aedf87c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ef28e35469bfd31a5e1e33e1983e5e2faeef0929cad68ff1f285258580e0345
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.8541597127914429,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-650",
4
- "epoch": 0.5484436592793767,
5
  "eval_steps": 25,
6
- "global_step": 650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4773,6 +4773,118 @@
4773
  "eval_samples_per_second": 52.637,
4774
  "eval_steps_per_second": 13.686,
4775
  "step": 650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4776
  }
4777
  ],
4778
  "logging_steps": 1,
@@ -4796,12 +4908,12 @@
4796
  "should_evaluate": false,
4797
  "should_log": false,
4798
  "should_save": true,
4799
- "should_training_stop": false
4800
  },
4801
  "attributes": {}
4802
  }
4803
  },
4804
- "total_flos": 4.182041063063552e+18,
4805
  "train_batch_size": 1,
4806
  "trial_name": null,
4807
  "trial_params": null
 
1
  {
2
  "best_metric": 0.8541597127914429,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-650",
4
+ "epoch": 0.5619438108924075,
5
  "eval_steps": 25,
6
+ "global_step": 666,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4773
  "eval_samples_per_second": 52.637,
4774
  "eval_steps_per_second": 13.686,
4775
  "step": 650
4776
+ },
4777
+ {
4778
+ "epoch": 0.549287418755191,
4779
+ "grad_norm": 0.2805044949054718,
4780
+ "learning_rate": 1.0124639809571427e-05,
4781
+ "loss": 0.921,
4782
+ "step": 651
4783
+ },
4784
+ {
4785
+ "epoch": 0.5501311782310055,
4786
+ "grad_norm": 0.34615084528923035,
4787
+ "learning_rate": 1.010858158683357e-05,
4788
+ "loss": 0.8931,
4789
+ "step": 652
4790
+ },
4791
+ {
4792
+ "epoch": 0.5509749377068199,
4793
+ "grad_norm": 0.37605908513069153,
4794
+ "learning_rate": 1.0093629108529187e-05,
4795
+ "loss": 0.9133,
4796
+ "step": 653
4797
+ },
4798
+ {
4799
+ "epoch": 0.5518186971826343,
4800
+ "grad_norm": 0.39621636271476746,
4801
+ "learning_rate": 1.0079782742960727e-05,
4802
+ "loss": 0.8801,
4803
+ "step": 654
4804
+ },
4805
+ {
4806
+ "epoch": 0.5526624566584488,
4807
+ "grad_norm": 0.43372654914855957,
4808
+ "learning_rate": 1.0067042831185395e-05,
4809
+ "loss": 0.8728,
4810
+ "step": 655
4811
+ },
4812
+ {
4813
+ "epoch": 0.5535062161342632,
4814
+ "grad_norm": 0.4617686867713928,
4815
+ "learning_rate": 1.0055409687006741e-05,
4816
+ "loss": 0.8503,
4817
+ "step": 656
4818
+ },
4819
+ {
4820
+ "epoch": 0.5543499756100777,
4821
+ "grad_norm": 0.4721163213253021,
4822
+ "learning_rate": 1.0044883596966938e-05,
4823
+ "loss": 0.7944,
4824
+ "step": 657
4825
+ },
4826
+ {
4827
+ "epoch": 0.5551937350858921,
4828
+ "grad_norm": 0.5155826210975647,
4829
+ "learning_rate": 1.0035464820339714e-05,
4830
+ "loss": 0.8678,
4831
+ "step": 658
4832
+ },
4833
+ {
4834
+ "epoch": 0.5560374945617065,
4835
+ "grad_norm": 0.5820474028587341,
4836
+ "learning_rate": 1.0027153589123977e-05,
4837
+ "loss": 0.8539,
4838
+ "step": 659
4839
+ },
4840
+ {
4841
+ "epoch": 0.556881254037521,
4842
+ "grad_norm": 0.6269566416740417,
4843
+ "learning_rate": 1.001995010803808e-05,
4844
+ "loss": 0.8211,
4845
+ "step": 660
4846
+ },
4847
+ {
4848
+ "epoch": 0.5577250135133354,
4849
+ "grad_norm": 0.6699291467666626,
4850
+ "learning_rate": 1.0013854554514806e-05,
4851
+ "loss": 0.7731,
4852
+ "step": 661
4853
+ },
4854
+ {
4855
+ "epoch": 0.5585687729891498,
4856
+ "grad_norm": 0.7107478976249695,
4857
+ "learning_rate": 1.000886707869698e-05,
4858
+ "loss": 0.6735,
4859
+ "step": 662
4860
+ },
4861
+ {
4862
+ "epoch": 0.5594125324649643,
4863
+ "grad_norm": 0.3569500148296356,
4864
+ "learning_rate": 1.0004987803433777e-05,
4865
+ "loss": 0.9516,
4866
+ "step": 663
4867
+ },
4868
+ {
4869
+ "epoch": 0.5602562919407786,
4870
+ "grad_norm": 0.3141714930534363,
4871
+ "learning_rate": 1.0002216824277691e-05,
4872
+ "loss": 0.9088,
4873
+ "step": 664
4874
+ },
4875
+ {
4876
+ "epoch": 0.561100051416593,
4877
+ "grad_norm": 0.35655835270881653,
4878
+ "learning_rate": 1.0000554209482183e-05,
4879
+ "loss": 0.9101,
4880
+ "step": 665
4881
+ },
4882
+ {
4883
+ "epoch": 0.5619438108924075,
4884
+ "grad_norm": 0.3734205961227417,
4885
+ "learning_rate": 1e-05,
4886
+ "loss": 0.8718,
4887
+ "step": 666
4888
  }
4889
  ],
4890
  "logging_steps": 1,
 
4908
  "should_evaluate": false,
4909
  "should_log": false,
4910
  "should_save": true,
4911
+ "should_training_stop": true
4912
  },
4913
  "attributes": {}
4914
  }
4915
  },
4916
+ "total_flos": 4.2849836123081933e+18,
4917
  "train_batch_size": 1,
4918
  "trial_name": null,
4919
  "trial_params": null