arcwarden46 commited on
Commit
88246a9
·
verified ·
1 Parent(s): a1c5e0c

Training in progress, step 107, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd87937ced3de3b818b57ee45c5ad91a0b2aab76050990853b0ceba243e4d99c
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5508feddb72e1df80a397ad696709dfc1b3d28a096365a692bda3a96f182f042
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb46749be138d01511e0975759ec14610856e157605aa7830b164a27a94c78bd
3
  size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db31e72e84d509117d060edb2ada4fe8d0e69eb0f459dbeb4b6cff8b22d71e3a
3
  size 341314196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9e5f4f92834f76f4e8e16f9610fd5a1611a04f1f6f3ed0dbb8212c20d8ab159
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b64f8ff6880c5e67b66094199c91b541de8f77d54887348e36823d92a7ac0bd7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54e1b33060aa1896ff83a569391d348a9ec0988e6492b2677c2ebd57a88700b8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9115d19f29cad07fed0dd5f8f69efe017189f597938ab6d300b5e3ea914190e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.23224526643753052,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.9345794392523364,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -731,6 +731,55 @@
731
  "eval_samples_per_second": 12.141,
732
  "eval_steps_per_second": 3.086,
733
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  }
735
  ],
736
  "logging_steps": 1,
@@ -754,12 +803,12 @@
754
  "should_evaluate": false,
755
  "should_log": false,
756
  "should_save": true,
757
- "should_training_stop": false
758
  },
759
  "attributes": {}
760
  }
761
  },
762
- "total_flos": 1.6405085168861184e+17,
763
  "train_batch_size": 8,
764
  "trial_name": null,
765
  "trial_params": null
 
1
  {
2
  "best_metric": 0.23224526643753052,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 1.0,
5
  "eval_steps": 50,
6
+ "global_step": 107,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
731
  "eval_samples_per_second": 12.141,
732
  "eval_steps_per_second": 3.086,
733
  "step": 100
734
+ },
735
+ {
736
+ "epoch": 0.9439252336448598,
737
+ "grad_norm": 0.7002161741256714,
738
+ "learning_rate": 9.410911550880475e-07,
739
+ "loss": 0.2533,
740
+ "step": 101
741
+ },
742
+ {
743
+ "epoch": 0.9532710280373832,
744
+ "grad_norm": 0.6093440651893616,
745
+ "learning_rate": 6.54164563305465e-07,
746
+ "loss": 0.1811,
747
+ "step": 102
748
+ },
749
+ {
750
+ "epoch": 0.9626168224299065,
751
+ "grad_norm": 0.7323504686355591,
752
+ "learning_rate": 4.189949386787462e-07,
753
+ "loss": 0.2834,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 0.9719626168224299,
758
+ "grad_norm": 0.701038122177124,
759
+ "learning_rate": 2.3582894166930268e-07,
760
+ "loss": 0.2602,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 0.9813084112149533,
765
+ "grad_norm": 0.44046589732170105,
766
+ "learning_rate": 1.0485868811441757e-07,
767
+ "loss": 0.2167,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 0.9906542056074766,
772
+ "grad_norm": 0.6788187026977539,
773
+ "learning_rate": 2.6221547724253337e-08,
774
+ "loss": 0.3156,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 1.0,
779
+ "grad_norm": 0.561313807964325,
780
+ "learning_rate": 0.0,
781
+ "loss": 0.2182,
782
+ "step": 107
783
  }
784
  ],
785
  "logging_steps": 1,
 
803
  "should_evaluate": false,
804
  "should_log": false,
805
  "should_save": true,
806
+ "should_training_stop": true
807
  },
808
  "attributes": {}
809
  }
810
  },
811
+ "total_flos": 1.7536470352920576e+17,
812
  "train_batch_size": 8,
813
  "trial_name": null,
814
  "trial_params": null