Romain-XV commited on
Commit
6a15986
·
verified ·
1 Parent(s): 4bcedde

Training in progress, step 714, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50b1d2ba2733fb32ba9ae7869157021c125589ce9edf74a29b6d5122feb1c57b
3
  size 1001465824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31949e3beef1c9644fadd722124d7e9efb5189cc688f46b7444456220e09c973
3
  size 1001465824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f541cb1d23774d9a832ecca9a8bd4c75a07423d665daeed9feb4cf3a053814b7
3
  size 509177556
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b73d6749f34c834bfd1413be7ccea161370653a987271156f945ab7dd962edb
3
  size 509177556
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9849c595b3d1408534e576e2fe74ffffa8a13d64045843806a2b3e623f7ac2f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1fe0070b3c7b42ade8d0ee03ab5cb8ffb3bc1ceba2a2a3a77f8a56cf55ec43e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5eae9d759cd879a3225c386ee5c5b05dd4099eecf648f218bccd70e50b1fceed
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbead2b15bcf2538a82bf72fc57c0f7afbfbd35caedd54a6bcc7131cfe5e9d1e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.11092506349086761,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-700",
4
- "epoch": 0.7482629609834314,
5
  "eval_steps": 100,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4971,6 +4971,104 @@
4971
  "eval_samples_per_second": 4.318,
4972
  "eval_steps_per_second": 1.079,
4973
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4974
  }
4975
  ],
4976
  "logging_steps": 1,
@@ -4994,12 +5092,12 @@
4994
  "should_evaluate": false,
4995
  "should_log": false,
4996
  "should_save": true,
4997
- "should_training_stop": false
4998
  },
4999
  "attributes": {}
5000
  }
5001
  },
5002
- "total_flos": 3.614380671369216e+18,
5003
  "train_batch_size": 4,
5004
  "trial_name": null,
5005
  "trial_params": null
 
1
  {
2
  "best_metric": 0.11092506349086761,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-700",
4
+ "epoch": 0.7632282202031,
5
  "eval_steps": 100,
6
+ "global_step": 714,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4971
  "eval_samples_per_second": 4.318,
4972
  "eval_steps_per_second": 1.079,
4973
  "step": 700
4974
+ },
4975
+ {
4976
+ "epoch": 0.7493319080705505,
4977
+ "grad_norm": 0.16759201884269714,
4978
+ "learning_rate": 1.6822453383367186e-07,
4979
+ "loss": 0.1136,
4980
+ "step": 701
4981
+ },
4982
+ {
4983
+ "epoch": 0.7504008551576697,
4984
+ "grad_norm": 0.149668887257576,
4985
+ "learning_rate": 1.4334519492711362e-07,
4986
+ "loss": 0.1026,
4987
+ "step": 702
4988
+ },
4989
+ {
4990
+ "epoch": 0.7514698022447889,
4991
+ "grad_norm": 0.14984771609306335,
4992
+ "learning_rate": 1.204543794827595e-07,
4993
+ "loss": 0.0942,
4994
+ "step": 703
4995
+ },
4996
+ {
4997
+ "epoch": 0.7525387493319081,
4998
+ "grad_norm": 0.15921737253665924,
4999
+ "learning_rate": 9.955254334328423e-08,
5000
+ "loss": 0.1044,
5001
+ "step": 704
5002
+ },
5003
+ {
5004
+ "epoch": 0.7536076964190273,
5005
+ "grad_norm": 0.14484407007694244,
5006
+ "learning_rate": 8.064010274324573e-08,
5007
+ "loss": 0.0849,
5008
+ "step": 705
5009
+ },
5010
+ {
5011
+ "epoch": 0.7546766435061465,
5012
+ "grad_norm": 0.1964549571275711,
5013
+ "learning_rate": 6.37174343008251e-08,
5014
+ "loss": 0.125,
5015
+ "step": 706
5016
+ },
5017
+ {
5018
+ "epoch": 0.7557455905932656,
5019
+ "grad_norm": 0.1271902620792389,
5020
+ "learning_rate": 4.878487501033258e-08,
5021
+ "loss": 0.0783,
5022
+ "step": 707
5023
+ },
5024
+ {
5025
+ "epoch": 0.7568145376803849,
5026
+ "grad_norm": 0.155805766582489,
5027
+ "learning_rate": 3.584272223546847e-08,
5028
+ "loss": 0.0972,
5029
+ "step": 708
5030
+ },
5031
+ {
5032
+ "epoch": 0.757883484767504,
5033
+ "grad_norm": 0.15836969017982483,
5034
+ "learning_rate": 2.4891233703394634e-08,
5035
+ "loss": 0.095,
5036
+ "step": 709
5037
+ },
5038
+ {
5039
+ "epoch": 0.7589524318546232,
5040
+ "grad_norm": 0.1573915183544159,
5041
+ "learning_rate": 1.593062749967178e-08,
5042
+ "loss": 0.1093,
5043
+ "step": 710
5044
+ },
5045
+ {
5046
+ "epoch": 0.7600213789417424,
5047
+ "grad_norm": 0.15478433668613434,
5048
+ "learning_rate": 8.961082063829729e-09,
5049
+ "loss": 0.0977,
5050
+ "step": 711
5051
+ },
5052
+ {
5053
+ "epoch": 0.7610903260288616,
5054
+ "grad_norm": 0.1593605875968933,
5055
+ "learning_rate": 3.982736185859093e-09,
5056
+ "loss": 0.1136,
5057
+ "step": 712
5058
+ },
5059
+ {
5060
+ "epoch": 0.7621592731159808,
5061
+ "grad_norm": 0.1565905064344406,
5062
+ "learning_rate": 9.956890034468202e-10,
5063
+ "loss": 0.0978,
5064
+ "step": 713
5065
+ },
5066
+ {
5067
+ "epoch": 0.7632282202031,
5068
+ "grad_norm": 0.1467771828174591,
5069
+ "learning_rate": 0.0,
5070
+ "loss": 0.1067,
5071
+ "step": 714
5072
  }
5073
  ],
5074
  "logging_steps": 1,
 
5092
  "should_evaluate": false,
5093
  "should_log": false,
5094
  "should_save": true,
5095
+ "should_training_stop": true
5096
  },
5097
  "attributes": {}
5098
  }
5099
  },
5100
+ "total_flos": 3.6866682847966003e+18,
5101
  "train_batch_size": 4,
5102
  "trial_name": null,
5103
  "trial_params": null