romainnn commited on
Commit
0159290
·
verified ·
1 Parent(s): dd09513

Training in progress, step 744, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7b9b0995d81a4e6102d30f0995bf8828a62876256831a63c1dc11eea5296222
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b3fa76795aac7ce31b919a79ce0da184e31cd62feb3a8b1a2eb838957202798
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc9add2f35a28648dd72f9ff5dc11631b157ac498f629b329b9cc5e0d4401e11
3
  size 85723732
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6b0f55b2c27d4899beec2319887ce2031f6991a2c282f12dc8403a58f0d4431
3
  size 85723732
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:536a0e535acdbd4a3f4815b9826b43df6ea43875ece79c19d27a976930911033
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:390f0dbac2add99b9de0bdfc18171c87964e9eac7b5a9d79ded1784da24c00e8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a586d009b8e09da7aeb997c54dfb316dc2e5b40628d8e4e254d5f6db44ee7d4d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e22f1029354fc2004de93e539443b580b540e5122ae66043f3e66f977d6dd066
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.267533302307129,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-700",
4
- "epoch": 0.07301839802850325,
5
  "eval_steps": 100,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4971,6 +4971,314 @@
4971
  "eval_samples_per_second": 6.272,
4972
  "eval_steps_per_second": 1.568,
4973
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4974
  }
4975
  ],
4976
  "logging_steps": 1,
@@ -4994,12 +5302,12 @@
4994
  "should_evaluate": false,
4995
  "should_log": false,
4996
  "should_save": true,
4997
- "should_training_stop": false
4998
  },
4999
  "attributes": {}
5000
  }
5001
  },
5002
- "total_flos": 2.2367901599347507e+18,
5003
  "train_batch_size": 4,
5004
  "trial_name": null,
5005
  "trial_params": null
 
1
  {
2
  "best_metric": 1.267533302307129,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-700",
4
+ "epoch": 0.0776081259045806,
5
  "eval_steps": 100,
6
+ "global_step": 744,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4971
  "eval_samples_per_second": 6.272,
4972
  "eval_steps_per_second": 1.568,
4973
  "step": 700
4974
+ },
4975
+ {
4976
+ "epoch": 0.07312271002568684,
4977
+ "grad_norm": 0.5641259551048279,
4978
+ "learning_rate": 1.688839258059971e-06,
4979
+ "loss": 1.3232,
4980
+ "step": 701
4981
+ },
4982
+ {
4983
+ "epoch": 0.0732270220228704,
4984
+ "grad_norm": 0.5656439661979675,
4985
+ "learning_rate": 1.6114111754051974e-06,
4986
+ "loss": 1.2328,
4987
+ "step": 702
4988
+ },
4989
+ {
4990
+ "epoch": 0.07333133402005398,
4991
+ "grad_norm": 0.5426310896873474,
4992
+ "learning_rate": 1.5357854948895634e-06,
4993
+ "loss": 1.251,
4994
+ "step": 703
4995
+ },
4996
+ {
4997
+ "epoch": 0.07343564601723755,
4998
+ "grad_norm": 0.5175634622573853,
4999
+ "learning_rate": 1.4619636019164606e-06,
5000
+ "loss": 1.1482,
5001
+ "step": 704
5002
+ },
5003
+ {
5004
+ "epoch": 0.07353995801442113,
5005
+ "grad_norm": 0.5635023713111877,
5006
+ "learning_rate": 1.3899468488453583e-06,
5007
+ "loss": 1.2063,
5008
+ "step": 705
5009
+ },
5010
+ {
5011
+ "epoch": 0.07364427001160471,
5012
+ "grad_norm": 0.518434464931488,
5013
+ "learning_rate": 1.319736554966955e-06,
5014
+ "loss": 1.4171,
5015
+ "step": 706
5016
+ },
5017
+ {
5018
+ "epoch": 0.07374858200878828,
5019
+ "grad_norm": 0.5022286176681519,
5020
+ "learning_rate": 1.2513340064790102e-06,
5021
+ "loss": 1.1245,
5022
+ "step": 707
5023
+ },
5024
+ {
5025
+ "epoch": 0.07385289400597186,
5026
+ "grad_norm": 0.5017629265785217,
5027
+ "learning_rate": 1.1847404564628185e-06,
5028
+ "loss": 1.144,
5029
+ "step": 708
5030
+ },
5031
+ {
5032
+ "epoch": 0.07395720600315543,
5033
+ "grad_norm": 0.5079241991043091,
5034
+ "learning_rate": 1.1199571248602382e-06,
5035
+ "loss": 1.3892,
5036
+ "step": 709
5037
+ },
5038
+ {
5039
+ "epoch": 0.07406151800033901,
5040
+ "grad_norm": 0.558506190776825,
5041
+ "learning_rate": 1.0569851984513103e-06,
5042
+ "loss": 1.2448,
5043
+ "step": 710
5044
+ },
5045
+ {
5046
+ "epoch": 0.0741658299975226,
5047
+ "grad_norm": 0.5008535981178284,
5048
+ "learning_rate": 9.958258308325973e-07,
5049
+ "loss": 1.2267,
5050
+ "step": 711
5051
+ },
5052
+ {
5053
+ "epoch": 0.07427014199470616,
5054
+ "grad_norm": 0.53139728307724,
5055
+ "learning_rate": 9.364801423959235e-07,
5056
+ "loss": 1.2265,
5057
+ "step": 712
5058
+ },
5059
+ {
5060
+ "epoch": 0.07437445399188974,
5061
+ "grad_norm": 0.5449005961418152,
5062
+ "learning_rate": 8.78949220308023e-07,
5063
+ "loss": 1.3256,
5064
+ "step": 713
5065
+ },
5066
+ {
5067
+ "epoch": 0.07447876598907331,
5068
+ "grad_norm": 0.5834050178527832,
5069
+ "learning_rate": 8.232341184904457e-07,
5070
+ "loss": 1.2626,
5071
+ "step": 714
5072
+ },
5073
+ {
5074
+ "epoch": 0.0745830779862569,
5075
+ "grad_norm": 0.5395832657814026,
5076
+ "learning_rate": 7.693358576003617e-07,
5077
+ "loss": 1.2074,
5078
+ "step": 715
5079
+ },
5080
+ {
5081
+ "epoch": 0.07468738998344047,
5082
+ "grad_norm": 0.5254425406455994,
5083
+ "learning_rate": 7.172554250118535e-07,
5084
+ "loss": 1.2065,
5085
+ "step": 716
5086
+ },
5087
+ {
5088
+ "epoch": 0.07479170198062404,
5089
+ "grad_norm": 0.5222790837287903,
5090
+ "learning_rate": 6.66993774797775e-07,
5091
+ "loss": 1.1553,
5092
+ "step": 717
5093
+ },
5094
+ {
5095
+ "epoch": 0.07489601397780762,
5096
+ "grad_norm": 0.5531771183013916,
5097
+ "learning_rate": 6.185518277123214e-07,
5098
+ "loss": 1.1405,
5099
+ "step": 718
5100
+ },
5101
+ {
5102
+ "epoch": 0.0750003259749912,
5103
+ "grad_norm": 0.5848060250282288,
5104
+ "learning_rate": 5.719304711741535e-07,
5105
+ "loss": 1.283,
5106
+ "step": 719
5107
+ },
5108
+ {
5109
+ "epoch": 0.07510463797217477,
5110
+ "grad_norm": 0.6206434965133667,
5111
+ "learning_rate": 5.271305592501108e-07,
5112
+ "loss": 1.2983,
5113
+ "step": 720
5114
+ },
5115
+ {
5116
+ "epoch": 0.07520894996935835,
5117
+ "grad_norm": 0.5380674600601196,
5118
+ "learning_rate": 4.841529126396238e-07,
5119
+ "loss": 1.4001,
5120
+ "step": 721
5121
+ },
5122
+ {
5123
+ "epoch": 0.07531326196654192,
5124
+ "grad_norm": 0.5470457673072815,
5125
+ "learning_rate": 4.429983186596265e-07,
5126
+ "loss": 1.2379,
5127
+ "step": 722
5128
+ },
5129
+ {
5130
+ "epoch": 0.0754175739637255,
5131
+ "grad_norm": 0.598996102809906,
5132
+ "learning_rate": 4.036675312301452e-07,
5133
+ "loss": 1.2304,
5134
+ "step": 723
5135
+ },
5136
+ {
5137
+ "epoch": 0.07552188596090909,
5138
+ "grad_norm": 0.521056056022644,
5139
+ "learning_rate": 3.6616127086051e-07,
5140
+ "loss": 1.3609,
5141
+ "step": 724
5142
+ },
5143
+ {
5144
+ "epoch": 0.07562619795809265,
5145
+ "grad_norm": 0.5366652011871338,
5146
+ "learning_rate": 3.3048022463612047e-07,
5147
+ "loss": 1.2715,
5148
+ "step": 725
5149
+ },
5150
+ {
5151
+ "epoch": 0.07573050995527623,
5152
+ "grad_norm": 0.5772047638893127,
5153
+ "learning_rate": 2.9662504620588947e-07,
5154
+ "loss": 1.1976,
5155
+ "step": 726
5156
+ },
5157
+ {
5158
+ "epoch": 0.0758348219524598,
5159
+ "grad_norm": 0.547029435634613,
5160
+ "learning_rate": 2.6459635577026353e-07,
5161
+ "loss": 1.3886,
5162
+ "step": 727
5163
+ },
5164
+ {
5165
+ "epoch": 0.07593913394964338,
5166
+ "grad_norm": 0.5879443287849426,
5167
+ "learning_rate": 2.343947400698432e-07,
5168
+ "loss": 1.3782,
5169
+ "step": 728
5170
+ },
5171
+ {
5172
+ "epoch": 0.07604344594682697,
5173
+ "grad_norm": 0.5763773322105408,
5174
+ "learning_rate": 2.0602075237465823e-07,
5175
+ "loss": 1.2558,
5176
+ "step": 729
5177
+ },
5178
+ {
5179
+ "epoch": 0.07614775794401053,
5180
+ "grad_norm": 0.7472733855247498,
5181
+ "learning_rate": 1.7947491247399806e-07,
5182
+ "loss": 1.4806,
5183
+ "step": 730
5184
+ },
5185
+ {
5186
+ "epoch": 0.07625206994119411,
5187
+ "grad_norm": 0.54900723695755,
5188
+ "learning_rate": 1.5475770666694144e-07,
5189
+ "loss": 1.1513,
5190
+ "step": 731
5191
+ },
5192
+ {
5193
+ "epoch": 0.07635638193837768,
5194
+ "grad_norm": 0.5599141120910645,
5195
+ "learning_rate": 1.318695877533971e-07,
5196
+ "loss": 1.1123,
5197
+ "step": 732
5198
+ },
5199
+ {
5200
+ "epoch": 0.07646069393556126,
5201
+ "grad_norm": 0.5251076817512512,
5202
+ "learning_rate": 1.1081097502584348e-07,
5203
+ "loss": 1.1586,
5204
+ "step": 733
5205
+ },
5206
+ {
5207
+ "epoch": 0.07656500593274485,
5208
+ "grad_norm": 0.5731672644615173,
5209
+ "learning_rate": 9.158225426160183e-08,
5210
+ "loss": 1.1249,
5211
+ "step": 734
5212
+ },
5213
+ {
5214
+ "epoch": 0.07666931792992841,
5215
+ "grad_norm": 0.5583503842353821,
5216
+ "learning_rate": 7.418377771585273e-08,
5217
+ "loss": 1.184,
5218
+ "step": 735
5219
+ },
5220
+ {
5221
+ "epoch": 0.076773629927112,
5222
+ "grad_norm": 0.7562082409858704,
5223
+ "learning_rate": 5.86158641150969e-08,
5224
+ "loss": 1.3864,
5225
+ "step": 736
5226
+ },
5227
+ {
5228
+ "epoch": 0.07687794192429556,
5229
+ "grad_norm": 0.492931604385376,
5230
+ "learning_rate": 4.487879865133771e-08,
5231
+ "loss": 1.2668,
5232
+ "step": 737
5233
+ },
5234
+ {
5235
+ "epoch": 0.07698225392147914,
5236
+ "grad_norm": 0.5797216296195984,
5237
+ "learning_rate": 3.2972832976918554e-08,
5238
+ "loss": 1.2089,
5239
+ "step": 738
5240
+ },
5241
+ {
5242
+ "epoch": 0.07708656591866273,
5243
+ "grad_norm": 0.5604730844497681,
5244
+ "learning_rate": 2.2898185199826673e-08,
5245
+ "loss": 1.404,
5246
+ "step": 739
5247
+ },
5248
+ {
5249
+ "epoch": 0.07719087791584629,
5250
+ "grad_norm": 0.5185406804084778,
5251
+ "learning_rate": 1.4655039879740706e-08,
5252
+ "loss": 1.2627,
5253
+ "step": 740
5254
+ },
5255
+ {
5256
+ "epoch": 0.07729518991302987,
5257
+ "grad_norm": 0.5420083999633789,
5258
+ "learning_rate": 8.243548024655656e-09,
5259
+ "loss": 1.1388,
5260
+ "step": 741
5261
+ },
5262
+ {
5263
+ "epoch": 0.07739950191021344,
5264
+ "grad_norm": 0.5024222731590271,
5265
+ "learning_rate": 3.663827088085103e-09,
5266
+ "loss": 1.2322,
5267
+ "step": 742
5268
+ },
5269
+ {
5270
+ "epoch": 0.07750381390739702,
5271
+ "grad_norm": 0.6090103387832642,
5272
+ "learning_rate": 9.159609669406877e-10,
5273
+ "loss": 1.3192,
5274
+ "step": 743
5275
+ },
5276
+ {
5277
+ "epoch": 0.0776081259045806,
5278
+ "grad_norm": 0.53780597448349,
5279
+ "learning_rate": 0.0,
5280
+ "loss": 1.13,
5281
+ "step": 744
5282
  }
5283
  ],
5284
  "logging_steps": 1,
 
5302
  "should_evaluate": false,
5303
  "should_log": false,
5304
  "should_save": true,
5305
+ "should_training_stop": true
5306
  },
5307
  "attributes": {}
5308
  }
5309
  },
5310
+ "total_flos": 2.381458180229038e+18,
5311
  "train_batch_size": 4,
5312
  "trial_name": null,
5313
  "trial_params": null