Romain-XV commited on
Commit
4dea7b8
·
verified ·
1 Parent(s): e6157cd

Training in progress, step 769, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c067e9b7f288530ab39326f6e570c05f17a138323d318e18c4f5ef2a3a307d7f
3
  size 78480072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf532bff9ad6147dd88328720e03b44878aa6b0931a77cd997d677bdba34e871
3
  size 78480072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ebf985949d5355ffd617b0ec9baa62c507546545c51519c7db5be5e381aabb8
3
  size 40131524
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21819e721894f2463b4a3a69c9c1c6f71462d4e01d9fe1b76da32267867a5726
3
  size 40131524
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5eeb22bcf29c8e6133c89cd9fd221f0894b2376a305e63e6775d3448ca845c80
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72cb262ec7962a2bec51b38ffd60bf00b445c7c161ad40a55d35f9ba16677aa8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c86598b782c348ab7928decefae2f5953cb6a5fed8d04870c2a2a6ffe78cc4ad
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bcc6b391be8ff7ed73c430f9b92a688f34c68358531300f1a61ed95351f819a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.4688050746917725,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-700",
4
- "epoch": 0.9103470698203691,
5
  "eval_steps": 50,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5027,6 +5027,497 @@
5027
  "eval_samples_per_second": 27.933,
5028
  "eval_steps_per_second": 6.986,
5029
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5030
  }
5031
  ],
5032
  "logging_steps": 1,
@@ -5050,12 +5541,12 @@
5050
  "should_evaluate": false,
5051
  "should_log": false,
5052
  "should_save": true,
5053
- "should_training_stop": false
5054
  },
5055
  "attributes": {}
5056
  }
5057
  },
5058
- "total_flos": 5.5096338217854566e+17,
5059
  "train_batch_size": 4,
5060
  "trial_name": null,
5061
  "trial_params": null
 
1
  {
2
  "best_metric": 1.4688050746917725,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-700",
4
+ "epoch": 1.0004064049418842,
5
  "eval_steps": 50,
6
+ "global_step": 769,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5027
  "eval_samples_per_second": 27.933,
5028
  "eval_steps_per_second": 6.986,
5029
  "step": 700
5030
+ },
5031
+ {
5032
+ "epoch": 0.9116475656343981,
5033
+ "grad_norm": 0.7602055668830872,
5034
+ "learning_rate": 3.934912380055289e-06,
5035
+ "loss": 1.4222,
5036
+ "step": 701
5037
+ },
5038
+ {
5039
+ "epoch": 0.9129480614484272,
5040
+ "grad_norm": 0.7412660717964172,
5041
+ "learning_rate": 3.820767937093095e-06,
5042
+ "loss": 1.4024,
5043
+ "step": 702
5044
+ },
5045
+ {
5046
+ "epoch": 0.9142485572624564,
5047
+ "grad_norm": 0.6979288458824158,
5048
+ "learning_rate": 3.7082712652200867e-06,
5049
+ "loss": 1.4652,
5050
+ "step": 703
5051
+ },
5052
+ {
5053
+ "epoch": 0.9155490530764854,
5054
+ "grad_norm": 0.659394383430481,
5055
+ "learning_rate": 3.5974242917625187e-06,
5056
+ "loss": 1.4534,
5057
+ "step": 704
5058
+ },
5059
+ {
5060
+ "epoch": 0.9168495488905145,
5061
+ "grad_norm": 0.7618691325187683,
5062
+ "learning_rate": 3.488228915783631e-06,
5063
+ "loss": 1.3859,
5064
+ "step": 705
5065
+ },
5066
+ {
5067
+ "epoch": 0.9181500447045436,
5068
+ "grad_norm": 0.6013392806053162,
5069
+ "learning_rate": 3.380687008050909e-06,
5070
+ "loss": 1.4909,
5071
+ "step": 706
5072
+ },
5073
+ {
5074
+ "epoch": 0.9194505405185727,
5075
+ "grad_norm": 0.6869235634803772,
5076
+ "learning_rate": 3.2748004110041863e-06,
5077
+ "loss": 1.3593,
5078
+ "step": 707
5079
+ },
5080
+ {
5081
+ "epoch": 0.9207510363326018,
5082
+ "grad_norm": 0.7875847816467285,
5083
+ "learning_rate": 3.1705709387239934e-06,
5084
+ "loss": 1.5395,
5085
+ "step": 708
5086
+ },
5087
+ {
5088
+ "epoch": 0.9220515321466309,
5089
+ "grad_norm": 0.7080655097961426,
5090
+ "learning_rate": 3.068000376900515e-06,
5091
+ "loss": 1.4826,
5092
+ "step": 709
5093
+ },
5094
+ {
5095
+ "epoch": 0.92335202796066,
5096
+ "grad_norm": 0.7241777181625366,
5097
+ "learning_rate": 2.9670904828030033e-06,
5098
+ "loss": 1.4333,
5099
+ "step": 710
5100
+ },
5101
+ {
5102
+ "epoch": 0.9246525237746891,
5103
+ "grad_norm": 0.745314359664917,
5104
+ "learning_rate": 2.8678429852496467e-06,
5105
+ "loss": 1.3511,
5106
+ "step": 711
5107
+ },
5108
+ {
5109
+ "epoch": 0.9259530195887182,
5110
+ "grad_norm": 0.7951449751853943,
5111
+ "learning_rate": 2.770259584577972e-06,
5112
+ "loss": 1.4815,
5113
+ "step": 712
5114
+ },
5115
+ {
5116
+ "epoch": 0.9272535154027473,
5117
+ "grad_norm": 0.6265352964401245,
5118
+ "learning_rate": 2.6743419526157e-06,
5119
+ "loss": 1.511,
5120
+ "step": 713
5121
+ },
5122
+ {
5123
+ "epoch": 0.9285540112167764,
5124
+ "grad_norm": 0.6809277534484863,
5125
+ "learning_rate": 2.580091732652101e-06,
5126
+ "loss": 1.4647,
5127
+ "step": 714
5128
+ },
5129
+ {
5130
+ "epoch": 0.9298545070308055,
5131
+ "grad_norm": 1.0379259586334229,
5132
+ "learning_rate": 2.4875105394098654e-06,
5133
+ "loss": 1.6214,
5134
+ "step": 715
5135
+ },
5136
+ {
5137
+ "epoch": 0.9311550028448345,
5138
+ "grad_norm": 0.8921117782592773,
5139
+ "learning_rate": 2.3965999590174095e-06,
5140
+ "loss": 1.5787,
5141
+ "step": 716
5142
+ },
5143
+ {
5144
+ "epoch": 0.9324554986588637,
5145
+ "grad_norm": 0.8291761875152588,
5146
+ "learning_rate": 2.3073615489817235e-06,
5147
+ "loss": 1.3749,
5148
+ "step": 717
5149
+ },
5150
+ {
5151
+ "epoch": 0.9337559944728928,
5152
+ "grad_norm": 0.734061598777771,
5153
+ "learning_rate": 2.219796838161681e-06,
5154
+ "loss": 1.3417,
5155
+ "step": 718
5156
+ },
5157
+ {
5158
+ "epoch": 0.9350564902869218,
5159
+ "grad_norm": 0.7294670343399048,
5160
+ "learning_rate": 2.1339073267418464e-06,
5161
+ "loss": 1.42,
5162
+ "step": 719
5163
+ },
5164
+ {
5165
+ "epoch": 0.936356986100951,
5166
+ "grad_norm": 0.7059687972068787,
5167
+ "learning_rate": 2.0496944862067656e-06,
5168
+ "loss": 1.5228,
5169
+ "step": 720
5170
+ },
5171
+ {
5172
+ "epoch": 0.9376574819149801,
5173
+ "grad_norm": 0.7029430270195007,
5174
+ "learning_rate": 1.967159759315751e-06,
5175
+ "loss": 1.4259,
5176
+ "step": 721
5177
+ },
5178
+ {
5179
+ "epoch": 0.9389579777290091,
5180
+ "grad_norm": 0.7115610241889954,
5181
+ "learning_rate": 1.8863045600782003e-06,
5182
+ "loss": 1.5353,
5183
+ "step": 722
5184
+ },
5185
+ {
5186
+ "epoch": 0.9402584735430383,
5187
+ "grad_norm": 0.6966084241867065,
5188
+ "learning_rate": 1.8071302737293295e-06,
5189
+ "loss": 1.4112,
5190
+ "step": 723
5191
+ },
5192
+ {
5193
+ "epoch": 0.9415589693570674,
5194
+ "grad_norm": 0.6711810827255249,
5195
+ "learning_rate": 1.7296382567064672e-06,
5196
+ "loss": 1.5192,
5197
+ "step": 724
5198
+ },
5199
+ {
5200
+ "epoch": 0.9428594651710965,
5201
+ "grad_norm": 0.7636929154396057,
5202
+ "learning_rate": 1.6538298366257976e-06,
5203
+ "loss": 1.3406,
5204
+ "step": 725
5205
+ },
5206
+ {
5207
+ "epoch": 0.9441599609851256,
5208
+ "grad_norm": 0.7429101467132568,
5209
+ "learning_rate": 1.57970631225961e-06,
5210
+ "loss": 1.4707,
5211
+ "step": 726
5212
+ },
5213
+ {
5214
+ "epoch": 0.9454604567991547,
5215
+ "grad_norm": 0.7741535902023315,
5216
+ "learning_rate": 1.5072689535141072e-06,
5217
+ "loss": 1.5783,
5218
+ "step": 727
5219
+ },
5220
+ {
5221
+ "epoch": 0.9467609526131838,
5222
+ "grad_norm": 0.7151786684989929,
5223
+ "learning_rate": 1.4365190014075437e-06,
5224
+ "loss": 1.5202,
5225
+ "step": 728
5226
+ },
5227
+ {
5228
+ "epoch": 0.9480614484272128,
5229
+ "grad_norm": 0.793623685836792,
5230
+ "learning_rate": 1.3674576680490659e-06,
5231
+ "loss": 1.4622,
5232
+ "step": 729
5233
+ },
5234
+ {
5235
+ "epoch": 0.949361944241242,
5236
+ "grad_norm": 0.6933899521827698,
5237
+ "learning_rate": 1.3000861366179062e-06,
5238
+ "loss": 1.4376,
5239
+ "step": 730
5240
+ },
5241
+ {
5242
+ "epoch": 0.9506624400552711,
5243
+ "grad_norm": 0.826909065246582,
5244
+ "learning_rate": 1.234405561343066e-06,
5245
+ "loss": 1.6414,
5246
+ "step": 731
5247
+ },
5248
+ {
5249
+ "epoch": 0.9519629358693001,
5250
+ "grad_norm": 0.7606424689292908,
5251
+ "learning_rate": 1.1704170674836313e-06,
5252
+ "loss": 1.474,
5253
+ "step": 732
5254
+ },
5255
+ {
5256
+ "epoch": 0.9532634316833293,
5257
+ "grad_norm": 0.7522351145744324,
5258
+ "learning_rate": 1.1081217513094212e-06,
5259
+ "loss": 1.353,
5260
+ "step": 733
5261
+ },
5262
+ {
5263
+ "epoch": 0.9545639274973584,
5264
+ "grad_norm": 0.6792885661125183,
5265
+ "learning_rate": 1.047520680082248e-06,
5266
+ "loss": 1.4632,
5267
+ "step": 734
5268
+ },
5269
+ {
5270
+ "epoch": 0.9558644233113874,
5271
+ "grad_norm": 0.7526161074638367,
5272
+ "learning_rate": 9.886148920376203e-07,
5273
+ "loss": 1.4683,
5274
+ "step": 735
5275
+ },
5276
+ {
5277
+ "epoch": 0.9571649191254166,
5278
+ "grad_norm": 0.7043469548225403,
5279
+ "learning_rate": 9.314053963669245e-07,
5280
+ "loss": 1.64,
5281
+ "step": 736
5282
+ },
5283
+ {
5284
+ "epoch": 0.9584654149394457,
5285
+ "grad_norm": 0.6877920627593994,
5286
+ "learning_rate": 8.75893173200204e-07,
5287
+ "loss": 1.4917,
5288
+ "step": 737
5289
+ },
5290
+ {
5291
+ "epoch": 0.9597659107534747,
5292
+ "grad_norm": 0.7135722637176514,
5293
+ "learning_rate": 8.220791735892964e-07,
5294
+ "loss": 1.4555,
5295
+ "step": 738
5296
+ },
5297
+ {
5298
+ "epoch": 0.9610664065675039,
5299
+ "grad_norm": 0.7366563677787781,
5300
+ "learning_rate": 7.699643194915784e-07,
5301
+ "loss": 1.4801,
5302
+ "step": 739
5303
+ },
5304
+ {
5305
+ "epoch": 0.962366902381533,
5306
+ "grad_norm": 0.600986123085022,
5307
+ "learning_rate": 7.1954950375418e-07,
5308
+ "loss": 1.4188,
5309
+ "step": 740
5310
+ },
5311
+ {
5312
+ "epoch": 0.963667398195562,
5313
+ "grad_norm": 0.682475745677948,
5314
+ "learning_rate": 6.708355900986396e-07,
5315
+ "loss": 1.4593,
5316
+ "step": 741
5317
+ },
5318
+ {
5319
+ "epoch": 0.9649678940095912,
5320
+ "grad_norm": 0.8102880120277405,
5321
+ "learning_rate": 6.238234131061616e-07,
5322
+ "loss": 1.4449,
5323
+ "step": 742
5324
+ },
5325
+ {
5326
+ "epoch": 0.9662683898236203,
5327
+ "grad_norm": 0.6956253051757812,
5328
+ "learning_rate": 5.785137782032824e-07,
5329
+ "loss": 1.3999,
5330
+ "step": 743
5331
+ },
5332
+ {
5333
+ "epoch": 0.9675688856376493,
5334
+ "grad_norm": 0.7653059363365173,
5335
+ "learning_rate": 5.349074616480931e-07,
5336
+ "loss": 1.5632,
5337
+ "step": 744
5338
+ },
5339
+ {
5340
+ "epoch": 0.9688693814516784,
5341
+ "grad_norm": 0.784920334815979,
5342
+ "learning_rate": 4.93005210516928e-07,
5343
+ "loss": 1.5078,
5344
+ "step": 745
5345
+ },
5346
+ {
5347
+ "epoch": 0.9701698772657076,
5348
+ "grad_norm": 0.8322311043739319,
5349
+ "learning_rate": 4.5280774269154115e-07,
5350
+ "loss": 1.5513,
5351
+ "step": 746
5352
+ },
5353
+ {
5354
+ "epoch": 0.9714703730797366,
5355
+ "grad_norm": 0.8491717576980591,
5356
+ "learning_rate": 4.143157468468717e-07,
5357
+ "loss": 1.4737,
5358
+ "step": 747
5359
+ },
5360
+ {
5361
+ "epoch": 0.9727708688937657,
5362
+ "grad_norm": 0.6789981722831726,
5363
+ "learning_rate": 3.775298824391982e-07,
5364
+ "loss": 1.5004,
5365
+ "step": 748
5366
+ },
5367
+ {
5368
+ "epoch": 0.9740713647077949,
5369
+ "grad_norm": 0.7747517824172974,
5370
+ "learning_rate": 3.424507796948362e-07,
5371
+ "loss": 1.5806,
5372
+ "step": 749
5373
+ },
5374
+ {
5375
+ "epoch": 0.9753718605218239,
5376
+ "grad_norm": 0.8059523105621338,
5377
+ "learning_rate": 3.090790395993692e-07,
5378
+ "loss": 1.4772,
5379
+ "step": 750
5380
+ },
5381
+ {
5382
+ "epoch": 0.9753718605218239,
5383
+ "eval_loss": 1.4685416221618652,
5384
+ "eval_runtime": 92.6813,
5385
+ "eval_samples_per_second": 27.956,
5386
+ "eval_steps_per_second": 6.992,
5387
+ "step": 750
5388
+ },
5389
+ {
5390
+ "epoch": 0.976672356335853,
5391
+ "grad_norm": 0.6508041024208069,
5392
+ "learning_rate": 2.774152338873126e-07,
5393
+ "loss": 1.4536,
5394
+ "step": 751
5395
+ },
5396
+ {
5397
+ "epoch": 0.9779728521498822,
5398
+ "grad_norm": 0.7037675380706787,
5399
+ "learning_rate": 2.474599050323989e-07,
5400
+ "loss": 1.4824,
5401
+ "step": 752
5402
+ },
5403
+ {
5404
+ "epoch": 0.9792733479639112,
5405
+ "grad_norm": 0.7295902967453003,
5406
+ "learning_rate": 2.1921356623816336e-07,
5407
+ "loss": 1.3991,
5408
+ "step": 753
5409
+ },
5410
+ {
5411
+ "epoch": 0.9805738437779403,
5412
+ "grad_norm": 0.6926706433296204,
5413
+ "learning_rate": 1.9267670142926187e-07,
5414
+ "loss": 1.3409,
5415
+ "step": 754
5416
+ },
5417
+ {
5418
+ "epoch": 0.9818743395919695,
5419
+ "grad_norm": 0.8004979491233826,
5420
+ "learning_rate": 1.6784976524312213e-07,
5421
+ "loss": 1.4575,
5422
+ "step": 755
5423
+ },
5424
+ {
5425
+ "epoch": 0.9831748354059985,
5426
+ "grad_norm": 0.6473626494407654,
5427
+ "learning_rate": 1.4473318302216098e-07,
5428
+ "loss": 1.2736,
5429
+ "step": 756
5430
+ },
5431
+ {
5432
+ "epoch": 0.9844753312200276,
5433
+ "grad_norm": 0.7723749876022339,
5434
+ "learning_rate": 1.2332735080651248e-07,
5435
+ "loss": 1.386,
5436
+ "step": 757
5437
+ },
5438
+ {
5439
+ "epoch": 0.9857758270340568,
5440
+ "grad_norm": 0.6859511733055115,
5441
+ "learning_rate": 1.0363263532724432e-07,
5442
+ "loss": 1.4629,
5443
+ "step": 758
5444
+ },
5445
+ {
5446
+ "epoch": 0.9870763228480859,
5447
+ "grad_norm": 0.7101691961288452,
5448
+ "learning_rate": 8.564937400004081e-08,
5449
+ "loss": 1.534,
5450
+ "step": 759
5451
+ },
5452
+ {
5453
+ "epoch": 0.9883768186621149,
5454
+ "grad_norm": 0.7447732090950012,
5455
+ "learning_rate": 6.9377874919474e-08,
5456
+ "loss": 1.3709,
5457
+ "step": 760
5458
+ },
5459
+ {
5460
+ "epoch": 0.989677314476144,
5461
+ "grad_norm": 0.6958511471748352,
5462
+ "learning_rate": 5.4818416853674726e-08,
5463
+ "loss": 1.3528,
5464
+ "step": 761
5465
+ },
5466
+ {
5467
+ "epoch": 0.9909778102901732,
5468
+ "grad_norm": 0.8112965226173401,
5469
+ "learning_rate": 4.1971249239591834e-08,
5470
+ "loss": 1.6432,
5471
+ "step": 762
5472
+ },
5473
+ {
5474
+ "epoch": 0.9922783061042022,
5475
+ "grad_norm": 0.7658011317253113,
5476
+ "learning_rate": 3.0836592178717926e-08,
5477
+ "loss": 1.5357,
5478
+ "step": 763
5479
+ },
5480
+ {
5481
+ "epoch": 0.9935788019182313,
5482
+ "grad_norm": 0.7664233446121216,
5483
+ "learning_rate": 2.141463643328123e-08,
5484
+ "loss": 1.5152,
5485
+ "step": 764
5486
+ },
5487
+ {
5488
+ "epoch": 0.9948792977322605,
5489
+ "grad_norm": 0.6668312549591064,
5490
+ "learning_rate": 1.370554342302599e-08,
5491
+ "loss": 1.4316,
5492
+ "step": 765
5493
+ },
5494
+ {
5495
+ "epoch": 0.9961797935462895,
5496
+ "grad_norm": 0.7010061144828796,
5497
+ "learning_rate": 7.709445222403577e-09,
5498
+ "loss": 1.4363,
5499
+ "step": 766
5500
+ },
5501
+ {
5502
+ "epoch": 0.9974802893603186,
5503
+ "grad_norm": 0.7528097629547119,
5504
+ "learning_rate": 3.4264445583631622e-09,
5505
+ "loss": 1.6157,
5506
+ "step": 767
5507
+ },
5508
+ {
5509
+ "epoch": 0.9987807851743478,
5510
+ "grad_norm": 0.662280261516571,
5511
+ "learning_rate": 8.566148085309423e-10,
5512
+ "loss": 1.3786,
5513
+ "step": 768
5514
+ },
5515
+ {
5516
+ "epoch": 1.0004064049418842,
5517
+ "grad_norm": 0.988018274307251,
5518
+ "learning_rate": 0.0,
5519
+ "loss": 1.7631,
5520
+ "step": 769
5521
  }
5522
  ],
5523
  "logging_steps": 1,
 
5541
  "should_evaluate": false,
5542
  "should_log": false,
5543
  "should_save": true,
5544
+ "should_training_stop": true
5545
  },
5546
  "attributes": {}
5547
  }
5548
  },
5549
+ "total_flos": 6.052677812433715e+17,
5550
  "train_batch_size": 4,
5551
  "trial_name": null,
5552
  "trial_params": null