Romain-XV commited on
Commit
9f1d068
·
verified ·
1 Parent(s): a53ed30

Training in progress, step 2639, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49c1378ef4736f2f7bbb0d8e6d86b1193a3f82cecb1bcad549ca99671c48a0b1
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6326305b02c5345bbc9a927c7664228a65ff343e0847e6f540371df214898245
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbe94b5b12b5433cb5408f96ae6477af4d31b0be684a25007f8ff9c8229ce038
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39cff31ff9cdaa3d7bf98887213ed1fe7065b9b469b83a1f2874185b37641eff
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:522709a2fdcdf556470c396e7257ba4a77af185dd94807163c8371fa84e5bab4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c886e41b7a0f834b74bd37a49b30c0918f520a0ee05607af869fc7415f419c1e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8651321171077e1043f195622e013e6334728fa9b899c30fea12b95d0b3a7ba
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:457691edb449186d661b3ffd85f29c9eb7a6ed830917fa95f78e4941adeac57c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.09612426161766052,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2550",
4
- "epoch": 0.1392548500279875,
5
  "eval_steps": 150,
6
- "global_step": 2550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -18001,6 +18001,629 @@
18001
  "eval_samples_per_second": 4.176,
18002
  "eval_steps_per_second": 2.088,
18003
  "step": 2550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18004
  }
18005
  ],
18006
  "logging_steps": 1,
@@ -18024,12 +18647,12 @@
18024
  "should_evaluate": false,
18025
  "should_log": false,
18026
  "should_save": true,
18027
- "should_training_stop": false
18028
  },
18029
  "attributes": {}
18030
  }
18031
  },
18032
- "total_flos": 1.8277422069794734e+18,
18033
  "train_batch_size": 2,
18034
  "trial_name": null,
18035
  "trial_params": null
 
1
  {
2
  "best_metric": 0.09612426161766052,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2550",
4
+ "epoch": 0.1441151173426898,
5
  "eval_steps": 150,
6
+ "global_step": 2639,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
18001
  "eval_samples_per_second": 4.176,
18002
  "eval_steps_per_second": 2.088,
18003
  "step": 2550
18004
+ },
18005
+ {
18006
+ "epoch": 0.1393094597730965,
18007
+ "grad_norm": 3.723029851913452,
18008
+ "learning_rate": 5.52400007501297e-07,
18009
+ "loss": 0.2928,
18010
+ "step": 2551
18011
+ },
18012
+ {
18013
+ "epoch": 0.1393640695182055,
18014
+ "grad_norm": 2.9333627223968506,
18015
+ "learning_rate": 5.39928040206128e-07,
18016
+ "loss": 0.3217,
18017
+ "step": 2552
18018
+ },
18019
+ {
18020
+ "epoch": 0.13941867926331453,
18021
+ "grad_norm": 2.4878392219543457,
18022
+ "learning_rate": 5.27598098726123e-07,
18023
+ "loss": 0.2826,
18024
+ "step": 2553
18025
+ },
18026
+ {
18027
+ "epoch": 0.13947328900842354,
18028
+ "grad_norm": 2.785881757736206,
18029
+ "learning_rate": 5.15410200668054e-07,
18030
+ "loss": 0.4198,
18031
+ "step": 2554
18032
+ },
18033
+ {
18034
+ "epoch": 0.13952789875353258,
18035
+ "grad_norm": 4.309914588928223,
18036
+ "learning_rate": 5.03364363435832e-07,
18037
+ "loss": 0.4567,
18038
+ "step": 2555
18039
+ },
18040
+ {
18041
+ "epoch": 0.1395825084986416,
18042
+ "grad_norm": 3.7101707458496094,
18043
+ "learning_rate": 4.914606042305426e-07,
18044
+ "loss": 0.4291,
18045
+ "step": 2556
18046
+ },
18047
+ {
18048
+ "epoch": 0.1396371182437506,
18049
+ "grad_norm": 4.047240734100342,
18050
+ "learning_rate": 4.796989400503771e-07,
18051
+ "loss": 0.4754,
18052
+ "step": 2557
18053
+ },
18054
+ {
18055
+ "epoch": 0.13969172798885962,
18056
+ "grad_norm": 4.014715671539307,
18057
+ "learning_rate": 4.680793876906009e-07,
18058
+ "loss": 0.4832,
18059
+ "step": 2558
18060
+ },
18061
+ {
18062
+ "epoch": 0.13974633773396863,
18063
+ "grad_norm": 3.6147289276123047,
18064
+ "learning_rate": 4.566019637435748e-07,
18065
+ "loss": 0.4284,
18066
+ "step": 2559
18067
+ },
18068
+ {
18069
+ "epoch": 0.13980094747907765,
18070
+ "grad_norm": 6.866396903991699,
18071
+ "learning_rate": 4.4526668459869967e-07,
18072
+ "loss": 0.6259,
18073
+ "step": 2560
18074
+ },
18075
+ {
18076
+ "epoch": 0.13985555722418666,
18077
+ "grad_norm": 4.688859939575195,
18078
+ "learning_rate": 4.340735664423834e-07,
18079
+ "loss": 0.6314,
18080
+ "step": 2561
18081
+ },
18082
+ {
18083
+ "epoch": 0.13991016696929567,
18084
+ "grad_norm": 5.6968488693237305,
18085
+ "learning_rate": 4.230226252580516e-07,
18086
+ "loss": 0.646,
18087
+ "step": 2562
18088
+ },
18089
+ {
18090
+ "epoch": 0.1399647767144047,
18091
+ "grad_norm": 4.044732093811035,
18092
+ "learning_rate": 4.1211387682609237e-07,
18093
+ "loss": 0.3599,
18094
+ "step": 2563
18095
+ },
18096
+ {
18097
+ "epoch": 0.1400193864595137,
18098
+ "grad_norm": 2.7242889404296875,
18099
+ "learning_rate": 4.013473367238452e-07,
18100
+ "loss": 0.2921,
18101
+ "step": 2564
18102
+ },
18103
+ {
18104
+ "epoch": 0.1400739962046227,
18105
+ "grad_norm": 4.420613765716553,
18106
+ "learning_rate": 3.907230203255896e-07,
18107
+ "loss": 0.4667,
18108
+ "step": 2565
18109
+ },
18110
+ {
18111
+ "epoch": 0.14012860594973173,
18112
+ "grad_norm": 5.145106792449951,
18113
+ "learning_rate": 3.802409428025233e-07,
18114
+ "loss": 0.5955,
18115
+ "step": 2566
18116
+ },
18117
+ {
18118
+ "epoch": 0.14018321569484074,
18119
+ "grad_norm": 3.0266265869140625,
18120
+ "learning_rate": 3.699011191227064e-07,
18121
+ "loss": 0.2157,
18122
+ "step": 2567
18123
+ },
18124
+ {
18125
+ "epoch": 0.14023782543994975,
18126
+ "grad_norm": 3.0053975582122803,
18127
+ "learning_rate": 3.5970356405107263e-07,
18128
+ "loss": 0.329,
18129
+ "step": 2568
18130
+ },
18131
+ {
18132
+ "epoch": 0.14029243518505877,
18133
+ "grad_norm": 3.0817947387695312,
18134
+ "learning_rate": 3.496482921494182e-07,
18135
+ "loss": 0.3323,
18136
+ "step": 2569
18137
+ },
18138
+ {
18139
+ "epoch": 0.14034704493016778,
18140
+ "grad_norm": 3.6538212299346924,
18141
+ "learning_rate": 3.3973531777634625e-07,
18142
+ "loss": 0.5624,
18143
+ "step": 2570
18144
+ },
18145
+ {
18146
+ "epoch": 0.1404016546752768,
18147
+ "grad_norm": 4.178391456604004,
18148
+ "learning_rate": 3.29964655087267e-07,
18149
+ "loss": 0.4504,
18150
+ "step": 2571
18151
+ },
18152
+ {
18153
+ "epoch": 0.1404562644203858,
18154
+ "grad_norm": 3.166633367538452,
18155
+ "learning_rate": 3.2033631803437546e-07,
18156
+ "loss": 0.3282,
18157
+ "step": 2572
18158
+ },
18159
+ {
18160
+ "epoch": 0.14051087416549482,
18161
+ "grad_norm": 5.640282154083252,
18162
+ "learning_rate": 3.108503203666402e-07,
18163
+ "loss": 0.642,
18164
+ "step": 2573
18165
+ },
18166
+ {
18167
+ "epoch": 0.14056548391060383,
18168
+ "grad_norm": 4.808424949645996,
18169
+ "learning_rate": 3.01506675629748e-07,
18170
+ "loss": 0.6067,
18171
+ "step": 2574
18172
+ },
18173
+ {
18174
+ "epoch": 0.14062009365571287,
18175
+ "grad_norm": 2.6138014793395996,
18176
+ "learning_rate": 2.9230539716613713e-07,
18177
+ "loss": 0.2736,
18178
+ "step": 2575
18179
+ },
18180
+ {
18181
+ "epoch": 0.1406747034008219,
18182
+ "grad_norm": 4.742761135101318,
18183
+ "learning_rate": 2.832464981149308e-07,
18184
+ "loss": 0.4355,
18185
+ "step": 2576
18186
+ },
18187
+ {
18188
+ "epoch": 0.1407293131459309,
18189
+ "grad_norm": 4.499370098114014,
18190
+ "learning_rate": 2.7432999141195904e-07,
18191
+ "loss": 0.5156,
18192
+ "step": 2577
18193
+ },
18194
+ {
18195
+ "epoch": 0.14078392289103991,
18196
+ "grad_norm": 4.269051551818848,
18197
+ "learning_rate": 2.655558897897037e-07,
18198
+ "loss": 0.4639,
18199
+ "step": 2578
18200
+ },
18201
+ {
18202
+ "epoch": 0.14083853263614893,
18203
+ "grad_norm": 2.7144224643707275,
18204
+ "learning_rate": 2.569242057773091e-07,
18205
+ "loss": 0.2879,
18206
+ "step": 2579
18207
+ },
18208
+ {
18209
+ "epoch": 0.14089314238125794,
18210
+ "grad_norm": 3.774793863296509,
18211
+ "learning_rate": 2.48434951700538e-07,
18212
+ "loss": 0.3831,
18213
+ "step": 2580
18214
+ },
18215
+ {
18216
+ "epoch": 0.14094775212636695,
18217
+ "grad_norm": 2.9699277877807617,
18218
+ "learning_rate": 2.4008813968177115e-07,
18219
+ "loss": 0.3127,
18220
+ "step": 2581
18221
+ },
18222
+ {
18223
+ "epoch": 0.14100236187147597,
18224
+ "grad_norm": 4.905210971832275,
18225
+ "learning_rate": 2.318837816399966e-07,
18226
+ "loss": 0.4349,
18227
+ "step": 2582
18228
+ },
18229
+ {
18230
+ "epoch": 0.14105697161658498,
18231
+ "grad_norm": 6.435173511505127,
18232
+ "learning_rate": 2.238218892907762e-07,
18233
+ "loss": 0.5555,
18234
+ "step": 2583
18235
+ },
18236
+ {
18237
+ "epoch": 0.141111581361694,
18238
+ "grad_norm": 2.4339380264282227,
18239
+ "learning_rate": 2.1590247414624566e-07,
18240
+ "loss": 0.2213,
18241
+ "step": 2584
18242
+ },
18243
+ {
18244
+ "epoch": 0.141166191106803,
18245
+ "grad_norm": 25.24824333190918,
18246
+ "learning_rate": 2.0812554751504788e-07,
18247
+ "loss": 1.1507,
18248
+ "step": 2585
18249
+ },
18250
+ {
18251
+ "epoch": 0.14122080085191202,
18252
+ "grad_norm": 2.5197904109954834,
18253
+ "learning_rate": 2.004911205024107e-07,
18254
+ "loss": 0.2165,
18255
+ "step": 2586
18256
+ },
18257
+ {
18258
+ "epoch": 0.14127541059702103,
18259
+ "grad_norm": 3.841722011566162,
18260
+ "learning_rate": 1.9299920401004702e-07,
18261
+ "loss": 0.4137,
18262
+ "step": 2587
18263
+ },
18264
+ {
18265
+ "epoch": 0.14133002034213005,
18266
+ "grad_norm": 3.005751609802246,
18267
+ "learning_rate": 1.8564980873618798e-07,
18268
+ "loss": 0.4004,
18269
+ "step": 2588
18270
+ },
18271
+ {
18272
+ "epoch": 0.14138463008723906,
18273
+ "grad_norm": 2.7246780395507812,
18274
+ "learning_rate": 1.784429451755054e-07,
18275
+ "loss": 0.3111,
18276
+ "step": 2589
18277
+ },
18278
+ {
18279
+ "epoch": 0.14143923983234807,
18280
+ "grad_norm": 5.2464494705200195,
18281
+ "learning_rate": 1.7137862361920054e-07,
18282
+ "loss": 0.5692,
18283
+ "step": 2590
18284
+ },
18285
+ {
18286
+ "epoch": 0.1414938495774571,
18287
+ "grad_norm": 5.011432647705078,
18288
+ "learning_rate": 1.6445685415488188e-07,
18289
+ "loss": 0.6385,
18290
+ "step": 2591
18291
+ },
18292
+ {
18293
+ "epoch": 0.1415484593225661,
18294
+ "grad_norm": 5.104406356811523,
18295
+ "learning_rate": 1.5767764666662078e-07,
18296
+ "loss": 0.452,
18297
+ "step": 2592
18298
+ },
18299
+ {
18300
+ "epoch": 0.1416030690676751,
18301
+ "grad_norm": 3.401982545852661,
18302
+ "learning_rate": 1.5104101083490695e-07,
18303
+ "loss": 0.5705,
18304
+ "step": 2593
18305
+ },
18306
+ {
18307
+ "epoch": 0.14165767881278415,
18308
+ "grad_norm": 3.839580535888672,
18309
+ "learning_rate": 1.445469561366486e-07,
18310
+ "loss": 0.5416,
18311
+ "step": 2594
18312
+ },
18313
+ {
18314
+ "epoch": 0.14171228855789317,
18315
+ "grad_norm": 6.060207366943359,
18316
+ "learning_rate": 1.3819549184516112e-07,
18317
+ "loss": 0.3357,
18318
+ "step": 2595
18319
+ },
18320
+ {
18321
+ "epoch": 0.14176689830300218,
18322
+ "grad_norm": 5.15747594833374,
18323
+ "learning_rate": 1.3198662703011178e-07,
18324
+ "loss": 0.7372,
18325
+ "step": 2596
18326
+ },
18327
+ {
18328
+ "epoch": 0.1418215080481112,
18329
+ "grad_norm": 5.06006383895874,
18330
+ "learning_rate": 1.2592037055756401e-07,
18331
+ "loss": 0.4465,
18332
+ "step": 2597
18333
+ },
18334
+ {
18335
+ "epoch": 0.1418761177932202,
18336
+ "grad_norm": 3.257328987121582,
18337
+ "learning_rate": 1.1999673108995523e-07,
18338
+ "loss": 0.3966,
18339
+ "step": 2598
18340
+ },
18341
+ {
18342
+ "epoch": 0.14193072753832922,
18343
+ "grad_norm": 5.779103755950928,
18344
+ "learning_rate": 1.142157170860414e-07,
18345
+ "loss": 0.5437,
18346
+ "step": 2599
18347
+ },
18348
+ {
18349
+ "epoch": 0.14198533728343823,
18350
+ "grad_norm": 4.808257579803467,
18351
+ "learning_rate": 1.0857733680093018e-07,
18352
+ "loss": 0.4866,
18353
+ "step": 2600
18354
+ },
18355
+ {
18356
+ "epoch": 0.14203994702854725,
18357
+ "grad_norm": 4.1833720207214355,
18358
+ "learning_rate": 1.030815982860478e-07,
18359
+ "loss": 0.4623,
18360
+ "step": 2601
18361
+ },
18362
+ {
18363
+ "epoch": 0.14209455677365626,
18364
+ "grad_norm": 3.7359843254089355,
18365
+ "learning_rate": 9.772850938913891e-08,
18366
+ "loss": 0.4209,
18367
+ "step": 2602
18368
+ },
18369
+ {
18370
+ "epoch": 0.14214916651876527,
18371
+ "grad_norm": 3.794407844543457,
18372
+ "learning_rate": 9.251807775423338e-08,
18373
+ "loss": 0.464,
18374
+ "step": 2603
18375
+ },
18376
+ {
18377
+ "epoch": 0.1422037762638743,
18378
+ "grad_norm": 4.367050647735596,
18379
+ "learning_rate": 8.745031082166843e-08,
18380
+ "loss": 0.592,
18381
+ "step": 2604
18382
+ },
18383
+ {
18384
+ "epoch": 0.1422583860089833,
18385
+ "grad_norm": 2.95585560798645,
18386
+ "learning_rate": 8.252521582805539e-08,
18387
+ "loss": 0.2558,
18388
+ "step": 2605
18389
+ },
18390
+ {
18391
+ "epoch": 0.14231299575409231,
18392
+ "grad_norm": 8.54871940612793,
18393
+ "learning_rate": 7.774279980626853e-08,
18394
+ "loss": 0.3912,
18395
+ "step": 2606
18396
+ },
18397
+ {
18398
+ "epoch": 0.14236760549920133,
18399
+ "grad_norm": 3.3426690101623535,
18400
+ "learning_rate": 7.31030695854451e-08,
18401
+ "loss": 0.3907,
18402
+ "step": 2607
18403
+ },
18404
+ {
18405
+ "epoch": 0.14242221524431034,
18406
+ "grad_norm": 3.3232293128967285,
18407
+ "learning_rate": 6.860603179098535e-08,
18408
+ "loss": 0.3101,
18409
+ "step": 2608
18410
+ },
18411
+ {
18412
+ "epoch": 0.14247682498941935,
18413
+ "grad_norm": 3.4219775199890137,
18414
+ "learning_rate": 6.425169284449695e-08,
18415
+ "loss": 0.4823,
18416
+ "step": 2609
18417
+ },
18418
+ {
18419
+ "epoch": 0.14253143473452837,
18420
+ "grad_norm": 3.4401795864105225,
18421
+ "learning_rate": 6.004005896385057e-08,
18422
+ "loss": 0.3489,
18423
+ "step": 2610
18424
+ },
18425
+ {
18426
+ "epoch": 0.14258604447963738,
18427
+ "grad_norm": 3.8011462688446045,
18428
+ "learning_rate": 5.597113616311322e-08,
18429
+ "loss": 0.4371,
18430
+ "step": 2611
18431
+ },
18432
+ {
18433
+ "epoch": 0.1426406542247464,
18434
+ "grad_norm": 3.8608498573303223,
18435
+ "learning_rate": 5.2044930252592714e-08,
18436
+ "loss": 0.3707,
18437
+ "step": 2612
18438
+ },
18439
+ {
18440
+ "epoch": 0.1426952639698554,
18441
+ "grad_norm": 4.046934604644775,
18442
+ "learning_rate": 4.826144683877098e-08,
18443
+ "loss": 0.4979,
18444
+ "step": 2613
18445
+ },
18446
+ {
18447
+ "epoch": 0.14274987371496445,
18448
+ "grad_norm": 3.4525563716888428,
18449
+ "learning_rate": 4.462069132434854e-08,
18450
+ "loss": 0.3075,
18451
+ "step": 2614
18452
+ },
18453
+ {
18454
+ "epoch": 0.14280448346007346,
18455
+ "grad_norm": 4.25659704208374,
18456
+ "learning_rate": 4.112266890821115e-08,
18457
+ "loss": 0.4598,
18458
+ "step": 2615
18459
+ },
18460
+ {
18461
+ "epoch": 0.14285909320518247,
18462
+ "grad_norm": 3.9870779514312744,
18463
+ "learning_rate": 3.776738458541873e-08,
18464
+ "loss": 0.4917,
18465
+ "step": 2616
18466
+ },
18467
+ {
18468
+ "epoch": 0.1429137029502915,
18469
+ "grad_norm": 4.058454990386963,
18470
+ "learning_rate": 3.4554843147216464e-08,
18471
+ "loss": 0.4187,
18472
+ "step": 2617
18473
+ },
18474
+ {
18475
+ "epoch": 0.1429683126954005,
18476
+ "grad_norm": 3.4139716625213623,
18477
+ "learning_rate": 3.148504918100148e-08,
18478
+ "loss": 0.3487,
18479
+ "step": 2618
18480
+ },
18481
+ {
18482
+ "epoch": 0.14302292244050951,
18483
+ "grad_norm": 4.22880744934082,
18484
+ "learning_rate": 2.855800707034506e-08,
18485
+ "loss": 0.3527,
18486
+ "step": 2619
18487
+ },
18488
+ {
18489
+ "epoch": 0.14307753218561853,
18490
+ "grad_norm": 6.186976432800293,
18491
+ "learning_rate": 2.5773720994981542e-08,
18492
+ "loss": 0.8495,
18493
+ "step": 2620
18494
+ },
18495
+ {
18496
+ "epoch": 0.14313214193072754,
18497
+ "grad_norm": 3.880368232727051,
18498
+ "learning_rate": 2.313219493077501e-08,
18499
+ "loss": 0.3824,
18500
+ "step": 2621
18501
+ },
18502
+ {
18503
+ "epoch": 0.14318675167583655,
18504
+ "grad_norm": 4.570279121398926,
18505
+ "learning_rate": 2.063343264973039e-08,
18506
+ "loss": 0.4636,
18507
+ "step": 2622
18508
+ },
18509
+ {
18510
+ "epoch": 0.14324136142094557,
18511
+ "grad_norm": 5.0569844245910645,
18512
+ "learning_rate": 1.8277437720015668e-08,
18513
+ "loss": 0.4485,
18514
+ "step": 2623
18515
+ },
18516
+ {
18517
+ "epoch": 0.14329597116605458,
18518
+ "grad_norm": 3.9018783569335938,
18519
+ "learning_rate": 1.606421350590637e-08,
18520
+ "loss": 0.3351,
18521
+ "step": 2624
18522
+ },
18523
+ {
18524
+ "epoch": 0.1433505809111636,
18525
+ "grad_norm": 5.237850189208984,
18526
+ "learning_rate": 1.3993763167818863e-08,
18527
+ "loss": 0.3547,
18528
+ "step": 2625
18529
+ },
18530
+ {
18531
+ "epoch": 0.1434051906562726,
18532
+ "grad_norm": 3.112959861755371,
18533
+ "learning_rate": 1.2066089662288172e-08,
18534
+ "loss": 0.3124,
18535
+ "step": 2626
18536
+ },
18537
+ {
18538
+ "epoch": 0.14345980040138162,
18539
+ "grad_norm": 3.759575366973877,
18540
+ "learning_rate": 1.0281195741979055e-08,
18541
+ "loss": 0.4726,
18542
+ "step": 2627
18543
+ },
18544
+ {
18545
+ "epoch": 0.14351441014649063,
18546
+ "grad_norm": 3.29337477684021,
18547
+ "learning_rate": 8.639083955663818e-09,
18548
+ "loss": 0.3879,
18549
+ "step": 2628
18550
+ },
18551
+ {
18552
+ "epoch": 0.14356901989159965,
18553
+ "grad_norm": 6.597833633422852,
18554
+ "learning_rate": 7.1397566482112045e-09,
18555
+ "loss": 0.648,
18556
+ "step": 2629
18557
+ },
18558
+ {
18559
+ "epoch": 0.14362362963670866,
18560
+ "grad_norm": 3.1305155754089355,
18561
+ "learning_rate": 5.783215960630806e-09,
18562
+ "loss": 0.2629,
18563
+ "step": 2630
18564
+ },
18565
+ {
18566
+ "epoch": 0.14367823938181767,
18567
+ "grad_norm": 2.792299270629883,
18568
+ "learning_rate": 4.569463830006448e-09,
18569
+ "loss": 0.3509,
18570
+ "step": 2631
18571
+ },
18572
+ {
18573
+ "epoch": 0.1437328491269267,
18574
+ "grad_norm": 5.20680046081543,
18575
+ "learning_rate": 3.498501989529501e-09,
18576
+ "loss": 0.4678,
18577
+ "step": 2632
18578
+ },
18579
+ {
18580
+ "epoch": 0.14378745887203573,
18581
+ "grad_norm": 4.746030330657959,
18582
+ "learning_rate": 2.5703319685321805e-09,
18583
+ "loss": 0.5117,
18584
+ "step": 2633
18585
+ },
18586
+ {
18587
+ "epoch": 0.14384206861714474,
18588
+ "grad_norm": 4.22279167175293,
18589
+ "learning_rate": 1.784955092376528e-09,
18590
+ "loss": 0.3752,
18591
+ "step": 2634
18592
+ },
18593
+ {
18594
+ "epoch": 0.14389667836225375,
18595
+ "grad_norm": 3.952853202819824,
18596
+ "learning_rate": 1.1423724825765369e-09,
18597
+ "loss": 0.3835,
18598
+ "step": 2635
18599
+ },
18600
+ {
18601
+ "epoch": 0.14395128810736277,
18602
+ "grad_norm": 7.198517322540283,
18603
+ "learning_rate": 6.425850567093328e-10,
18604
+ "loss": 0.8721,
18605
+ "step": 2636
18606
+ },
18607
+ {
18608
+ "epoch": 0.14400589785247178,
18609
+ "grad_norm": 4.09921407699585,
18610
+ "learning_rate": 2.8559352845958234e-10,
18611
+ "loss": 0.4584,
18612
+ "step": 2637
18613
+ },
18614
+ {
18615
+ "epoch": 0.1440605075975808,
18616
+ "grad_norm": 4.515547752380371,
18617
+ "learning_rate": 7.139840760839178e-11,
18618
+ "loss": 0.6557,
18619
+ "step": 2638
18620
+ },
18621
+ {
18622
+ "epoch": 0.1441151173426898,
18623
+ "grad_norm": 3.298574447631836,
18624
+ "learning_rate": 0.0,
18625
+ "loss": 0.3579,
18626
+ "step": 2639
18627
  }
18628
  ],
18629
  "logging_steps": 1,
 
18647
  "should_evaluate": false,
18648
  "should_log": false,
18649
  "should_save": true,
18650
+ "should_training_stop": true
18651
  },
18652
  "attributes": {}
18653
  }
18654
  },
18655
+ "total_flos": 1.8914216183794237e+18,
18656
  "train_batch_size": 2,
18657
  "trial_name": null,
18658
  "trial_params": null