schnell commited on
Commit
23dd097
1 Parent(s): 67ba99c

Training in progress, epoch 13

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1533d6bddf3c8d95f6ff4932752f745542fa6faede711ac4e5f78630fd44fdb
3
  size 236470789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf4bfff09a16707370556956675459f9d75876bfb14a22ccca62770b5a95bbc
3
  size 236470789
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8e789cbff592c7abcbb70a0a1080069b2a642bdd41392da72c8a88aa5f570ae
3
  size 118243218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:612de748233d159437078bb7f4ee519273e837c5c8f59ae0d256c06a48bde03c
3
  size 118243218
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1555aa793e45869a50c0ea8fa30e8bcbcb3c7466ee570768b03040a237ce44ce
3
  size 15597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfa02c10b9cd4c99264361fef392b6d9708225ffc3e69c7351c226b900a6c196
3
  size 15597
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2004c6163970935b37a492dabc7d20258e92b44d334d59d43006d4c3533be13
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ba5415d56f47c80da2bd5540c9633f62d6ea0cd4624ff0eb7758d31be968154
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c877a115c1cbc8e43174987b4b039efb0e758136a482d45ffc9deb2c3305746a
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52058bdd1f404d63cb045dcfb79f7694db4c45d09eda931f288ba1a544e087e7
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.0,
5
- "global_step": 833676,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10116,11 +10116,854 @@
10116
  "eval_samples_per_second": 413.518,
10117
  "eval_steps_per_second": 25.845,
10118
  "step": 833676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10119
  }
10120
  ],
10121
  "max_steps": 972622,
10122
  "num_train_epochs": 14,
10123
- "total_flos": 6.311350074383032e+18,
10124
  "trial_name": null,
10125
  "trial_params": null
10126
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.0,
5
+ "global_step": 903149,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10116
  "eval_samples_per_second": 413.518,
10117
  "eval_steps_per_second": 25.845,
10118
  "step": 833676
10119
+ },
10120
+ {
10121
+ "epoch": 12.0,
10122
+ "learning_rate": 1.4435945767710915e-05,
10123
+ "loss": 1.7129,
10124
+ "step": 834000
10125
+ },
10126
+ {
10127
+ "epoch": 12.01,
10128
+ "learning_rate": 1.4384019025958179e-05,
10129
+ "loss": 1.7078,
10130
+ "step": 834500
10131
+ },
10132
+ {
10133
+ "epoch": 12.02,
10134
+ "learning_rate": 1.4332092284205445e-05,
10135
+ "loss": 1.7095,
10136
+ "step": 835000
10137
+ },
10138
+ {
10139
+ "epoch": 12.03,
10140
+ "learning_rate": 1.4280165542452708e-05,
10141
+ "loss": 1.7136,
10142
+ "step": 835500
10143
+ },
10144
+ {
10145
+ "epoch": 12.03,
10146
+ "learning_rate": 1.422834265418348e-05,
10147
+ "loss": 1.7119,
10148
+ "step": 836000
10149
+ },
10150
+ {
10151
+ "epoch": 12.04,
10152
+ "learning_rate": 1.4176415912430743e-05,
10153
+ "loss": 1.7085,
10154
+ "step": 836500
10155
+ },
10156
+ {
10157
+ "epoch": 12.05,
10158
+ "learning_rate": 1.4124489170678006e-05,
10159
+ "loss": 1.7114,
10160
+ "step": 837000
10161
+ },
10162
+ {
10163
+ "epoch": 12.06,
10164
+ "learning_rate": 1.4072562428925273e-05,
10165
+ "loss": 1.7107,
10166
+ "step": 837500
10167
+ },
10168
+ {
10169
+ "epoch": 12.06,
10170
+ "learning_rate": 1.4020739540656044e-05,
10171
+ "loss": 1.7106,
10172
+ "step": 838000
10173
+ },
10174
+ {
10175
+ "epoch": 12.07,
10176
+ "learning_rate": 1.3968812798903307e-05,
10177
+ "loss": 1.7099,
10178
+ "step": 838500
10179
+ },
10180
+ {
10181
+ "epoch": 12.08,
10182
+ "learning_rate": 1.3916886057150572e-05,
10183
+ "loss": 1.7152,
10184
+ "step": 839000
10185
+ },
10186
+ {
10187
+ "epoch": 12.08,
10188
+ "learning_rate": 1.3864959315397839e-05,
10189
+ "loss": 1.7166,
10190
+ "step": 839500
10191
+ },
10192
+ {
10193
+ "epoch": 12.09,
10194
+ "learning_rate": 1.3813136427128608e-05,
10195
+ "loss": 1.7123,
10196
+ "step": 840000
10197
+ },
10198
+ {
10199
+ "epoch": 12.1,
10200
+ "learning_rate": 1.3761209685375873e-05,
10201
+ "loss": 1.7127,
10202
+ "step": 840500
10203
+ },
10204
+ {
10205
+ "epoch": 12.11,
10206
+ "learning_rate": 1.3709282943623137e-05,
10207
+ "loss": 1.7144,
10208
+ "step": 841000
10209
+ },
10210
+ {
10211
+ "epoch": 12.11,
10212
+ "learning_rate": 1.3657356201870403e-05,
10213
+ "loss": 1.7121,
10214
+ "step": 841500
10215
+ },
10216
+ {
10217
+ "epoch": 12.12,
10218
+ "learning_rate": 1.3605429460117666e-05,
10219
+ "loss": 1.714,
10220
+ "step": 842000
10221
+ },
10222
+ {
10223
+ "epoch": 12.13,
10224
+ "learning_rate": 1.3553502718364933e-05,
10225
+ "loss": 1.7112,
10226
+ "step": 842500
10227
+ },
10228
+ {
10229
+ "epoch": 12.13,
10230
+ "learning_rate": 1.3501679830095701e-05,
10231
+ "loss": 1.7121,
10232
+ "step": 843000
10233
+ },
10234
+ {
10235
+ "epoch": 12.14,
10236
+ "learning_rate": 1.3449753088342967e-05,
10237
+ "loss": 1.706,
10238
+ "step": 843500
10239
+ },
10240
+ {
10241
+ "epoch": 12.15,
10242
+ "learning_rate": 1.339782634659023e-05,
10243
+ "loss": 1.7121,
10244
+ "step": 844000
10245
+ },
10246
+ {
10247
+ "epoch": 12.16,
10248
+ "learning_rate": 1.3345899604837494e-05,
10249
+ "loss": 1.7075,
10250
+ "step": 844500
10251
+ },
10252
+ {
10253
+ "epoch": 12.16,
10254
+ "learning_rate": 1.3294076716568265e-05,
10255
+ "loss": 1.7105,
10256
+ "step": 845000
10257
+ },
10258
+ {
10259
+ "epoch": 12.17,
10260
+ "learning_rate": 1.3242149974815532e-05,
10261
+ "loss": 1.7086,
10262
+ "step": 845500
10263
+ },
10264
+ {
10265
+ "epoch": 12.18,
10266
+ "learning_rate": 1.3190223233062795e-05,
10267
+ "loss": 1.7103,
10268
+ "step": 846000
10269
+ },
10270
+ {
10271
+ "epoch": 12.18,
10272
+ "learning_rate": 1.313829649131006e-05,
10273
+ "loss": 1.7082,
10274
+ "step": 846500
10275
+ },
10276
+ {
10277
+ "epoch": 12.19,
10278
+ "learning_rate": 1.3086369749557325e-05,
10279
+ "loss": 1.7101,
10280
+ "step": 847000
10281
+ },
10282
+ {
10283
+ "epoch": 12.2,
10284
+ "learning_rate": 1.3034546861288096e-05,
10285
+ "loss": 1.7145,
10286
+ "step": 847500
10287
+ },
10288
+ {
10289
+ "epoch": 12.21,
10290
+ "learning_rate": 1.2982620119535361e-05,
10291
+ "loss": 1.7118,
10292
+ "step": 848000
10293
+ },
10294
+ {
10295
+ "epoch": 12.21,
10296
+ "learning_rate": 1.2930693377782624e-05,
10297
+ "loss": 1.7077,
10298
+ "step": 848500
10299
+ },
10300
+ {
10301
+ "epoch": 12.22,
10302
+ "learning_rate": 1.2878766636029891e-05,
10303
+ "loss": 1.7097,
10304
+ "step": 849000
10305
+ },
10306
+ {
10307
+ "epoch": 12.23,
10308
+ "learning_rate": 1.2826943747760659e-05,
10309
+ "loss": 1.7084,
10310
+ "step": 849500
10311
+ },
10312
+ {
10313
+ "epoch": 12.23,
10314
+ "learning_rate": 1.2775017006007925e-05,
10315
+ "loss": 1.7086,
10316
+ "step": 850000
10317
+ },
10318
+ {
10319
+ "epoch": 12.24,
10320
+ "learning_rate": 1.2723090264255189e-05,
10321
+ "loss": 1.7108,
10322
+ "step": 850500
10323
+ },
10324
+ {
10325
+ "epoch": 12.25,
10326
+ "learning_rate": 1.2671163522502455e-05,
10327
+ "loss": 1.7097,
10328
+ "step": 851000
10329
+ },
10330
+ {
10331
+ "epoch": 12.26,
10332
+ "learning_rate": 1.2619340634233223e-05,
10333
+ "loss": 1.7124,
10334
+ "step": 851500
10335
+ },
10336
+ {
10337
+ "epoch": 12.26,
10338
+ "learning_rate": 1.256741389248049e-05,
10339
+ "loss": 1.7127,
10340
+ "step": 852000
10341
+ },
10342
+ {
10343
+ "epoch": 12.27,
10344
+ "learning_rate": 1.2515487150727753e-05,
10345
+ "loss": 1.7104,
10346
+ "step": 852500
10347
+ },
10348
+ {
10349
+ "epoch": 12.28,
10350
+ "learning_rate": 1.2463560408975018e-05,
10351
+ "loss": 1.7132,
10352
+ "step": 853000
10353
+ },
10354
+ {
10355
+ "epoch": 12.29,
10356
+ "learning_rate": 1.2411633667222283e-05,
10357
+ "loss": 1.7116,
10358
+ "step": 853500
10359
+ },
10360
+ {
10361
+ "epoch": 12.29,
10362
+ "learning_rate": 1.2359810778953054e-05,
10363
+ "loss": 1.7118,
10364
+ "step": 854000
10365
+ },
10366
+ {
10367
+ "epoch": 12.3,
10368
+ "learning_rate": 1.2307884037200317e-05,
10369
+ "loss": 1.7152,
10370
+ "step": 854500
10371
+ },
10372
+ {
10373
+ "epoch": 12.31,
10374
+ "learning_rate": 1.2255957295447582e-05,
10375
+ "loss": 1.7078,
10376
+ "step": 855000
10377
+ },
10378
+ {
10379
+ "epoch": 12.31,
10380
+ "learning_rate": 1.2204030553694847e-05,
10381
+ "loss": 1.7121,
10382
+ "step": 855500
10383
+ },
10384
+ {
10385
+ "epoch": 12.32,
10386
+ "learning_rate": 1.2152207665425617e-05,
10387
+ "loss": 1.7093,
10388
+ "step": 856000
10389
+ },
10390
+ {
10391
+ "epoch": 12.33,
10392
+ "learning_rate": 1.2100280923672883e-05,
10393
+ "loss": 1.708,
10394
+ "step": 856500
10395
+ },
10396
+ {
10397
+ "epoch": 12.34,
10398
+ "learning_rate": 1.2048354181920148e-05,
10399
+ "loss": 1.7122,
10400
+ "step": 857000
10401
+ },
10402
+ {
10403
+ "epoch": 12.34,
10404
+ "learning_rate": 1.1996427440167413e-05,
10405
+ "loss": 1.7071,
10406
+ "step": 857500
10407
+ },
10408
+ {
10409
+ "epoch": 12.35,
10410
+ "learning_rate": 1.1944604551898183e-05,
10411
+ "loss": 1.7148,
10412
+ "step": 858000
10413
+ },
10414
+ {
10415
+ "epoch": 12.36,
10416
+ "learning_rate": 1.1892677810145448e-05,
10417
+ "loss": 1.7069,
10418
+ "step": 858500
10419
+ },
10420
+ {
10421
+ "epoch": 12.36,
10422
+ "learning_rate": 1.1840751068392713e-05,
10423
+ "loss": 1.7117,
10424
+ "step": 859000
10425
+ },
10426
+ {
10427
+ "epoch": 12.37,
10428
+ "learning_rate": 1.1788824326639977e-05,
10429
+ "loss": 1.7114,
10430
+ "step": 859500
10431
+ },
10432
+ {
10433
+ "epoch": 12.38,
10434
+ "learning_rate": 1.1737001438370747e-05,
10435
+ "loss": 1.7095,
10436
+ "step": 860000
10437
+ },
10438
+ {
10439
+ "epoch": 12.39,
10440
+ "learning_rate": 1.1685074696618012e-05,
10441
+ "loss": 1.7103,
10442
+ "step": 860500
10443
+ },
10444
+ {
10445
+ "epoch": 12.39,
10446
+ "learning_rate": 1.1633147954865277e-05,
10447
+ "loss": 1.7089,
10448
+ "step": 861000
10449
+ },
10450
+ {
10451
+ "epoch": 12.4,
10452
+ "learning_rate": 1.1581221213112542e-05,
10453
+ "loss": 1.709,
10454
+ "step": 861500
10455
+ },
10456
+ {
10457
+ "epoch": 12.41,
10458
+ "learning_rate": 1.1529294471359805e-05,
10459
+ "loss": 1.7089,
10460
+ "step": 862000
10461
+ },
10462
+ {
10463
+ "epoch": 12.41,
10464
+ "learning_rate": 1.147736772960707e-05,
10465
+ "loss": 1.7028,
10466
+ "step": 862500
10467
+ },
10468
+ {
10469
+ "epoch": 12.42,
10470
+ "learning_rate": 1.1425544841337841e-05,
10471
+ "loss": 1.7084,
10472
+ "step": 863000
10473
+ },
10474
+ {
10475
+ "epoch": 12.43,
10476
+ "learning_rate": 1.1373618099585104e-05,
10477
+ "loss": 1.7113,
10478
+ "step": 863500
10479
+ },
10480
+ {
10481
+ "epoch": 12.44,
10482
+ "learning_rate": 1.1321691357832371e-05,
10483
+ "loss": 1.7077,
10484
+ "step": 864000
10485
+ },
10486
+ {
10487
+ "epoch": 12.44,
10488
+ "learning_rate": 1.1269764616079636e-05,
10489
+ "loss": 1.7084,
10490
+ "step": 864500
10491
+ },
10492
+ {
10493
+ "epoch": 12.45,
10494
+ "learning_rate": 1.1217837874326901e-05,
10495
+ "loss": 1.7084,
10496
+ "step": 865000
10497
+ },
10498
+ {
10499
+ "epoch": 12.46,
10500
+ "learning_rate": 1.116601498605767e-05,
10501
+ "loss": 1.7087,
10502
+ "step": 865500
10503
+ },
10504
+ {
10505
+ "epoch": 12.47,
10506
+ "learning_rate": 1.1114088244304935e-05,
10507
+ "loss": 1.7086,
10508
+ "step": 866000
10509
+ },
10510
+ {
10511
+ "epoch": 12.47,
10512
+ "learning_rate": 1.10621615025522e-05,
10513
+ "loss": 1.7121,
10514
+ "step": 866500
10515
+ },
10516
+ {
10517
+ "epoch": 12.48,
10518
+ "learning_rate": 1.1010234760799465e-05,
10519
+ "loss": 1.7092,
10520
+ "step": 867000
10521
+ },
10522
+ {
10523
+ "epoch": 12.49,
10524
+ "learning_rate": 1.0958411872530235e-05,
10525
+ "loss": 1.7075,
10526
+ "step": 867500
10527
+ },
10528
+ {
10529
+ "epoch": 12.49,
10530
+ "learning_rate": 1.09064851307775e-05,
10531
+ "loss": 1.7053,
10532
+ "step": 868000
10533
+ },
10534
+ {
10535
+ "epoch": 12.5,
10536
+ "learning_rate": 1.0854558389024765e-05,
10537
+ "loss": 1.7118,
10538
+ "step": 868500
10539
+ },
10540
+ {
10541
+ "epoch": 12.51,
10542
+ "learning_rate": 1.080263164727203e-05,
10543
+ "loss": 1.7089,
10544
+ "step": 869000
10545
+ },
10546
+ {
10547
+ "epoch": 12.52,
10548
+ "learning_rate": 1.07508087590028e-05,
10549
+ "loss": 1.7085,
10550
+ "step": 869500
10551
+ },
10552
+ {
10553
+ "epoch": 12.52,
10554
+ "learning_rate": 1.0698882017250064e-05,
10555
+ "loss": 1.7098,
10556
+ "step": 870000
10557
+ },
10558
+ {
10559
+ "epoch": 12.53,
10560
+ "learning_rate": 1.0646955275497329e-05,
10561
+ "loss": 1.7075,
10562
+ "step": 870500
10563
+ },
10564
+ {
10565
+ "epoch": 12.54,
10566
+ "learning_rate": 1.0595028533744594e-05,
10567
+ "loss": 1.7067,
10568
+ "step": 871000
10569
+ },
10570
+ {
10571
+ "epoch": 12.54,
10572
+ "learning_rate": 1.0543101791991857e-05,
10573
+ "loss": 1.7055,
10574
+ "step": 871500
10575
+ },
10576
+ {
10577
+ "epoch": 12.55,
10578
+ "learning_rate": 1.0491175050239124e-05,
10579
+ "loss": 1.7062,
10580
+ "step": 872000
10581
+ },
10582
+ {
10583
+ "epoch": 12.56,
10584
+ "learning_rate": 1.0439352161969893e-05,
10585
+ "loss": 1.708,
10586
+ "step": 872500
10587
+ },
10588
+ {
10589
+ "epoch": 12.57,
10590
+ "learning_rate": 1.0387425420217158e-05,
10591
+ "loss": 1.7085,
10592
+ "step": 873000
10593
+ },
10594
+ {
10595
+ "epoch": 12.57,
10596
+ "learning_rate": 1.0335498678464423e-05,
10597
+ "loss": 1.7083,
10598
+ "step": 873500
10599
+ },
10600
+ {
10601
+ "epoch": 12.58,
10602
+ "learning_rate": 1.0283571936711688e-05,
10603
+ "loss": 1.7091,
10604
+ "step": 874000
10605
+ },
10606
+ {
10607
+ "epoch": 12.59,
10608
+ "learning_rate": 1.0231645194958953e-05,
10609
+ "loss": 1.707,
10610
+ "step": 874500
10611
+ },
10612
+ {
10613
+ "epoch": 12.59,
10614
+ "learning_rate": 1.0179822306689723e-05,
10615
+ "loss": 1.7085,
10616
+ "step": 875000
10617
+ },
10618
+ {
10619
+ "epoch": 12.6,
10620
+ "learning_rate": 1.0127895564936987e-05,
10621
+ "loss": 1.7052,
10622
+ "step": 875500
10623
+ },
10624
+ {
10625
+ "epoch": 12.61,
10626
+ "learning_rate": 1.0075968823184252e-05,
10627
+ "loss": 1.7092,
10628
+ "step": 876000
10629
+ },
10630
+ {
10631
+ "epoch": 12.62,
10632
+ "learning_rate": 1.0024042081431517e-05,
10633
+ "loss": 1.705,
10634
+ "step": 876500
10635
+ },
10636
+ {
10637
+ "epoch": 12.62,
10638
+ "learning_rate": 9.972219193162287e-06,
10639
+ "loss": 1.7111,
10640
+ "step": 877000
10641
+ },
10642
+ {
10643
+ "epoch": 12.63,
10644
+ "learning_rate": 9.920292451409552e-06,
10645
+ "loss": 1.712,
10646
+ "step": 877500
10647
+ },
10648
+ {
10649
+ "epoch": 12.64,
10650
+ "learning_rate": 9.868365709656817e-06,
10651
+ "loss": 1.7061,
10652
+ "step": 878000
10653
+ },
10654
+ {
10655
+ "epoch": 12.65,
10656
+ "learning_rate": 9.816438967904082e-06,
10657
+ "loss": 1.7059,
10658
+ "step": 878500
10659
+ },
10660
+ {
10661
+ "epoch": 12.65,
10662
+ "learning_rate": 9.764616079634851e-06,
10663
+ "loss": 1.7086,
10664
+ "step": 879000
10665
+ },
10666
+ {
10667
+ "epoch": 12.66,
10668
+ "learning_rate": 9.712689337882116e-06,
10669
+ "loss": 1.7065,
10670
+ "step": 879500
10671
+ },
10672
+ {
10673
+ "epoch": 12.67,
10674
+ "learning_rate": 9.660762596129381e-06,
10675
+ "loss": 1.703,
10676
+ "step": 880000
10677
+ },
10678
+ {
10679
+ "epoch": 12.67,
10680
+ "learning_rate": 9.608835854376646e-06,
10681
+ "loss": 1.7063,
10682
+ "step": 880500
10683
+ },
10684
+ {
10685
+ "epoch": 12.68,
10686
+ "learning_rate": 9.556909112623911e-06,
10687
+ "loss": 1.7087,
10688
+ "step": 881000
10689
+ },
10690
+ {
10691
+ "epoch": 12.69,
10692
+ "learning_rate": 9.504982370871176e-06,
10693
+ "loss": 1.7095,
10694
+ "step": 881500
10695
+ },
10696
+ {
10697
+ "epoch": 12.7,
10698
+ "learning_rate": 9.45305562911844e-06,
10699
+ "loss": 1.7099,
10700
+ "step": 882000
10701
+ },
10702
+ {
10703
+ "epoch": 12.7,
10704
+ "learning_rate": 9.40123274084921e-06,
10705
+ "loss": 1.709,
10706
+ "step": 882500
10707
+ },
10708
+ {
10709
+ "epoch": 12.71,
10710
+ "learning_rate": 9.349305999096475e-06,
10711
+ "loss": 1.7032,
10712
+ "step": 883000
10713
+ },
10714
+ {
10715
+ "epoch": 12.72,
10716
+ "learning_rate": 9.29737925734374e-06,
10717
+ "loss": 1.7104,
10718
+ "step": 883500
10719
+ },
10720
+ {
10721
+ "epoch": 12.72,
10722
+ "learning_rate": 9.245452515591005e-06,
10723
+ "loss": 1.706,
10724
+ "step": 884000
10725
+ },
10726
+ {
10727
+ "epoch": 12.73,
10728
+ "learning_rate": 9.19352577383827e-06,
10729
+ "loss": 1.7081,
10730
+ "step": 884500
10731
+ },
10732
+ {
10733
+ "epoch": 12.74,
10734
+ "learning_rate": 9.14170288556904e-06,
10735
+ "loss": 1.7088,
10736
+ "step": 885000
10737
+ },
10738
+ {
10739
+ "epoch": 12.75,
10740
+ "learning_rate": 9.089776143816304e-06,
10741
+ "loss": 1.7035,
10742
+ "step": 885500
10743
+ },
10744
+ {
10745
+ "epoch": 12.75,
10746
+ "learning_rate": 9.03784940206357e-06,
10747
+ "loss": 1.7031,
10748
+ "step": 886000
10749
+ },
10750
+ {
10751
+ "epoch": 12.76,
10752
+ "learning_rate": 8.985922660310834e-06,
10753
+ "loss": 1.7079,
10754
+ "step": 886500
10755
+ },
10756
+ {
10757
+ "epoch": 12.77,
10758
+ "learning_rate": 8.933995918558097e-06,
10759
+ "loss": 1.7058,
10760
+ "step": 887000
10761
+ },
10762
+ {
10763
+ "epoch": 12.77,
10764
+ "learning_rate": 8.882173030288869e-06,
10765
+ "loss": 1.7032,
10766
+ "step": 887500
10767
+ },
10768
+ {
10769
+ "epoch": 12.78,
10770
+ "learning_rate": 8.830246288536134e-06,
10771
+ "loss": 1.7037,
10772
+ "step": 888000
10773
+ },
10774
+ {
10775
+ "epoch": 12.79,
10776
+ "learning_rate": 8.778319546783399e-06,
10777
+ "loss": 1.703,
10778
+ "step": 888500
10779
+ },
10780
+ {
10781
+ "epoch": 12.8,
10782
+ "learning_rate": 8.726392805030663e-06,
10783
+ "loss": 1.7074,
10784
+ "step": 889000
10785
+ },
10786
+ {
10787
+ "epoch": 12.8,
10788
+ "learning_rate": 8.674466063277928e-06,
10789
+ "loss": 1.7061,
10790
+ "step": 889500
10791
+ },
10792
+ {
10793
+ "epoch": 12.81,
10794
+ "learning_rate": 8.622643175008698e-06,
10795
+ "loss": 1.7065,
10796
+ "step": 890000
10797
+ },
10798
+ {
10799
+ "epoch": 12.82,
10800
+ "learning_rate": 8.570716433255963e-06,
10801
+ "loss": 1.7058,
10802
+ "step": 890500
10803
+ },
10804
+ {
10805
+ "epoch": 12.83,
10806
+ "learning_rate": 8.518789691503228e-06,
10807
+ "loss": 1.7054,
10808
+ "step": 891000
10809
+ },
10810
+ {
10811
+ "epoch": 12.83,
10812
+ "learning_rate": 8.466862949750493e-06,
10813
+ "loss": 1.7071,
10814
+ "step": 891500
10815
+ },
10816
+ {
10817
+ "epoch": 12.84,
10818
+ "learning_rate": 8.415040061481262e-06,
10819
+ "loss": 1.703,
10820
+ "step": 892000
10821
+ },
10822
+ {
10823
+ "epoch": 12.85,
10824
+ "learning_rate": 8.363113319728527e-06,
10825
+ "loss": 1.7054,
10826
+ "step": 892500
10827
+ },
10828
+ {
10829
+ "epoch": 12.85,
10830
+ "learning_rate": 8.311186577975792e-06,
10831
+ "loss": 1.7059,
10832
+ "step": 893000
10833
+ },
10834
+ {
10835
+ "epoch": 12.86,
10836
+ "learning_rate": 8.259259836223057e-06,
10837
+ "loss": 1.7043,
10838
+ "step": 893500
10839
+ },
10840
+ {
10841
+ "epoch": 12.87,
10842
+ "learning_rate": 8.207436947953827e-06,
10843
+ "loss": 1.6995,
10844
+ "step": 894000
10845
+ },
10846
+ {
10847
+ "epoch": 12.88,
10848
+ "learning_rate": 8.155510206201092e-06,
10849
+ "loss": 1.7058,
10850
+ "step": 894500
10851
+ },
10852
+ {
10853
+ "epoch": 12.88,
10854
+ "learning_rate": 8.103583464448357e-06,
10855
+ "loss": 1.7018,
10856
+ "step": 895000
10857
+ },
10858
+ {
10859
+ "epoch": 12.89,
10860
+ "learning_rate": 8.051656722695621e-06,
10861
+ "loss": 1.7065,
10862
+ "step": 895500
10863
+ },
10864
+ {
10865
+ "epoch": 12.9,
10866
+ "learning_rate": 7.999729980942886e-06,
10867
+ "loss": 1.7049,
10868
+ "step": 896000
10869
+ },
10870
+ {
10871
+ "epoch": 12.9,
10872
+ "learning_rate": 7.947907092673656e-06,
10873
+ "loss": 1.7068,
10874
+ "step": 896500
10875
+ },
10876
+ {
10877
+ "epoch": 12.91,
10878
+ "learning_rate": 7.895980350920921e-06,
10879
+ "loss": 1.7001,
10880
+ "step": 897000
10881
+ },
10882
+ {
10883
+ "epoch": 12.92,
10884
+ "learning_rate": 7.844053609168186e-06,
10885
+ "loss": 1.7098,
10886
+ "step": 897500
10887
+ },
10888
+ {
10889
+ "epoch": 12.93,
10890
+ "learning_rate": 7.79212686741545e-06,
10891
+ "loss": 1.7014,
10892
+ "step": 898000
10893
+ },
10894
+ {
10895
+ "epoch": 12.93,
10896
+ "learning_rate": 7.740303979146222e-06,
10897
+ "loss": 1.7061,
10898
+ "step": 898500
10899
+ },
10900
+ {
10901
+ "epoch": 12.94,
10902
+ "learning_rate": 7.688377237393487e-06,
10903
+ "loss": 1.705,
10904
+ "step": 899000
10905
+ },
10906
+ {
10907
+ "epoch": 12.95,
10908
+ "learning_rate": 7.63645049564075e-06,
10909
+ "loss": 1.7037,
10910
+ "step": 899500
10911
+ },
10912
+ {
10913
+ "epoch": 12.95,
10914
+ "learning_rate": 7.584523753888015e-06,
10915
+ "loss": 1.7083,
10916
+ "step": 900000
10917
+ },
10918
+ {
10919
+ "epoch": 12.96,
10920
+ "learning_rate": 7.53259701213528e-06,
10921
+ "loss": 1.7029,
10922
+ "step": 900500
10923
+ },
10924
+ {
10925
+ "epoch": 12.97,
10926
+ "learning_rate": 7.4807741238660495e-06,
10927
+ "loss": 1.7047,
10928
+ "step": 901000
10929
+ },
10930
+ {
10931
+ "epoch": 12.98,
10932
+ "learning_rate": 7.4288473821133144e-06,
10933
+ "loss": 1.7049,
10934
+ "step": 901500
10935
+ },
10936
+ {
10937
+ "epoch": 12.98,
10938
+ "learning_rate": 7.376920640360579e-06,
10939
+ "loss": 1.7073,
10940
+ "step": 902000
10941
+ },
10942
+ {
10943
+ "epoch": 12.99,
10944
+ "learning_rate": 7.324993898607844e-06,
10945
+ "loss": 1.7036,
10946
+ "step": 902500
10947
+ },
10948
+ {
10949
+ "epoch": 13.0,
10950
+ "learning_rate": 7.273171010338614e-06,
10951
+ "loss": 1.7011,
10952
+ "step": 903000
10953
+ },
10954
+ {
10955
+ "epoch": 13.0,
10956
+ "eval_accuracy": 0.6677293660710026,
10957
+ "eval_loss": 1.5736616849899292,
10958
+ "eval_runtime": 1292.8595,
10959
+ "eval_samples_per_second": 416.857,
10960
+ "eval_steps_per_second": 26.054,
10961
+ "step": 903149
10962
  }
10963
  ],
10964
  "max_steps": 972622,
10965
  "num_train_epochs": 14,
10966
+ "total_flos": 6.837275613018931e+18,
10967
  "trial_name": null,
10968
  "trial_params": null
10969
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8e789cbff592c7abcbb70a0a1080069b2a642bdd41392da72c8a88aa5f570ae
3
  size 118243218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:612de748233d159437078bb7f4ee519273e837c5c8f59ae0d256c06a48bde03c
3
  size 118243218
runs/Feb22_09-35-52_ubuntu-2004/events.out.tfevents.1677026198.ubuntu-2004.1870487.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54427ce9472c28a9802ca844bd62a2af3e4bc1d184b1bcde76bbe915e3c41367
3
- size 274328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e26da7adb61568c20149e3a3b8d6acebeecb52046b416908a25f52f3d232dca1
3
+ size 296897