schnell commited on
Commit
acdfbd0
1 Parent(s): a9e5457

Training in progress, epoch 13

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee64dad6d55d6511beb6ac0b9d53d67463b43f31b3c562840789a31fb7f3ab68
3
  size 236469913
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abf9b77ab18c1d965eda50968a3847cc6106b58462d4356ff7de7ae0d63608a8
3
  size 236469913
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30d2075d628d49b7e8ef70e8eea7597d65c9743427dd0d8f469ad8d7e3fe87e1
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63317fdc9bd1e5fb626b521461e4e06505472bbc1f4c4073a239d29dd5ffa2fe
3
  size 118242180
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da377c1b16e0291f1d5c1d80cc85b79a3d05b723665175d2784787456eb041f1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:333da71000447e43f702bfc9998f6ad6067449f86dbeb5930f75e8dcb65fe68a
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82531253b364cd83aa3acd17616fa6b687da8d3637e43c753c627947983b45a3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a0b12574d897bb564b80ea26065589ab67e680d05dd345d6160de20ec922697
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bfe7c18952ddc5f5995dc7a21d9ebb622f29d27e920708e7de9b94f5fbe46f3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5512b0b1df1194ea96bc608e5a07fb71625f2b0a54742e7e5c4dc9f057af07fa
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8373a1388c2e6c546588d79c969bd60bd5ee55c12004454fea447aefb01b11a
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a30aea1e37e8569b5ef68b5a8eaf2f8b65c53373e99d320d5195a1cc1d7e588e
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31d6c65356079d9822a9fd489b5c7372ee32f1df23966bba618ffdc93860f84c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:298f7652523db2ed0fc2a4e1bcef4e45b30cbf7cc2fa3479838e7cd2bddbc4b3
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.0,
5
- "global_step": 833676,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10116,11 +10116,854 @@
10116
  "eval_samples_per_second": 832.478,
10117
  "eval_steps_per_second": 34.687,
10118
  "step": 833676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10119
  }
10120
  ],
10121
  "max_steps": 972622,
10122
  "num_train_epochs": 14,
10123
- "total_flos": 5.832190387963298e+18,
10124
  "trial_name": null,
10125
  "trial_params": null
10126
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.0,
5
+ "global_step": 903149,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10116
  "eval_samples_per_second": 832.478,
10117
  "eval_steps_per_second": 34.687,
10118
  "step": 833676
10119
+ },
10120
+ {
10121
+ "epoch": 12.0,
10122
+ "learning_rate": 1.443418025849132e-05,
10123
+ "loss": 1.8307,
10124
+ "step": 834000
10125
+ },
10126
+ {
10127
+ "epoch": 12.01,
10128
+ "learning_rate": 1.4382253516738586e-05,
10129
+ "loss": 1.8256,
10130
+ "step": 834500
10131
+ },
10132
+ {
10133
+ "epoch": 12.02,
10134
+ "learning_rate": 1.4330430628469358e-05,
10135
+ "loss": 1.829,
10136
+ "step": 835000
10137
+ },
10138
+ {
10139
+ "epoch": 12.03,
10140
+ "learning_rate": 1.4278503886716621e-05,
10141
+ "loss": 1.8268,
10142
+ "step": 835500
10143
+ },
10144
+ {
10145
+ "epoch": 12.03,
10146
+ "learning_rate": 1.4226577144963884e-05,
10147
+ "loss": 1.8277,
10148
+ "step": 836000
10149
+ },
10150
+ {
10151
+ "epoch": 12.04,
10152
+ "learning_rate": 1.417465040321115e-05,
10153
+ "loss": 1.8258,
10154
+ "step": 836500
10155
+ },
10156
+ {
10157
+ "epoch": 12.05,
10158
+ "learning_rate": 1.4122723661458414e-05,
10159
+ "loss": 1.8224,
10160
+ "step": 837000
10161
+ },
10162
+ {
10163
+ "epoch": 12.06,
10164
+ "learning_rate": 1.4070900773189185e-05,
10165
+ "loss": 1.827,
10166
+ "step": 837500
10167
+ },
10168
+ {
10169
+ "epoch": 12.06,
10170
+ "learning_rate": 1.401897403143645e-05,
10171
+ "loss": 1.8239,
10172
+ "step": 838000
10173
+ },
10174
+ {
10175
+ "epoch": 12.07,
10176
+ "learning_rate": 1.3967047289683715e-05,
10177
+ "loss": 1.8255,
10178
+ "step": 838500
10179
+ },
10180
+ {
10181
+ "epoch": 12.08,
10182
+ "learning_rate": 1.391512054793098e-05,
10183
+ "loss": 1.8282,
10184
+ "step": 839000
10185
+ },
10186
+ {
10187
+ "epoch": 12.08,
10188
+ "learning_rate": 1.3863193806178247e-05,
10189
+ "loss": 1.8262,
10190
+ "step": 839500
10191
+ },
10192
+ {
10193
+ "epoch": 12.09,
10194
+ "learning_rate": 1.3811370917909014e-05,
10195
+ "loss": 1.8251,
10196
+ "step": 840000
10197
+ },
10198
+ {
10199
+ "epoch": 12.1,
10200
+ "learning_rate": 1.3759444176156281e-05,
10201
+ "loss": 1.8257,
10202
+ "step": 840500
10203
+ },
10204
+ {
10205
+ "epoch": 12.11,
10206
+ "learning_rate": 1.3707517434403544e-05,
10207
+ "loss": 1.825,
10208
+ "step": 841000
10209
+ },
10210
+ {
10211
+ "epoch": 12.11,
10212
+ "learning_rate": 1.3655590692650807e-05,
10213
+ "loss": 1.8269,
10214
+ "step": 841500
10215
+ },
10216
+ {
10217
+ "epoch": 12.12,
10218
+ "learning_rate": 1.3603767804381579e-05,
10219
+ "loss": 1.8266,
10220
+ "step": 842000
10221
+ },
10222
+ {
10223
+ "epoch": 12.13,
10224
+ "learning_rate": 1.3551841062628845e-05,
10225
+ "loss": 1.8275,
10226
+ "step": 842500
10227
+ },
10228
+ {
10229
+ "epoch": 12.13,
10230
+ "learning_rate": 1.3499914320876109e-05,
10231
+ "loss": 1.8266,
10232
+ "step": 843000
10233
+ },
10234
+ {
10235
+ "epoch": 12.14,
10236
+ "learning_rate": 1.3447987579123372e-05,
10237
+ "loss": 1.8256,
10238
+ "step": 843500
10239
+ },
10240
+ {
10241
+ "epoch": 12.15,
10242
+ "learning_rate": 1.3396164690854143e-05,
10243
+ "loss": 1.8266,
10244
+ "step": 844000
10245
+ },
10246
+ {
10247
+ "epoch": 12.16,
10248
+ "learning_rate": 1.334423794910141e-05,
10249
+ "loss": 1.8231,
10250
+ "step": 844500
10251
+ },
10252
+ {
10253
+ "epoch": 12.16,
10254
+ "learning_rate": 1.3292311207348673e-05,
10255
+ "loss": 1.8236,
10256
+ "step": 845000
10257
+ },
10258
+ {
10259
+ "epoch": 12.17,
10260
+ "learning_rate": 1.3240384465595936e-05,
10261
+ "loss": 1.8239,
10262
+ "step": 845500
10263
+ },
10264
+ {
10265
+ "epoch": 12.18,
10266
+ "learning_rate": 1.3188561577326707e-05,
10267
+ "loss": 1.8254,
10268
+ "step": 846000
10269
+ },
10270
+ {
10271
+ "epoch": 12.18,
10272
+ "learning_rate": 1.3136634835573972e-05,
10273
+ "loss": 1.8244,
10274
+ "step": 846500
10275
+ },
10276
+ {
10277
+ "epoch": 12.19,
10278
+ "learning_rate": 1.3084708093821237e-05,
10279
+ "loss": 1.8238,
10280
+ "step": 847000
10281
+ },
10282
+ {
10283
+ "epoch": 12.2,
10284
+ "learning_rate": 1.3032781352068502e-05,
10285
+ "loss": 1.823,
10286
+ "step": 847500
10287
+ },
10288
+ {
10289
+ "epoch": 12.21,
10290
+ "learning_rate": 1.2980854610315769e-05,
10291
+ "loss": 1.8253,
10292
+ "step": 848000
10293
+ },
10294
+ {
10295
+ "epoch": 12.21,
10296
+ "learning_rate": 1.2929031722046537e-05,
10297
+ "loss": 1.8226,
10298
+ "step": 848500
10299
+ },
10300
+ {
10301
+ "epoch": 12.22,
10302
+ "learning_rate": 1.2877104980293803e-05,
10303
+ "loss": 1.8251,
10304
+ "step": 849000
10305
+ },
10306
+ {
10307
+ "epoch": 12.23,
10308
+ "learning_rate": 1.2825178238541067e-05,
10309
+ "loss": 1.8232,
10310
+ "step": 849500
10311
+ },
10312
+ {
10313
+ "epoch": 12.23,
10314
+ "learning_rate": 1.2773251496788333e-05,
10315
+ "loss": 1.8279,
10316
+ "step": 850000
10317
+ },
10318
+ {
10319
+ "epoch": 12.24,
10320
+ "learning_rate": 1.2721324755035596e-05,
10321
+ "loss": 1.8273,
10322
+ "step": 850500
10323
+ },
10324
+ {
10325
+ "epoch": 12.25,
10326
+ "learning_rate": 1.2669501866766368e-05,
10327
+ "loss": 1.8245,
10328
+ "step": 851000
10329
+ },
10330
+ {
10331
+ "epoch": 12.26,
10332
+ "learning_rate": 1.2617575125013631e-05,
10333
+ "loss": 1.8226,
10334
+ "step": 851500
10335
+ },
10336
+ {
10337
+ "epoch": 12.26,
10338
+ "learning_rate": 1.2565648383260897e-05,
10339
+ "loss": 1.8262,
10340
+ "step": 852000
10341
+ },
10342
+ {
10343
+ "epoch": 12.27,
10344
+ "learning_rate": 1.251372164150816e-05,
10345
+ "loss": 1.8248,
10346
+ "step": 852500
10347
+ },
10348
+ {
10349
+ "epoch": 12.28,
10350
+ "learning_rate": 1.246189875323893e-05,
10351
+ "loss": 1.8203,
10352
+ "step": 853000
10353
+ },
10354
+ {
10355
+ "epoch": 12.29,
10356
+ "learning_rate": 1.2409972011486195e-05,
10357
+ "loss": 1.8257,
10358
+ "step": 853500
10359
+ },
10360
+ {
10361
+ "epoch": 12.29,
10362
+ "learning_rate": 1.235804526973346e-05,
10363
+ "loss": 1.8219,
10364
+ "step": 854000
10365
+ },
10366
+ {
10367
+ "epoch": 12.3,
10368
+ "learning_rate": 1.2306118527980725e-05,
10369
+ "loss": 1.8203,
10370
+ "step": 854500
10371
+ },
10372
+ {
10373
+ "epoch": 12.31,
10374
+ "learning_rate": 1.2254295639711495e-05,
10375
+ "loss": 1.8241,
10376
+ "step": 855000
10377
+ },
10378
+ {
10379
+ "epoch": 12.31,
10380
+ "learning_rate": 1.220236889795876e-05,
10381
+ "loss": 1.8251,
10382
+ "step": 855500
10383
+ },
10384
+ {
10385
+ "epoch": 12.32,
10386
+ "learning_rate": 1.2150442156206024e-05,
10387
+ "loss": 1.8244,
10388
+ "step": 856000
10389
+ },
10390
+ {
10391
+ "epoch": 12.33,
10392
+ "learning_rate": 1.2098515414453291e-05,
10393
+ "loss": 1.8227,
10394
+ "step": 856500
10395
+ },
10396
+ {
10397
+ "epoch": 12.34,
10398
+ "learning_rate": 1.204669252618406e-05,
10399
+ "loss": 1.8229,
10400
+ "step": 857000
10401
+ },
10402
+ {
10403
+ "epoch": 12.34,
10404
+ "learning_rate": 1.1994765784431326e-05,
10405
+ "loss": 1.8256,
10406
+ "step": 857500
10407
+ },
10408
+ {
10409
+ "epoch": 12.35,
10410
+ "learning_rate": 1.194283904267859e-05,
10411
+ "loss": 1.8224,
10412
+ "step": 858000
10413
+ },
10414
+ {
10415
+ "epoch": 12.36,
10416
+ "learning_rate": 1.1890912300925855e-05,
10417
+ "loss": 1.8221,
10418
+ "step": 858500
10419
+ },
10420
+ {
10421
+ "epoch": 12.36,
10422
+ "learning_rate": 1.1838985559173119e-05,
10423
+ "loss": 1.826,
10424
+ "step": 859000
10425
+ },
10426
+ {
10427
+ "epoch": 12.37,
10428
+ "learning_rate": 1.178716267090389e-05,
10429
+ "loss": 1.8251,
10430
+ "step": 859500
10431
+ },
10432
+ {
10433
+ "epoch": 12.38,
10434
+ "learning_rate": 1.1735235929151155e-05,
10435
+ "loss": 1.8236,
10436
+ "step": 860000
10437
+ },
10438
+ {
10439
+ "epoch": 12.39,
10440
+ "learning_rate": 1.1683309187398418e-05,
10441
+ "loss": 1.82,
10442
+ "step": 860500
10443
+ },
10444
+ {
10445
+ "epoch": 12.39,
10446
+ "learning_rate": 1.1631382445645683e-05,
10447
+ "loss": 1.8277,
10448
+ "step": 861000
10449
+ },
10450
+ {
10451
+ "epoch": 12.4,
10452
+ "learning_rate": 1.1579455703892948e-05,
10453
+ "loss": 1.8275,
10454
+ "step": 861500
10455
+ },
10456
+ {
10457
+ "epoch": 12.41,
10458
+ "learning_rate": 1.152763281562372e-05,
10459
+ "loss": 1.8221,
10460
+ "step": 862000
10461
+ },
10462
+ {
10463
+ "epoch": 12.41,
10464
+ "learning_rate": 1.1475706073870982e-05,
10465
+ "loss": 1.8203,
10466
+ "step": 862500
10467
+ },
10468
+ {
10469
+ "epoch": 12.42,
10470
+ "learning_rate": 1.1423779332118247e-05,
10471
+ "loss": 1.8198,
10472
+ "step": 863000
10473
+ },
10474
+ {
10475
+ "epoch": 12.43,
10476
+ "learning_rate": 1.1371852590365512e-05,
10477
+ "loss": 1.8251,
10478
+ "step": 863500
10479
+ },
10480
+ {
10481
+ "epoch": 12.44,
10482
+ "learning_rate": 1.1319925848612779e-05,
10483
+ "loss": 1.8221,
10484
+ "step": 864000
10485
+ },
10486
+ {
10487
+ "epoch": 12.44,
10488
+ "learning_rate": 1.1268102960343548e-05,
10489
+ "loss": 1.8245,
10490
+ "step": 864500
10491
+ },
10492
+ {
10493
+ "epoch": 12.45,
10494
+ "learning_rate": 1.1216176218590813e-05,
10495
+ "loss": 1.8263,
10496
+ "step": 865000
10497
+ },
10498
+ {
10499
+ "epoch": 12.46,
10500
+ "learning_rate": 1.1164249476838078e-05,
10501
+ "loss": 1.8232,
10502
+ "step": 865500
10503
+ },
10504
+ {
10505
+ "epoch": 12.47,
10506
+ "learning_rate": 1.1112322735085343e-05,
10507
+ "loss": 1.821,
10508
+ "step": 866000
10509
+ },
10510
+ {
10511
+ "epoch": 12.47,
10512
+ "learning_rate": 1.1060499846816113e-05,
10513
+ "loss": 1.8223,
10514
+ "step": 866500
10515
+ },
10516
+ {
10517
+ "epoch": 12.48,
10518
+ "learning_rate": 1.1008573105063378e-05,
10519
+ "loss": 1.8272,
10520
+ "step": 867000
10521
+ },
10522
+ {
10523
+ "epoch": 12.49,
10524
+ "learning_rate": 1.0956646363310643e-05,
10525
+ "loss": 1.8216,
10526
+ "step": 867500
10527
+ },
10528
+ {
10529
+ "epoch": 12.49,
10530
+ "learning_rate": 1.0904719621557907e-05,
10531
+ "loss": 1.823,
10532
+ "step": 868000
10533
+ },
10534
+ {
10535
+ "epoch": 12.5,
10536
+ "learning_rate": 1.085279287980517e-05,
10537
+ "loss": 1.8194,
10538
+ "step": 868500
10539
+ },
10540
+ {
10541
+ "epoch": 12.51,
10542
+ "learning_rate": 1.0800969991535942e-05,
10543
+ "loss": 1.8213,
10544
+ "step": 869000
10545
+ },
10546
+ {
10547
+ "epoch": 12.52,
10548
+ "learning_rate": 1.0749043249783207e-05,
10549
+ "loss": 1.8196,
10550
+ "step": 869500
10551
+ },
10552
+ {
10553
+ "epoch": 12.52,
10554
+ "learning_rate": 1.069711650803047e-05,
10555
+ "loss": 1.8231,
10556
+ "step": 870000
10557
+ },
10558
+ {
10559
+ "epoch": 12.53,
10560
+ "learning_rate": 1.0645189766277735e-05,
10561
+ "loss": 1.8158,
10562
+ "step": 870500
10563
+ },
10564
+ {
10565
+ "epoch": 12.54,
10566
+ "learning_rate": 1.0593366878008506e-05,
10567
+ "loss": 1.8195,
10568
+ "step": 871000
10569
+ },
10570
+ {
10571
+ "epoch": 12.54,
10572
+ "learning_rate": 1.054144013625577e-05,
10573
+ "loss": 1.8227,
10574
+ "step": 871500
10575
+ },
10576
+ {
10577
+ "epoch": 12.55,
10578
+ "learning_rate": 1.0489513394503034e-05,
10579
+ "loss": 1.8157,
10580
+ "step": 872000
10581
+ },
10582
+ {
10583
+ "epoch": 12.56,
10584
+ "learning_rate": 1.0437586652750301e-05,
10585
+ "loss": 1.8217,
10586
+ "step": 872500
10587
+ },
10588
+ {
10589
+ "epoch": 12.57,
10590
+ "learning_rate": 1.0385659910997566e-05,
10591
+ "loss": 1.8213,
10592
+ "step": 873000
10593
+ },
10594
+ {
10595
+ "epoch": 12.57,
10596
+ "learning_rate": 1.0333837022728336e-05,
10597
+ "loss": 1.8229,
10598
+ "step": 873500
10599
+ },
10600
+ {
10601
+ "epoch": 12.58,
10602
+ "learning_rate": 1.02819102809756e-05,
10603
+ "loss": 1.8199,
10604
+ "step": 874000
10605
+ },
10606
+ {
10607
+ "epoch": 12.59,
10608
+ "learning_rate": 1.0229983539222865e-05,
10609
+ "loss": 1.8235,
10610
+ "step": 874500
10611
+ },
10612
+ {
10613
+ "epoch": 12.59,
10614
+ "learning_rate": 1.017805679747013e-05,
10615
+ "loss": 1.8226,
10616
+ "step": 875000
10617
+ },
10618
+ {
10619
+ "epoch": 12.6,
10620
+ "learning_rate": 1.0126130055717395e-05,
10621
+ "loss": 1.8207,
10622
+ "step": 875500
10623
+ },
10624
+ {
10625
+ "epoch": 12.61,
10626
+ "learning_rate": 1.0074307167448165e-05,
10627
+ "loss": 1.8171,
10628
+ "step": 876000
10629
+ },
10630
+ {
10631
+ "epoch": 12.62,
10632
+ "learning_rate": 1.002238042569543e-05,
10633
+ "loss": 1.8231,
10634
+ "step": 876500
10635
+ },
10636
+ {
10637
+ "epoch": 12.62,
10638
+ "learning_rate": 9.970453683942695e-06,
10639
+ "loss": 1.8192,
10640
+ "step": 877000
10641
+ },
10642
+ {
10643
+ "epoch": 12.63,
10644
+ "learning_rate": 9.918526942189958e-06,
10645
+ "loss": 1.8194,
10646
+ "step": 877500
10647
+ },
10648
+ {
10649
+ "epoch": 12.64,
10650
+ "learning_rate": 9.866600200437223e-06,
10651
+ "loss": 1.8203,
10652
+ "step": 878000
10653
+ },
10654
+ {
10655
+ "epoch": 12.65,
10656
+ "learning_rate": 9.814673458684488e-06,
10657
+ "loss": 1.8184,
10658
+ "step": 878500
10659
+ },
10660
+ {
10661
+ "epoch": 12.65,
10662
+ "learning_rate": 9.762850570415259e-06,
10663
+ "loss": 1.8189,
10664
+ "step": 879000
10665
+ },
10666
+ {
10667
+ "epoch": 12.66,
10668
+ "learning_rate": 9.710923828662522e-06,
10669
+ "loss": 1.8217,
10670
+ "step": 879500
10671
+ },
10672
+ {
10673
+ "epoch": 12.67,
10674
+ "learning_rate": 9.658997086909787e-06,
10675
+ "loss": 1.8219,
10676
+ "step": 880000
10677
+ },
10678
+ {
10679
+ "epoch": 12.67,
10680
+ "learning_rate": 9.607070345157054e-06,
10681
+ "loss": 1.8226,
10682
+ "step": 880500
10683
+ },
10684
+ {
10685
+ "epoch": 12.68,
10686
+ "learning_rate": 9.555247456887823e-06,
10687
+ "loss": 1.8191,
10688
+ "step": 881000
10689
+ },
10690
+ {
10691
+ "epoch": 12.69,
10692
+ "learning_rate": 9.503320715135088e-06,
10693
+ "loss": 1.8191,
10694
+ "step": 881500
10695
+ },
10696
+ {
10697
+ "epoch": 12.7,
10698
+ "learning_rate": 9.451393973382353e-06,
10699
+ "loss": 1.821,
10700
+ "step": 882000
10701
+ },
10702
+ {
10703
+ "epoch": 12.7,
10704
+ "learning_rate": 9.399467231629618e-06,
10705
+ "loss": 1.8177,
10706
+ "step": 882500
10707
+ },
10708
+ {
10709
+ "epoch": 12.71,
10710
+ "learning_rate": 9.347644343360388e-06,
10711
+ "loss": 1.8254,
10712
+ "step": 883000
10713
+ },
10714
+ {
10715
+ "epoch": 12.72,
10716
+ "learning_rate": 9.295717601607653e-06,
10717
+ "loss": 1.8205,
10718
+ "step": 883500
10719
+ },
10720
+ {
10721
+ "epoch": 12.72,
10722
+ "learning_rate": 9.243790859854917e-06,
10723
+ "loss": 1.8196,
10724
+ "step": 884000
10725
+ },
10726
+ {
10727
+ "epoch": 12.73,
10728
+ "learning_rate": 9.191864118102182e-06,
10729
+ "loss": 1.8206,
10730
+ "step": 884500
10731
+ },
10732
+ {
10733
+ "epoch": 12.74,
10734
+ "learning_rate": 9.139937376349447e-06,
10735
+ "loss": 1.8201,
10736
+ "step": 885000
10737
+ },
10738
+ {
10739
+ "epoch": 12.75,
10740
+ "learning_rate": 9.088114488080217e-06,
10741
+ "loss": 1.8182,
10742
+ "step": 885500
10743
+ },
10744
+ {
10745
+ "epoch": 12.75,
10746
+ "learning_rate": 9.036187746327482e-06,
10747
+ "loss": 1.8198,
10748
+ "step": 886000
10749
+ },
10750
+ {
10751
+ "epoch": 12.76,
10752
+ "learning_rate": 8.984261004574747e-06,
10753
+ "loss": 1.8238,
10754
+ "step": 886500
10755
+ },
10756
+ {
10757
+ "epoch": 12.77,
10758
+ "learning_rate": 8.93233426282201e-06,
10759
+ "loss": 1.8186,
10760
+ "step": 887000
10761
+ },
10762
+ {
10763
+ "epoch": 12.77,
10764
+ "learning_rate": 8.880407521069275e-06,
10765
+ "loss": 1.8159,
10766
+ "step": 887500
10767
+ },
10768
+ {
10769
+ "epoch": 12.78,
10770
+ "learning_rate": 8.828584632800046e-06,
10771
+ "loss": 1.818,
10772
+ "step": 888000
10773
+ },
10774
+ {
10775
+ "epoch": 12.79,
10776
+ "learning_rate": 8.776657891047311e-06,
10777
+ "loss": 1.8221,
10778
+ "step": 888500
10779
+ },
10780
+ {
10781
+ "epoch": 12.8,
10782
+ "learning_rate": 8.724731149294576e-06,
10783
+ "loss": 1.8165,
10784
+ "step": 889000
10785
+ },
10786
+ {
10787
+ "epoch": 12.8,
10788
+ "learning_rate": 8.67280440754184e-06,
10789
+ "loss": 1.8205,
10790
+ "step": 889500
10791
+ },
10792
+ {
10793
+ "epoch": 12.81,
10794
+ "learning_rate": 8.62098151927261e-06,
10795
+ "loss": 1.8192,
10796
+ "step": 890000
10797
+ },
10798
+ {
10799
+ "epoch": 12.82,
10800
+ "learning_rate": 8.569054777519875e-06,
10801
+ "loss": 1.8155,
10802
+ "step": 890500
10803
+ },
10804
+ {
10805
+ "epoch": 12.83,
10806
+ "learning_rate": 8.51712803576714e-06,
10807
+ "loss": 1.8241,
10808
+ "step": 891000
10809
+ },
10810
+ {
10811
+ "epoch": 12.83,
10812
+ "learning_rate": 8.465201294014405e-06,
10813
+ "loss": 1.8203,
10814
+ "step": 891500
10815
+ },
10816
+ {
10817
+ "epoch": 12.84,
10818
+ "learning_rate": 8.413378405745175e-06,
10819
+ "loss": 1.8153,
10820
+ "step": 892000
10821
+ },
10822
+ {
10823
+ "epoch": 12.85,
10824
+ "learning_rate": 8.36145166399244e-06,
10825
+ "loss": 1.8188,
10826
+ "step": 892500
10827
+ },
10828
+ {
10829
+ "epoch": 12.85,
10830
+ "learning_rate": 8.309524922239705e-06,
10831
+ "loss": 1.8233,
10832
+ "step": 893000
10833
+ },
10834
+ {
10835
+ "epoch": 12.86,
10836
+ "learning_rate": 8.25759818048697e-06,
10837
+ "loss": 1.82,
10838
+ "step": 893500
10839
+ },
10840
+ {
10841
+ "epoch": 12.87,
10842
+ "learning_rate": 8.205775292217739e-06,
10843
+ "loss": 1.8259,
10844
+ "step": 894000
10845
+ },
10846
+ {
10847
+ "epoch": 12.88,
10848
+ "learning_rate": 8.153848550465004e-06,
10849
+ "loss": 1.8141,
10850
+ "step": 894500
10851
+ },
10852
+ {
10853
+ "epoch": 12.88,
10854
+ "learning_rate": 8.101921808712269e-06,
10855
+ "loss": 1.8185,
10856
+ "step": 895000
10857
+ },
10858
+ {
10859
+ "epoch": 12.89,
10860
+ "learning_rate": 8.049995066959534e-06,
10861
+ "loss": 1.8158,
10862
+ "step": 895500
10863
+ },
10864
+ {
10865
+ "epoch": 12.9,
10866
+ "learning_rate": 7.998172178690303e-06,
10867
+ "loss": 1.8182,
10868
+ "step": 896000
10869
+ },
10870
+ {
10871
+ "epoch": 12.9,
10872
+ "learning_rate": 7.946245436937568e-06,
10873
+ "loss": 1.8226,
10874
+ "step": 896500
10875
+ },
10876
+ {
10877
+ "epoch": 12.91,
10878
+ "learning_rate": 7.894318695184833e-06,
10879
+ "loss": 1.82,
10880
+ "step": 897000
10881
+ },
10882
+ {
10883
+ "epoch": 12.92,
10884
+ "learning_rate": 7.842391953432098e-06,
10885
+ "loss": 1.8245,
10886
+ "step": 897500
10887
+ },
10888
+ {
10889
+ "epoch": 12.93,
10890
+ "learning_rate": 7.790465211679363e-06,
10891
+ "loss": 1.8168,
10892
+ "step": 898000
10893
+ },
10894
+ {
10895
+ "epoch": 12.93,
10896
+ "learning_rate": 7.738538469926628e-06,
10897
+ "loss": 1.8208,
10898
+ "step": 898500
10899
+ },
10900
+ {
10901
+ "epoch": 12.94,
10902
+ "learning_rate": 7.686611728173893e-06,
10903
+ "loss": 1.8174,
10904
+ "step": 899000
10905
+ },
10906
+ {
10907
+ "epoch": 12.95,
10908
+ "learning_rate": 7.634788839904664e-06,
10909
+ "loss": 1.8105,
10910
+ "step": 899500
10911
+ },
10912
+ {
10913
+ "epoch": 12.95,
10914
+ "learning_rate": 7.5828620981519274e-06,
10915
+ "loss": 1.8165,
10916
+ "step": 900000
10917
+ },
10918
+ {
10919
+ "epoch": 12.96,
10920
+ "learning_rate": 7.530935356399192e-06,
10921
+ "loss": 1.8201,
10922
+ "step": 900500
10923
+ },
10924
+ {
10925
+ "epoch": 12.97,
10926
+ "learning_rate": 7.479008614646457e-06,
10927
+ "loss": 1.8207,
10928
+ "step": 901000
10929
+ },
10930
+ {
10931
+ "epoch": 12.98,
10932
+ "learning_rate": 7.427185726377227e-06,
10933
+ "loss": 1.8191,
10934
+ "step": 901500
10935
+ },
10936
+ {
10937
+ "epoch": 12.98,
10938
+ "learning_rate": 7.375258984624492e-06,
10939
+ "loss": 1.8161,
10940
+ "step": 902000
10941
+ },
10942
+ {
10943
+ "epoch": 12.99,
10944
+ "learning_rate": 7.323332242871757e-06,
10945
+ "loss": 1.8179,
10946
+ "step": 902500
10947
+ },
10948
+ {
10949
+ "epoch": 13.0,
10950
+ "learning_rate": 7.271405501119022e-06,
10951
+ "loss": 1.8191,
10952
+ "step": 903000
10953
+ },
10954
+ {
10955
+ "epoch": 13.0,
10956
+ "eval_accuracy": 0.651487792294714,
10957
+ "eval_loss": 1.6829075813293457,
10958
+ "eval_runtime": 646.4178,
10959
+ "eval_samples_per_second": 833.729,
10960
+ "eval_steps_per_second": 34.739,
10961
+ "step": 903149
10962
  }
10963
  ],
10964
  "max_steps": 972622,
10965
  "num_train_epochs": 14,
10966
+ "total_flos": 6.3182107514629e+18,
10967
  "trial_name": null,
10968
  "trial_params": null
10969
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30d2075d628d49b7e8ef70e8eea7597d65c9743427dd0d8f469ad8d7e3fe87e1
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63317fdc9bd1e5fb626b521461e4e06505472bbc1f4c4073a239d29dd5ffa2fe
3
  size 118242180
runs/Aug02_00-40-05_user-SYS-5049A-TR/events.out.tfevents.1659368419.user-SYS-5049A-TR.4008140.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af05549d74148c78a30fded743641173f0fcc77b72e6135712a94988aef723ba
3
- size 273998
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef9a4232c9659b4d86de167e19a1c2898a08e3e74d5884eed5b93f3a8a6cd35d
3
+ size 296567