schnell commited on
Commit
8a7ecca
β€’
1 Parent(s): 1c20b5f

Training in progress, epoch 13

Browse files
last-checkpoint/{global_step833676 β†’ global_step903149}/mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:177844d079c3a878e1a1593c72eded78b25e288ac4a03b30814cb9af2602867c
3
  size 59134503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b775166fd1a64aee6f469dc8b8d1b61c93aed94e05c851b01afffc78a7f8d824
3
  size 59134503
last-checkpoint/{global_step833676 β†’ global_step903149}/zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82a5653e5b267c422ac12dc076f785e897bdcb3a16edb6b006982fda69b3bb51
3
  size 118216675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8375c93a445d2ea278db9232bd27e3755b4edde55288673ceb9724ccb4e419d3
3
  size 118216675
last-checkpoint/{global_step833676 β†’ global_step903149}/zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0e6b8661967a017dad54b703862739dc5d807281993dff1b620b1ebabb8014e
3
  size 118217955
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d4ae64a54a2aa06bef2a3f313601c1fd1df2ce3a54cce5edabb996b80a0e115
3
  size 118217955
last-checkpoint/{global_step833676 β†’ global_step903149}/zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:386d65eba5dca298a14c422909c05240ef26c77aab6fbf33f674e302119829b8
3
  size 118221091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b31082d5254a709950a22034d6b3ec40a727801696f8d64d8f6198111d36c5c0
3
  size 118221091
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step833676
 
1
+ global_step903149
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04c08ed382f6136d0ce1cf9f9ff6dc9d51facd3c3c7bfb6310570568f912f9d3
3
  size 59121639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e34d647e6884e69d4c0ac0c471739081db7d9613d859e3063a44fbef38b3071
3
  size 59121639
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c3d8f4685f0bf12e340e437d68a0eebc0588823560567bdc5e8f25644fa6143
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9b793718eabc9fd1cf6b885c3c367bf56530f86fa9da8f52709d8af564fcd1d
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60f0df4f8af1db9f9ba3e4b0b5704affca4062e2190a322a2bfdc4a71360bf6f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec01a93b58d3f364e28d168f16fd7aa97504e4518958fc5c638c2c55c7950435
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f17bb43e86bdfad29d8cd5e13a50207b01c9027b96171395039f166c10973a3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:926bd923a4da604535e54c1d36be4420e219d20073027592194a07474a632c8d
3
  size 14503
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.0,
5
- "global_step": 833676,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10116,11 +10116,854 @@
10116
  "eval_samples_per_second": 759.827,
10117
  "eval_steps_per_second": 31.66,
10118
  "step": 833676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10119
  }
10120
  ],
10121
  "max_steps": 972622,
10122
  "num_train_epochs": 14,
10123
- "total_flos": 6.201401539576201e+18,
10124
  "trial_name": null,
10125
  "trial_params": null
10126
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.0,
5
+ "global_step": 903149,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10116
  "eval_samples_per_second": 759.827,
10117
  "eval_steps_per_second": 31.66,
10118
  "step": 833676
10119
+ },
10120
+ {
10121
+ "epoch": 12.0,
10122
+ "learning_rate": 1.4569501347498949e-05,
10123
+ "loss": 1.4253,
10124
+ "step": 834000
10125
+ },
10126
+ {
10127
+ "epoch": 12.01,
10128
+ "learning_rate": 1.4517574605746216e-05,
10129
+ "loss": 1.4285,
10130
+ "step": 834500
10131
+ },
10132
+ {
10133
+ "epoch": 12.02,
10134
+ "learning_rate": 1.446585557096049e-05,
10135
+ "loss": 1.4269,
10136
+ "step": 835000
10137
+ },
10138
+ {
10139
+ "epoch": 12.03,
10140
+ "learning_rate": 1.4413928829207755e-05,
10141
+ "loss": 1.428,
10142
+ "step": 835500
10143
+ },
10144
+ {
10145
+ "epoch": 12.03,
10146
+ "learning_rate": 1.4362209794422029e-05,
10147
+ "loss": 1.4239,
10148
+ "step": 836000
10149
+ },
10150
+ {
10151
+ "epoch": 12.04,
10152
+ "learning_rate": 1.4310283052669296e-05,
10153
+ "loss": 1.4252,
10154
+ "step": 836500
10155
+ },
10156
+ {
10157
+ "epoch": 12.05,
10158
+ "learning_rate": 1.4258564017883572e-05,
10159
+ "loss": 1.4251,
10160
+ "step": 837000
10161
+ },
10162
+ {
10163
+ "epoch": 12.06,
10164
+ "learning_rate": 1.4206637276130835e-05,
10165
+ "loss": 1.425,
10166
+ "step": 837500
10167
+ },
10168
+ {
10169
+ "epoch": 12.06,
10170
+ "learning_rate": 1.4154918241345111e-05,
10171
+ "loss": 1.4267,
10172
+ "step": 838000
10173
+ },
10174
+ {
10175
+ "epoch": 12.07,
10176
+ "learning_rate": 1.4102991499592374e-05,
10177
+ "loss": 1.4217,
10178
+ "step": 838500
10179
+ },
10180
+ {
10181
+ "epoch": 12.08,
10182
+ "learning_rate": 1.405127246480665e-05,
10183
+ "loss": 1.4223,
10184
+ "step": 839000
10185
+ },
10186
+ {
10187
+ "epoch": 12.08,
10188
+ "learning_rate": 1.3999345723053917e-05,
10189
+ "loss": 1.4215,
10190
+ "step": 839500
10191
+ },
10192
+ {
10193
+ "epoch": 12.09,
10194
+ "learning_rate": 1.3947626688268193e-05,
10195
+ "loss": 1.427,
10196
+ "step": 840000
10197
+ },
10198
+ {
10199
+ "epoch": 12.1,
10200
+ "learning_rate": 1.3895699946515456e-05,
10201
+ "loss": 1.4286,
10202
+ "step": 840500
10203
+ },
10204
+ {
10205
+ "epoch": 12.11,
10206
+ "learning_rate": 1.3843980911729732e-05,
10207
+ "loss": 1.423,
10208
+ "step": 841000
10209
+ },
10210
+ {
10211
+ "epoch": 12.11,
10212
+ "learning_rate": 1.3792054169976997e-05,
10213
+ "loss": 1.4245,
10214
+ "step": 841500
10215
+ },
10216
+ {
10217
+ "epoch": 12.12,
10218
+ "learning_rate": 1.3740335135191273e-05,
10219
+ "loss": 1.4242,
10220
+ "step": 842000
10221
+ },
10222
+ {
10223
+ "epoch": 12.13,
10224
+ "learning_rate": 1.3688408393438536e-05,
10225
+ "loss": 1.4258,
10226
+ "step": 842500
10227
+ },
10228
+ {
10229
+ "epoch": 12.13,
10230
+ "learning_rate": 1.3636689358652812e-05,
10231
+ "loss": 1.4273,
10232
+ "step": 843000
10233
+ },
10234
+ {
10235
+ "epoch": 12.14,
10236
+ "learning_rate": 1.3584762616900079e-05,
10237
+ "loss": 1.4241,
10238
+ "step": 843500
10239
+ },
10240
+ {
10241
+ "epoch": 12.15,
10242
+ "learning_rate": 1.3533043582114355e-05,
10243
+ "loss": 1.4265,
10244
+ "step": 844000
10245
+ },
10246
+ {
10247
+ "epoch": 12.16,
10248
+ "learning_rate": 1.3481116840361618e-05,
10249
+ "loss": 1.4246,
10250
+ "step": 844500
10251
+ },
10252
+ {
10253
+ "epoch": 12.16,
10254
+ "learning_rate": 1.3429397805575894e-05,
10255
+ "loss": 1.4265,
10256
+ "step": 845000
10257
+ },
10258
+ {
10259
+ "epoch": 12.17,
10260
+ "learning_rate": 1.3377471063823158e-05,
10261
+ "loss": 1.4272,
10262
+ "step": 845500
10263
+ },
10264
+ {
10265
+ "epoch": 12.18,
10266
+ "learning_rate": 1.3325752029037434e-05,
10267
+ "loss": 1.4265,
10268
+ "step": 846000
10269
+ },
10270
+ {
10271
+ "epoch": 12.18,
10272
+ "learning_rate": 1.32738252872847e-05,
10273
+ "loss": 1.4232,
10274
+ "step": 846500
10275
+ },
10276
+ {
10277
+ "epoch": 12.19,
10278
+ "learning_rate": 1.3222106252498975e-05,
10279
+ "loss": 1.4247,
10280
+ "step": 847000
10281
+ },
10282
+ {
10283
+ "epoch": 12.2,
10284
+ "learning_rate": 1.317017951074624e-05,
10285
+ "loss": 1.4239,
10286
+ "step": 847500
10287
+ },
10288
+ {
10289
+ "epoch": 12.21,
10290
+ "learning_rate": 1.3118460475960514e-05,
10291
+ "loss": 1.4239,
10292
+ "step": 848000
10293
+ },
10294
+ {
10295
+ "epoch": 12.21,
10296
+ "learning_rate": 1.306653373420778e-05,
10297
+ "loss": 1.4255,
10298
+ "step": 848500
10299
+ },
10300
+ {
10301
+ "epoch": 12.22,
10302
+ "learning_rate": 1.3014814699422057e-05,
10303
+ "loss": 1.4233,
10304
+ "step": 849000
10305
+ },
10306
+ {
10307
+ "epoch": 12.23,
10308
+ "learning_rate": 1.296288795766932e-05,
10309
+ "loss": 1.4269,
10310
+ "step": 849500
10311
+ },
10312
+ {
10313
+ "epoch": 12.23,
10314
+ "learning_rate": 1.2911168922883596e-05,
10315
+ "loss": 1.4271,
10316
+ "step": 850000
10317
+ },
10318
+ {
10319
+ "epoch": 12.24,
10320
+ "learning_rate": 1.2859242181130862e-05,
10321
+ "loss": 1.4217,
10322
+ "step": 850500
10323
+ },
10324
+ {
10325
+ "epoch": 12.25,
10326
+ "learning_rate": 1.2807523146345138e-05,
10327
+ "loss": 1.4218,
10328
+ "step": 851000
10329
+ },
10330
+ {
10331
+ "epoch": 12.26,
10332
+ "learning_rate": 1.2755596404592402e-05,
10333
+ "loss": 1.4209,
10334
+ "step": 851500
10335
+ },
10336
+ {
10337
+ "epoch": 12.26,
10338
+ "learning_rate": 1.2703877369806678e-05,
10339
+ "loss": 1.4242,
10340
+ "step": 852000
10341
+ },
10342
+ {
10343
+ "epoch": 12.27,
10344
+ "learning_rate": 1.2651950628053943e-05,
10345
+ "loss": 1.4204,
10346
+ "step": 852500
10347
+ },
10348
+ {
10349
+ "epoch": 12.28,
10350
+ "learning_rate": 1.2600231593268219e-05,
10351
+ "loss": 1.4215,
10352
+ "step": 853000
10353
+ },
10354
+ {
10355
+ "epoch": 12.29,
10356
+ "learning_rate": 1.2548304851515482e-05,
10357
+ "loss": 1.4262,
10358
+ "step": 853500
10359
+ },
10360
+ {
10361
+ "epoch": 12.29,
10362
+ "learning_rate": 1.2496585816729758e-05,
10363
+ "loss": 1.4251,
10364
+ "step": 854000
10365
+ },
10366
+ {
10367
+ "epoch": 12.3,
10368
+ "learning_rate": 1.2444659074977023e-05,
10369
+ "loss": 1.422,
10370
+ "step": 854500
10371
+ },
10372
+ {
10373
+ "epoch": 12.31,
10374
+ "learning_rate": 1.2392940040191299e-05,
10375
+ "loss": 1.4225,
10376
+ "step": 855000
10377
+ },
10378
+ {
10379
+ "epoch": 12.31,
10380
+ "learning_rate": 1.2341013298438564e-05,
10381
+ "loss": 1.4243,
10382
+ "step": 855500
10383
+ },
10384
+ {
10385
+ "epoch": 12.32,
10386
+ "learning_rate": 1.228929426365284e-05,
10387
+ "loss": 1.4233,
10388
+ "step": 856000
10389
+ },
10390
+ {
10391
+ "epoch": 12.33,
10392
+ "learning_rate": 1.2237367521900103e-05,
10393
+ "loss": 1.423,
10394
+ "step": 856500
10395
+ },
10396
+ {
10397
+ "epoch": 12.34,
10398
+ "learning_rate": 1.2185648487114381e-05,
10399
+ "loss": 1.422,
10400
+ "step": 857000
10401
+ },
10402
+ {
10403
+ "epoch": 12.34,
10404
+ "learning_rate": 1.2133721745361644e-05,
10405
+ "loss": 1.4213,
10406
+ "step": 857500
10407
+ },
10408
+ {
10409
+ "epoch": 12.35,
10410
+ "learning_rate": 1.208200271057592e-05,
10411
+ "loss": 1.4262,
10412
+ "step": 858000
10413
+ },
10414
+ {
10415
+ "epoch": 12.36,
10416
+ "learning_rate": 1.2030075968823185e-05,
10417
+ "loss": 1.4227,
10418
+ "step": 858500
10419
+ },
10420
+ {
10421
+ "epoch": 12.36,
10422
+ "learning_rate": 1.197835693403746e-05,
10423
+ "loss": 1.4223,
10424
+ "step": 859000
10425
+ },
10426
+ {
10427
+ "epoch": 12.37,
10428
+ "learning_rate": 1.1926430192284726e-05,
10429
+ "loss": 1.4242,
10430
+ "step": 859500
10431
+ },
10432
+ {
10433
+ "epoch": 12.38,
10434
+ "learning_rate": 1.1874711157499e-05,
10435
+ "loss": 1.4193,
10436
+ "step": 860000
10437
+ },
10438
+ {
10439
+ "epoch": 12.39,
10440
+ "learning_rate": 1.1822784415746265e-05,
10441
+ "loss": 1.4243,
10442
+ "step": 860500
10443
+ },
10444
+ {
10445
+ "epoch": 12.39,
10446
+ "learning_rate": 1.1771065380960541e-05,
10447
+ "loss": 1.4233,
10448
+ "step": 861000
10449
+ },
10450
+ {
10451
+ "epoch": 12.4,
10452
+ "learning_rate": 1.1719138639207806e-05,
10453
+ "loss": 1.4219,
10454
+ "step": 861500
10455
+ },
10456
+ {
10457
+ "epoch": 12.41,
10458
+ "learning_rate": 1.1667419604422082e-05,
10459
+ "loss": 1.4217,
10460
+ "step": 862000
10461
+ },
10462
+ {
10463
+ "epoch": 12.41,
10464
+ "learning_rate": 1.1615492862669347e-05,
10465
+ "loss": 1.4228,
10466
+ "step": 862500
10467
+ },
10468
+ {
10469
+ "epoch": 12.42,
10470
+ "learning_rate": 1.1563773827883623e-05,
10471
+ "loss": 1.4225,
10472
+ "step": 863000
10473
+ },
10474
+ {
10475
+ "epoch": 12.43,
10476
+ "learning_rate": 1.1511847086130888e-05,
10477
+ "loss": 1.4227,
10478
+ "step": 863500
10479
+ },
10480
+ {
10481
+ "epoch": 12.44,
10482
+ "learning_rate": 1.1460128051345164e-05,
10483
+ "loss": 1.4224,
10484
+ "step": 864000
10485
+ },
10486
+ {
10487
+ "epoch": 12.44,
10488
+ "learning_rate": 1.1408201309592427e-05,
10489
+ "loss": 1.4211,
10490
+ "step": 864500
10491
+ },
10492
+ {
10493
+ "epoch": 12.45,
10494
+ "learning_rate": 1.1356482274806702e-05,
10495
+ "loss": 1.4255,
10496
+ "step": 865000
10497
+ },
10498
+ {
10499
+ "epoch": 12.46,
10500
+ "learning_rate": 1.1304555533053968e-05,
10501
+ "loss": 1.4211,
10502
+ "step": 865500
10503
+ },
10504
+ {
10505
+ "epoch": 12.47,
10506
+ "learning_rate": 1.1252836498268243e-05,
10507
+ "loss": 1.423,
10508
+ "step": 866000
10509
+ },
10510
+ {
10511
+ "epoch": 12.47,
10512
+ "learning_rate": 1.1200909756515508e-05,
10513
+ "loss": 1.4213,
10514
+ "step": 866500
10515
+ },
10516
+ {
10517
+ "epoch": 12.48,
10518
+ "learning_rate": 1.1149190721729784e-05,
10519
+ "loss": 1.4229,
10520
+ "step": 867000
10521
+ },
10522
+ {
10523
+ "epoch": 12.49,
10524
+ "learning_rate": 1.1097263979977049e-05,
10525
+ "loss": 1.4249,
10526
+ "step": 867500
10527
+ },
10528
+ {
10529
+ "epoch": 12.49,
10530
+ "learning_rate": 1.1045544945191325e-05,
10531
+ "loss": 1.4228,
10532
+ "step": 868000
10533
+ },
10534
+ {
10535
+ "epoch": 12.5,
10536
+ "learning_rate": 1.099361820343859e-05,
10537
+ "loss": 1.423,
10538
+ "step": 868500
10539
+ },
10540
+ {
10541
+ "epoch": 12.51,
10542
+ "learning_rate": 1.0941899168652866e-05,
10543
+ "loss": 1.4263,
10544
+ "step": 869000
10545
+ },
10546
+ {
10547
+ "epoch": 12.52,
10548
+ "learning_rate": 1.088997242690013e-05,
10549
+ "loss": 1.419,
10550
+ "step": 869500
10551
+ },
10552
+ {
10553
+ "epoch": 12.52,
10554
+ "learning_rate": 1.0838253392114407e-05,
10555
+ "loss": 1.4227,
10556
+ "step": 870000
10557
+ },
10558
+ {
10559
+ "epoch": 12.53,
10560
+ "learning_rate": 1.0786326650361671e-05,
10561
+ "loss": 1.4233,
10562
+ "step": 870500
10563
+ },
10564
+ {
10565
+ "epoch": 12.54,
10566
+ "learning_rate": 1.0734607615575946e-05,
10567
+ "loss": 1.4183,
10568
+ "step": 871000
10569
+ },
10570
+ {
10571
+ "epoch": 12.54,
10572
+ "learning_rate": 1.068268087382321e-05,
10573
+ "loss": 1.4224,
10574
+ "step": 871500
10575
+ },
10576
+ {
10577
+ "epoch": 12.55,
10578
+ "learning_rate": 1.0630961839037487e-05,
10579
+ "loss": 1.424,
10580
+ "step": 872000
10581
+ },
10582
+ {
10583
+ "epoch": 12.56,
10584
+ "learning_rate": 1.057903509728475e-05,
10585
+ "loss": 1.4203,
10586
+ "step": 872500
10587
+ },
10588
+ {
10589
+ "epoch": 12.57,
10590
+ "learning_rate": 1.0527316062499026e-05,
10591
+ "loss": 1.423,
10592
+ "step": 873000
10593
+ },
10594
+ {
10595
+ "epoch": 12.57,
10596
+ "learning_rate": 1.0475389320746291e-05,
10597
+ "loss": 1.4204,
10598
+ "step": 873500
10599
+ },
10600
+ {
10601
+ "epoch": 12.58,
10602
+ "learning_rate": 1.0423670285960567e-05,
10603
+ "loss": 1.4206,
10604
+ "step": 874000
10605
+ },
10606
+ {
10607
+ "epoch": 12.59,
10608
+ "learning_rate": 1.0371743544207832e-05,
10609
+ "loss": 1.423,
10610
+ "step": 874500
10611
+ },
10612
+ {
10613
+ "epoch": 12.59,
10614
+ "learning_rate": 1.0320024509422108e-05,
10615
+ "loss": 1.4221,
10616
+ "step": 875000
10617
+ },
10618
+ {
10619
+ "epoch": 12.6,
10620
+ "learning_rate": 1.0268097767669373e-05,
10621
+ "loss": 1.4183,
10622
+ "step": 875500
10623
+ },
10624
+ {
10625
+ "epoch": 12.61,
10626
+ "learning_rate": 1.0216378732883649e-05,
10627
+ "loss": 1.4245,
10628
+ "step": 876000
10629
+ },
10630
+ {
10631
+ "epoch": 12.62,
10632
+ "learning_rate": 1.0164451991130914e-05,
10633
+ "loss": 1.4192,
10634
+ "step": 876500
10635
+ },
10636
+ {
10637
+ "epoch": 12.62,
10638
+ "learning_rate": 1.0112732956345188e-05,
10639
+ "loss": 1.4228,
10640
+ "step": 877000
10641
+ },
10642
+ {
10643
+ "epoch": 12.63,
10644
+ "learning_rate": 1.0060806214592453e-05,
10645
+ "loss": 1.4195,
10646
+ "step": 877500
10647
+ },
10648
+ {
10649
+ "epoch": 12.64,
10650
+ "learning_rate": 1.000908717980673e-05,
10651
+ "loss": 1.4188,
10652
+ "step": 878000
10653
+ },
10654
+ {
10655
+ "epoch": 12.65,
10656
+ "learning_rate": 9.957160438053994e-06,
10657
+ "loss": 1.4201,
10658
+ "step": 878500
10659
+ },
10660
+ {
10661
+ "epoch": 12.65,
10662
+ "learning_rate": 9.90544140326827e-06,
10663
+ "loss": 1.4185,
10664
+ "step": 879000
10665
+ },
10666
+ {
10667
+ "epoch": 12.66,
10668
+ "learning_rate": 9.853514661515533e-06,
10669
+ "loss": 1.4197,
10670
+ "step": 879500
10671
+ },
10672
+ {
10673
+ "epoch": 12.67,
10674
+ "learning_rate": 9.80179562672981e-06,
10675
+ "loss": 1.4229,
10676
+ "step": 880000
10677
+ },
10678
+ {
10679
+ "epoch": 12.67,
10680
+ "learning_rate": 9.749868884977074e-06,
10681
+ "loss": 1.424,
10682
+ "step": 880500
10683
+ },
10684
+ {
10685
+ "epoch": 12.68,
10686
+ "learning_rate": 9.69814985019135e-06,
10687
+ "loss": 1.4222,
10688
+ "step": 881000
10689
+ },
10690
+ {
10691
+ "epoch": 12.69,
10692
+ "learning_rate": 9.646223108438615e-06,
10693
+ "loss": 1.4197,
10694
+ "step": 881500
10695
+ },
10696
+ {
10697
+ "epoch": 12.7,
10698
+ "learning_rate": 9.594504073652891e-06,
10699
+ "loss": 1.423,
10700
+ "step": 882000
10701
+ },
10702
+ {
10703
+ "epoch": 12.7,
10704
+ "learning_rate": 9.542577331900156e-06,
10705
+ "loss": 1.4203,
10706
+ "step": 882500
10707
+ },
10708
+ {
10709
+ "epoch": 12.71,
10710
+ "learning_rate": 9.49085829711443e-06,
10711
+ "loss": 1.42,
10712
+ "step": 883000
10713
+ },
10714
+ {
10715
+ "epoch": 12.72,
10716
+ "learning_rate": 9.438931555361697e-06,
10717
+ "loss": 1.4189,
10718
+ "step": 883500
10719
+ },
10720
+ {
10721
+ "epoch": 12.72,
10722
+ "learning_rate": 9.387212520575972e-06,
10723
+ "loss": 1.4213,
10724
+ "step": 884000
10725
+ },
10726
+ {
10727
+ "epoch": 12.73,
10728
+ "learning_rate": 9.335285778823236e-06,
10729
+ "loss": 1.4191,
10730
+ "step": 884500
10731
+ },
10732
+ {
10733
+ "epoch": 12.74,
10734
+ "learning_rate": 9.283566744037513e-06,
10735
+ "loss": 1.4198,
10736
+ "step": 885000
10737
+ },
10738
+ {
10739
+ "epoch": 12.75,
10740
+ "learning_rate": 9.231640002284777e-06,
10741
+ "loss": 1.4223,
10742
+ "step": 885500
10743
+ },
10744
+ {
10745
+ "epoch": 12.75,
10746
+ "learning_rate": 9.179920967499054e-06,
10747
+ "loss": 1.4225,
10748
+ "step": 886000
10749
+ },
10750
+ {
10751
+ "epoch": 12.76,
10752
+ "learning_rate": 9.127994225746317e-06,
10753
+ "loss": 1.4208,
10754
+ "step": 886500
10755
+ },
10756
+ {
10757
+ "epoch": 12.77,
10758
+ "learning_rate": 9.076275190960594e-06,
10759
+ "loss": 1.4222,
10760
+ "step": 887000
10761
+ },
10762
+ {
10763
+ "epoch": 12.77,
10764
+ "learning_rate": 9.024348449207858e-06,
10765
+ "loss": 1.4223,
10766
+ "step": 887500
10767
+ },
10768
+ {
10769
+ "epoch": 12.78,
10770
+ "learning_rate": 8.972629414422134e-06,
10771
+ "loss": 1.4195,
10772
+ "step": 888000
10773
+ },
10774
+ {
10775
+ "epoch": 12.79,
10776
+ "learning_rate": 8.920702672669399e-06,
10777
+ "loss": 1.4184,
10778
+ "step": 888500
10779
+ },
10780
+ {
10781
+ "epoch": 12.8,
10782
+ "learning_rate": 8.868983637883673e-06,
10783
+ "loss": 1.4199,
10784
+ "step": 889000
10785
+ },
10786
+ {
10787
+ "epoch": 12.8,
10788
+ "learning_rate": 8.81705689613094e-06,
10789
+ "loss": 1.4214,
10790
+ "step": 889500
10791
+ },
10792
+ {
10793
+ "epoch": 12.81,
10794
+ "learning_rate": 8.765337861345214e-06,
10795
+ "loss": 1.4199,
10796
+ "step": 890000
10797
+ },
10798
+ {
10799
+ "epoch": 12.82,
10800
+ "learning_rate": 8.713411119592479e-06,
10801
+ "loss": 1.4183,
10802
+ "step": 890500
10803
+ },
10804
+ {
10805
+ "epoch": 12.83,
10806
+ "learning_rate": 8.661692084806755e-06,
10807
+ "loss": 1.4208,
10808
+ "step": 891000
10809
+ },
10810
+ {
10811
+ "epoch": 12.83,
10812
+ "learning_rate": 8.60976534305402e-06,
10813
+ "loss": 1.4193,
10814
+ "step": 891500
10815
+ },
10816
+ {
10817
+ "epoch": 12.84,
10818
+ "learning_rate": 8.558046308268296e-06,
10819
+ "loss": 1.421,
10820
+ "step": 892000
10821
+ },
10822
+ {
10823
+ "epoch": 12.85,
10824
+ "learning_rate": 8.50611956651556e-06,
10825
+ "loss": 1.4179,
10826
+ "step": 892500
10827
+ },
10828
+ {
10829
+ "epoch": 12.85,
10830
+ "learning_rate": 8.454400531729837e-06,
10831
+ "loss": 1.4223,
10832
+ "step": 893000
10833
+ },
10834
+ {
10835
+ "epoch": 12.86,
10836
+ "learning_rate": 8.402473789977102e-06,
10837
+ "loss": 1.4201,
10838
+ "step": 893500
10839
+ },
10840
+ {
10841
+ "epoch": 12.87,
10842
+ "learning_rate": 8.350754755191378e-06,
10843
+ "loss": 1.4173,
10844
+ "step": 894000
10845
+ },
10846
+ {
10847
+ "epoch": 12.88,
10848
+ "learning_rate": 8.298828013438641e-06,
10849
+ "loss": 1.4206,
10850
+ "step": 894500
10851
+ },
10852
+ {
10853
+ "epoch": 12.88,
10854
+ "learning_rate": 8.247108978652917e-06,
10855
+ "loss": 1.4188,
10856
+ "step": 895000
10857
+ },
10858
+ {
10859
+ "epoch": 12.89,
10860
+ "learning_rate": 8.195182236900182e-06,
10861
+ "loss": 1.422,
10862
+ "step": 895500
10863
+ },
10864
+ {
10865
+ "epoch": 12.9,
10866
+ "learning_rate": 8.143463202114456e-06,
10867
+ "loss": 1.4188,
10868
+ "step": 896000
10869
+ },
10870
+ {
10871
+ "epoch": 12.9,
10872
+ "learning_rate": 8.091536460361721e-06,
10873
+ "loss": 1.4173,
10874
+ "step": 896500
10875
+ },
10876
+ {
10877
+ "epoch": 12.91,
10878
+ "learning_rate": 8.039817425575997e-06,
10879
+ "loss": 1.4191,
10880
+ "step": 897000
10881
+ },
10882
+ {
10883
+ "epoch": 12.92,
10884
+ "learning_rate": 7.987890683823262e-06,
10885
+ "loss": 1.4212,
10886
+ "step": 897500
10887
+ },
10888
+ {
10889
+ "epoch": 12.93,
10890
+ "learning_rate": 7.936171649037538e-06,
10891
+ "loss": 1.419,
10892
+ "step": 898000
10893
+ },
10894
+ {
10895
+ "epoch": 12.93,
10896
+ "learning_rate": 7.884244907284803e-06,
10897
+ "loss": 1.4188,
10898
+ "step": 898500
10899
+ },
10900
+ {
10901
+ "epoch": 12.94,
10902
+ "learning_rate": 7.83252587249908e-06,
10903
+ "loss": 1.4202,
10904
+ "step": 899000
10905
+ },
10906
+ {
10907
+ "epoch": 12.95,
10908
+ "learning_rate": 7.780599130746344e-06,
10909
+ "loss": 1.4191,
10910
+ "step": 899500
10911
+ },
10912
+ {
10913
+ "epoch": 12.95,
10914
+ "learning_rate": 7.72888009596062e-06,
10915
+ "loss": 1.419,
10916
+ "step": 900000
10917
+ },
10918
+ {
10919
+ "epoch": 12.96,
10920
+ "learning_rate": 7.676953354207885e-06,
10921
+ "loss": 1.418,
10922
+ "step": 900500
10923
+ },
10924
+ {
10925
+ "epoch": 12.97,
10926
+ "learning_rate": 7.62523431942216e-06,
10927
+ "loss": 1.4206,
10928
+ "step": 901000
10929
+ },
10930
+ {
10931
+ "epoch": 12.98,
10932
+ "learning_rate": 7.5733075776694235e-06,
10933
+ "loss": 1.4203,
10934
+ "step": 901500
10935
+ },
10936
+ {
10937
+ "epoch": 12.98,
10938
+ "learning_rate": 7.5215885428837e-06,
10939
+ "loss": 1.4196,
10940
+ "step": 902000
10941
+ },
10942
+ {
10943
+ "epoch": 12.99,
10944
+ "learning_rate": 7.4696618011309645e-06,
10945
+ "loss": 1.4209,
10946
+ "step": 902500
10947
+ },
10948
+ {
10949
+ "epoch": 13.0,
10950
+ "learning_rate": 7.41794276634524e-06,
10951
+ "loss": 1.4191,
10952
+ "step": 903000
10953
+ },
10954
+ {
10955
+ "epoch": 13.0,
10956
+ "eval_accuracy": 0.7223704973673399,
10957
+ "eval_loss": 1.2763671875,
10958
+ "eval_runtime": 708.7365,
10959
+ "eval_samples_per_second": 760.419,
10960
+ "eval_steps_per_second": 31.685,
10961
+ "step": 903149
10962
  }
10963
  ],
10964
  "max_steps": 972622,
10965
  "num_train_epochs": 14,
10966
+ "total_flos": 6.718256910631436e+18,
10967
  "trial_name": null,
10968
  "trial_params": null
10969
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04c08ed382f6136d0ce1cf9f9ff6dc9d51facd3c3c7bfb6310570568f912f9d3
3
  size 59121639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e34d647e6884e69d4c0ac0c471739081db7d9613d859e3063a44fbef38b3071
3
  size 59121639
runs/Feb25_11-43-56_user-SYS-5049A-TR/events.out.tfevents.1677293058.user-SYS-5049A-TR.2588949.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83a43a49ddd36ab07608143fe3a530b2c38e16ae89f73d24d4dea0787d6bb273
3
- size 274029
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:559e3fd862e8bed3f284246e670ec50c1202e2832b887ae3ddbb381dd1080735
3
+ size 296598