schnell commited on
Commit
00c9cdd
β€’
1 Parent(s): 12f80a6

Training in progress, epoch 12

Browse files
last-checkpoint/{global_step764203 β†’ global_step833676}/mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6d3a944c02da8cab63211ea344457352b1e867c143dd4ba032e13f4d7b053b2
3
  size 59134503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a10bd8e1bdaf252f74bea2291d5cca8f62eb7c148658b26368a2d6b0ba7c8fa0
3
  size 59134503
last-checkpoint/{global_step764203 β†’ global_step833676}/zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f520dd24ba55333384a2e7fe4dbdc72fa22c7573bfd053e0fa6575cc741e005
3
  size 118216675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5357b0ddb95f9e6f7bf04d666fc26a1073ddfb37c1db9f1fa161e5b0aa236f0
3
  size 118216675
last-checkpoint/{global_step764203 β†’ global_step833676}/zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4870ea75c2c258ed8e3db5b3cacc8389cf85168a775c7b4690e9975010158a97
3
  size 118217955
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9897318fe2bb26a0e6506e150b5359f333188a81a5641fb093924b256734118f
3
  size 118217955
last-checkpoint/{global_step764203 β†’ global_step833676}/zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36068e46a55b8e33a89495d15b2c365a6c60a4c5ee94f19d4609ec5ef70f7d16
3
  size 118221091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edddfd234d5e9f057274e50758e3fad9d9059344c52a509d6c1acb72db8dc867
3
  size 118221091
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step764203
 
1
+ global_step833676
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3591f92a3ea75a18befe35c7e5f724aba35f75e5ccf0ac219f5bedffe6eba571
3
  size 59121639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc2ca3b5ddf381486e7d313cce4d992c3c21fc5091bd64a28c07dff9fdb4bb4a
3
  size 59121639
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71e6a469a517553086b640b944dcb307d20c868a8e32c376051d5052c24a23ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bc140d0e95ef9cbc743cac8a7f9d0435dd673e76216c4b818af7bf560901baa
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05945fd01c6901c187b7c0caf344165a99a350858b2df6ff012ac2c468bfef10
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3943c64e1378ed4c3abc5aebafe6c70e5628fb6a86eb5487b1b382b95394539
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89b1d92bcb0d80499f8a76592b2f192a99e1349bf695c6b8deeedc4e0f591b5a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac632b7a956eded5feca7f7c4414c5ae7b352c7f8180c312ce5d70adc950be46
3
  size 14503
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.0,
5
- "global_step": 764203,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9273,11 +9273,854 @@
9273
  "eval_samples_per_second": 723.769,
9274
  "eval_steps_per_second": 30.157,
9275
  "step": 764203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9276
  }
9277
  ],
9278
  "max_steps": 972622,
9279
  "num_train_epochs": 14,
9280
- "total_flos": 5.771928932257366e+18,
9281
  "trial_name": null,
9282
  "trial_params": null
9283
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.0,
5
+ "global_step": 833676,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9273
  "eval_samples_per_second": 723.769,
9274
  "eval_steps_per_second": 30.157,
9275
  "step": 764203
9276
+ },
9277
+ {
9278
+ "epoch": 11.0,
9279
+ "learning_rate": 2.177277896343838e-05,
9280
+ "loss": 1.275,
9281
+ "step": 764500
9282
+ },
9283
+ {
9284
+ "epoch": 11.01,
9285
+ "learning_rate": 2.1721059928652658e-05,
9286
+ "loss": 1.2762,
9287
+ "step": 765000
9288
+ },
9289
+ {
9290
+ "epoch": 11.02,
9291
+ "learning_rate": 2.1669133186899923e-05,
9292
+ "loss": 1.2754,
9293
+ "step": 765500
9294
+ },
9295
+ {
9296
+ "epoch": 11.03,
9297
+ "learning_rate": 2.1617414152114197e-05,
9298
+ "loss": 1.2784,
9299
+ "step": 766000
9300
+ },
9301
+ {
9302
+ "epoch": 11.03,
9303
+ "learning_rate": 2.1565487410361462e-05,
9304
+ "loss": 1.2729,
9305
+ "step": 766500
9306
+ },
9307
+ {
9308
+ "epoch": 11.04,
9309
+ "learning_rate": 2.1513768375575737e-05,
9310
+ "loss": 1.2745,
9311
+ "step": 767000
9312
+ },
9313
+ {
9314
+ "epoch": 11.05,
9315
+ "learning_rate": 2.1461841633823005e-05,
9316
+ "loss": 1.2754,
9317
+ "step": 767500
9318
+ },
9319
+ {
9320
+ "epoch": 11.05,
9321
+ "learning_rate": 2.141012259903728e-05,
9322
+ "loss": 1.2776,
9323
+ "step": 768000
9324
+ },
9325
+ {
9326
+ "epoch": 11.06,
9327
+ "learning_rate": 2.1358195857284544e-05,
9328
+ "loss": 1.2776,
9329
+ "step": 768500
9330
+ },
9331
+ {
9332
+ "epoch": 11.07,
9333
+ "learning_rate": 2.130647682249882e-05,
9334
+ "loss": 1.2742,
9335
+ "step": 769000
9336
+ },
9337
+ {
9338
+ "epoch": 11.08,
9339
+ "learning_rate": 2.1254550080746087e-05,
9340
+ "loss": 1.2744,
9341
+ "step": 769500
9342
+ },
9343
+ {
9344
+ "epoch": 11.08,
9345
+ "learning_rate": 2.120283104596036e-05,
9346
+ "loss": 1.2783,
9347
+ "step": 770000
9348
+ },
9349
+ {
9350
+ "epoch": 11.09,
9351
+ "learning_rate": 2.1150904304207626e-05,
9352
+ "loss": 1.2766,
9353
+ "step": 770500
9354
+ },
9355
+ {
9356
+ "epoch": 11.1,
9357
+ "learning_rate": 2.10991852694219e-05,
9358
+ "loss": 1.2728,
9359
+ "step": 771000
9360
+ },
9361
+ {
9362
+ "epoch": 11.11,
9363
+ "learning_rate": 2.1047258527669165e-05,
9364
+ "loss": 1.2722,
9365
+ "step": 771500
9366
+ },
9367
+ {
9368
+ "epoch": 11.11,
9369
+ "learning_rate": 2.099553949288344e-05,
9370
+ "loss": 1.2745,
9371
+ "step": 772000
9372
+ },
9373
+ {
9374
+ "epoch": 11.12,
9375
+ "learning_rate": 2.0943612751130705e-05,
9376
+ "loss": 1.2762,
9377
+ "step": 772500
9378
+ },
9379
+ {
9380
+ "epoch": 11.13,
9381
+ "learning_rate": 2.0891893716344982e-05,
9382
+ "loss": 1.2718,
9383
+ "step": 773000
9384
+ },
9385
+ {
9386
+ "epoch": 11.13,
9387
+ "learning_rate": 2.0839966974592244e-05,
9388
+ "loss": 1.2739,
9389
+ "step": 773500
9390
+ },
9391
+ {
9392
+ "epoch": 11.14,
9393
+ "learning_rate": 2.0788247939806522e-05,
9394
+ "loss": 1.2741,
9395
+ "step": 774000
9396
+ },
9397
+ {
9398
+ "epoch": 11.15,
9399
+ "learning_rate": 2.0736321198053787e-05,
9400
+ "loss": 1.2727,
9401
+ "step": 774500
9402
+ },
9403
+ {
9404
+ "epoch": 11.16,
9405
+ "learning_rate": 2.0684602163268064e-05,
9406
+ "loss": 1.2749,
9407
+ "step": 775000
9408
+ },
9409
+ {
9410
+ "epoch": 11.16,
9411
+ "learning_rate": 2.0632675421515326e-05,
9412
+ "loss": 1.2738,
9413
+ "step": 775500
9414
+ },
9415
+ {
9416
+ "epoch": 11.17,
9417
+ "learning_rate": 2.0580956386729604e-05,
9418
+ "loss": 1.276,
9419
+ "step": 776000
9420
+ },
9421
+ {
9422
+ "epoch": 11.18,
9423
+ "learning_rate": 2.052902964497687e-05,
9424
+ "loss": 1.2733,
9425
+ "step": 776500
9426
+ },
9427
+ {
9428
+ "epoch": 11.18,
9429
+ "learning_rate": 2.0477310610191143e-05,
9430
+ "loss": 1.2752,
9431
+ "step": 777000
9432
+ },
9433
+ {
9434
+ "epoch": 11.19,
9435
+ "learning_rate": 2.0425383868438408e-05,
9436
+ "loss": 1.2737,
9437
+ "step": 777500
9438
+ },
9439
+ {
9440
+ "epoch": 11.2,
9441
+ "learning_rate": 2.0373664833652682e-05,
9442
+ "loss": 1.2759,
9443
+ "step": 778000
9444
+ },
9445
+ {
9446
+ "epoch": 11.21,
9447
+ "learning_rate": 2.0321738091899947e-05,
9448
+ "loss": 1.2737,
9449
+ "step": 778500
9450
+ },
9451
+ {
9452
+ "epoch": 11.21,
9453
+ "learning_rate": 2.027001905711422e-05,
9454
+ "loss": 1.2762,
9455
+ "step": 779000
9456
+ },
9457
+ {
9458
+ "epoch": 11.22,
9459
+ "learning_rate": 2.021809231536149e-05,
9460
+ "loss": 1.2741,
9461
+ "step": 779500
9462
+ },
9463
+ {
9464
+ "epoch": 11.23,
9465
+ "learning_rate": 2.0166373280575764e-05,
9466
+ "loss": 1.2751,
9467
+ "step": 780000
9468
+ },
9469
+ {
9470
+ "epoch": 11.23,
9471
+ "learning_rate": 2.011444653882303e-05,
9472
+ "loss": 1.2703,
9473
+ "step": 780500
9474
+ },
9475
+ {
9476
+ "epoch": 11.24,
9477
+ "learning_rate": 2.0062727504037303e-05,
9478
+ "loss": 1.2712,
9479
+ "step": 781000
9480
+ },
9481
+ {
9482
+ "epoch": 11.25,
9483
+ "learning_rate": 2.001080076228457e-05,
9484
+ "loss": 1.2725,
9485
+ "step": 781500
9486
+ },
9487
+ {
9488
+ "epoch": 11.26,
9489
+ "learning_rate": 1.9959081727498846e-05,
9490
+ "loss": 1.2758,
9491
+ "step": 782000
9492
+ },
9493
+ {
9494
+ "epoch": 11.26,
9495
+ "learning_rate": 1.990715498574611e-05,
9496
+ "loss": 1.2721,
9497
+ "step": 782500
9498
+ },
9499
+ {
9500
+ "epoch": 11.27,
9501
+ "learning_rate": 1.9855435950960385e-05,
9502
+ "loss": 1.2733,
9503
+ "step": 783000
9504
+ },
9505
+ {
9506
+ "epoch": 11.28,
9507
+ "learning_rate": 1.980350920920765e-05,
9508
+ "loss": 1.2716,
9509
+ "step": 783500
9510
+ },
9511
+ {
9512
+ "epoch": 11.28,
9513
+ "learning_rate": 1.9751790174421928e-05,
9514
+ "loss": 1.2729,
9515
+ "step": 784000
9516
+ },
9517
+ {
9518
+ "epoch": 11.29,
9519
+ "learning_rate": 1.969986343266919e-05,
9520
+ "loss": 1.2715,
9521
+ "step": 784500
9522
+ },
9523
+ {
9524
+ "epoch": 11.3,
9525
+ "learning_rate": 1.9648144397883467e-05,
9526
+ "loss": 1.2722,
9527
+ "step": 785000
9528
+ },
9529
+ {
9530
+ "epoch": 11.31,
9531
+ "learning_rate": 1.959621765613073e-05,
9532
+ "loss": 1.2745,
9533
+ "step": 785500
9534
+ },
9535
+ {
9536
+ "epoch": 11.31,
9537
+ "learning_rate": 1.954449862134501e-05,
9538
+ "loss": 1.2719,
9539
+ "step": 786000
9540
+ },
9541
+ {
9542
+ "epoch": 11.32,
9543
+ "learning_rate": 1.949257187959227e-05,
9544
+ "loss": 1.2755,
9545
+ "step": 786500
9546
+ },
9547
+ {
9548
+ "epoch": 11.33,
9549
+ "learning_rate": 1.944085284480655e-05,
9550
+ "loss": 1.2732,
9551
+ "step": 787000
9552
+ },
9553
+ {
9554
+ "epoch": 11.34,
9555
+ "learning_rate": 1.938892610305381e-05,
9556
+ "loss": 1.2701,
9557
+ "step": 787500
9558
+ },
9559
+ {
9560
+ "epoch": 11.34,
9561
+ "learning_rate": 1.933720706826809e-05,
9562
+ "loss": 1.2736,
9563
+ "step": 788000
9564
+ },
9565
+ {
9566
+ "epoch": 11.35,
9567
+ "learning_rate": 1.9285280326515353e-05,
9568
+ "loss": 1.2753,
9569
+ "step": 788500
9570
+ },
9571
+ {
9572
+ "epoch": 11.36,
9573
+ "learning_rate": 1.9233561291729628e-05,
9574
+ "loss": 1.2701,
9575
+ "step": 789000
9576
+ },
9577
+ {
9578
+ "epoch": 11.36,
9579
+ "learning_rate": 1.9181634549976893e-05,
9580
+ "loss": 1.2725,
9581
+ "step": 789500
9582
+ },
9583
+ {
9584
+ "epoch": 11.37,
9585
+ "learning_rate": 1.9129915515191167e-05,
9586
+ "loss": 1.2725,
9587
+ "step": 790000
9588
+ },
9589
+ {
9590
+ "epoch": 11.38,
9591
+ "learning_rate": 1.9077988773438435e-05,
9592
+ "loss": 1.2705,
9593
+ "step": 790500
9594
+ },
9595
+ {
9596
+ "epoch": 11.39,
9597
+ "learning_rate": 1.902626973865271e-05,
9598
+ "loss": 1.2739,
9599
+ "step": 791000
9600
+ },
9601
+ {
9602
+ "epoch": 11.39,
9603
+ "learning_rate": 1.8974342996899974e-05,
9604
+ "loss": 1.2696,
9605
+ "step": 791500
9606
+ },
9607
+ {
9608
+ "epoch": 11.4,
9609
+ "learning_rate": 1.892262396211425e-05,
9610
+ "loss": 1.2732,
9611
+ "step": 792000
9612
+ },
9613
+ {
9614
+ "epoch": 11.41,
9615
+ "learning_rate": 1.8870697220361517e-05,
9616
+ "loss": 1.2682,
9617
+ "step": 792500
9618
+ },
9619
+ {
9620
+ "epoch": 11.41,
9621
+ "learning_rate": 1.881897818557579e-05,
9622
+ "loss": 1.2717,
9623
+ "step": 793000
9624
+ },
9625
+ {
9626
+ "epoch": 11.42,
9627
+ "learning_rate": 1.8767051443823056e-05,
9628
+ "loss": 1.2715,
9629
+ "step": 793500
9630
+ },
9631
+ {
9632
+ "epoch": 11.43,
9633
+ "learning_rate": 1.871533240903733e-05,
9634
+ "loss": 1.2696,
9635
+ "step": 794000
9636
+ },
9637
+ {
9638
+ "epoch": 11.44,
9639
+ "learning_rate": 1.8663405667284596e-05,
9640
+ "loss": 1.2723,
9641
+ "step": 794500
9642
+ },
9643
+ {
9644
+ "epoch": 11.44,
9645
+ "learning_rate": 1.861168663249887e-05,
9646
+ "loss": 1.274,
9647
+ "step": 795000
9648
+ },
9649
+ {
9650
+ "epoch": 11.45,
9651
+ "learning_rate": 1.855975989074614e-05,
9652
+ "loss": 1.2716,
9653
+ "step": 795500
9654
+ },
9655
+ {
9656
+ "epoch": 11.46,
9657
+ "learning_rate": 1.8508040855960413e-05,
9658
+ "loss": 1.2727,
9659
+ "step": 796000
9660
+ },
9661
+ {
9662
+ "epoch": 11.46,
9663
+ "learning_rate": 1.8456114114207678e-05,
9664
+ "loss": 1.2708,
9665
+ "step": 796500
9666
+ },
9667
+ {
9668
+ "epoch": 11.47,
9669
+ "learning_rate": 1.8404395079421952e-05,
9670
+ "loss": 1.271,
9671
+ "step": 797000
9672
+ },
9673
+ {
9674
+ "epoch": 11.48,
9675
+ "learning_rate": 1.8352468337669217e-05,
9676
+ "loss": 1.2711,
9677
+ "step": 797500
9678
+ },
9679
+ {
9680
+ "epoch": 11.49,
9681
+ "learning_rate": 1.8300749302883495e-05,
9682
+ "loss": 1.2709,
9683
+ "step": 798000
9684
+ },
9685
+ {
9686
+ "epoch": 11.49,
9687
+ "learning_rate": 1.8248822561130756e-05,
9688
+ "loss": 1.2735,
9689
+ "step": 798500
9690
+ },
9691
+ {
9692
+ "epoch": 11.5,
9693
+ "learning_rate": 1.8197103526345034e-05,
9694
+ "loss": 1.2738,
9695
+ "step": 799000
9696
+ },
9697
+ {
9698
+ "epoch": 11.51,
9699
+ "learning_rate": 1.81451767845923e-05,
9700
+ "loss": 1.2711,
9701
+ "step": 799500
9702
+ },
9703
+ {
9704
+ "epoch": 11.52,
9705
+ "learning_rate": 1.8093457749806577e-05,
9706
+ "loss": 1.2698,
9707
+ "step": 800000
9708
+ },
9709
+ {
9710
+ "epoch": 11.52,
9711
+ "learning_rate": 1.8041531008053838e-05,
9712
+ "loss": 1.2724,
9713
+ "step": 800500
9714
+ },
9715
+ {
9716
+ "epoch": 11.53,
9717
+ "learning_rate": 1.7989811973268116e-05,
9718
+ "loss": 1.2722,
9719
+ "step": 801000
9720
+ },
9721
+ {
9722
+ "epoch": 11.54,
9723
+ "learning_rate": 1.7937885231515377e-05,
9724
+ "loss": 1.2702,
9725
+ "step": 801500
9726
+ },
9727
+ {
9728
+ "epoch": 11.54,
9729
+ "learning_rate": 1.7886166196729655e-05,
9730
+ "loss": 1.2709,
9731
+ "step": 802000
9732
+ },
9733
+ {
9734
+ "epoch": 11.55,
9735
+ "learning_rate": 1.783423945497692e-05,
9736
+ "loss": 1.2713,
9737
+ "step": 802500
9738
+ },
9739
+ {
9740
+ "epoch": 11.56,
9741
+ "learning_rate": 1.7782520420191194e-05,
9742
+ "loss": 1.2681,
9743
+ "step": 803000
9744
+ },
9745
+ {
9746
+ "epoch": 11.57,
9747
+ "learning_rate": 1.773059367843846e-05,
9748
+ "loss": 1.2699,
9749
+ "step": 803500
9750
+ },
9751
+ {
9752
+ "epoch": 11.57,
9753
+ "learning_rate": 1.7678874643652734e-05,
9754
+ "loss": 1.2735,
9755
+ "step": 804000
9756
+ },
9757
+ {
9758
+ "epoch": 11.58,
9759
+ "learning_rate": 1.7626947901900002e-05,
9760
+ "loss": 1.2735,
9761
+ "step": 804500
9762
+ },
9763
+ {
9764
+ "epoch": 11.59,
9765
+ "learning_rate": 1.7575228867114276e-05,
9766
+ "loss": 1.2681,
9767
+ "step": 805000
9768
+ },
9769
+ {
9770
+ "epoch": 11.59,
9771
+ "learning_rate": 1.752330212536154e-05,
9772
+ "loss": 1.2676,
9773
+ "step": 805500
9774
+ },
9775
+ {
9776
+ "epoch": 11.6,
9777
+ "learning_rate": 1.7471583090575816e-05,
9778
+ "loss": 1.2735,
9779
+ "step": 806000
9780
+ },
9781
+ {
9782
+ "epoch": 11.61,
9783
+ "learning_rate": 1.7419656348823084e-05,
9784
+ "loss": 1.2731,
9785
+ "step": 806500
9786
+ },
9787
+ {
9788
+ "epoch": 11.62,
9789
+ "learning_rate": 1.7367937314037358e-05,
9790
+ "loss": 1.2707,
9791
+ "step": 807000
9792
+ },
9793
+ {
9794
+ "epoch": 11.62,
9795
+ "learning_rate": 1.7316010572284623e-05,
9796
+ "loss": 1.2702,
9797
+ "step": 807500
9798
+ },
9799
+ {
9800
+ "epoch": 11.63,
9801
+ "learning_rate": 1.7264291537498897e-05,
9802
+ "loss": 1.2704,
9803
+ "step": 808000
9804
+ },
9805
+ {
9806
+ "epoch": 11.64,
9807
+ "learning_rate": 1.7212364795746162e-05,
9808
+ "loss": 1.2714,
9809
+ "step": 808500
9810
+ },
9811
+ {
9812
+ "epoch": 11.64,
9813
+ "learning_rate": 1.7160645760960437e-05,
9814
+ "loss": 1.2696,
9815
+ "step": 809000
9816
+ },
9817
+ {
9818
+ "epoch": 11.65,
9819
+ "learning_rate": 1.71087190192077e-05,
9820
+ "loss": 1.2687,
9821
+ "step": 809500
9822
+ },
9823
+ {
9824
+ "epoch": 11.66,
9825
+ "learning_rate": 1.705699998442198e-05,
9826
+ "loss": 1.2699,
9827
+ "step": 810000
9828
+ },
9829
+ {
9830
+ "epoch": 11.67,
9831
+ "learning_rate": 1.700507324266924e-05,
9832
+ "loss": 1.2696,
9833
+ "step": 810500
9834
+ },
9835
+ {
9836
+ "epoch": 11.67,
9837
+ "learning_rate": 1.695335420788352e-05,
9838
+ "loss": 1.2726,
9839
+ "step": 811000
9840
+ },
9841
+ {
9842
+ "epoch": 11.68,
9843
+ "learning_rate": 1.6901427466130784e-05,
9844
+ "loss": 1.2691,
9845
+ "step": 811500
9846
+ },
9847
+ {
9848
+ "epoch": 11.69,
9849
+ "learning_rate": 1.684970843134506e-05,
9850
+ "loss": 1.2685,
9851
+ "step": 812000
9852
+ },
9853
+ {
9854
+ "epoch": 11.7,
9855
+ "learning_rate": 1.6797781689592323e-05,
9856
+ "loss": 1.2709,
9857
+ "step": 812500
9858
+ },
9859
+ {
9860
+ "epoch": 11.7,
9861
+ "learning_rate": 1.67460626548066e-05,
9862
+ "loss": 1.2718,
9863
+ "step": 813000
9864
+ },
9865
+ {
9866
+ "epoch": 11.71,
9867
+ "learning_rate": 1.6694135913053865e-05,
9868
+ "loss": 1.269,
9869
+ "step": 813500
9870
+ },
9871
+ {
9872
+ "epoch": 11.72,
9873
+ "learning_rate": 1.664241687826814e-05,
9874
+ "loss": 1.2669,
9875
+ "step": 814000
9876
+ },
9877
+ {
9878
+ "epoch": 11.72,
9879
+ "learning_rate": 1.6590490136515405e-05,
9880
+ "loss": 1.2679,
9881
+ "step": 814500
9882
+ },
9883
+ {
9884
+ "epoch": 11.73,
9885
+ "learning_rate": 1.653877110172968e-05,
9886
+ "loss": 1.2695,
9887
+ "step": 815000
9888
+ },
9889
+ {
9890
+ "epoch": 11.74,
9891
+ "learning_rate": 1.6486844359976944e-05,
9892
+ "loss": 1.2757,
9893
+ "step": 815500
9894
+ },
9895
+ {
9896
+ "epoch": 11.75,
9897
+ "learning_rate": 1.6435125325191222e-05,
9898
+ "loss": 1.2692,
9899
+ "step": 816000
9900
+ },
9901
+ {
9902
+ "epoch": 11.75,
9903
+ "learning_rate": 1.6383198583438487e-05,
9904
+ "loss": 1.2675,
9905
+ "step": 816500
9906
+ },
9907
+ {
9908
+ "epoch": 11.76,
9909
+ "learning_rate": 1.633147954865276e-05,
9910
+ "loss": 1.2689,
9911
+ "step": 817000
9912
+ },
9913
+ {
9914
+ "epoch": 11.77,
9915
+ "learning_rate": 1.6279552806900026e-05,
9916
+ "loss": 1.2712,
9917
+ "step": 817500
9918
+ },
9919
+ {
9920
+ "epoch": 11.77,
9921
+ "learning_rate": 1.62278337721143e-05,
9922
+ "loss": 1.269,
9923
+ "step": 818000
9924
+ },
9925
+ {
9926
+ "epoch": 11.78,
9927
+ "learning_rate": 1.617590703036157e-05,
9928
+ "loss": 1.2699,
9929
+ "step": 818500
9930
+ },
9931
+ {
9932
+ "epoch": 11.79,
9933
+ "learning_rate": 1.6124187995575843e-05,
9934
+ "loss": 1.27,
9935
+ "step": 819000
9936
+ },
9937
+ {
9938
+ "epoch": 11.8,
9939
+ "learning_rate": 1.6072261253823108e-05,
9940
+ "loss": 1.2709,
9941
+ "step": 819500
9942
+ },
9943
+ {
9944
+ "epoch": 11.8,
9945
+ "learning_rate": 1.6020542219037382e-05,
9946
+ "loss": 1.2704,
9947
+ "step": 820000
9948
+ },
9949
+ {
9950
+ "epoch": 11.81,
9951
+ "learning_rate": 1.5968615477284647e-05,
9952
+ "loss": 1.2693,
9953
+ "step": 820500
9954
+ },
9955
+ {
9956
+ "epoch": 11.82,
9957
+ "learning_rate": 1.5916896442498925e-05,
9958
+ "loss": 1.2687,
9959
+ "step": 821000
9960
+ },
9961
+ {
9962
+ "epoch": 11.82,
9963
+ "learning_rate": 1.5864969700746186e-05,
9964
+ "loss": 1.272,
9965
+ "step": 821500
9966
+ },
9967
+ {
9968
+ "epoch": 11.83,
9969
+ "learning_rate": 1.5813250665960464e-05,
9970
+ "loss": 1.2649,
9971
+ "step": 822000
9972
+ },
9973
+ {
9974
+ "epoch": 11.84,
9975
+ "learning_rate": 1.576132392420773e-05,
9976
+ "loss": 1.2684,
9977
+ "step": 822500
9978
+ },
9979
+ {
9980
+ "epoch": 11.85,
9981
+ "learning_rate": 1.5709604889422007e-05,
9982
+ "loss": 1.2676,
9983
+ "step": 823000
9984
+ },
9985
+ {
9986
+ "epoch": 11.85,
9987
+ "learning_rate": 1.565767814766927e-05,
9988
+ "loss": 1.2697,
9989
+ "step": 823500
9990
+ },
9991
+ {
9992
+ "epoch": 11.86,
9993
+ "learning_rate": 1.5605959112883546e-05,
9994
+ "loss": 1.2692,
9995
+ "step": 824000
9996
+ },
9997
+ {
9998
+ "epoch": 11.87,
9999
+ "learning_rate": 1.5554032371130808e-05,
10000
+ "loss": 1.2675,
10001
+ "step": 824500
10002
+ },
10003
+ {
10004
+ "epoch": 11.88,
10005
+ "learning_rate": 1.5502313336345085e-05,
10006
+ "loss": 1.2674,
10007
+ "step": 825000
10008
+ },
10009
+ {
10010
+ "epoch": 11.88,
10011
+ "learning_rate": 1.545038659459235e-05,
10012
+ "loss": 1.2704,
10013
+ "step": 825500
10014
+ },
10015
+ {
10016
+ "epoch": 11.89,
10017
+ "learning_rate": 1.5398667559806625e-05,
10018
+ "loss": 1.266,
10019
+ "step": 826000
10020
+ },
10021
+ {
10022
+ "epoch": 11.9,
10023
+ "learning_rate": 1.534674081805389e-05,
10024
+ "loss": 1.2701,
10025
+ "step": 826500
10026
+ },
10027
+ {
10028
+ "epoch": 11.9,
10029
+ "learning_rate": 1.5295021783268164e-05,
10030
+ "loss": 1.2666,
10031
+ "step": 827000
10032
+ },
10033
+ {
10034
+ "epoch": 11.91,
10035
+ "learning_rate": 1.5243095041515432e-05,
10036
+ "loss": 1.2715,
10037
+ "step": 827500
10038
+ },
10039
+ {
10040
+ "epoch": 11.92,
10041
+ "learning_rate": 1.5191376006729707e-05,
10042
+ "loss": 1.2691,
10043
+ "step": 828000
10044
+ },
10045
+ {
10046
+ "epoch": 11.93,
10047
+ "learning_rate": 1.5139449264976971e-05,
10048
+ "loss": 1.2685,
10049
+ "step": 828500
10050
+ },
10051
+ {
10052
+ "epoch": 11.93,
10053
+ "learning_rate": 1.5087730230191246e-05,
10054
+ "loss": 1.2707,
10055
+ "step": 829000
10056
+ },
10057
+ {
10058
+ "epoch": 11.94,
10059
+ "learning_rate": 1.5035803488438512e-05,
10060
+ "loss": 1.2687,
10061
+ "step": 829500
10062
+ },
10063
+ {
10064
+ "epoch": 11.95,
10065
+ "learning_rate": 1.4984084453652788e-05,
10066
+ "loss": 1.2697,
10067
+ "step": 830000
10068
+ },
10069
+ {
10070
+ "epoch": 11.95,
10071
+ "learning_rate": 1.4932157711900052e-05,
10072
+ "loss": 1.2665,
10073
+ "step": 830500
10074
+ },
10075
+ {
10076
+ "epoch": 11.96,
10077
+ "learning_rate": 1.4880438677114328e-05,
10078
+ "loss": 1.2661,
10079
+ "step": 831000
10080
+ },
10081
+ {
10082
+ "epoch": 11.97,
10083
+ "learning_rate": 1.4828511935361591e-05,
10084
+ "loss": 1.2696,
10085
+ "step": 831500
10086
+ },
10087
+ {
10088
+ "epoch": 11.98,
10089
+ "learning_rate": 1.4776792900575867e-05,
10090
+ "loss": 1.2692,
10091
+ "step": 832000
10092
+ },
10093
+ {
10094
+ "epoch": 11.98,
10095
+ "learning_rate": 1.4724866158823134e-05,
10096
+ "loss": 1.2665,
10097
+ "step": 832500
10098
+ },
10099
+ {
10100
+ "epoch": 11.99,
10101
+ "learning_rate": 1.467314712403741e-05,
10102
+ "loss": 1.2682,
10103
+ "step": 833000
10104
+ },
10105
+ {
10106
+ "epoch": 12.0,
10107
+ "learning_rate": 1.4621220382284673e-05,
10108
+ "loss": 1.2694,
10109
+ "step": 833500
10110
+ },
10111
+ {
10112
+ "epoch": 12.0,
10113
+ "eval_accuracy": 0.7431053375139824,
10114
+ "eval_loss": 1.1376953125,
10115
+ "eval_runtime": 747.4572,
10116
+ "eval_samples_per_second": 721.029,
10117
+ "eval_steps_per_second": 30.043,
10118
+ "step": 833676
10119
  }
10120
  ],
10121
  "max_steps": 972622,
10122
  "num_train_epochs": 14,
10123
+ "total_flos": 6.296629362397544e+18,
10124
  "trial_name": null,
10125
  "trial_params": null
10126
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3591f92a3ea75a18befe35c7e5f724aba35f75e5ccf0ac219f5bedffe6eba571
3
  size 59121639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc2ca3b5ddf381486e7d313cce4d992c3c21fc5091bd64a28c07dff9fdb4bb4a
3
  size 59121639
runs/May29_03-16-06_user-SYS-5049A-TR/events.out.tfevents.1685297788.user-SYS-5049A-TR.557399.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76aeba89d69d5e31aa1c4be4066b72619fc2267f0a0d042de6208cf72948822f
3
- size 251456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c999829c90a5cf8554f15412e37f8e4a6d962f8db875b9ed57de0c61a7b22a46
3
+ size 274025