schnell commited on
Commit
0aef3ce
β€’
1 Parent(s): fc753b8

Training in progress, epoch 11

Browse files
last-checkpoint/{global_step694730 β†’ global_step764203}/mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ee392d88dc868b5890ca7996e6ec339b30f237efa1839e0c64b08dad6289206
3
  size 59134503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:957ee02a261b6bfc99fc73ea67bd87b9a19090647bf92442249930ffd0120616
3
  size 59134503
last-checkpoint/{global_step694730 β†’ global_step764203}/zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e34fc2de8e79bc5058ddaa66e2519ea27f2eddd3f2befc76e9facec35c2db3d3
3
  size 118216675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f27d7eb472bc3f4e910186c454447d51ae61ae3e5bc5705aed01117f21e4f439
3
  size 118216675
last-checkpoint/{global_step694730 β†’ global_step764203}/zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b0d019db7add3c46c5493d8c9d59782d5f412e96cc488af612f2a31efeeb00f
3
  size 118217955
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e6b6fed03b1341c0bb208a6f9ae54cc64bd7ebea666b859caf3c7f9bcf96c16
3
  size 118217955
last-checkpoint/{global_step694730 β†’ global_step764203}/zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b13f04c23581271f6328860b9b30785657dd8b4bac3a373cc0aa0343cc20eeb
3
  size 118221091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:799b982154df0374c7243ebd969031c761789dd5cb606eebc1903009c444c1be
3
  size 118221091
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step694730
 
1
+ global_step764203
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4be24da6f40113ade8fa6ddcbe34900b97a89bd592240ef9bd732243443d0b1
3
  size 59121639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda519537384a51d41a4a58b603f16ba211d4cdc516840c5fe985d5f26ea19e0
3
  size 59121639
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6827913d5b5009c4f490af52b5e712248b2c107280cef2a76e7ccaee6a2becfd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5326b7d97861f628b803fe440b0a6b1644e36579b2582f5db7027e77542c648c
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcb3cb336976e2993737c4475a2ad97626d3405ddc90ee58e8b5bb6ffdd6c303
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd99ee4ba032a3620b579bb7224c1482125e732862d1ed96ef21acfe546d242c
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f3e369128165ef5470802131180ef18043cf7f665664dfc6f3fd47e5bc966a9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af18f0195f365b86261ebeb1b8c35c2ae70733f2059a39025bf1bd0e07744a7c
3
  size 14503
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.0,
5
- "global_step": 694730,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8430,11 +8430,854 @@
8430
  "eval_samples_per_second": 823.146,
8431
  "eval_steps_per_second": 34.298,
8432
  "step": 694730
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8433
  }
8434
  ],
8435
  "max_steps": 972622,
8436
  "num_train_epochs": 14,
8437
- "total_flos": 4.979440517872878e+18,
8438
  "trial_name": null,
8439
  "trial_params": null
8440
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.0,
5
+ "global_step": 764203,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8430
  "eval_samples_per_second": 823.146,
8431
  "eval_steps_per_second": 34.298,
8432
  "step": 694730
8433
+ },
8434
+ {
8435
+ "epoch": 10.0,
8436
+ "learning_rate": 2.8976264286344828e-05,
8437
+ "loss": 1.7335,
8438
+ "step": 695000
8439
+ },
8440
+ {
8441
+ "epoch": 10.01,
8442
+ "learning_rate": 2.8924545251559106e-05,
8443
+ "loss": 1.729,
8444
+ "step": 695500
8445
+ },
8446
+ {
8447
+ "epoch": 10.02,
8448
+ "learning_rate": 2.8872618509806364e-05,
8449
+ "loss": 1.7314,
8450
+ "step": 696000
8451
+ },
8452
+ {
8453
+ "epoch": 10.03,
8454
+ "learning_rate": 2.8820899475020642e-05,
8455
+ "loss": 1.7345,
8456
+ "step": 696500
8457
+ },
8458
+ {
8459
+ "epoch": 10.03,
8460
+ "learning_rate": 2.8768972733267907e-05,
8461
+ "loss": 1.7342,
8462
+ "step": 697000
8463
+ },
8464
+ {
8465
+ "epoch": 10.04,
8466
+ "learning_rate": 2.8717253698482184e-05,
8467
+ "loss": 1.7326,
8468
+ "step": 697500
8469
+ },
8470
+ {
8471
+ "epoch": 10.05,
8472
+ "learning_rate": 2.866532695672945e-05,
8473
+ "loss": 1.7301,
8474
+ "step": 698000
8475
+ },
8476
+ {
8477
+ "epoch": 10.05,
8478
+ "learning_rate": 2.8613607921943724e-05,
8479
+ "loss": 1.7292,
8480
+ "step": 698500
8481
+ },
8482
+ {
8483
+ "epoch": 10.06,
8484
+ "learning_rate": 2.8561681180190985e-05,
8485
+ "loss": 1.7291,
8486
+ "step": 699000
8487
+ },
8488
+ {
8489
+ "epoch": 10.07,
8490
+ "learning_rate": 2.8509962145405263e-05,
8491
+ "loss": 1.7321,
8492
+ "step": 699500
8493
+ },
8494
+ {
8495
+ "epoch": 10.08,
8496
+ "learning_rate": 2.8458035403652528e-05,
8497
+ "loss": 1.7314,
8498
+ "step": 700000
8499
+ },
8500
+ {
8501
+ "epoch": 10.08,
8502
+ "learning_rate": 2.8406316368866802e-05,
8503
+ "loss": 1.7333,
8504
+ "step": 700500
8505
+ },
8506
+ {
8507
+ "epoch": 10.09,
8508
+ "learning_rate": 2.835438962711407e-05,
8509
+ "loss": 1.7329,
8510
+ "step": 701000
8511
+ },
8512
+ {
8513
+ "epoch": 10.1,
8514
+ "learning_rate": 2.8302670592328345e-05,
8515
+ "loss": 1.7337,
8516
+ "step": 701500
8517
+ },
8518
+ {
8519
+ "epoch": 10.1,
8520
+ "learning_rate": 2.8250743850575613e-05,
8521
+ "loss": 1.7346,
8522
+ "step": 702000
8523
+ },
8524
+ {
8525
+ "epoch": 10.11,
8526
+ "learning_rate": 2.8199024815789887e-05,
8527
+ "loss": 1.734,
8528
+ "step": 702500
8529
+ },
8530
+ {
8531
+ "epoch": 10.12,
8532
+ "learning_rate": 2.814709807403715e-05,
8533
+ "loss": 1.736,
8534
+ "step": 703000
8535
+ },
8536
+ {
8537
+ "epoch": 10.13,
8538
+ "learning_rate": 2.8095379039251423e-05,
8539
+ "loss": 1.7305,
8540
+ "step": 703500
8541
+ },
8542
+ {
8543
+ "epoch": 10.13,
8544
+ "learning_rate": 2.804345229749869e-05,
8545
+ "loss": 1.7273,
8546
+ "step": 704000
8547
+ },
8548
+ {
8549
+ "epoch": 10.14,
8550
+ "learning_rate": 2.7991733262712966e-05,
8551
+ "loss": 1.734,
8552
+ "step": 704500
8553
+ },
8554
+ {
8555
+ "epoch": 10.15,
8556
+ "learning_rate": 2.793980652096023e-05,
8557
+ "loss": 1.7322,
8558
+ "step": 705000
8559
+ },
8560
+ {
8561
+ "epoch": 10.16,
8562
+ "learning_rate": 2.788808748617451e-05,
8563
+ "loss": 1.7312,
8564
+ "step": 705500
8565
+ },
8566
+ {
8567
+ "epoch": 10.16,
8568
+ "learning_rate": 2.783616074442177e-05,
8569
+ "loss": 1.7309,
8570
+ "step": 706000
8571
+ },
8572
+ {
8573
+ "epoch": 10.17,
8574
+ "learning_rate": 2.7784441709636045e-05,
8575
+ "loss": 1.7292,
8576
+ "step": 706500
8577
+ },
8578
+ {
8579
+ "epoch": 10.18,
8580
+ "learning_rate": 2.773251496788331e-05,
8581
+ "loss": 1.7313,
8582
+ "step": 707000
8583
+ },
8584
+ {
8585
+ "epoch": 10.18,
8586
+ "learning_rate": 2.7680795933097587e-05,
8587
+ "loss": 1.7328,
8588
+ "step": 707500
8589
+ },
8590
+ {
8591
+ "epoch": 10.19,
8592
+ "learning_rate": 2.7628869191344852e-05,
8593
+ "loss": 1.7298,
8594
+ "step": 708000
8595
+ },
8596
+ {
8597
+ "epoch": 10.2,
8598
+ "learning_rate": 2.757715015655913e-05,
8599
+ "loss": 1.73,
8600
+ "step": 708500
8601
+ },
8602
+ {
8603
+ "epoch": 10.21,
8604
+ "learning_rate": 2.7525223414806395e-05,
8605
+ "loss": 1.7338,
8606
+ "step": 709000
8607
+ },
8608
+ {
8609
+ "epoch": 10.21,
8610
+ "learning_rate": 2.747350438002067e-05,
8611
+ "loss": 1.7327,
8612
+ "step": 709500
8613
+ },
8614
+ {
8615
+ "epoch": 10.22,
8616
+ "learning_rate": 2.742157763826793e-05,
8617
+ "loss": 1.7283,
8618
+ "step": 710000
8619
+ },
8620
+ {
8621
+ "epoch": 10.23,
8622
+ "learning_rate": 2.736985860348221e-05,
8623
+ "loss": 1.7319,
8624
+ "step": 710500
8625
+ },
8626
+ {
8627
+ "epoch": 10.23,
8628
+ "learning_rate": 2.7317931861729473e-05,
8629
+ "loss": 1.729,
8630
+ "step": 711000
8631
+ },
8632
+ {
8633
+ "epoch": 10.24,
8634
+ "learning_rate": 2.7266212826943748e-05,
8635
+ "loss": 1.7318,
8636
+ "step": 711500
8637
+ },
8638
+ {
8639
+ "epoch": 10.25,
8640
+ "learning_rate": 2.7214286085191016e-05,
8641
+ "loss": 1.7307,
8642
+ "step": 712000
8643
+ },
8644
+ {
8645
+ "epoch": 10.26,
8646
+ "learning_rate": 2.716256705040529e-05,
8647
+ "loss": 1.7321,
8648
+ "step": 712500
8649
+ },
8650
+ {
8651
+ "epoch": 10.26,
8652
+ "learning_rate": 2.7110640308652552e-05,
8653
+ "loss": 1.7313,
8654
+ "step": 713000
8655
+ },
8656
+ {
8657
+ "epoch": 10.27,
8658
+ "learning_rate": 2.7058921273866826e-05,
8659
+ "loss": 1.7283,
8660
+ "step": 713500
8661
+ },
8662
+ {
8663
+ "epoch": 10.28,
8664
+ "learning_rate": 2.7006994532114095e-05,
8665
+ "loss": 1.7248,
8666
+ "step": 714000
8667
+ },
8668
+ {
8669
+ "epoch": 10.28,
8670
+ "learning_rate": 2.695527549732837e-05,
8671
+ "loss": 1.7267,
8672
+ "step": 714500
8673
+ },
8674
+ {
8675
+ "epoch": 10.29,
8676
+ "learning_rate": 2.6903348755575637e-05,
8677
+ "loss": 1.7297,
8678
+ "step": 715000
8679
+ },
8680
+ {
8681
+ "epoch": 10.3,
8682
+ "learning_rate": 2.685162972078991e-05,
8683
+ "loss": 1.7297,
8684
+ "step": 715500
8685
+ },
8686
+ {
8687
+ "epoch": 10.31,
8688
+ "learning_rate": 2.6799702979037176e-05,
8689
+ "loss": 1.7281,
8690
+ "step": 716000
8691
+ },
8692
+ {
8693
+ "epoch": 10.31,
8694
+ "learning_rate": 2.6747983944251454e-05,
8695
+ "loss": 1.7306,
8696
+ "step": 716500
8697
+ },
8698
+ {
8699
+ "epoch": 10.32,
8700
+ "learning_rate": 2.6696057202498716e-05,
8701
+ "loss": 1.7297,
8702
+ "step": 717000
8703
+ },
8704
+ {
8705
+ "epoch": 10.33,
8706
+ "learning_rate": 2.664433816771299e-05,
8707
+ "loss": 1.7319,
8708
+ "step": 717500
8709
+ },
8710
+ {
8711
+ "epoch": 10.33,
8712
+ "learning_rate": 2.6592411425960255e-05,
8713
+ "loss": 1.7312,
8714
+ "step": 718000
8715
+ },
8716
+ {
8717
+ "epoch": 10.34,
8718
+ "learning_rate": 2.6540692391174533e-05,
8719
+ "loss": 1.7296,
8720
+ "step": 718500
8721
+ },
8722
+ {
8723
+ "epoch": 10.35,
8724
+ "learning_rate": 2.6488765649421798e-05,
8725
+ "loss": 1.7301,
8726
+ "step": 719000
8727
+ },
8728
+ {
8729
+ "epoch": 10.36,
8730
+ "learning_rate": 2.6437046614636075e-05,
8731
+ "loss": 1.7252,
8732
+ "step": 719500
8733
+ },
8734
+ {
8735
+ "epoch": 10.36,
8736
+ "learning_rate": 2.6385119872883333e-05,
8737
+ "loss": 1.7303,
8738
+ "step": 720000
8739
+ },
8740
+ {
8741
+ "epoch": 10.37,
8742
+ "learning_rate": 2.633340083809761e-05,
8743
+ "loss": 1.7276,
8744
+ "step": 720500
8745
+ },
8746
+ {
8747
+ "epoch": 10.38,
8748
+ "learning_rate": 2.6281474096344876e-05,
8749
+ "loss": 1.7308,
8750
+ "step": 721000
8751
+ },
8752
+ {
8753
+ "epoch": 10.39,
8754
+ "learning_rate": 2.6229755061559154e-05,
8755
+ "loss": 1.7275,
8756
+ "step": 721500
8757
+ },
8758
+ {
8759
+ "epoch": 10.39,
8760
+ "learning_rate": 2.617782831980642e-05,
8761
+ "loss": 1.7294,
8762
+ "step": 722000
8763
+ },
8764
+ {
8765
+ "epoch": 10.4,
8766
+ "learning_rate": 2.6126109285020693e-05,
8767
+ "loss": 1.7296,
8768
+ "step": 722500
8769
+ },
8770
+ {
8771
+ "epoch": 10.41,
8772
+ "learning_rate": 2.607418254326796e-05,
8773
+ "loss": 1.7331,
8774
+ "step": 723000
8775
+ },
8776
+ {
8777
+ "epoch": 10.41,
8778
+ "learning_rate": 2.6022463508482236e-05,
8779
+ "loss": 1.7316,
8780
+ "step": 723500
8781
+ },
8782
+ {
8783
+ "epoch": 10.42,
8784
+ "learning_rate": 2.5970536766729497e-05,
8785
+ "loss": 1.7348,
8786
+ "step": 724000
8787
+ },
8788
+ {
8789
+ "epoch": 10.43,
8790
+ "learning_rate": 2.5918817731943772e-05,
8791
+ "loss": 1.7302,
8792
+ "step": 724500
8793
+ },
8794
+ {
8795
+ "epoch": 10.44,
8796
+ "learning_rate": 2.586689099019104e-05,
8797
+ "loss": 1.7292,
8798
+ "step": 725000
8799
+ },
8800
+ {
8801
+ "epoch": 10.44,
8802
+ "learning_rate": 2.5815171955405314e-05,
8803
+ "loss": 1.7273,
8804
+ "step": 725500
8805
+ },
8806
+ {
8807
+ "epoch": 10.45,
8808
+ "learning_rate": 2.5763245213652583e-05,
8809
+ "loss": 1.7286,
8810
+ "step": 726000
8811
+ },
8812
+ {
8813
+ "epoch": 10.46,
8814
+ "learning_rate": 2.5711526178866857e-05,
8815
+ "loss": 1.7255,
8816
+ "step": 726500
8817
+ },
8818
+ {
8819
+ "epoch": 10.46,
8820
+ "learning_rate": 2.565959943711412e-05,
8821
+ "loss": 1.7281,
8822
+ "step": 727000
8823
+ },
8824
+ {
8825
+ "epoch": 10.47,
8826
+ "learning_rate": 2.5607880402328393e-05,
8827
+ "loss": 1.7314,
8828
+ "step": 727500
8829
+ },
8830
+ {
8831
+ "epoch": 10.48,
8832
+ "learning_rate": 2.555595366057566e-05,
8833
+ "loss": 1.7246,
8834
+ "step": 728000
8835
+ },
8836
+ {
8837
+ "epoch": 10.49,
8838
+ "learning_rate": 2.5504234625789936e-05,
8839
+ "loss": 1.729,
8840
+ "step": 728500
8841
+ },
8842
+ {
8843
+ "epoch": 10.49,
8844
+ "learning_rate": 2.54523078840372e-05,
8845
+ "loss": 1.7246,
8846
+ "step": 729000
8847
+ },
8848
+ {
8849
+ "epoch": 10.5,
8850
+ "learning_rate": 2.5400588849251478e-05,
8851
+ "loss": 1.73,
8852
+ "step": 729500
8853
+ },
8854
+ {
8855
+ "epoch": 10.51,
8856
+ "learning_rate": 2.5348662107498743e-05,
8857
+ "loss": 1.7267,
8858
+ "step": 730000
8859
+ },
8860
+ {
8861
+ "epoch": 10.51,
8862
+ "learning_rate": 2.529694307271302e-05,
8863
+ "loss": 1.7278,
8864
+ "step": 730500
8865
+ },
8866
+ {
8867
+ "epoch": 10.52,
8868
+ "learning_rate": 2.524501633096028e-05,
8869
+ "loss": 1.7264,
8870
+ "step": 731000
8871
+ },
8872
+ {
8873
+ "epoch": 10.53,
8874
+ "learning_rate": 2.5193297296174557e-05,
8875
+ "loss": 1.7308,
8876
+ "step": 731500
8877
+ },
8878
+ {
8879
+ "epoch": 10.54,
8880
+ "learning_rate": 2.514137055442182e-05,
8881
+ "loss": 1.7262,
8882
+ "step": 732000
8883
+ },
8884
+ {
8885
+ "epoch": 10.54,
8886
+ "learning_rate": 2.50896515196361e-05,
8887
+ "loss": 1.7284,
8888
+ "step": 732500
8889
+ },
8890
+ {
8891
+ "epoch": 10.55,
8892
+ "learning_rate": 2.5037724777883364e-05,
8893
+ "loss": 1.727,
8894
+ "step": 733000
8895
+ },
8896
+ {
8897
+ "epoch": 10.56,
8898
+ "learning_rate": 2.498600574309764e-05,
8899
+ "loss": 1.7257,
8900
+ "step": 733500
8901
+ },
8902
+ {
8903
+ "epoch": 10.57,
8904
+ "learning_rate": 2.4934079001344904e-05,
8905
+ "loss": 1.7264,
8906
+ "step": 734000
8907
+ },
8908
+ {
8909
+ "epoch": 10.57,
8910
+ "learning_rate": 2.4882359966559178e-05,
8911
+ "loss": 1.7271,
8912
+ "step": 734500
8913
+ },
8914
+ {
8915
+ "epoch": 10.58,
8916
+ "learning_rate": 2.4830433224806443e-05,
8917
+ "loss": 1.7238,
8918
+ "step": 735000
8919
+ },
8920
+ {
8921
+ "epoch": 10.59,
8922
+ "learning_rate": 2.4778714190020717e-05,
8923
+ "loss": 1.7282,
8924
+ "step": 735500
8925
+ },
8926
+ {
8927
+ "epoch": 10.59,
8928
+ "learning_rate": 2.4726787448267985e-05,
8929
+ "loss": 1.728,
8930
+ "step": 736000
8931
+ },
8932
+ {
8933
+ "epoch": 10.6,
8934
+ "learning_rate": 2.467506841348226e-05,
8935
+ "loss": 1.7236,
8936
+ "step": 736500
8937
+ },
8938
+ {
8939
+ "epoch": 10.61,
8940
+ "learning_rate": 2.4623141671729525e-05,
8941
+ "loss": 1.7285,
8942
+ "step": 737000
8943
+ },
8944
+ {
8945
+ "epoch": 10.62,
8946
+ "learning_rate": 2.45714226369438e-05,
8947
+ "loss": 1.7228,
8948
+ "step": 737500
8949
+ },
8950
+ {
8951
+ "epoch": 10.62,
8952
+ "learning_rate": 2.4519495895191067e-05,
8953
+ "loss": 1.7276,
8954
+ "step": 738000
8955
+ },
8956
+ {
8957
+ "epoch": 10.63,
8958
+ "learning_rate": 2.4467776860405342e-05,
8959
+ "loss": 1.7287,
8960
+ "step": 738500
8961
+ },
8962
+ {
8963
+ "epoch": 10.64,
8964
+ "learning_rate": 2.4415850118652607e-05,
8965
+ "loss": 1.7278,
8966
+ "step": 739000
8967
+ },
8968
+ {
8969
+ "epoch": 10.64,
8970
+ "learning_rate": 2.436413108386688e-05,
8971
+ "loss": 1.728,
8972
+ "step": 739500
8973
+ },
8974
+ {
8975
+ "epoch": 10.65,
8976
+ "learning_rate": 2.431220434211415e-05,
8977
+ "loss": 1.7278,
8978
+ "step": 740000
8979
+ },
8980
+ {
8981
+ "epoch": 10.66,
8982
+ "learning_rate": 2.4260485307328424e-05,
8983
+ "loss": 1.7244,
8984
+ "step": 740500
8985
+ },
8986
+ {
8987
+ "epoch": 10.67,
8988
+ "learning_rate": 2.4208558565575685e-05,
8989
+ "loss": 1.7253,
8990
+ "step": 741000
8991
+ },
8992
+ {
8993
+ "epoch": 10.67,
8994
+ "learning_rate": 2.4156839530789963e-05,
8995
+ "loss": 1.7291,
8996
+ "step": 741500
8997
+ },
8998
+ {
8999
+ "epoch": 10.68,
9000
+ "learning_rate": 2.4104912789037224e-05,
9001
+ "loss": 1.7234,
9002
+ "step": 742000
9003
+ },
9004
+ {
9005
+ "epoch": 10.69,
9006
+ "learning_rate": 2.4053193754251502e-05,
9007
+ "loss": 1.7239,
9008
+ "step": 742500
9009
+ },
9010
+ {
9011
+ "epoch": 10.69,
9012
+ "learning_rate": 2.4001267012498767e-05,
9013
+ "loss": 1.726,
9014
+ "step": 743000
9015
+ },
9016
+ {
9017
+ "epoch": 10.7,
9018
+ "learning_rate": 2.3949547977713045e-05,
9019
+ "loss": 1.729,
9020
+ "step": 743500
9021
+ },
9022
+ {
9023
+ "epoch": 10.71,
9024
+ "learning_rate": 2.3897621235960306e-05,
9025
+ "loss": 1.7244,
9026
+ "step": 744000
9027
+ },
9028
+ {
9029
+ "epoch": 10.72,
9030
+ "learning_rate": 2.3845902201174584e-05,
9031
+ "loss": 1.724,
9032
+ "step": 744500
9033
+ },
9034
+ {
9035
+ "epoch": 10.72,
9036
+ "learning_rate": 2.379397545942185e-05,
9037
+ "loss": 1.7243,
9038
+ "step": 745000
9039
+ },
9040
+ {
9041
+ "epoch": 10.73,
9042
+ "learning_rate": 2.3742256424636123e-05,
9043
+ "loss": 1.7262,
9044
+ "step": 745500
9045
+ },
9046
+ {
9047
+ "epoch": 10.74,
9048
+ "learning_rate": 2.369032968288339e-05,
9049
+ "loss": 1.7221,
9050
+ "step": 746000
9051
+ },
9052
+ {
9053
+ "epoch": 10.75,
9054
+ "learning_rate": 2.3638610648097663e-05,
9055
+ "loss": 1.7231,
9056
+ "step": 746500
9057
+ },
9058
+ {
9059
+ "epoch": 10.75,
9060
+ "learning_rate": 2.358668390634493e-05,
9061
+ "loss": 1.7281,
9062
+ "step": 747000
9063
+ },
9064
+ {
9065
+ "epoch": 10.76,
9066
+ "learning_rate": 2.3534964871559205e-05,
9067
+ "loss": 1.728,
9068
+ "step": 747500
9069
+ },
9070
+ {
9071
+ "epoch": 10.77,
9072
+ "learning_rate": 2.348303812980647e-05,
9073
+ "loss": 1.7238,
9074
+ "step": 748000
9075
+ },
9076
+ {
9077
+ "epoch": 10.77,
9078
+ "learning_rate": 2.3431319095020745e-05,
9079
+ "loss": 1.7245,
9080
+ "step": 748500
9081
+ },
9082
+ {
9083
+ "epoch": 10.78,
9084
+ "learning_rate": 2.337939235326801e-05,
9085
+ "loss": 1.7276,
9086
+ "step": 749000
9087
+ },
9088
+ {
9089
+ "epoch": 10.79,
9090
+ "learning_rate": 2.3327673318482287e-05,
9091
+ "loss": 1.7243,
9092
+ "step": 749500
9093
+ },
9094
+ {
9095
+ "epoch": 10.8,
9096
+ "learning_rate": 2.3275746576729552e-05,
9097
+ "loss": 1.7242,
9098
+ "step": 750000
9099
+ },
9100
+ {
9101
+ "epoch": 10.8,
9102
+ "learning_rate": 2.3224027541943827e-05,
9103
+ "loss": 1.7268,
9104
+ "step": 750500
9105
+ },
9106
+ {
9107
+ "epoch": 10.81,
9108
+ "learning_rate": 2.317210080019109e-05,
9109
+ "loss": 1.7224,
9110
+ "step": 751000
9111
+ },
9112
+ {
9113
+ "epoch": 10.82,
9114
+ "learning_rate": 2.3120174058438356e-05,
9115
+ "loss": 1.7241,
9116
+ "step": 751500
9117
+ },
9118
+ {
9119
+ "epoch": 10.82,
9120
+ "learning_rate": 2.3068455023652634e-05,
9121
+ "loss": 1.7226,
9122
+ "step": 752000
9123
+ },
9124
+ {
9125
+ "epoch": 10.83,
9126
+ "learning_rate": 2.3016528281899896e-05,
9127
+ "loss": 1.7251,
9128
+ "step": 752500
9129
+ },
9130
+ {
9131
+ "epoch": 10.84,
9132
+ "learning_rate": 2.2964809247114173e-05,
9133
+ "loss": 1.7182,
9134
+ "step": 753000
9135
+ },
9136
+ {
9137
+ "epoch": 10.85,
9138
+ "learning_rate": 2.2912882505361438e-05,
9139
+ "loss": 1.7232,
9140
+ "step": 753500
9141
+ },
9142
+ {
9143
+ "epoch": 10.85,
9144
+ "learning_rate": 2.2861163470575713e-05,
9145
+ "loss": 1.7256,
9146
+ "step": 754000
9147
+ },
9148
+ {
9149
+ "epoch": 10.86,
9150
+ "learning_rate": 2.2809236728822978e-05,
9151
+ "loss": 1.7254,
9152
+ "step": 754500
9153
+ },
9154
+ {
9155
+ "epoch": 10.87,
9156
+ "learning_rate": 2.2757517694037252e-05,
9157
+ "loss": 1.7269,
9158
+ "step": 755000
9159
+ },
9160
+ {
9161
+ "epoch": 10.87,
9162
+ "learning_rate": 2.2705590952284517e-05,
9163
+ "loss": 1.7254,
9164
+ "step": 755500
9165
+ },
9166
+ {
9167
+ "epoch": 10.88,
9168
+ "learning_rate": 2.2653871917498795e-05,
9169
+ "loss": 1.7247,
9170
+ "step": 756000
9171
+ },
9172
+ {
9173
+ "epoch": 10.89,
9174
+ "learning_rate": 2.260194517574606e-05,
9175
+ "loss": 1.7232,
9176
+ "step": 756500
9177
+ },
9178
+ {
9179
+ "epoch": 10.9,
9180
+ "learning_rate": 2.2550226140960334e-05,
9181
+ "loss": 1.7252,
9182
+ "step": 757000
9183
+ },
9184
+ {
9185
+ "epoch": 10.9,
9186
+ "learning_rate": 2.24982993992076e-05,
9187
+ "loss": 1.7227,
9188
+ "step": 757500
9189
+ },
9190
+ {
9191
+ "epoch": 10.91,
9192
+ "learning_rate": 2.2446580364421873e-05,
9193
+ "loss": 1.7207,
9194
+ "step": 758000
9195
+ },
9196
+ {
9197
+ "epoch": 10.92,
9198
+ "learning_rate": 2.239465362266914e-05,
9199
+ "loss": 1.7222,
9200
+ "step": 758500
9201
+ },
9202
+ {
9203
+ "epoch": 10.93,
9204
+ "learning_rate": 2.2342934587883416e-05,
9205
+ "loss": 1.7244,
9206
+ "step": 759000
9207
+ },
9208
+ {
9209
+ "epoch": 10.93,
9210
+ "learning_rate": 2.229100784613068e-05,
9211
+ "loss": 1.7237,
9212
+ "step": 759500
9213
+ },
9214
+ {
9215
+ "epoch": 10.94,
9216
+ "learning_rate": 2.2239288811344955e-05,
9217
+ "loss": 1.7208,
9218
+ "step": 760000
9219
+ },
9220
+ {
9221
+ "epoch": 10.95,
9222
+ "learning_rate": 2.218736206959222e-05,
9223
+ "loss": 1.7214,
9224
+ "step": 760500
9225
+ },
9226
+ {
9227
+ "epoch": 10.95,
9228
+ "learning_rate": 2.2135643034806498e-05,
9229
+ "loss": 1.7169,
9230
+ "step": 761000
9231
+ },
9232
+ {
9233
+ "epoch": 10.96,
9234
+ "learning_rate": 2.208371629305376e-05,
9235
+ "loss": 1.7241,
9236
+ "step": 761500
9237
+ },
9238
+ {
9239
+ "epoch": 10.97,
9240
+ "learning_rate": 2.2031997258268037e-05,
9241
+ "loss": 1.7228,
9242
+ "step": 762000
9243
+ },
9244
+ {
9245
+ "epoch": 10.98,
9246
+ "learning_rate": 2.1980070516515302e-05,
9247
+ "loss": 1.72,
9248
+ "step": 762500
9249
+ },
9250
+ {
9251
+ "epoch": 10.98,
9252
+ "learning_rate": 2.192835148172958e-05,
9253
+ "loss": 1.7202,
9254
+ "step": 763000
9255
+ },
9256
+ {
9257
+ "epoch": 10.99,
9258
+ "learning_rate": 2.187642473997684e-05,
9259
+ "loss": 1.7232,
9260
+ "step": 763500
9261
+ },
9262
+ {
9263
+ "epoch": 11.0,
9264
+ "learning_rate": 2.182470570519112e-05,
9265
+ "loss": 1.7225,
9266
+ "step": 764000
9267
+ },
9268
+ {
9269
+ "epoch": 11.0,
9270
+ "eval_accuracy": 0.6647001699746609,
9271
+ "eval_loss": 1.5908203125,
9272
+ "eval_runtime": 653.6993,
9273
+ "eval_samples_per_second": 824.442,
9274
+ "eval_steps_per_second": 34.352,
9275
+ "step": 764203
9276
  }
9277
  ],
9278
  "max_steps": 972622,
9279
  "num_train_epochs": 14,
9280
+ "total_flos": 5.477391803378303e+18,
9281
  "trial_name": null,
9282
  "trial_params": null
9283
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4be24da6f40113ade8fa6ddcbe34900b97a89bd592240ef9bd732243443d0b1
3
  size 59121639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda519537384a51d41a4a58b603f16ba211d4cdc516840c5fe985d5f26ea19e0
3
  size 59121639
runs/Feb22_11-16-27_user-SYS-5049A-TR/events.out.tfevents.1677032209.user-SYS-5049A-TR.55703.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58a578e0e123afd69e485b9ed663db404d26dff4ee2ea75c65ec0061592c4fd8
3
- size 228875
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bdf33703fbf3b6aa16fbab3def8fe9ed924d0ad3c9b789ba3b9b80a46cf39cc
3
+ size 251444