Training in progress, step 678, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +558 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:798495212ffbe2d661f45c58b4dde2b470a5d9405f2ca836ceffeb3519f87b9f
 size 73911112

 version https://git-lfs.github.com/spec/v1
+oid sha256:4c5e63de907b19c7910db799e3b78a42b2e9f9179854322d92c712e5afde4b89
 size 73911112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e527d5c564e6baad9c3626c20566561c3f3796e8bd110aee53f3262f7443ad6b
 size 37965684

 version https://git-lfs.github.com/spec/v1
+oid sha256:a14e009833d53fbe9c82d489fe0e9b4ad66c19f8ea8adae05db1259a8aef663d
 size 37965684

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d50938b0c074c46e5ebfab86bc5496d4da624a1aed174c37e59cd548461c1665
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c6b4a91354537b8cb9fcab809682ea0aa63e50d33b8c1b98a541a3219c1729c7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:afbdecfe8917cda87c4dc409742f9fdbc81109fe8f3de28716a1b9d03463baff
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f12e5684e3c4181284222b5bfd802e16a9d019f6d061509f1ae13f7ad0785a15
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 2.048067808151245,
   "best_model_checkpoint": "miner_id_24/checkpoint-600",
-  "epoch": 1.7679558011049723,
   "eval_steps": 50,
-  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4311,6 +4311,560 @@
       "eval_samples_per_second": 34.057,
       "eval_steps_per_second": 8.514,
       "step": 600
     }
   ],
   "logging_steps": 1,
@@ -4334,12 +4888,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.136841328025928e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 2.048067808151245,
   "best_model_checkpoint": "miner_id_24/checkpoint-600",
+  "epoch": 1.9977900552486187,
   "eval_steps": 50,
+  "global_step": 678,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 34.057,
       "eval_steps_per_second": 8.514,
       "step": 600
+    },
+    {
+      "epoch": 1.770902394106814,
+      "grad_norm": 0.3590349853038788,
+      "learning_rate": 6.485553382910026e-06,
+      "loss": 1.9617,
+      "step": 601
+    },
+    {
+      "epoch": 1.7738489871086556,
+      "grad_norm": 0.35265564918518066,
+      "learning_rate": 6.319977191187232e-06,
+      "loss": 2.0291,
+      "step": 602
+    },
+    {
+      "epoch": 1.7767955801104973,
+      "grad_norm": 0.34210672974586487,
+      "learning_rate": 6.156473015002029e-06,
+      "loss": 1.9461,
+      "step": 603
+    },
+    {
+      "epoch": 1.7797421731123388,
+      "grad_norm": 0.36943700909614563,
+      "learning_rate": 5.995044470741151e-06,
+      "loss": 1.9437,
+      "step": 604
+    },
+    {
+      "epoch": 1.7826887661141804,
+      "grad_norm": 0.3616660237312317,
+      "learning_rate": 5.835695128882513e-06,
+      "loss": 1.9987,
+      "step": 605
+    },
+    {
+      "epoch": 1.7856353591160221,
+      "grad_norm": 0.35595205426216125,
+      "learning_rate": 5.678428513916212e-06,
+      "loss": 2.0232,
+      "step": 606
+    },
+    {
+      "epoch": 1.7885819521178639,
+      "grad_norm": 0.3314264416694641,
+      "learning_rate": 5.5232481042665764e-06,
+      "loss": 1.7963,
+      "step": 607
+    },
+    {
+      "epoch": 1.7915285451197054,
+      "grad_norm": 0.3526301681995392,
+      "learning_rate": 5.370157332215209e-06,
+      "loss": 2.0898,
+      "step": 608
+    },
+    {
+      "epoch": 1.794475138121547,
+      "grad_norm": 0.3451070785522461,
+      "learning_rate": 5.219159583825106e-06,
+      "loss": 1.8876,
+      "step": 609
+    },
+    {
+      "epoch": 1.7974217311233884,
+      "grad_norm": 0.34398043155670166,
+      "learning_rate": 5.07025819886574e-06,
+      "loss": 1.9447,
+      "step": 610
+    },
+    {
+      "epoch": 1.8003683241252302,
+      "grad_norm": 0.33297380805015564,
+      "learning_rate": 4.923456470739219e-06,
+      "loss": 1.9111,
+      "step": 611
+    },
+    {
+      "epoch": 1.803314917127072,
+      "grad_norm": 0.35569652915000916,
+      "learning_rate": 4.778757646407362e-06,
+      "loss": 2.052,
+      "step": 612
+    },
+    {
+      "epoch": 1.8062615101289135,
+      "grad_norm": 0.3721804618835449,
+      "learning_rate": 4.636164926320064e-06,
+      "loss": 2.1146,
+      "step": 613
+    },
+    {
+      "epoch": 1.809208103130755,
+      "grad_norm": 0.3460250198841095,
+      "learning_rate": 4.495681464344259e-06,
+      "loss": 1.9138,
+      "step": 614
+    },
+    {
+      "epoch": 1.8121546961325967,
+      "grad_norm": 0.38758331537246704,
+      "learning_rate": 4.357310367694378e-06,
+      "loss": 1.975,
+      "step": 615
+    },
+    {
+      "epoch": 1.8151012891344385,
+      "grad_norm": 0.37770211696624756,
+      "learning_rate": 4.22105469686348e-06,
+      "loss": 2.0591,
+      "step": 616
+    },
+    {
+      "epoch": 1.81804788213628,
+      "grad_norm": 0.3766396641731262,
+      "learning_rate": 4.086917465555662e-06,
+      "loss": 2.0694,
+      "step": 617
+    },
+    {
+      "epoch": 1.8209944751381215,
+      "grad_norm": 0.3475191593170166,
+      "learning_rate": 3.954901640619368e-06,
+      "loss": 1.9057,
+      "step": 618
+    },
+    {
+      "epoch": 1.823941068139963,
+      "grad_norm": 0.3547375798225403,
+      "learning_rate": 3.825010141981677e-06,
+      "loss": 2.0484,
+      "step": 619
+    },
+    {
+      "epoch": 1.8268876611418048,
+      "grad_norm": 0.35701024532318115,
+      "learning_rate": 3.6972458425838962e-06,
+      "loss": 2.084,
+      "step": 620
+    },
+    {
+      "epoch": 1.8298342541436465,
+      "grad_norm": 0.3376360535621643,
+      "learning_rate": 3.571611568317856e-06,
+      "loss": 1.8885,
+      "step": 621
+    },
+    {
+      "epoch": 1.832780847145488,
+      "grad_norm": 0.34869226813316345,
+      "learning_rate": 3.4481100979635306e-06,
+      "loss": 2.1957,
+      "step": 622
+    },
+    {
+      "epoch": 1.8357274401473296,
+      "grad_norm": 0.3375036418437958,
+      "learning_rate": 3.32674416312746e-06,
+      "loss": 1.8859,
+      "step": 623
+    },
+    {
+      "epoch": 1.838674033149171,
+      "grad_norm": 0.3811117708683014,
+      "learning_rate": 3.207516448182435e-06,
+      "loss": 2.147,
+      "step": 624
+    },
+    {
+      "epoch": 1.8416206261510129,
+      "grad_norm": 0.3357301652431488,
+      "learning_rate": 3.0904295902080326e-06,
+      "loss": 1.7973,
+      "step": 625
+    },
+    {
+      "epoch": 1.8445672191528546,
+      "grad_norm": 0.38550111651420593,
+      "learning_rate": 2.9754861789324073e-06,
+      "loss": 2.1196,
+      "step": 626
+    },
+    {
+      "epoch": 1.8475138121546961,
+      "grad_norm": 0.3680182099342346,
+      "learning_rate": 2.8626887566748807e-06,
+      "loss": 2.0742,
+      "step": 627
+    },
+    {
+      "epoch": 1.8504604051565376,
+      "grad_norm": 0.3618917763233185,
+      "learning_rate": 2.752039818289809e-06,
+      "loss": 2.0812,
+      "step": 628
+    },
+    {
+      "epoch": 1.8534069981583794,
+      "grad_norm": 0.3539152443408966,
+      "learning_rate": 2.6435418111113276e-06,
+      "loss": 1.9961,
+      "step": 629
+    },
+    {
+      "epoch": 1.8563535911602211,
+      "grad_norm": 0.35489708185195923,
+      "learning_rate": 2.537197134899294e-06,
+      "loss": 2.0572,
+      "step": 630
+    },
+    {
+      "epoch": 1.8593001841620627,
+      "grad_norm": 0.34133121371269226,
+      "learning_rate": 2.433008141786153e-06,
+      "loss": 1.9718,
+      "step": 631
+    },
+    {
+      "epoch": 1.8622467771639042,
+      "grad_norm": 0.34101560711860657,
+      "learning_rate": 2.330977136224932e-06,
+      "loss": 1.881,
+      "step": 632
+    },
+    {
+      "epoch": 1.8651933701657457,
+      "grad_norm": 0.3772835433483124,
+      "learning_rate": 2.2311063749382742e-06,
+      "loss": 2.0331,
+      "step": 633
+    },
+    {
+      "epoch": 1.8681399631675875,
+      "grad_norm": 0.3728832006454468,
+      "learning_rate": 2.1333980668685414e-06,
+      "loss": 1.9764,
+      "step": 634
+    },
+    {
+      "epoch": 1.8710865561694292,
+      "grad_norm": 0.36289656162261963,
+      "learning_rate": 2.037854373128889e-06,
+      "loss": 2.006,
+      "step": 635
+    },
+    {
+      "epoch": 1.8740331491712707,
+      "grad_norm": 0.36972489953041077,
+      "learning_rate": 1.9444774069555694e-06,
+      "loss": 2.1053,
+      "step": 636
+    },
+    {
+      "epoch": 1.8769797421731123,
+      "grad_norm": 0.34683194756507874,
+      "learning_rate": 1.8532692336611035e-06,
+      "loss": 2.0312,
+      "step": 637
+    },
+    {
+      "epoch": 1.879926335174954,
+      "grad_norm": 0.3639572858810425,
+      "learning_rate": 1.7642318705886286e-06,
+      "loss": 1.9183,
+      "step": 638
+    },
+    {
+      "epoch": 1.8828729281767957,
+      "grad_norm": 0.3998357355594635,
+      "learning_rate": 1.6773672870673218e-06,
+      "loss": 2.0961,
+      "step": 639
+    },
+    {
+      "epoch": 1.8858195211786373,
+      "grad_norm": 0.3396947383880615,
+      "learning_rate": 1.5926774043687365e-06,
+      "loss": 1.9543,
+      "step": 640
+    },
+    {
+      "epoch": 1.8887661141804788,
+      "grad_norm": 0.3566475510597229,
+      "learning_rate": 1.510164095664457e-06,
+      "loss": 2.0163,
+      "step": 641
+    },
+    {
+      "epoch": 1.8917127071823203,
+      "grad_norm": 0.33658653497695923,
+      "learning_rate": 1.4298291859845214e-06,
+      "loss": 1.9524,
+      "step": 642
+    },
+    {
+      "epoch": 1.894659300184162,
+      "grad_norm": 0.35298457741737366,
+      "learning_rate": 1.351674452177143e-06,
+      "loss": 2.0429,
+      "step": 643
+    },
+    {
+      "epoch": 1.8976058931860038,
+      "grad_norm": 0.353605180978775,
+      "learning_rate": 1.2757016228693964e-06,
+      "loss": 2.0201,
+      "step": 644
+    },
+    {
+      "epoch": 1.9005524861878453,
+      "grad_norm": 0.3473069667816162,
+      "learning_rate": 1.2019123784289488e-06,
+      "loss": 2.054,
+      "step": 645
+    },
+    {
+      "epoch": 1.9034990791896869,
+      "grad_norm": 0.373976469039917,
+      "learning_rate": 1.1303083509269452e-06,
+      "loss": 1.9479,
+      "step": 646
+    },
+    {
+      "epoch": 1.9064456721915284,
+      "grad_norm": 0.3614707887172699,
+      "learning_rate": 1.0608911241018594e-06,
+      "loss": 1.9979,
+      "step": 647
+    },
+    {
+      "epoch": 1.9093922651933701,
+      "grad_norm": 0.35335227847099304,
+      "learning_rate": 9.936622333245104e-07,
+      "loss": 1.9355,
+      "step": 648
+    },
+    {
+      "epoch": 1.9123388581952119,
+      "grad_norm": 0.3640158474445343,
+      "learning_rate": 9.286231655640799e-07,
+      "loss": 2.0179,
+      "step": 649
+    },
+    {
+      "epoch": 1.9152854511970534,
+      "grad_norm": 0.3631095290184021,
+      "learning_rate": 8.657753593552143e-07,
+      "loss": 1.9934,
+      "step": 650
+    },
+    {
+      "epoch": 1.9152854511970534,
+      "eval_loss": 2.0474448204040527,
+      "eval_runtime": 33.5976,
+      "eval_samples_per_second": 34.05,
+      "eval_steps_per_second": 8.513,
+      "step": 650
+    },
+    {
+      "epoch": 1.918232044198895,
+      "grad_norm": 0.3535584509372711,
+      "learning_rate": 8.051202047662187e-07,
+      "loss": 1.9061,
+      "step": 651
+    },
+    {
+      "epoch": 1.9211786372007367,
+      "grad_norm": 0.3503694534301758,
+      "learning_rate": 7.466590433683251e-07,
+      "loss": 1.9775,
+      "step": 652
+    },
+    {
+      "epoch": 1.9241252302025784,
+      "grad_norm": 0.3380708396434784,
+      "learning_rate": 6.903931682059827e-07,
+      "loss": 1.8295,
+      "step": 653
+    },
+    {
+      "epoch": 1.92707182320442,
+      "grad_norm": 0.3362259268760681,
+      "learning_rate": 6.363238237683033e-07,
+      "loss": 1.7838,
+      "step": 654
+    },
+    {
+      "epoch": 1.9300184162062615,
+      "grad_norm": 0.3610450327396393,
+      "learning_rate": 5.844522059614943e-07,
+      "loss": 2.1624,
+      "step": 655
+    },
+    {
+      "epoch": 1.932965009208103,
+      "grad_norm": 0.34263625741004944,
+      "learning_rate": 5.347794620824576e-07,
+      "loss": 1.9548,
+      "step": 656
+    },
+    {
+      "epoch": 1.9359116022099447,
+      "grad_norm": 0.35191041231155396,
+      "learning_rate": 4.873066907933543e-07,
+      "loss": 2.0965,
+      "step": 657
+    },
+    {
+      "epoch": 1.9388581952117865,
+      "grad_norm": 0.361122727394104,
+      "learning_rate": 4.4203494209733576e-07,
+      "loss": 1.9781,
+      "step": 658
+    },
+    {
+      "epoch": 1.941804788213628,
+      "grad_norm": 0.35918036103248596,
+      "learning_rate": 3.9896521731532797e-07,
+      "loss": 1.8921,
+      "step": 659
+    },
+    {
+      "epoch": 1.9447513812154695,
+      "grad_norm": 0.37808507680892944,
+      "learning_rate": 3.580984690638611e-07,
+      "loss": 2.2418,
+      "step": 660
+    },
+    {
+      "epoch": 1.9476979742173113,
+      "grad_norm": 0.34101665019989014,
+      "learning_rate": 3.194356012340305e-07,
+      "loss": 1.8401,
+      "step": 661
+    },
+    {
+      "epoch": 1.9506445672191528,
+      "grad_norm": 0.360929936170578,
+      "learning_rate": 2.8297746897146816e-07,
+      "loss": 2.0246,
+      "step": 662
+    },
+    {
+      "epoch": 1.9535911602209945,
+      "grad_norm": 0.36307036876678467,
+      "learning_rate": 2.48724878657447e-07,
+      "loss": 2.0241,
+      "step": 663
+    },
+    {
+      "epoch": 1.956537753222836,
+      "grad_norm": 0.3703291118144989,
+      "learning_rate": 2.1667858789105043e-07,
+      "loss": 2.1115,
+      "step": 664
+    },
+    {
+      "epoch": 1.9594843462246776,
+      "grad_norm": 0.3611985445022583,
+      "learning_rate": 1.8683930547243045e-07,
+      "loss": 1.9565,
+      "step": 665
+    },
+    {
+      "epoch": 1.9624309392265193,
+      "grad_norm": 0.351419061422348,
+      "learning_rate": 1.5920769138706438e-07,
+      "loss": 1.8914,
+      "step": 666
+    },
+    {
+      "epoch": 1.965377532228361,
+      "grad_norm": 0.36377424001693726,
+      "learning_rate": 1.3378435679122226e-07,
+      "loss": 2.0512,
+      "step": 667
+    },
+    {
+      "epoch": 1.9683241252302026,
+      "grad_norm": 0.36848321557044983,
+      "learning_rate": 1.1056986399845537e-07,
+      "loss": 2.1475,
+      "step": 668
+    },
+    {
+      "epoch": 1.9712707182320441,
+      "grad_norm": 0.3410709500312805,
+      "learning_rate": 8.956472646710623e-08,
+      "loss": 1.9034,
+      "step": 669
+    },
+    {
+      "epoch": 1.9742173112338857,
+      "grad_norm": 0.37526410818099976,
+      "learning_rate": 7.07694087889621e-08,
+      "loss": 2.1324,
+      "step": 670
+    },
+    {
+      "epoch": 1.9771639042357274,
+      "grad_norm": 0.3631035089492798,
+      "learning_rate": 5.418432667905204e-08,
+      "loss": 1.9301,
+      "step": 671
+    },
+    {
+      "epoch": 1.9801104972375692,
+      "grad_norm": 0.3520108759403229,
+      "learning_rate": 3.980984696634327e-08,
+      "loss": 1.9756,
+      "step": 672
+    },
+    {
+      "epoch": 1.9830570902394107,
+      "grad_norm": 0.35726797580718994,
+      "learning_rate": 2.764628758570309e-08,
+      "loss": 1.9707,
+      "step": 673
+    },
+    {
+      "epoch": 1.9860036832412522,
+      "grad_norm": 0.35579240322113037,
+      "learning_rate": 1.7693917570837936e-08,
+      "loss": 1.9214,
+      "step": 674
+    },
+    {
+      "epoch": 1.988950276243094,
+      "grad_norm": 0.3749707043170929,
+      "learning_rate": 9.95295704835364e-09,
+      "loss": 2.0993,
+      "step": 675
+    },
+    {
+      "epoch": 1.9918968692449357,
+      "grad_norm": 0.35399675369262695,
+      "learning_rate": 4.42357723288156e-09,
+      "loss": 1.9808,
+      "step": 676
+    },
+    {
+      "epoch": 1.9948434622467772,
+      "grad_norm": 0.34812989830970764,
+      "learning_rate": 1.1059004233038296e-09,
+      "loss": 1.9791,
+      "step": 677
+    },
+    {
+      "epoch": 1.9977900552486187,
+      "grad_norm": 0.3504377603530884,
+      "learning_rate": 0.0,
+      "loss": 1.9639,
+      "step": 678
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.544397205114716e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null