Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fed470f917aebd0601aec5ee2d94ca1652f99fb74ded2aa81ec68736a6a969ed
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:b3421136029af9219236835100aaa94b31fdbb3ccea611efe6bfa2932917fc56
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71add94e3b6b7c03ef531b1d89af24c4a9d1ee405f57210680b01788634f4699
 size 341314644

 version https://git-lfs.github.com/spec/v1
+oid sha256:41b5de16a0712ca43d3aa9de94a5ef2c7d5865937e23bf1a23282f8fb5cbabad
 size 341314644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1bae8297b5a883f90795d9032dbcec9f8172d682ba1ae14dd976e48022bffca
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1917439856796ce0a94c4a3c7e2abb23d672675600349200a2315b874d0f0e9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9096f15f02bac6b0fc27aa7aa4986f85d87d53fca310a75657e0015357af5c5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:705cabf5cbc3a6ab0feb67c77b9b453d59efcc939ce90d310af96e621810f990
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.3468055725097656,
   "best_model_checkpoint": "miner_id_24/checkpoint-350",
-  "epoch": 0.0827178729689808,
   "eval_steps": 50,
-  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2521,6 +2521,364 @@
       "eval_samples_per_second": 13.051,
       "eval_steps_per_second": 3.263,
       "step": 350
     }
   ],
   "logging_steps": 1,
@@ -2535,7 +2893,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -2544,12 +2902,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.321281649026007e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.3468055725097656,
   "best_model_checkpoint": "miner_id_24/checkpoint-350",
+  "epoch": 0.09453471196454949,
   "eval_steps": 50,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.051,
       "eval_steps_per_second": 3.263,
       "step": 350
+    },
+    {
+      "epoch": 0.08295420974889217,
+      "grad_norm": 0.8049944639205933,
+      "learning_rate": 3.844650207332562e-06,
+      "loss": 1.2096,
+      "step": 351
+    },
+    {
+      "epoch": 0.08319054652880355,
+      "grad_norm": 0.625877857208252,
+      "learning_rate": 3.691267552111183e-06,
+      "loss": 1.2358,
+      "step": 352
+    },
+    {
+      "epoch": 0.08342688330871492,
+      "grad_norm": 0.5220972895622253,
+      "learning_rate": 3.54088980417534e-06,
+      "loss": 1.2713,
+      "step": 353
+    },
+    {
+      "epoch": 0.0836632200886263,
+      "grad_norm": 0.42933788895606995,
+      "learning_rate": 3.393526721321616e-06,
+      "loss": 1.6528,
+      "step": 354
+    },
+    {
+      "epoch": 0.08389955686853767,
+      "grad_norm": 0.40879005193710327,
+      "learning_rate": 3.249187865729264e-06,
+      "loss": 1.4614,
+      "step": 355
+    },
+    {
+      "epoch": 0.08413589364844903,
+      "grad_norm": 0.7605451941490173,
+      "learning_rate": 3.1078826033397843e-06,
+      "loss": 1.344,
+      "step": 356
+    },
+    {
+      "epoch": 0.08437223042836041,
+      "grad_norm": 0.5741116404533386,
+      "learning_rate": 2.9696201032491434e-06,
+      "loss": 1.439,
+      "step": 357
+    },
+    {
+      "epoch": 0.08460856720827178,
+      "grad_norm": 0.7407791018486023,
+      "learning_rate": 2.8344093371128424e-06,
+      "loss": 1.5002,
+      "step": 358
+    },
+    {
+      "epoch": 0.08484490398818316,
+      "grad_norm": 0.642069399356842,
+      "learning_rate": 2.70225907856374e-06,
+      "loss": 1.3762,
+      "step": 359
+    },
+    {
+      "epoch": 0.08508124076809453,
+      "grad_norm": 0.5877596139907837,
+      "learning_rate": 2.573177902642726e-06,
+      "loss": 1.2367,
+      "step": 360
+    },
+    {
+      "epoch": 0.08531757754800591,
+      "grad_norm": 0.6829987168312073,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.3109,
+      "step": 361
+    },
+    {
+      "epoch": 0.08555391432791729,
+      "grad_norm": 0.6324713230133057,
+      "learning_rate": 2.324256102563188e-06,
+      "loss": 1.3204,
+      "step": 362
+    },
+    {
+      "epoch": 0.08579025110782866,
+      "grad_norm": 0.6385138630867004,
+      "learning_rate": 2.204431630583548e-06,
+      "loss": 1.3549,
+      "step": 363
+    },
+    {
+      "epoch": 0.08602658788774004,
+      "grad_norm": 0.7741832733154297,
+      "learning_rate": 2.087708544541689e-06,
+      "loss": 1.2457,
+      "step": 364
+    },
+    {
+      "epoch": 0.0862629246676514,
+      "grad_norm": 0.7856371998786926,
+      "learning_rate": 1.974094418431388e-06,
+      "loss": 1.2434,
+      "step": 365
+    },
+    {
+      "epoch": 0.08649926144756277,
+      "grad_norm": 0.8865452408790588,
+      "learning_rate": 1.8635966245104664e-06,
+      "loss": 1.2015,
+      "step": 366
+    },
+    {
+      "epoch": 0.08673559822747415,
+      "grad_norm": 0.7335183024406433,
+      "learning_rate": 1.7562223328224325e-06,
+      "loss": 1.3795,
+      "step": 367
+    },
+    {
+      "epoch": 0.08697193500738552,
+      "grad_norm": 0.8089309930801392,
+      "learning_rate": 1.6519785107311891e-06,
+      "loss": 1.5514,
+      "step": 368
+    },
+    {
+      "epoch": 0.0872082717872969,
+      "grad_norm": 0.8729308247566223,
+      "learning_rate": 1.5508719224689717e-06,
+      "loss": 1.432,
+      "step": 369
+    },
+    {
+      "epoch": 0.08744460856720827,
+      "grad_norm": 0.6937180757522583,
+      "learning_rate": 1.4529091286973995e-06,
+      "loss": 1.2671,
+      "step": 370
+    },
+    {
+      "epoch": 0.08768094534711965,
+      "grad_norm": 0.6132826209068298,
+      "learning_rate": 1.358096486081778e-06,
+      "loss": 1.3877,
+      "step": 371
+    },
+    {
+      "epoch": 0.08791728212703102,
+      "grad_norm": 0.7071219086647034,
+      "learning_rate": 1.2664401468786114e-06,
+      "loss": 1.1341,
+      "step": 372
+    },
+    {
+      "epoch": 0.0881536189069424,
+      "grad_norm": 0.7697627544403076,
+      "learning_rate": 1.1779460585363944e-06,
+      "loss": 1.1962,
+      "step": 373
+    },
+    {
+      "epoch": 0.08838995568685376,
+      "grad_norm": 0.6145182251930237,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 1.372,
+      "step": 374
+    },
+    {
+      "epoch": 0.08862629246676514,
+      "grad_norm": 1.2259669303894043,
+      "learning_rate": 1.0104673978866164e-06,
+      "loss": 0.9949,
+      "step": 375
+    },
+    {
+      "epoch": 0.08886262924667651,
+      "grad_norm": 0.6482958793640137,
+      "learning_rate": 9.314936930293283e-07,
+      "loss": 1.3385,
+      "step": 376
+    },
+    {
+      "epoch": 0.08909896602658789,
+      "grad_norm": 0.6545903086662292,
+      "learning_rate": 8.557039732283944e-07,
+      "loss": 1.4068,
+      "step": 377
+    },
+    {
+      "epoch": 0.08933530280649926,
+      "grad_norm": 0.5748283267021179,
+      "learning_rate": 7.83103156370113e-07,
+      "loss": 1.1817,
+      "step": 378
+    },
+    {
+      "epoch": 0.08957163958641064,
+      "grad_norm": 0.6678035855293274,
+      "learning_rate": 7.136959534174592e-07,
+      "loss": 1.2978,
+      "step": 379
+    },
+    {
+      "epoch": 0.08980797636632201,
+      "grad_norm": 0.7104936242103577,
+      "learning_rate": 6.474868681043578e-07,
+      "loss": 1.465,
+      "step": 380
+    },
+    {
+      "epoch": 0.09004431314623339,
+      "grad_norm": 0.7184752821922302,
+      "learning_rate": 5.844801966434832e-07,
+      "loss": 1.2339,
+      "step": 381
+    },
+    {
+      "epoch": 0.09028064992614476,
+      "grad_norm": 0.7748799324035645,
+      "learning_rate": 5.246800274474439e-07,
+      "loss": 1.462,
+      "step": 382
+    },
+    {
+      "epoch": 0.09051698670605612,
+      "grad_norm": 0.6268565058708191,
+      "learning_rate": 4.680902408635335e-07,
+      "loss": 1.3812,
+      "step": 383
+    },
+    {
+      "epoch": 0.0907533234859675,
+      "grad_norm": 0.6132498383522034,
+      "learning_rate": 4.1471450892189846e-07,
+      "loss": 1.4681,
+      "step": 384
+    },
+    {
+      "epoch": 0.09098966026587887,
+      "grad_norm": 0.6265970468521118,
+      "learning_rate": 3.6455629509730136e-07,
+      "loss": 1.3954,
+      "step": 385
+    },
+    {
+      "epoch": 0.09122599704579025,
+      "grad_norm": 0.6720038056373596,
+      "learning_rate": 3.1761885408435054e-07,
+      "loss": 1.4201,
+      "step": 386
+    },
+    {
+      "epoch": 0.09146233382570162,
+      "grad_norm": 0.6600791215896606,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.4669,
+      "step": 387
+    },
+    {
+      "epoch": 0.091698670605613,
+      "grad_norm": 0.7095836997032166,
+      "learning_rate": 2.334182641175686e-07,
+      "loss": 1.4614,
+      "step": 388
+    },
+    {
+      "epoch": 0.09193500738552438,
+      "grad_norm": 0.6956748366355896,
+      "learning_rate": 1.9616057881935436e-07,
+      "loss": 1.3951,
+      "step": 389
+    },
+    {
+      "epoch": 0.09217134416543575,
+      "grad_norm": 0.8048461079597473,
+      "learning_rate": 1.6213459328950352e-07,
+      "loss": 1.2369,
+      "step": 390
+    },
+    {
+      "epoch": 0.09240768094534713,
+      "grad_norm": 0.7024666666984558,
+      "learning_rate": 1.3134251542544774e-07,
+      "loss": 1.6628,
+      "step": 391
+    },
+    {
+      "epoch": 0.09264401772525849,
+      "grad_norm": 0.6767484545707703,
+      "learning_rate": 1.0378634328099269e-07,
+      "loss": 1.4995,
+      "step": 392
+    },
+    {
+      "epoch": 0.09288035450516986,
+      "grad_norm": 0.7625783681869507,
+      "learning_rate": 7.946786493666647e-08,
+      "loss": 1.56,
+      "step": 393
+    },
+    {
+      "epoch": 0.09311669128508124,
+      "grad_norm": 0.7535527348518372,
+      "learning_rate": 5.838865838366792e-08,
+      "loss": 1.5848,
+      "step": 394
+    },
+    {
+      "epoch": 0.09335302806499261,
+      "grad_norm": 0.7650996446609497,
+      "learning_rate": 4.055009142152067e-08,
+      "loss": 1.6873,
+      "step": 395
+    },
+    {
+      "epoch": 0.09358936484490399,
+      "grad_norm": 0.8912221789360046,
+      "learning_rate": 2.595332156925534e-08,
+      "loss": 1.4535,
+      "step": 396
+    },
+    {
+      "epoch": 0.09382570162481536,
+      "grad_norm": 0.9255561828613281,
+      "learning_rate": 1.4599295990352924e-08,
+      "loss": 1.6771,
+      "step": 397
+    },
+    {
+      "epoch": 0.09406203840472674,
+      "grad_norm": 0.9369992017745972,
+      "learning_rate": 6.488751431266149e-09,
+      "loss": 1.583,
+      "step": 398
+    },
+    {
+      "epoch": 0.09429837518463811,
+      "grad_norm": 1.0970020294189453,
+      "learning_rate": 1.622214173602199e-09,
+      "loss": 1.5595,
+      "step": 399
+    },
+    {
+      "epoch": 0.09453471196454949,
+      "grad_norm": 2.0042531490325928,
+      "learning_rate": 0.0,
+      "loss": 1.8251,
+      "step": 400
+    },
+    {
+      "epoch": 0.09453471196454949,
+      "eval_loss": 1.3509804010391235,
+      "eval_runtime": 546.2081,
+      "eval_samples_per_second": 13.048,
+      "eval_steps_per_second": 3.262,
+      "step": 400
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.083081006292664e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null