Training in progress, step 50, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +188 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a44ea41c435f428a0b4622481e973ad756944eb89167e27e6a2f373c8c653082
 size 119801528

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4c798a0a6b6439b7ba4990115669dd5390df4253f2ed05ff01b37d3e5b9a5ca
 size 119801528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c5acd814dfe0c52c3b2a582ea5999ca8d1e7c823e797b699411c9607223921a
 size 239892858

 version https://git-lfs.github.com/spec/v1
+oid sha256:a298dc93a456a5fb8f97d2d8676bb1136891c6bd02e561c83c13c606098ef0c2
 size 239892858

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20a3abcc3b0fbad0a1f38d611f520b0082cf25ed3bdfb5bce9163b1dc058f21b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3bbd7313c0d0ca8cf3311d6be2f7a700f91a9e1656cdac8179e6f8b0fceef01c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ea7ff16b0c30a914eb0d145e3fb06ff9027c6cd2408e766ce8a09accab89a4d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e69e2b49ea642509f0c688c16fb190b7cf27dac0a18903a5e2d1467d0343d8b8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.004080966372837088,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 4.889,
       "eval_steps_per_second": 2.445,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -212,7 +395,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.37856974585856e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 0.008161932745674175,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.889,
       "eval_steps_per_second": 2.445,
       "step": 25
+    },
+    {
+      "epoch": 0.004244205027750571,
+      "grad_norm": 3.404787540435791,
+      "learning_rate": 0.0001,
+      "loss": 2.1466,
+      "step": 26
+    },
+    {
+      "epoch": 0.004407443682664055,
+      "grad_norm": 4.816573143005371,
+      "learning_rate": 9.345968707698569e-05,
+      "loss": 2.0775,
+      "step": 27
+    },
+    {
+      "epoch": 0.004570682337577538,
+      "grad_norm": 5.124390125274658,
+      "learning_rate": 8.694738077799488e-05,
+      "loss": 2.5155,
+      "step": 28
+    },
+    {
+      "epoch": 0.004733920992491022,
+      "grad_norm": 5.200120449066162,
+      "learning_rate": 8.049096779838719e-05,
+      "loss": 2.1451,
+      "step": 29
+    },
+    {
+      "epoch": 0.004897159647404506,
+      "grad_norm": 3.7278730869293213,
+      "learning_rate": 7.411809548974792e-05,
+      "loss": 2.2736,
+      "step": 30
+    },
+    {
+      "epoch": 0.005060398302317989,
+      "grad_norm": 2.5494396686553955,
+      "learning_rate": 6.785605346968386e-05,
+      "loss": 1.6221,
+      "step": 31
+    },
+    {
+      "epoch": 0.005223636957231472,
+      "grad_norm": 5.404129505157471,
+      "learning_rate": 6.173165676349103e-05,
+      "loss": 2.2271,
+      "step": 32
+    },
+    {
+      "epoch": 0.0053868756121449556,
+      "grad_norm": 3.913649320602417,
+      "learning_rate": 5.577113097809989e-05,
+      "loss": 2.1973,
+      "step": 33
+    },
+    {
+      "epoch": 0.00555011426705844,
+      "grad_norm": 2.796231269836426,
+      "learning_rate": 5.000000000000002e-05,
+      "loss": 1.8852,
+      "step": 34
+    },
+    {
+      "epoch": 0.005713352921971923,
+      "grad_norm": 3.5265185832977295,
+      "learning_rate": 4.444297669803981e-05,
+      "loss": 2.7077,
+      "step": 35
+    },
+    {
+      "epoch": 0.005876591576885406,
+      "grad_norm": 3.8757095336914062,
+      "learning_rate": 3.9123857099127936e-05,
+      "loss": 1.7534,
+      "step": 36
+    },
+    {
+      "epoch": 0.00603983023179889,
+      "grad_norm": 4.616915225982666,
+      "learning_rate": 3.406541848999312e-05,
+      "loss": 1.937,
+      "step": 37
+    },
+    {
+      "epoch": 0.006203068886712374,
+      "grad_norm": 3.0357136726379395,
+      "learning_rate": 2.9289321881345254e-05,
+      "loss": 2.051,
+      "step": 38
+    },
+    {
+      "epoch": 0.006366307541625857,
+      "grad_norm": 6.157255172729492,
+      "learning_rate": 2.4816019252102273e-05,
+      "loss": 2.3425,
+      "step": 39
+    },
+    {
+      "epoch": 0.00652954619653934,
+      "grad_norm": 3.4735326766967773,
+      "learning_rate": 2.0664665970876496e-05,
+      "loss": 1.6875,
+      "step": 40
+    },
+    {
+      "epoch": 0.006692784851452824,
+      "grad_norm": 4.368310928344727,
+      "learning_rate": 1.6853038769745467e-05,
+      "loss": 2.5715,
+      "step": 41
+    },
+    {
+      "epoch": 0.0068560235063663075,
+      "grad_norm": 4.3574042320251465,
+      "learning_rate": 1.339745962155613e-05,
+      "loss": 2.253,
+      "step": 42
+    },
+    {
+      "epoch": 0.007019262161279791,
+      "grad_norm": 5.3956828117370605,
+      "learning_rate": 1.0312725846731175e-05,
+      "loss": 2.2636,
+      "step": 43
+    },
+    {
+      "epoch": 0.007182500816193275,
+      "grad_norm": 3.6278300285339355,
+      "learning_rate": 7.612046748871327e-06,
+      "loss": 1.7977,
+      "step": 44
+    },
+    {
+      "epoch": 0.007345739471106758,
+      "grad_norm": 4.850729465484619,
+      "learning_rate": 5.306987050489442e-06,
+      "loss": 2.2418,
+      "step": 45
+    },
+    {
+      "epoch": 0.0075089781260202415,
+      "grad_norm": 4.70204496383667,
+      "learning_rate": 3.40741737109318e-06,
+      "loss": 2.3636,
+      "step": 46
+    },
+    {
+      "epoch": 0.007672216780933725,
+      "grad_norm": 4.269098281860352,
+      "learning_rate": 1.921471959676957e-06,
+      "loss": 1.5737,
+      "step": 47
+    },
+    {
+      "epoch": 0.007835455435847209,
+      "grad_norm": 3.4017038345336914,
+      "learning_rate": 8.555138626189618e-07,
+      "loss": 1.4476,
+      "step": 48
+    },
+    {
+      "epoch": 0.007998694090760691,
+      "grad_norm": 7.0247907638549805,
+      "learning_rate": 2.141076761396521e-07,
+      "loss": 1.9751,
+      "step": 49
+    },
+    {
+      "epoch": 0.008161932745674175,
+      "grad_norm": 4.255161762237549,
+      "learning_rate": 0.0,
+      "loss": 0.9664,
+      "step": 50
+    },
+    {
+      "epoch": 0.008161932745674175,
+      "eval_loss": NaN,
+      "eval_runtime": 527.4654,
+      "eval_samples_per_second": 4.891,
+      "eval_steps_per_second": 2.446,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.75713949171712e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null