Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc3f280c5ca103143660266103b74a6570abc69758ce36cc9d368aab5e2dfd3a
 size 100059752

 version https://git-lfs.github.com/spec/v1
+oid sha256:edc6d110c6ee168934325d7caecb6fe72f8b7a5f0db191f69923a4f2b6b101e5
 size 100059752

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7c976ae980e9ac8047d235ccd19fef24fde2464dd53f75900c30ff396c32f10
 size 200345450

 version https://git-lfs.github.com/spec/v1
+oid sha256:24c4da6af4b377f072334f8dd3d554d37172ccd9bda8566a81fdf55d5bd9929b
 size 200345450

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9893f6162a58573bd264360beb6b21a0956ecc0fbb3754b119b82985ca44eb13
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1ed4842887a13ecff8e3f549138986adef79ce125ddb95aca0a6f49c9263062
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d689b2d605789fff660b4cfbb1ce6889a5251953118f474a71b33d74e2d19be7
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2c4a11c3ec7ace2e963dc6e2b0b5b6372cc0250cefb36d5f7289475908638cb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 5.724390029907227,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.07385524372230429,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 10.924,
       "eval_steps_per_second": 1.375,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.9110914162688e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 4.619130611419678,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.14771048744460857,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.924,
       "eval_steps_per_second": 1.375,
       "step": 50
+    },
+    {
+      "epoch": 0.07533234859675036,
+      "grad_norm": 6.688117027282715,
+      "learning_rate": 3.969463130731183e-06,
+      "loss": 3.2004,
+      "step": 51
+    },
+    {
+      "epoch": 0.07680945347119646,
+      "grad_norm": 8.25613784790039,
+      "learning_rate": 3.8772424536302565e-06,
+      "loss": 3.8522,
+      "step": 52
+    },
+    {
+      "epoch": 0.07828655834564253,
+      "grad_norm": 7.771860599517822,
+      "learning_rate": 3.782248193514766e-06,
+      "loss": 3.62,
+      "step": 53
+    },
+    {
+      "epoch": 0.07976366322008863,
+      "grad_norm": 9.126646041870117,
+      "learning_rate": 3.684671656182497e-06,
+      "loss": 4.2849,
+      "step": 54
+    },
+    {
+      "epoch": 0.08124076809453472,
+      "grad_norm": 8.893864631652832,
+      "learning_rate": 3.5847093477938955e-06,
+      "loss": 4.2011,
+      "step": 55
+    },
+    {
+      "epoch": 0.0827178729689808,
+      "grad_norm": 7.722706317901611,
+      "learning_rate": 3.4825625791348093e-06,
+      "loss": 3.6801,
+      "step": 56
+    },
+    {
+      "epoch": 0.08419497784342689,
+      "grad_norm": 8.404489517211914,
+      "learning_rate": 3.3784370602033572e-06,
+      "loss": 3.8903,
+      "step": 57
+    },
+    {
+      "epoch": 0.08567208271787297,
+      "grad_norm": 8.851256370544434,
+      "learning_rate": 3.272542485937369e-06,
+      "loss": 4.1234,
+      "step": 58
+    },
+    {
+      "epoch": 0.08714918759231906,
+      "grad_norm": 7.835949420928955,
+      "learning_rate": 3.165092113916688e-06,
+      "loss": 3.7817,
+      "step": 59
+    },
+    {
+      "epoch": 0.08862629246676514,
+      "grad_norm": 8.80264663696289,
+      "learning_rate": 3.056302334890786e-06,
+      "loss": 4.1737,
+      "step": 60
+    },
+    {
+      "epoch": 0.09010339734121123,
+      "grad_norm": 8.49503231048584,
+      "learning_rate": 2.946392236996592e-06,
+      "loss": 3.8425,
+      "step": 61
+    },
+    {
+      "epoch": 0.0915805022156573,
+      "grad_norm": 8.439919471740723,
+      "learning_rate": 2.835583164544139e-06,
+      "loss": 4.0178,
+      "step": 62
+    },
+    {
+      "epoch": 0.0930576070901034,
+      "grad_norm": 10.640103340148926,
+      "learning_rate": 2.724098272258584e-06,
+      "loss": 4.054,
+      "step": 63
+    },
+    {
+      "epoch": 0.09453471196454949,
+      "grad_norm": 8.416254043579102,
+      "learning_rate": 2.6121620758762877e-06,
+      "loss": 4.0347,
+      "step": 64
+    },
+    {
+      "epoch": 0.09601181683899557,
+      "grad_norm": 10.077754974365234,
+      "learning_rate": 2.5e-06,
+      "loss": 4.7311,
+      "step": 65
+    },
+    {
+      "epoch": 0.09748892171344166,
+      "grad_norm": 10.852299690246582,
+      "learning_rate": 2.3878379241237136e-06,
+      "loss": 5.0589,
+      "step": 66
+    },
+    {
+      "epoch": 0.09896602658788774,
+      "grad_norm": 8.857027053833008,
+      "learning_rate": 2.2759017277414165e-06,
+      "loss": 4.155,
+      "step": 67
+    },
+    {
+      "epoch": 0.10044313146233383,
+      "grad_norm": 9.840660095214844,
+      "learning_rate": 2.1644168354558623e-06,
+      "loss": 4.542,
+      "step": 68
+    },
+    {
+      "epoch": 0.1019202363367799,
+      "grad_norm": 10.586484909057617,
+      "learning_rate": 2.053607763003409e-06,
+      "loss": 5.1637,
+      "step": 69
+    },
+    {
+      "epoch": 0.103397341211226,
+      "grad_norm": 10.181109428405762,
+      "learning_rate": 1.9436976651092143e-06,
+      "loss": 4.5931,
+      "step": 70
+    },
+    {
+      "epoch": 0.10487444608567208,
+      "grad_norm": 8.879673957824707,
+      "learning_rate": 1.8349078860833125e-06,
+      "loss": 4.2358,
+      "step": 71
+    },
+    {
+      "epoch": 0.10635155096011817,
+      "grad_norm": 9.705038070678711,
+      "learning_rate": 1.7274575140626318e-06,
+      "loss": 4.2264,
+      "step": 72
+    },
+    {
+      "epoch": 0.10782865583456426,
+      "grad_norm": 11.851544380187988,
+      "learning_rate": 1.6215629397966432e-06,
+      "loss": 4.5553,
+      "step": 73
+    },
+    {
+      "epoch": 0.10930576070901034,
+      "grad_norm": 11.327683448791504,
+      "learning_rate": 1.5174374208651913e-06,
+      "loss": 4.2947,
+      "step": 74
+    },
+    {
+      "epoch": 0.11078286558345643,
+      "grad_norm": 9.12096881866455,
+      "learning_rate": 1.415290652206105e-06,
+      "loss": 4.4722,
+      "step": 75
+    },
+    {
+      "epoch": 0.11225997045790251,
+      "grad_norm": 10.653883934020996,
+      "learning_rate": 1.3153283438175036e-06,
+      "loss": 4.6045,
+      "step": 76
+    },
+    {
+      "epoch": 0.1137370753323486,
+      "grad_norm": 10.704031944274902,
+      "learning_rate": 1.217751806485235e-06,
+      "loss": 5.3001,
+      "step": 77
+    },
+    {
+      "epoch": 0.11521418020679468,
+      "grad_norm": 10.759645462036133,
+      "learning_rate": 1.122757546369744e-06,
+      "loss": 4.9655,
+      "step": 78
+    },
+    {
+      "epoch": 0.11669128508124077,
+      "grad_norm": 9.468240737915039,
+      "learning_rate": 1.0305368692688175e-06,
+      "loss": 4.4559,
+      "step": 79
+    },
+    {
+      "epoch": 0.11816838995568685,
+      "grad_norm": 10.726446151733398,
+      "learning_rate": 9.412754953531664e-07,
+      "loss": 4.495,
+      "step": 80
+    },
+    {
+      "epoch": 0.11964549483013294,
+      "grad_norm": 10.774774551391602,
+      "learning_rate": 8.551531851507186e-07,
+      "loss": 4.5627,
+      "step": 81
+    },
+    {
+      "epoch": 0.12112259970457903,
+      "grad_norm": 15.364916801452637,
+      "learning_rate": 7.723433775328385e-07,
+      "loss": 6.0505,
+      "step": 82
+    },
+    {
+      "epoch": 0.12259970457902511,
+      "grad_norm": 10.570252418518066,
+      "learning_rate": 6.930128404315214e-07,
+      "loss": 5.0273,
+      "step": 83
+    },
+    {
+      "epoch": 0.1240768094534712,
+      "grad_norm": 10.188580513000488,
+      "learning_rate": 6.17321334990973e-07,
+      "loss": 4.7658,
+      "step": 84
+    },
+    {
+      "epoch": 0.1255539143279173,
+      "grad_norm": 12.545642852783203,
+      "learning_rate": 5.454212938299256e-07,
+      "loss": 4.9244,
+      "step": 85
+    },
+    {
+      "epoch": 0.12703101920236337,
+      "grad_norm": 11.229552268981934,
+      "learning_rate": 4.774575140626317e-07,
+      "loss": 4.9959,
+      "step": 86
+    },
+    {
+      "epoch": 0.12850812407680945,
+      "grad_norm": 13.353278160095215,
+      "learning_rate": 4.1356686569674344e-07,
+      "loss": 5.5947,
+      "step": 87
+    },
+    {
+      "epoch": 0.12998522895125553,
+      "grad_norm": 12.811745643615723,
+      "learning_rate": 3.538780159953348e-07,
+      "loss": 5.2684,
+      "step": 88
+    },
+    {
+      "epoch": 0.13146233382570163,
+      "grad_norm": 14.855345726013184,
+      "learning_rate": 2.98511170358155e-07,
+      "loss": 5.4075,
+      "step": 89
+    },
+    {
+      "epoch": 0.1329394387001477,
+      "grad_norm": 22.820568084716797,
+      "learning_rate": 2.4757783024395244e-07,
+      "loss": 6.2209,
+      "step": 90
+    },
+    {
+      "epoch": 0.1344165435745938,
+      "grad_norm": 33.82524490356445,
+      "learning_rate": 2.0118056862137358e-07,
+      "loss": 9.4367,
+      "step": 91
+    },
+    {
+      "epoch": 0.1358936484490399,
+      "grad_norm": 14.382251739501953,
+      "learning_rate": 1.59412823400657e-07,
+      "loss": 5.8712,
+      "step": 92
+    },
+    {
+      "epoch": 0.13737075332348597,
+      "grad_norm": 14.410211563110352,
+      "learning_rate": 1.223587092621162e-07,
+      "loss": 5.3496,
+      "step": 93
+    },
+    {
+      "epoch": 0.13884785819793205,
+      "grad_norm": 31.486698150634766,
+      "learning_rate": 9.00928482603669e-08,
+      "loss": 9.6945,
+      "step": 94
+    },
+    {
+      "epoch": 0.14032496307237813,
+      "grad_norm": 20.718799591064453,
+      "learning_rate": 6.268021954544095e-08,
+      "loss": 7.5291,
+      "step": 95
+    },
+    {
+      "epoch": 0.14180206794682423,
+      "grad_norm": 21.785247802734375,
+      "learning_rate": 4.017602850342584e-08,
+      "loss": 6.9029,
+      "step": 96
+    },
+    {
+      "epoch": 0.1432791728212703,
+      "grad_norm": 30.33222770690918,
+      "learning_rate": 2.262559558016325e-08,
+      "loss": 7.52,
+      "step": 97
+    },
+    {
+      "epoch": 0.1447562776957164,
+      "grad_norm": 33.29640197753906,
+      "learning_rate": 1.006426501190233e-08,
+      "loss": 8.3836,
+      "step": 98
+    },
+    {
+      "epoch": 0.14623338257016247,
+      "grad_norm": 21.80366325378418,
+      "learning_rate": 2.5173336467135266e-09,
+      "loss": 7.7304,
+      "step": 99
+    },
+    {
+      "epoch": 0.14771048744460857,
+      "grad_norm": 33.6093864440918,
+      "learning_rate": 0.0,
+      "loss": 8.8576,
+      "step": 100
+    },
+    {
+      "epoch": 0.14771048744460857,
+      "eval_loss": 4.619130611419678,
+      "eval_runtime": 26.183,
+      "eval_samples_per_second": 10.923,
+      "eval_steps_per_second": 1.375,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.8221828325376e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null