Training in progress, step 1000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +83 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c5c9d0ed52b3043466a6377a3eae8328ec07305d72478a84ed821b4d68db79c4
 size 377528296

 version https://git-lfs.github.com/spec/v1
+oid sha256:da8950123b30f3fa3e0e8ee83f72f3b2f414436b088d27ee084dc11189dcdd69
 size 377528296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ef8324a5f781ac2c15d9f45dbdbff76de3d0825b6104a1c35acb40eb3205233
 size 100950454

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3bc56991baff6e9adc069cbe70c304d9e293c6e1a94dcc9e124de7f02e2ef9e
 size 100950454

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b7e1c9864391f7a9e1741c01173d2b910d5d1c311252162fc450d4c6d83da4d0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:241b88bd5e666af9478e0e5eb8cb4359a74fa33fddc106039ac6f6d66064cba9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c62f4fdc2df8e5e8bf6e891c16e6620dda6d955eaaffc2b1ff18df9d1875d5d
 size 2080

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfdef5a028ffb71f0f119eaf380178e35f4f8c0cf1e1810bd40a5706a8894a4d
 size 2080

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.4196394681930542,
   "best_model_checkpoint": "miner_id_24/checkpoint-700",
-  "epoch": 0.5144326950557302,
   "eval_steps": 100,
-  "global_step": 900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -717,6 +717,84 @@
       "eval_samples_per_second": 2.846,
       "eval_steps_per_second": 2.846,
       "step": 900
     }
   ],
   "logging_steps": 10,
@@ -731,7 +809,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -740,12 +818,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.29086365990912e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.4196394681930542,
   "best_model_checkpoint": "miner_id_24/checkpoint-700",
+  "epoch": 0.5715918833952558,
   "eval_steps": 100,
+  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 2.846,
       "eval_steps_per_second": 2.846,
       "step": 900
+    },
+    {
+      "epoch": 0.5201486138896828,
+      "grad_norm": 11.08961296081543,
+      "learning_rate": 0.00018628465477377176,
+      "loss": 1.4101,
+      "step": 910
+    },
+    {
+      "epoch": 0.5258645327236353,
+      "grad_norm": 7.305054664611816,
+      "learning_rate": 0.00018597871530801058,
+      "loss": 1.1836,
+      "step": 920
+    },
+    {
+      "epoch": 0.5315804515575879,
+      "grad_norm": 11.424978256225586,
+      "learning_rate": 0.00018566966173239052,
+      "loss": 1.7394,
+      "step": 930
+    },
+    {
+      "epoch": 0.5372963703915404,
+      "grad_norm": 9.069324493408203,
+      "learning_rate": 0.00018535747949499637,
+      "loss": 1.6682,
+      "step": 940
+    },
+    {
+      "epoch": 0.543012289225493,
+      "grad_norm": 11.527304649353027,
+      "learning_rate": 0.0001850422122515738,
+      "loss": 1.5007,
+      "step": 950
+    },
+    {
+      "epoch": 0.5487282080594456,
+      "grad_norm": 7.424556255340576,
+      "learning_rate": 0.00018472386000212282,
+      "loss": 1.4391,
+      "step": 960
+    },
+    {
+      "epoch": 0.5544441268933981,
+      "grad_norm": 7.527375221252441,
+      "learning_rate": 0.00018440245185047388,
+      "loss": 1.3857,
+      "step": 970
+    },
+    {
+      "epoch": 0.5601600457273507,
+      "grad_norm": 6.9273295402526855,
+      "learning_rate": 0.00018407795869279653,
+      "loss": 1.4611,
+      "step": 980
+    },
+    {
+      "epoch": 0.5658759645613032,
+      "grad_norm": 8.494791984558105,
+      "learning_rate": 0.00018375043873675168,
+      "loss": 1.5067,
+      "step": 990
+    },
+    {
+      "epoch": 0.5715918833952558,
+      "grad_norm": 33.15937042236328,
+      "learning_rate": 0.00018341986287850887,
+      "loss": 2.3622,
+      "step": 1000
+    },
+    {
+      "epoch": 0.5715918833952558,
+      "eval_loss": 0.5082448720932007,
+      "eval_runtime": 12.6257,
+      "eval_samples_per_second": 2.851,
+      "eval_steps_per_second": 2.851,
+      "step": 1000
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.655020281856e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null