Training in progress, step 666, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +116 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6cfa65a3b114689d9988157f964345cffd0fdf3868c0528e061bd0c6d337077a
 size 83115256

 version https://git-lfs.github.com/spec/v1
+oid sha256:be6a6293e873d29e9c123cfed8b9129a3a500086fa1097f67ec4d0c631d4846a
 size 83115256

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04d5924b6afa812a2c7e3e24840d388b87a9c5b91c12a061e12d9f2ece70cb59
 size 166439638

 version https://git-lfs.github.com/spec/v1
+oid sha256:9cdb0e85e05885b3a3fd540038294f26ab6ddd4d6a05b3a4065c07d822518d2f
 size 166439638

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:80b1cc2052c8c3383c26f8c510efc9f3bd0906692def428910029357fb332fa6
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:18b8b2bf3961b9182299da2d15b0278c7b409aa9c6efb89fb915ee1cccd96964
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1948f1667c5eac8eab3aad84caa2ddcfa7b6b58f0c921e1e247a9f2b914505ee
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:27e6ba4d45d5ee5eee425ae6ed411a0c7494450474b7f8f6b3e554abac44894f
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a8364beb1e2bd0b9ee8df0b70de706380e0a681bffd14410911a9b40a0e1e37
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:51281b34edba8f636a3adfd197e66e7cdd44d38de5c21737cb92d4f19959848b
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29652dc1901dc027f97222930a55463d6f44f87fdf6854a4e0e21917ae06dc0b
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:4a7a4add07966596d388a83a7b9df388bbae65c00a672369fcb325b7b27f1944
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:112390c5500004aa6fb3c2a337356c6031076ed6ffd64d9b3c825b8c3aedf87c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ef28e35469bfd31a5e1e33e1983e5e2faeef0929cad68ff1f285258580e0345
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.8541597127914429,
   "best_model_checkpoint": "miner_id_24/checkpoint-650",
-  "epoch": 0.5484436592793767,
   "eval_steps": 25,
-  "global_step": 650,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4773,6 +4773,118 @@
       "eval_samples_per_second": 52.637,
       "eval_steps_per_second": 13.686,
       "step": 650
     }
   ],
   "logging_steps": 1,
@@ -4796,12 +4908,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.182041063063552e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.8541597127914429,
   "best_model_checkpoint": "miner_id_24/checkpoint-650",
+  "epoch": 0.5619438108924075,
   "eval_steps": 25,
+  "global_step": 666,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 52.637,
       "eval_steps_per_second": 13.686,
       "step": 650
+    },
+    {
+      "epoch": 0.549287418755191,
+      "grad_norm": 0.2805044949054718,
+      "learning_rate": 1.0124639809571427e-05,
+      "loss": 0.921,
+      "step": 651
+    },
+    {
+      "epoch": 0.5501311782310055,
+      "grad_norm": 0.34615084528923035,
+      "learning_rate": 1.010858158683357e-05,
+      "loss": 0.8931,
+      "step": 652
+    },
+    {
+      "epoch": 0.5509749377068199,
+      "grad_norm": 0.37605908513069153,
+      "learning_rate": 1.0093629108529187e-05,
+      "loss": 0.9133,
+      "step": 653
+    },
+    {
+      "epoch": 0.5518186971826343,
+      "grad_norm": 0.39621636271476746,
+      "learning_rate": 1.0079782742960727e-05,
+      "loss": 0.8801,
+      "step": 654
+    },
+    {
+      "epoch": 0.5526624566584488,
+      "grad_norm": 0.43372654914855957,
+      "learning_rate": 1.0067042831185395e-05,
+      "loss": 0.8728,
+      "step": 655
+    },
+    {
+      "epoch": 0.5535062161342632,
+      "grad_norm": 0.4617686867713928,
+      "learning_rate": 1.0055409687006741e-05,
+      "loss": 0.8503,
+      "step": 656
+    },
+    {
+      "epoch": 0.5543499756100777,
+      "grad_norm": 0.4721163213253021,
+      "learning_rate": 1.0044883596966938e-05,
+      "loss": 0.7944,
+      "step": 657
+    },
+    {
+      "epoch": 0.5551937350858921,
+      "grad_norm": 0.5155826210975647,
+      "learning_rate": 1.0035464820339714e-05,
+      "loss": 0.8678,
+      "step": 658
+    },
+    {
+      "epoch": 0.5560374945617065,
+      "grad_norm": 0.5820474028587341,
+      "learning_rate": 1.0027153589123977e-05,
+      "loss": 0.8539,
+      "step": 659
+    },
+    {
+      "epoch": 0.556881254037521,
+      "grad_norm": 0.6269566416740417,
+      "learning_rate": 1.001995010803808e-05,
+      "loss": 0.8211,
+      "step": 660
+    },
+    {
+      "epoch": 0.5577250135133354,
+      "grad_norm": 0.6699291467666626,
+      "learning_rate": 1.0013854554514806e-05,
+      "loss": 0.7731,
+      "step": 661
+    },
+    {
+      "epoch": 0.5585687729891498,
+      "grad_norm": 0.7107478976249695,
+      "learning_rate": 1.000886707869698e-05,
+      "loss": 0.6735,
+      "step": 662
+    },
+    {
+      "epoch": 0.5594125324649643,
+      "grad_norm": 0.3569500148296356,
+      "learning_rate": 1.0004987803433777e-05,
+      "loss": 0.9516,
+      "step": 663
+    },
+    {
+      "epoch": 0.5602562919407786,
+      "grad_norm": 0.3141714930534363,
+      "learning_rate": 1.0002216824277691e-05,
+      "loss": 0.9088,
+      "step": 664
+    },
+    {
+      "epoch": 0.561100051416593,
+      "grad_norm": 0.35655835270881653,
+      "learning_rate": 1.0000554209482183e-05,
+      "loss": 0.9101,
+      "step": 665
+    },
+    {
+      "epoch": 0.5619438108924075,
+      "grad_norm": 0.3734205961227417,
+      "learning_rate": 1e-05,
+      "loss": 0.8718,
+      "step": 666
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.2849836123081933e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null