Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4db57f7ccec30afb61646942782c1af8dc17f47beb5eef3a03233531af731392
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:934aa5d245f85fd29209d92372e442ba0b1be6d7613b9f0cf23f65f9667ab347
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c5ba4bf2003f5db2f380b9eb531f80188e1179e3ca0d895e30af9fa4bc115ab5
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:301ed8f34b716b6bbcf37d61d5fc64ca85b017097cbabec328e6d83cae115208
 size 671466706

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eca8582e9a6283df8574cb1bd33baa4245e884c22389944c0493433a4abfbca6
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:a91d2867e1ece667482d7fd28ed30d814d0fcdc4d613d2faf14aa049d8194ef6
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa33e234e5df1c62a2092b542514313fb1a917c42c79a938f62b6692f9300da2
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef756830043bf90c5ccdb5a8406c96c2cac8b2e06fb9405cb650633878e5bf80
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d90799a923c6d60243c855545b9dd8c1070bf3dfe27fc07785e4e20c6c62f4e2
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e9ae19c7e5d4b427a37a687b673d5c1c21719c851fd211719c2c843e8529aa7
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:733a5800b58b3c903dc7293d44ca88bcc0ce2ff6ae7a984febc25cbabdcf501b
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:77459eefe9ed4ff2fd547b7617f29ee11ddfd9413808e4ba547ba1371c676f01
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8088321089744568,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.007323324789454412,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 21.386,
       "eval_steps_per_second": 5.56,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.015747257237504e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8050488233566284,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.014646649578908825,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.386,
       "eval_steps_per_second": 5.56,
       "step": 25
+    },
+    {
+      "epoch": 0.007616257781032589,
+      "grad_norm": 0.18672692775726318,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 0.5498,
+      "step": 26
+    },
+    {
+      "epoch": 0.007909190772610765,
+      "grad_norm": 0.21561962366104126,
+      "learning_rate": 5.205685918464356e-05,
+      "loss": 0.6171,
+      "step": 27
+    },
+    {
+      "epoch": 0.008202123764188942,
+      "grad_norm": 0.2518845200538635,
+      "learning_rate": 4.912632135009769e-05,
+      "loss": 0.6353,
+      "step": 28
+    },
+    {
+      "epoch": 0.008495056755767118,
+      "grad_norm": 0.3035564720630646,
+      "learning_rate": 4.6220935509274235e-05,
+      "loss": 0.7517,
+      "step": 29
+    },
+    {
+      "epoch": 0.008787989747345295,
+      "grad_norm": 0.34647372364997864,
+      "learning_rate": 4.3353142970386564e-05,
+      "loss": 0.7326,
+      "step": 30
+    },
+    {
+      "epoch": 0.009080922738923471,
+      "grad_norm": 0.3335069715976715,
+      "learning_rate": 4.053522406135775e-05,
+      "loss": 0.7853,
+      "step": 31
+    },
+    {
+      "epoch": 0.009373855730501647,
+      "grad_norm": 0.38675621151924133,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 0.8793,
+      "step": 32
+    },
+    {
+      "epoch": 0.009666788722079824,
+      "grad_norm": 0.4385841488838196,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 1.0219,
+      "step": 33
+    },
+    {
+      "epoch": 0.009959721713658,
+      "grad_norm": 0.6537007689476013,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 1.0441,
+      "step": 34
+    },
+    {
+      "epoch": 0.010252654705236177,
+      "grad_norm": 0.818059504032135,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 1.0197,
+      "step": 35
+    },
+    {
+      "epoch": 0.010545587696814353,
+      "grad_norm": 0.7295295000076294,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 0.9551,
+      "step": 36
+    },
+    {
+      "epoch": 0.01083852068839253,
+      "grad_norm": 0.560406506061554,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 0.8981,
+      "step": 37
+    },
+    {
+      "epoch": 0.011131453679970706,
+      "grad_norm": 0.21649475395679474,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 0.5961,
+      "step": 38
+    },
+    {
+      "epoch": 0.011424386671548882,
+      "grad_norm": 0.17390567064285278,
+      "learning_rate": 2.1167208663446025e-05,
+      "loss": 0.5689,
+      "step": 39
+    },
+    {
+      "epoch": 0.011717319663127059,
+      "grad_norm": 0.1967543065547943,
+      "learning_rate": 1.9299099686894423e-05,
+      "loss": 0.6788,
+      "step": 40
+    },
+    {
+      "epoch": 0.012010252654705237,
+      "grad_norm": 0.2193164974451065,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 0.7285,
+      "step": 41
+    },
+    {
+      "epoch": 0.012303185646283413,
+      "grad_norm": 0.2205907106399536,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 0.7071,
+      "step": 42
+    },
+    {
+      "epoch": 0.01259611863786159,
+      "grad_norm": 0.2299063801765442,
+      "learning_rate": 1.464072663102903e-05,
+      "loss": 0.7342,
+      "step": 43
+    },
+    {
+      "epoch": 0.012889051629439766,
+      "grad_norm": 0.260344922542572,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 0.7884,
+      "step": 44
+    },
+    {
+      "epoch": 0.013181984621017943,
+      "grad_norm": 0.28746941685676575,
+      "learning_rate": 1.2388144172720251e-05,
+      "loss": 0.9154,
+      "step": 45
+    },
+    {
+      "epoch": 0.013474917612596119,
+      "grad_norm": 0.28167566657066345,
+      "learning_rate": 1.1533337816991932e-05,
+      "loss": 1.0163,
+      "step": 46
+    },
+    {
+      "epoch": 0.013767850604174296,
+      "grad_norm": 0.3280790150165558,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 0.9091,
+      "step": 47
+    },
+    {
+      "epoch": 0.014060783595752472,
+      "grad_norm": 0.37814265489578247,
+      "learning_rate": 1.0384981238178534e-05,
+      "loss": 0.9017,
+      "step": 48
+    },
+    {
+      "epoch": 0.014353716587330648,
+      "grad_norm": 0.46121880412101746,
+      "learning_rate": 1.0096348454262845e-05,
+      "loss": 0.883,
+      "step": 49
+    },
+    {
+      "epoch": 0.014646649578908825,
+      "grad_norm": 0.7274380922317505,
+      "learning_rate": 1e-05,
+      "loss": 0.9317,
+      "step": 50
+    },
+    {
+      "epoch": 0.014646649578908825,
+      "eval_loss": 0.8050488233566284,
+      "eval_runtime": 2.3505,
+      "eval_samples_per_second": 21.272,
+      "eval_steps_per_second": 5.531,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.027764453490033e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null