Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aaa1c0315b0ea3ee4f28440dd1d2b84d3ca0f1a21498eaf088e6f9b0557961d7
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:6ea216a6ff6464bbb0071a4dc4b359572bae47a60a2f2f00f228a94770528d49
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:59290ddc36494142d846e99fb1c955cd41c3b1f20d34ef5024f200a3146753cb
 size 335922386

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9592e26f32696e693546933ea978dfb6689742a5db4ee2a46b07daabd50f774
 size 335922386

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09114bd5ee349ccab81e3afd21fabdb201f6387df3a1d7857ca9f4a46e737eb7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:04cb58c7c22b9c64173b11c368e5d7c1be618464957b34eeda2626a0f059931b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9567f2f3182e832808fd621212e0dd5e8f1a88bd24ddda3ea0d289496073738c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a60c7d771c1fd156acee762fba03c724cb41829a3f71df370ecd1d20b134982
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.1894547939300537,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.3003003003003003,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 3.51,
       "eval_steps_per_second": 1.78,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.41887283560448e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.135526418685913,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.6006006006006006,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.51,
       "eval_steps_per_second": 1.78,
       "step": 50
+    },
+    {
+      "epoch": 0.3063063063063063,
+      "grad_norm": 2.467716932296753,
+      "learning_rate": 0.00010495837546732224,
+      "loss": 1.3141,
+      "step": 51
+    },
+    {
+      "epoch": 0.3123123123123123,
+      "grad_norm": 2.7195961475372314,
+      "learning_rate": 0.00010165339447663587,
+      "loss": 1.1358,
+      "step": 52
+    },
+    {
+      "epoch": 0.3183183183183183,
+      "grad_norm": 2.4302902221679688,
+      "learning_rate": 9.834660552336415e-05,
+      "loss": 1.2766,
+      "step": 53
+    },
+    {
+      "epoch": 0.32432432432432434,
+      "grad_norm": 2.4811532497406006,
+      "learning_rate": 9.504162453267777e-05,
+      "loss": 1.2285,
+      "step": 54
+    },
+    {
+      "epoch": 0.3303303303303303,
+      "grad_norm": 2.6031277179718018,
+      "learning_rate": 9.174206545276677e-05,
+      "loss": 1.2114,
+      "step": 55
+    },
+    {
+      "epoch": 0.33633633633633636,
+      "grad_norm": 3.0436697006225586,
+      "learning_rate": 8.845153630304139e-05,
+      "loss": 1.457,
+      "step": 56
+    },
+    {
+      "epoch": 0.34234234234234234,
+      "grad_norm": 2.756425619125366,
+      "learning_rate": 8.517363522881579e-05,
+      "loss": 1.2442,
+      "step": 57
+    },
+    {
+      "epoch": 0.3483483483483483,
+      "grad_norm": 2.9140679836273193,
+      "learning_rate": 8.191194656678904e-05,
+      "loss": 1.127,
+      "step": 58
+    },
+    {
+      "epoch": 0.35435435435435436,
+      "grad_norm": 2.6434483528137207,
+      "learning_rate": 7.867003692562534e-05,
+      "loss": 1.0603,
+      "step": 59
+    },
+    {
+      "epoch": 0.36036036036036034,
+      "grad_norm": 5.660317897796631,
+      "learning_rate": 7.54514512859201e-05,
+      "loss": 1.048,
+      "step": 60
+    },
+    {
+      "epoch": 0.3663663663663664,
+      "grad_norm": 2.662621021270752,
+      "learning_rate": 7.225970912381556e-05,
+      "loss": 0.9771,
+      "step": 61
+    },
+    {
+      "epoch": 0.37237237237237236,
+      "grad_norm": 3.0631654262542725,
+      "learning_rate": 6.909830056250527e-05,
+      "loss": 1.4288,
+      "step": 62
+    },
+    {
+      "epoch": 0.3783783783783784,
+      "grad_norm": 2.566416025161743,
+      "learning_rate": 6.59706825558357e-05,
+      "loss": 0.9023,
+      "step": 63
+    },
+    {
+      "epoch": 0.3843843843843844,
+      "grad_norm": 2.502877712249756,
+      "learning_rate": 6.28802751081779e-05,
+      "loss": 0.8202,
+      "step": 64
+    },
+    {
+      "epoch": 0.39039039039039036,
+      "grad_norm": 3.319624662399292,
+      "learning_rate": 5.983045753470308e-05,
+      "loss": 1.045,
+      "step": 65
+    },
+    {
+      "epoch": 0.3963963963963964,
+      "grad_norm": 2.7986936569213867,
+      "learning_rate": 5.6824564766150726e-05,
+      "loss": 1.295,
+      "step": 66
+    },
+    {
+      "epoch": 0.4024024024024024,
+      "grad_norm": 3.08010196685791,
+      "learning_rate": 5.386588370213124e-05,
+      "loss": 1.0989,
+      "step": 67
+    },
+    {
+      "epoch": 0.4084084084084084,
+      "grad_norm": 3.931140899658203,
+      "learning_rate": 5.095764961694922e-05,
+      "loss": 0.8769,
+      "step": 68
+    },
+    {
+      "epoch": 0.4144144144144144,
+      "grad_norm": 4.616240978240967,
+      "learning_rate": 4.810304262187852e-05,
+      "loss": 1.3225,
+      "step": 69
+    },
+    {
+      "epoch": 0.42042042042042044,
+      "grad_norm": 3.139350652694702,
+      "learning_rate": 4.530518418775733e-05,
+      "loss": 1.1953,
+      "step": 70
+    },
+    {
+      "epoch": 0.4264264264264264,
+      "grad_norm": 3.0971598625183105,
+      "learning_rate": 4.256713373170564e-05,
+      "loss": 0.9564,
+      "step": 71
+    },
+    {
+      "epoch": 0.43243243243243246,
+      "grad_norm": 4.219956874847412,
+      "learning_rate": 3.9891885271697496e-05,
+      "loss": 0.9444,
+      "step": 72
+    },
+    {
+      "epoch": 0.43843843843843844,
+      "grad_norm": 3.592334747314453,
+      "learning_rate": 3.7282364152646297e-05,
+      "loss": 1.1826,
+      "step": 73
+    },
+    {
+      "epoch": 0.4444444444444444,
+      "grad_norm": 4.302462577819824,
+      "learning_rate": 3.4741423847583134e-05,
+      "loss": 1.0087,
+      "step": 74
+    },
+    {
+      "epoch": 0.45045045045045046,
+      "grad_norm": 2.8837406635284424,
+      "learning_rate": 3.227184283742591e-05,
+      "loss": 0.9843,
+      "step": 75
+    },
+    {
+      "epoch": 0.45645645645645644,
+      "grad_norm": 2.9115164279937744,
+      "learning_rate": 2.9876321572751144e-05,
+      "loss": 1.013,
+      "step": 76
+    },
+    {
+      "epoch": 0.4624624624624625,
+      "grad_norm": 2.647456645965576,
+      "learning_rate": 2.7557479520891104e-05,
+      "loss": 0.9615,
+      "step": 77
+    },
+    {
+      "epoch": 0.46846846846846846,
+      "grad_norm": 2.5124423503875732,
+      "learning_rate": 2.5317852301584643e-05,
+      "loss": 0.697,
+      "step": 78
+    },
+    {
+      "epoch": 0.4744744744744745,
+      "grad_norm": 4.744019508361816,
+      "learning_rate": 2.315988891431412e-05,
+      "loss": 1.0931,
+      "step": 79
+    },
+    {
+      "epoch": 0.4804804804804805,
+      "grad_norm": 3.7033681869506836,
+      "learning_rate": 2.1085949060360654e-05,
+      "loss": 1.0538,
+      "step": 80
+    },
+    {
+      "epoch": 0.4864864864864865,
+      "grad_norm": 3.1618974208831787,
+      "learning_rate": 1.9098300562505266e-05,
+      "loss": 0.7846,
+      "step": 81
+    },
+    {
+      "epoch": 0.4924924924924925,
+      "grad_norm": 3.4100112915039062,
+      "learning_rate": 1.7199116885197995e-05,
+      "loss": 1.1595,
+      "step": 82
+    },
+    {
+      "epoch": 0.4984984984984985,
+      "grad_norm": 2.8490142822265625,
+      "learning_rate": 1.5390474757906446e-05,
+      "loss": 1.3412,
+      "step": 83
+    },
+    {
+      "epoch": 0.5045045045045045,
+      "grad_norm": 1.9056596755981445,
+      "learning_rate": 1.3674351904242611e-05,
+      "loss": 1.2043,
+      "step": 84
+    },
+    {
+      "epoch": 0.5105105105105106,
+      "grad_norm": 2.0238497257232666,
+      "learning_rate": 1.2052624879351104e-05,
+      "loss": 1.1433,
+      "step": 85
+    },
+    {
+      "epoch": 0.5165165165165165,
+      "grad_norm": 2.10072922706604,
+      "learning_rate": 1.0527067017923654e-05,
+      "loss": 1.2434,
+      "step": 86
+    },
+    {
+      "epoch": 0.5225225225225225,
+      "grad_norm": 2.534837484359741,
+      "learning_rate": 9.09934649508375e-06,
+      "loss": 1.176,
+      "step": 87
+    },
+    {
+      "epoch": 0.5285285285285285,
+      "grad_norm": 2.2539825439453125,
+      "learning_rate": 7.771024502261526e-06,
+      "loss": 1.2467,
+      "step": 88
+    },
+    {
+      "epoch": 0.5345345345345346,
+      "grad_norm": 2.548096179962158,
+      "learning_rate": 6.543553540053926e-06,
+      "loss": 1.3314,
+      "step": 89
+    },
+    {
+      "epoch": 0.5405405405405406,
+      "grad_norm": 2.4377360343933105,
+      "learning_rate": 5.418275829936537e-06,
+      "loss": 1.2026,
+      "step": 90
+    },
+    {
+      "epoch": 0.5465465465465466,
+      "grad_norm": 2.1080400943756104,
+      "learning_rate": 4.3964218465642355e-06,
+      "loss": 1.1876,
+      "step": 91
+    },
+    {
+      "epoch": 0.5525525525525525,
+      "grad_norm": 2.250770330429077,
+      "learning_rate": 3.4791089722651436e-06,
+      "loss": 1.2981,
+      "step": 92
+    },
+    {
+      "epoch": 0.5585585585585585,
+      "grad_norm": 2.5565848350524902,
+      "learning_rate": 2.667340275199426e-06,
+      "loss": 1.031,
+      "step": 93
+    },
+    {
+      "epoch": 0.5645645645645646,
+      "grad_norm": 2.642076015472412,
+      "learning_rate": 1.9620034125190644e-06,
+      "loss": 1.1568,
+      "step": 94
+    },
+    {
+      "epoch": 0.5705705705705706,
+      "grad_norm": 2.3764145374298096,
+      "learning_rate": 1.3638696597277679e-06,
+      "loss": 1.2446,
+      "step": 95
+    },
+    {
+      "epoch": 0.5765765765765766,
+      "grad_norm": 2.5050697326660156,
+      "learning_rate": 8.735930673024806e-07,
+      "loss": 1.0928,
+      "step": 96
+    },
+    {
+      "epoch": 0.5825825825825826,
+      "grad_norm": 2.3315417766571045,
+      "learning_rate": 4.917097454988584e-07,
+      "loss": 1.0785,
+      "step": 97
+    },
+    {
+      "epoch": 0.5885885885885885,
+      "grad_norm": 1.9666122198104858,
+      "learning_rate": 2.1863727812254653e-07,
+      "loss": 0.9106,
+      "step": 98
+    },
+    {
+      "epoch": 0.5945945945945946,
+      "grad_norm": 2.5268125534057617,
+      "learning_rate": 5.467426590739511e-08,
+      "loss": 1.2624,
+      "step": 99
+    },
+    {
+      "epoch": 0.6006006006006006,
+      "grad_norm": 2.017895460128784,
+      "learning_rate": 0.0,
+      "loss": 0.8749,
+      "step": 100
+    },
+    {
+      "epoch": 0.6006006006006006,
+      "eval_loss": 1.135526418685913,
+      "eval_runtime": 20.2206,
+      "eval_samples_per_second": 3.511,
+      "eval_steps_per_second": 1.78,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.483774567120896e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null