Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44a61aa92678aabf723685dff3a6388aa2860c69d2bc7bbc4a8d84deaf487eff
 size 73911112

 version https://git-lfs.github.com/spec/v1
+oid sha256:f45713302ffec12ebb2045b50d0cecf78642afd3c89aacdf6b30270246255740
 size 73911112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8fbd367cf7947b43167437f2e62cd460a2008fc746b3b49fc95a6956f1f58f26
 size 148047722

 version https://git-lfs.github.com/spec/v1
+oid sha256:3351837ef943841bac25b40343357818d25708f06bc8ead49494dd562f0c62d7
 size 148047722

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d58be2bfdf572e7fb47b3f8f08cc7b53c23946ec1ad88dff89b3ecfd550b4223
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2769c8496c824dfff8aff679240229e055127b4e67ce00f8f0c475f6e539db60
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1900bd8021f13c38b942ed30aea6e2cea1b47664e4ce28d0276b142334732307
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2c4a11c3ec7ace2e963dc6e2b0b5b6372cc0250cefb36d5f7289475908638cb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.14814567565918,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.11389521640091116,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 31.449,
       "eval_steps_per_second": 7.862,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.4499584088735744e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.204195022583008,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.22779043280182232,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 31.449,
       "eval_steps_per_second": 7.862,
       "step": 50
+    },
+    {
+      "epoch": 0.11617312072892938,
+      "grad_norm": 14.492518424987793,
+      "learning_rate": 2.847932752400164e-06,
+      "loss": 14.905,
+      "step": 51
+    },
+    {
+      "epoch": 0.11845102505694761,
+      "grad_norm": 15.138047218322754,
+      "learning_rate": 2.761321158169134e-06,
+      "loss": 14.4474,
+      "step": 52
+    },
+    {
+      "epoch": 0.12072892938496584,
+      "grad_norm": 15.702535629272461,
+      "learning_rate": 2.6743911843603134e-06,
+      "loss": 15.193,
+      "step": 53
+    },
+    {
+      "epoch": 0.12300683371298406,
+      "grad_norm": 14.819658279418945,
+      "learning_rate": 2.587248741756253e-06,
+      "loss": 15.2334,
+      "step": 54
+    },
+    {
+      "epoch": 0.1252847380410023,
+      "grad_norm": 14.035573959350586,
+      "learning_rate": 2.5e-06,
+      "loss": 13.193,
+      "step": 55
+    },
+    {
+      "epoch": 0.1275626423690205,
+      "grad_norm": 12.402682304382324,
+      "learning_rate": 2.4127512582437486e-06,
+      "loss": 10.859,
+      "step": 56
+    },
+    {
+      "epoch": 0.12984054669703873,
+      "grad_norm": 11.38323974609375,
+      "learning_rate": 2.325608815639687e-06,
+      "loss": 10.0438,
+      "step": 57
+    },
+    {
+      "epoch": 0.13211845102505695,
+      "grad_norm": 12.060361862182617,
+      "learning_rate": 2.238678841830867e-06,
+      "loss": 10.6817,
+      "step": 58
+    },
+    {
+      "epoch": 0.13439635535307518,
+      "grad_norm": 12.099507331848145,
+      "learning_rate": 2.1520672475998374e-06,
+      "loss": 10.7362,
+      "step": 59
+    },
+    {
+      "epoch": 0.1366742596810934,
+      "grad_norm": 12.863167762756348,
+      "learning_rate": 2.0658795558326745e-06,
+      "loss": 10.3842,
+      "step": 60
+    },
+    {
+      "epoch": 0.13895216400911162,
+      "grad_norm": 11.783324241638184,
+      "learning_rate": 1.9802207729556023e-06,
+      "loss": 10.2279,
+      "step": 61
+    },
+    {
+      "epoch": 0.14123006833712984,
+      "grad_norm": 11.725215911865234,
+      "learning_rate": 1.895195261000831e-06,
+      "loss": 10.0699,
+      "step": 62
+    },
+    {
+      "epoch": 0.14350797266514806,
+      "grad_norm": 11.681756973266602,
+      "learning_rate": 1.8109066104575023e-06,
+      "loss": 9.8714,
+      "step": 63
+    },
+    {
+      "epoch": 0.14578587699316628,
+      "grad_norm": 11.722614288330078,
+      "learning_rate": 1.7274575140626318e-06,
+      "loss": 10.2099,
+      "step": 64
+    },
+    {
+      "epoch": 0.1480637813211845,
+      "grad_norm": 11.857827186584473,
+      "learning_rate": 1.6449496416858285e-06,
+      "loss": 10.0694,
+      "step": 65
+    },
+    {
+      "epoch": 0.15034168564920272,
+      "grad_norm": 11.467761039733887,
+      "learning_rate": 1.56348351646022e-06,
+      "loss": 10.2353,
+      "step": 66
+    },
+    {
+      "epoch": 0.15261958997722094,
+      "grad_norm": 11.274076461791992,
+      "learning_rate": 1.4831583923105e-06,
+      "loss": 10.2479,
+      "step": 67
+    },
+    {
+      "epoch": 0.1548974943052392,
+      "grad_norm": 11.101469039916992,
+      "learning_rate": 1.4040721330273063e-06,
+      "loss": 10.4798,
+      "step": 68
+    },
+    {
+      "epoch": 0.1571753986332574,
+      "grad_norm": 10.9977445602417,
+      "learning_rate": 1.3263210930352737e-06,
+      "loss": 10.1868,
+      "step": 69
+    },
+    {
+      "epoch": 0.15945330296127563,
+      "grad_norm": 11.99439525604248,
+      "learning_rate": 1.2500000000000007e-06,
+      "loss": 10.2214,
+      "step": 70
+    },
+    {
+      "epoch": 0.16173120728929385,
+      "grad_norm": 11.092779159545898,
+      "learning_rate": 1.1752018394169882e-06,
+      "loss": 10.2925,
+      "step": 71
+    },
+    {
+      "epoch": 0.16400911161731208,
+      "grad_norm": 11.094412803649902,
+      "learning_rate": 1.1020177413231334e-06,
+      "loss": 10.0751,
+      "step": 72
+    },
+    {
+      "epoch": 0.1662870159453303,
+      "grad_norm": 11.195977210998535,
+      "learning_rate": 1.0305368692688175e-06,
+      "loss": 10.0356,
+      "step": 73
+    },
+    {
+      "epoch": 0.16856492027334852,
+      "grad_norm": 11.195873260498047,
+      "learning_rate": 9.608463116858544e-07,
+      "loss": 9.7823,
+      "step": 74
+    },
+    {
+      "epoch": 0.17084282460136674,
+      "grad_norm": 11.247345924377441,
+      "learning_rate": 8.930309757836517e-07,
+      "loss": 9.9424,
+      "step": 75
+    },
+    {
+      "epoch": 0.17312072892938496,
+      "grad_norm": 11.40001106262207,
+      "learning_rate": 8.271734841028553e-07,
+      "loss": 9.9038,
+      "step": 76
+    },
+    {
+      "epoch": 0.17539863325740318,
+      "grad_norm": 11.882497787475586,
+      "learning_rate": 7.633540738525066e-07,
+      "loss": 10.0807,
+      "step": 77
+    },
+    {
+      "epoch": 0.1776765375854214,
+      "grad_norm": 11.732626914978027,
+      "learning_rate": 7.016504991533727e-07,
+      "loss": 9.6536,
+      "step": 78
+    },
+    {
+      "epoch": 0.17995444191343962,
+      "grad_norm": 11.187015533447266,
+      "learning_rate": 6.421379363065142e-07,
+      "loss": 9.7543,
+      "step": 79
+    },
+    {
+      "epoch": 0.18223234624145787,
+      "grad_norm": 11.719953536987305,
+      "learning_rate": 5.848888922025553e-07,
+      "loss": 10.0644,
+      "step": 80
+    },
+    {
+      "epoch": 0.1845102505694761,
+      "grad_norm": 11.316506385803223,
+      "learning_rate": 5.299731159831953e-07,
+      "loss": 10.244,
+      "step": 81
+    },
+    {
+      "epoch": 0.1867881548974943,
+      "grad_norm": 11.08798599243164,
+      "learning_rate": 4.774575140626317e-07,
+      "loss": 9.7301,
+      "step": 82
+    },
+    {
+      "epoch": 0.18906605922551253,
+      "grad_norm": 11.46131706237793,
+      "learning_rate": 4.27406068612396e-07,
+      "loss": 9.6195,
+      "step": 83
+    },
+    {
+      "epoch": 0.19134396355353075,
+      "grad_norm": 11.669816017150879,
+      "learning_rate": 3.798797596089351e-07,
+      "loss": 9.6443,
+      "step": 84
+    },
+    {
+      "epoch": 0.19362186788154898,
+      "grad_norm": 11.048128128051758,
+      "learning_rate": 3.3493649053890325e-07,
+      "loss": 9.8642,
+      "step": 85
+    },
+    {
+      "epoch": 0.1958997722095672,
+      "grad_norm": 11.225754737854004,
+      "learning_rate": 2.9263101785268253e-07,
+      "loss": 9.5736,
+      "step": 86
+    },
+    {
+      "epoch": 0.19817767653758542,
+      "grad_norm": 11.950643539428711,
+      "learning_rate": 2.53014884252083e-07,
+      "loss": 9.8235,
+      "step": 87
+    },
+    {
+      "epoch": 0.20045558086560364,
+      "grad_norm": 11.37067699432373,
+      "learning_rate": 2.1613635589349756e-07,
+      "loss": 10.0363,
+      "step": 88
+    },
+    {
+      "epoch": 0.20273348519362186,
+      "grad_norm": 11.574517250061035,
+      "learning_rate": 1.8204036358303173e-07,
+      "loss": 9.8055,
+      "step": 89
+    },
+    {
+      "epoch": 0.20501138952164008,
+      "grad_norm": 11.37779426574707,
+      "learning_rate": 1.507684480352292e-07,
+      "loss": 10.0572,
+      "step": 90
+    },
+    {
+      "epoch": 0.2072892938496583,
+      "grad_norm": 11.561871528625488,
+      "learning_rate": 1.223587092621162e-07,
+      "loss": 9.9006,
+      "step": 91
+    },
+    {
+      "epoch": 0.20956719817767655,
+      "grad_norm": 10.969876289367676,
+      "learning_rate": 9.684576015420277e-08,
+      "loss": 9.8471,
+      "step": 92
+    },
+    {
+      "epoch": 0.21184510250569477,
+      "grad_norm": 11.3236722946167,
+      "learning_rate": 7.426068431000883e-08,
+      "loss": 9.7325,
+      "step": 93
+    },
+    {
+      "epoch": 0.214123006833713,
+      "grad_norm": 10.691644668579102,
+      "learning_rate": 5.463099816548578e-08,
+      "loss": 10.0359,
+      "step": 94
+    },
+    {
+      "epoch": 0.2164009111617312,
+      "grad_norm": 11.637835502624512,
+      "learning_rate": 3.798061746947995e-08,
+      "loss": 9.6017,
+      "step": 95
+    },
+    {
+      "epoch": 0.21867881548974943,
+      "grad_norm": 12.517233848571777,
+      "learning_rate": 2.4329828146074096e-08,
+      "loss": 10.0366,
+      "step": 96
+    },
+    {
+      "epoch": 0.22095671981776766,
+      "grad_norm": 11.837146759033203,
+      "learning_rate": 1.3695261579316776e-08,
+      "loss": 10.1357,
+      "step": 97
+    },
+    {
+      "epoch": 0.22323462414578588,
+      "grad_norm": 12.083368301391602,
+      "learning_rate": 6.089874350439507e-09,
+      "loss": 10.1905,
+      "step": 98
+    },
+    {
+      "epoch": 0.2255125284738041,
+      "grad_norm": 12.383536338806152,
+      "learning_rate": 1.5229324522605949e-09,
+      "loss": 10.3541,
+      "step": 99
+    },
+    {
+      "epoch": 0.22779043280182232,
+      "grad_norm": 14.80471420288086,
+      "learning_rate": 0.0,
+      "loss": 10.4165,
+      "step": 100
+    },
+    {
+      "epoch": 0.22779043280182232,
+      "eval_loss": 10.204195022583008,
+      "eval_runtime": 23.6586,
+      "eval_samples_per_second": 31.278,
+      "eval_steps_per_second": 7.82,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.8770571131027456e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null