Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c58da1217f71b44d4c9d2caac92916f34ad8a07f6bc657650f8fd0a0073862d
 size 83115256

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f59c03e0db6e28e6264baf54723e9200176bf6aae719a51b41d81b573893054
 size 83115256

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee3af30333a689ba6e38b0db905197e59fa2103e4b7f22c82a95cfb57a1c47a6
 size 166439638

 version https://git-lfs.github.com/spec/v1
+oid sha256:c05362f8e708da0f6fab3dbb2851ab26a2d238c5907f7c83df3a5463cc6e3638
 size 166439638

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5c5bb441bbc6d262df4ef0f6f28cec35031fef61c4955cb20d2972dc177b7e4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c6f7cbd62dd1dd1abad27b2e9bc108a1841e8f3477190f33f916fadcc76e787
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a89ffc445067fef9d6d02bb3ff9e61d5e3209e6fa67c7259b3b364b90dbaa2cd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:49d60a69e2379be2053e816cbaff31e6c931b5922dd86c71c9eaf473299cbf62
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.19923071563243866,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.05980861244019139,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 19.888,
       "eval_steps_per_second": 4.972,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.0103247819767808e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.15960238873958588,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.11961722488038277,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 19.888,
       "eval_steps_per_second": 4.972,
       "step": 50
+    },
+    {
+      "epoch": 0.061004784688995214,
+      "grad_norm": 0.7308669090270996,
+      "learning_rate": 5.695865504800327e-05,
+      "loss": 0.1903,
+      "step": 51
+    },
+    {
+      "epoch": 0.06220095693779904,
+      "grad_norm": 0.8045475482940674,
+      "learning_rate": 5.522642316338268e-05,
+      "loss": 0.2067,
+      "step": 52
+    },
+    {
+      "epoch": 0.06339712918660287,
+      "grad_norm": 0.8171185851097107,
+      "learning_rate": 5.348782368720626e-05,
+      "loss": 0.2032,
+      "step": 53
+    },
+    {
+      "epoch": 0.0645933014354067,
+      "grad_norm": 0.6744611859321594,
+      "learning_rate": 5.174497483512506e-05,
+      "loss": 0.1581,
+      "step": 54
+    },
+    {
+      "epoch": 0.06578947368421052,
+      "grad_norm": 0.7448108792304993,
+      "learning_rate": 5e-05,
+      "loss": 0.1815,
+      "step": 55
+    },
+    {
+      "epoch": 0.06698564593301436,
+      "grad_norm": 0.7760705351829529,
+      "learning_rate": 4.825502516487497e-05,
+      "loss": 0.1738,
+      "step": 56
+    },
+    {
+      "epoch": 0.06818181818181818,
+      "grad_norm": 0.8528043031692505,
+      "learning_rate": 4.6512176312793736e-05,
+      "loss": 0.1841,
+      "step": 57
+    },
+    {
+      "epoch": 0.06937799043062201,
+      "grad_norm": 0.905849277973175,
+      "learning_rate": 4.477357683661734e-05,
+      "loss": 0.1738,
+      "step": 58
+    },
+    {
+      "epoch": 0.07057416267942583,
+      "grad_norm": 0.8059756755828857,
+      "learning_rate": 4.3041344951996746e-05,
+      "loss": 0.1722,
+      "step": 59
+    },
+    {
+      "epoch": 0.07177033492822966,
+      "grad_norm": 0.8063069581985474,
+      "learning_rate": 4.131759111665349e-05,
+      "loss": 0.1927,
+      "step": 60
+    },
+    {
+      "epoch": 0.0729665071770335,
+      "grad_norm": 0.825333833694458,
+      "learning_rate": 3.960441545911204e-05,
+      "loss": 0.1445,
+      "step": 61
+    },
+    {
+      "epoch": 0.07416267942583732,
+      "grad_norm": 0.8422712087631226,
+      "learning_rate": 3.790390522001662e-05,
+      "loss": 0.1558,
+      "step": 62
+    },
+    {
+      "epoch": 0.07535885167464115,
+      "grad_norm": 0.9840994477272034,
+      "learning_rate": 3.6218132209150045e-05,
+      "loss": 0.1733,
+      "step": 63
+    },
+    {
+      "epoch": 0.07655502392344497,
+      "grad_norm": 0.8723501563072205,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 0.1491,
+      "step": 64
+    },
+    {
+      "epoch": 0.07775119617224881,
+      "grad_norm": 0.8056702017784119,
+      "learning_rate": 3.289899283371657e-05,
+      "loss": 0.1573,
+      "step": 65
+    },
+    {
+      "epoch": 0.07894736842105263,
+      "grad_norm": 0.8925252556800842,
+      "learning_rate": 3.12696703292044e-05,
+      "loss": 0.1795,
+      "step": 66
+    },
+    {
+      "epoch": 0.08014354066985646,
+      "grad_norm": 0.9729580283164978,
+      "learning_rate": 2.9663167846209998e-05,
+      "loss": 0.1791,
+      "step": 67
+    },
+    {
+      "epoch": 0.08133971291866028,
+      "grad_norm": 1.0415399074554443,
+      "learning_rate": 2.8081442660546125e-05,
+      "loss": 0.177,
+      "step": 68
+    },
+    {
+      "epoch": 0.08253588516746412,
+      "grad_norm": 0.8020843863487244,
+      "learning_rate": 2.6526421860705473e-05,
+      "loss": 0.1268,
+      "step": 69
+    },
+    {
+      "epoch": 0.08373205741626795,
+      "grad_norm": 0.8391036987304688,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.1344,
+      "step": 70
+    },
+    {
+      "epoch": 0.08492822966507177,
+      "grad_norm": 1.1039748191833496,
+      "learning_rate": 2.350403678833976e-05,
+      "loss": 0.1954,
+      "step": 71
+    },
+    {
+      "epoch": 0.0861244019138756,
+      "grad_norm": 0.9647433757781982,
+      "learning_rate": 2.2040354826462668e-05,
+      "loss": 0.1759,
+      "step": 72
+    },
+    {
+      "epoch": 0.08732057416267942,
+      "grad_norm": 0.9805471301078796,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 0.1625,
+      "step": 73
+    },
+    {
+      "epoch": 0.08851674641148326,
+      "grad_norm": 0.9431953430175781,
+      "learning_rate": 1.9216926233717085e-05,
+      "loss": 0.1464,
+      "step": 74
+    },
+    {
+      "epoch": 0.08971291866028708,
+      "grad_norm": 0.9271813631057739,
+      "learning_rate": 1.7860619515673033e-05,
+      "loss": 0.1374,
+      "step": 75
+    },
+    {
+      "epoch": 0.09090909090909091,
+      "grad_norm": 1.0403175354003906,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 0.1637,
+      "step": 76
+    },
+    {
+      "epoch": 0.09210526315789473,
+      "grad_norm": 0.9993944764137268,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 0.1522,
+      "step": 77
+    },
+    {
+      "epoch": 0.09330143540669857,
+      "grad_norm": 1.0364563465118408,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 0.1471,
+      "step": 78
+    },
+    {
+      "epoch": 0.09449760765550239,
+      "grad_norm": 0.8725647926330566,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 0.1113,
+      "step": 79
+    },
+    {
+      "epoch": 0.09569377990430622,
+      "grad_norm": 0.9386890530586243,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 0.1104,
+      "step": 80
+    },
+    {
+      "epoch": 0.09688995215311005,
+      "grad_norm": 1.0741575956344604,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 0.1286,
+      "step": 81
+    },
+    {
+      "epoch": 0.09808612440191387,
+      "grad_norm": 1.2276641130447388,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.1517,
+      "step": 82
+    },
+    {
+      "epoch": 0.09928229665071771,
+      "grad_norm": 1.12937593460083,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 0.1482,
+      "step": 83
+    },
+    {
+      "epoch": 0.10047846889952153,
+      "grad_norm": 1.1188244819641113,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 0.1201,
+      "step": 84
+    },
+    {
+      "epoch": 0.10167464114832536,
+      "grad_norm": 1.406683325767517,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.2082,
+      "step": 85
+    },
+    {
+      "epoch": 0.10287081339712918,
+      "grad_norm": 1.214578628540039,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 0.186,
+      "step": 86
+    },
+    {
+      "epoch": 0.10406698564593302,
+      "grad_norm": 1.270246148109436,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 0.1308,
+      "step": 87
+    },
+    {
+      "epoch": 0.10526315789473684,
+      "grad_norm": 1.2750657796859741,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 0.1296,
+      "step": 88
+    },
+    {
+      "epoch": 0.10645933014354067,
+      "grad_norm": 1.4091739654541016,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 0.1985,
+      "step": 89
+    },
+    {
+      "epoch": 0.1076555023923445,
+      "grad_norm": 1.3769253492355347,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 0.1777,
+      "step": 90
+    },
+    {
+      "epoch": 0.10885167464114832,
+      "grad_norm": 1.5859569311141968,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.1514,
+      "step": 91
+    },
+    {
+      "epoch": 0.11004784688995216,
+      "grad_norm": 1.3966237306594849,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 0.1825,
+      "step": 92
+    },
+    {
+      "epoch": 0.11124401913875598,
+      "grad_norm": 1.6977550983428955,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 0.1799,
+      "step": 93
+    },
+    {
+      "epoch": 0.11244019138755981,
+      "grad_norm": 1.6813634634017944,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.1539,
+      "step": 94
+    },
+    {
+      "epoch": 0.11363636363636363,
+      "grad_norm": 2.3753628730773926,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 0.2349,
+      "step": 95
+    },
+    {
+      "epoch": 0.11483253588516747,
+      "grad_norm": 2.1884944438934326,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 0.2109,
+      "step": 96
+    },
+    {
+      "epoch": 0.11602870813397129,
+      "grad_norm": 1.6471924781799316,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 0.1618,
+      "step": 97
+    },
+    {
+      "epoch": 0.11722488038277512,
+      "grad_norm": 1.5181317329406738,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 0.102,
+      "step": 98
+    },
+    {
+      "epoch": 0.11842105263157894,
+      "grad_norm": 5.536009788513184,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 0.5176,
+      "step": 99
+    },
+    {
+      "epoch": 0.11961722488038277,
+      "grad_norm": 5.12874174118042,
+      "learning_rate": 0.0,
+      "loss": 0.2482,
+      "step": 100
+    },
+    {
+      "epoch": 0.11961722488038277,
+      "eval_loss": 0.15960238873958588,
+      "eval_runtime": 35.4137,
+      "eval_samples_per_second": 19.879,
+      "eval_steps_per_second": 4.97,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.015623072500941e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null