Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b578e55ffc9129f83e62ef011ce239d84fd7f4b4703078cf8f2a5ab8376b6383
 size 478211024

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2b2f2f8500194636e22887bcdd9baa304d05cf5a0d8c7ddd0e5d8da1a676601
 size 478211024

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5efde090a563412827d029565562a59eb52a4627893c563345417bd4c8c9b99f
 size 956679634

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f4d69899566f15af59cce4f322fa8c62df9558dd0572e2b48a00fddbb229c43
 size 956679634

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60a95f634cf0ec7f352a8f07f519b4f31c04f32208d0beea12c3e8a7f2b90aa2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9791dacf1c6bf7634b5f78f1ada12fb0f6dd540b4f751d424c867069ab8a913e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4c9c807f0681c8b7e53ada9b6ec3dba530d303de7da0d0a0562a3d8d0bbba08
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.2733176648616791,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.03372870875259992,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 17.102,
       "eval_steps_per_second": 8.551,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.2596142624944947e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.2704465389251709,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.04497161167013323,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 17.102,
       "eval_steps_per_second": 8.551,
       "step": 150
+    },
+    {
+      "epoch": 0.03395356681095059,
+      "grad_norm": 0.35225486755371094,
+      "learning_rate": 1.7197048550474643e-05,
+      "loss": 1.1456,
+      "step": 151
+    },
+    {
+      "epoch": 0.034178424869301255,
+      "grad_norm": 0.30325260758399963,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 0.9947,
+      "step": 152
+    },
+    {
+      "epoch": 0.03440328292765192,
+      "grad_norm": 0.31138110160827637,
+      "learning_rate": 1.5900081996875083e-05,
+      "loss": 0.9226,
+      "step": 153
+    },
+    {
+      "epoch": 0.03462814098600259,
+      "grad_norm": 0.3109647333621979,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 0.9011,
+      "step": 154
+    },
+    {
+      "epoch": 0.03485299904435325,
+      "grad_norm": 0.33004358410835266,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 1.0542,
+      "step": 155
+    },
+    {
+      "epoch": 0.03507785710270392,
+      "grad_norm": 0.4470955729484558,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 1.1847,
+      "step": 156
+    },
+    {
+      "epoch": 0.035302715161054585,
+      "grad_norm": 0.4040617048740387,
+      "learning_rate": 1.3432314919041478e-05,
+      "loss": 1.2763,
+      "step": 157
+    },
+    {
+      "epoch": 0.03552757321940525,
+      "grad_norm": 0.36411812901496887,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 1.0801,
+      "step": 158
+    },
+    {
+      "epoch": 0.03575243127775592,
+      "grad_norm": 0.39293670654296875,
+      "learning_rate": 1.22645209888614e-05,
+      "loss": 1.3485,
+      "step": 159
+    },
+    {
+      "epoch": 0.03597728933610658,
+      "grad_norm": 0.43890181183815,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 1.5783,
+      "step": 160
+    },
+    {
+      "epoch": 0.03620214739445725,
+      "grad_norm": 0.398112028837204,
+      "learning_rate": 1.1142701927151456e-05,
+      "loss": 1.3208,
+      "step": 161
+    },
+    {
+      "epoch": 0.036427005452807915,
+      "grad_norm": 0.3950172960758209,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 1.124,
+      "step": 162
+    },
+    {
+      "epoch": 0.03665186351115858,
+      "grad_norm": 0.43928781151771545,
+      "learning_rate": 1.006822449763537e-05,
+      "loss": 1.2575,
+      "step": 163
+    },
+    {
+      "epoch": 0.036876721569509247,
+      "grad_norm": 0.4250316917896271,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.2319,
+      "step": 164
+    },
+    {
+      "epoch": 0.03710157962785991,
+      "grad_norm": 0.3945818543434143,
+      "learning_rate": 9.042397785550405e-06,
+      "loss": 1.1373,
+      "step": 165
+    },
+    {
+      "epoch": 0.03732643768621058,
+      "grad_norm": 0.4187924861907959,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 1.3355,
+      "step": 166
+    },
+    {
+      "epoch": 0.037551295744561244,
+      "grad_norm": 0.39608845114707947,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 1.1346,
+      "step": 167
+    },
+    {
+      "epoch": 0.03777615380291191,
+      "grad_norm": 0.3910957872867584,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 1.0022,
+      "step": 168
+    },
+    {
+      "epoch": 0.038001011861262576,
+      "grad_norm": 0.42380449175834656,
+      "learning_rate": 7.1416349648943894e-06,
+      "loss": 1.0274,
+      "step": 169
+    },
+    {
+      "epoch": 0.03822586991961324,
+      "grad_norm": 0.4292587339878082,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 1.075,
+      "step": 170
+    },
+    {
+      "epoch": 0.03845072797796391,
+      "grad_norm": 0.4251493811607361,
+      "learning_rate": 6.269014643030213e-06,
+      "loss": 1.1064,
+      "step": 171
+    },
+    {
+      "epoch": 0.038675586036314574,
+      "grad_norm": 0.4216366410255432,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 0.9841,
+      "step": 172
+    },
+    {
+      "epoch": 0.03890044409466524,
+      "grad_norm": 0.42048147320747375,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 1.0027,
+      "step": 173
+    },
+    {
+      "epoch": 0.039125302153015906,
+      "grad_norm": 0.4132917523384094,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 0.9546,
+      "step": 174
+    },
+    {
+      "epoch": 0.03935016021136657,
+      "grad_norm": 0.45838025212287903,
+      "learning_rate": 4.684610648167503e-06,
+      "loss": 1.1604,
+      "step": 175
+    },
+    {
+      "epoch": 0.03957501826971724,
+      "grad_norm": 0.43805235624313354,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 0.9962,
+      "step": 176
+    },
+    {
+      "epoch": 0.039799876328067904,
+      "grad_norm": 0.4326944351196289,
+      "learning_rate": 3.974757327377981e-06,
+      "loss": 0.953,
+      "step": 177
+    },
+    {
+      "epoch": 0.04002473438641857,
+      "grad_norm": 0.4309222400188446,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 0.9219,
+      "step": 178
+    },
+    {
+      "epoch": 0.040249592444769236,
+      "grad_norm": 0.42803671956062317,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 0.9721,
+      "step": 179
+    },
+    {
+      "epoch": 0.04047445050311991,
+      "grad_norm": 0.4624505639076233,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 0.9768,
+      "step": 180
+    },
+    {
+      "epoch": 0.040699308561470575,
+      "grad_norm": 0.4740617275238037,
+      "learning_rate": 2.724071220034158e-06,
+      "loss": 1.024,
+      "step": 181
+    },
+    {
+      "epoch": 0.04092416661982124,
+      "grad_norm": 0.5211942791938782,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.2122,
+      "step": 182
+    },
+    {
+      "epoch": 0.04114902467817191,
+      "grad_norm": 0.5127923488616943,
+      "learning_rate": 2.1847622018482283e-06,
+      "loss": 1.1659,
+      "step": 183
+    },
+    {
+      "epoch": 0.04137388273652257,
+      "grad_norm": 0.49814149737358093,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 1.0559,
+      "step": 184
+    },
+    {
+      "epoch": 0.04159874079487324,
+      "grad_norm": 0.5479244589805603,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 1.1183,
+      "step": 185
+    },
+    {
+      "epoch": 0.041823598853223905,
+      "grad_norm": 0.5728726387023926,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 1.0562,
+      "step": 186
+    },
+    {
+      "epoch": 0.04204845691157457,
+      "grad_norm": 0.6099637150764465,
+      "learning_rate": 1.2814967607382432e-06,
+      "loss": 1.1409,
+      "step": 187
+    },
+    {
+      "epoch": 0.04227331496992524,
+      "grad_norm": 0.5624054670333862,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 1.0202,
+      "step": 188
+    },
+    {
+      "epoch": 0.0424981730282759,
+      "grad_norm": 0.5734448432922363,
+      "learning_rate": 9.186408276168013e-07,
+      "loss": 1.0401,
+      "step": 189
+    },
+    {
+      "epoch": 0.04272303108662657,
+      "grad_norm": 0.6112082004547119,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 1.0603,
+      "step": 190
+    },
+    {
+      "epoch": 0.042947889144977235,
+      "grad_norm": 0.6727156639099121,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 1.098,
+      "step": 191
+    },
+    {
+      "epoch": 0.0431727472033279,
+      "grad_norm": 0.6319669485092163,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 0.9956,
+      "step": 192
+    },
+    {
+      "epoch": 0.043397605261678567,
+      "grad_norm": 0.7210997343063354,
+      "learning_rate": 3.7269241793390085e-07,
+      "loss": 0.9215,
+      "step": 193
+    },
+    {
+      "epoch": 0.04362246332002923,
+      "grad_norm": 0.7941908836364746,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.0039,
+      "step": 194
+    },
+    {
+      "epoch": 0.0438473213783799,
+      "grad_norm": 0.8426123261451721,
+      "learning_rate": 1.9026509541272275e-07,
+      "loss": 1.0519,
+      "step": 195
+    },
+    {
+      "epoch": 0.044072179436730564,
+      "grad_norm": 0.8856363892555237,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 1.1357,
+      "step": 196
+    },
+    {
+      "epoch": 0.04429703749508123,
+      "grad_norm": 0.9602333307266235,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 0.9538,
+      "step": 197
+    },
+    {
+      "epoch": 0.044521895553431896,
+      "grad_norm": 1.0468101501464844,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 0.92,
+      "step": 198
+    },
+    {
+      "epoch": 0.04474675361178256,
+      "grad_norm": 1.224636197090149,
+      "learning_rate": 7.615242180436522e-09,
+      "loss": 1.1414,
+      "step": 199
+    },
+    {
+      "epoch": 0.04497161167013323,
+      "grad_norm": 1.523940920829773,
+      "learning_rate": 0.0,
+      "loss": 1.1848,
+      "step": 200
+    },
+    {
+      "epoch": 0.04497161167013323,
+      "eval_loss": 0.2704465389251709,
+      "eval_runtime": 438.1851,
+      "eval_samples_per_second": 17.093,
+      "eval_steps_per_second": 8.547,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.6788561909409382e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null