Training in progress, step 198, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +340 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:648569fb8798e3aae2a620485b01d541329e758c7c3a82bf11d166419ae78de5
 size 47724600

 version https://git-lfs.github.com/spec/v1
+oid sha256:92215874f3a584275265c23817f5cf7dcc79f0d43cb5f1abfc23b9011cf4aa75
 size 47724600

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3a9bbdd030fe2880693e1b01def974bab1aa1a4fcc1b65a28bf3010ef26dcad
 size 25331516

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfae77bcf589e68ea0fa173a585252bc33fb7d4734adeb26f8aecf5b26b112d0
 size 25331516

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec77f997d555f33bab0c493fee05043cf60797177f7fe76d260efdf8c9393bda
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0157921b798a43101d6f6da7d127e9044252ba6ca86855a0d8a7ff62d6854164
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae4f1bd750c09fc9bb727cae976f56e1bbe0dff5c4d4e1a6eec209a810ae59b2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:588b963689e2bc6a644ef6e066b36a07667462b36247fb966e7188944b9c91f2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7575757575757576,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,342 @@
       "eval_samples_per_second": 22.561,
       "eval_steps_per_second": 11.281,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1103,12 +1439,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.13169452613632e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 50,
+  "global_step": 198,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.561,
       "eval_steps_per_second": 11.281,
       "step": 150
+    },
+    {
+      "epoch": 0.7626262626262627,
+      "grad_norm": 2.387876510620117,
+      "learning_rate": 2.9289321881345254e-05,
+      "loss": 0.2854,
+      "step": 151
+    },
+    {
+      "epoch": 0.7676767676767676,
+      "grad_norm": 2.7719225883483887,
+      "learning_rate": 2.8117631612207084e-05,
+      "loss": 0.4052,
+      "step": 152
+    },
+    {
+      "epoch": 0.7727272727272727,
+      "grad_norm": 3.0982537269592285,
+      "learning_rate": 2.6966013605133088e-05,
+      "loss": 0.2989,
+      "step": 153
+    },
+    {
+      "epoch": 0.7777777777777778,
+      "grad_norm": 3.606672763824463,
+      "learning_rate": 2.5834789435204243e-05,
+      "loss": 0.2955,
+      "step": 154
+    },
+    {
+      "epoch": 0.7828282828282829,
+      "grad_norm": 3.5466747283935547,
+      "learning_rate": 2.4724274982774865e-05,
+      "loss": 0.3954,
+      "step": 155
+    },
+    {
+      "epoch": 0.7878787878787878,
+      "grad_norm": 2.7434446811676025,
+      "learning_rate": 2.3634780345266806e-05,
+      "loss": 0.3595,
+      "step": 156
+    },
+    {
+      "epoch": 0.7929292929292929,
+      "grad_norm": 2.7912631034851074,
+      "learning_rate": 2.2566609750578673e-05,
+      "loss": 0.3927,
+      "step": 157
+    },
+    {
+      "epoch": 0.797979797979798,
+      "grad_norm": 4.510963439941406,
+      "learning_rate": 2.1520061472133902e-05,
+      "loss": 0.2879,
+      "step": 158
+    },
+    {
+      "epoch": 0.803030303030303,
+      "grad_norm": 5.5550456047058105,
+      "learning_rate": 2.04954277455917e-05,
+      "loss": 0.3443,
+      "step": 159
+    },
+    {
+      "epoch": 0.8080808080808081,
+      "grad_norm": 3.655639886856079,
+      "learning_rate": 1.9492994687243714e-05,
+      "loss": 0.383,
+      "step": 160
+    },
+    {
+      "epoch": 0.8131313131313131,
+      "grad_norm": 3.4567534923553467,
+      "learning_rate": 1.851304221411967e-05,
+      "loss": 0.4136,
+      "step": 161
+    },
+    {
+      "epoch": 0.8181818181818182,
+      "grad_norm": 3.3192026615142822,
+      "learning_rate": 1.7555843965823992e-05,
+      "loss": 0.3948,
+      "step": 162
+    },
+    {
+      "epoch": 0.8232323232323232,
+      "grad_norm": 3.3644375801086426,
+      "learning_rate": 1.6621667228125302e-05,
+      "loss": 0.4367,
+      "step": 163
+    },
+    {
+      "epoch": 0.8282828282828283,
+      "grad_norm": 4.114311695098877,
+      "learning_rate": 1.57107728583203e-05,
+      "loss": 0.3574,
+      "step": 164
+    },
+    {
+      "epoch": 0.8333333333333334,
+      "grad_norm": 2.873835563659668,
+      "learning_rate": 1.4823415212392377e-05,
+      "loss": 0.3224,
+      "step": 165
+    },
+    {
+      "epoch": 0.8383838383838383,
+      "grad_norm": 2.4796266555786133,
+      "learning_rate": 1.3959842073986085e-05,
+      "loss": 0.2574,
+      "step": 166
+    },
+    {
+      "epoch": 0.8434343434343434,
+      "grad_norm": 2.918776512145996,
+      "learning_rate": 1.3120294585216353e-05,
+      "loss": 0.3416,
+      "step": 167
+    },
+    {
+      "epoch": 0.8484848484848485,
+      "grad_norm": 2.633633852005005,
+      "learning_rate": 1.230500717933285e-05,
+      "loss": 0.3451,
+      "step": 168
+    },
+    {
+      "epoch": 0.8535353535353535,
+      "grad_norm": 2.6851956844329834,
+      "learning_rate": 1.1514207515257147e-05,
+      "loss": 0.2827,
+      "step": 169
+    },
+    {
+      "epoch": 0.8585858585858586,
+      "grad_norm": 3.3666656017303467,
+      "learning_rate": 1.0748116414011888e-05,
+      "loss": 0.2547,
+      "step": 170
+    },
+    {
+      "epoch": 0.8636363636363636,
+      "grad_norm": 2.8328895568847656,
+      "learning_rate": 1.0006947797059219e-05,
+      "loss": 0.3208,
+      "step": 171
+    },
+    {
+      "epoch": 0.8686868686868687,
+      "grad_norm": 3.9734315872192383,
+      "learning_rate": 9.29090862656593e-06,
+      "loss": 0.4116,
+      "step": 172
+    },
+    {
+      "epoch": 0.8737373737373737,
+      "grad_norm": 2.9237353801727295,
+      "learning_rate": 8.600198847611729e-06,
+      "loss": 0.2467,
+      "step": 173
+    },
+    {
+      "epoch": 0.8787878787878788,
+      "grad_norm": 4.420521259307861,
+      "learning_rate": 7.935011332357112e-06,
+      "loss": 0.3809,
+      "step": 174
+    },
+    {
+      "epoch": 0.8838383838383839,
+      "grad_norm": 3.4638662338256836,
+      "learning_rate": 7.295531826186264e-06,
+      "loss": 0.3872,
+      "step": 175
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 5.868827819824219,
+      "learning_rate": 6.681938895839746e-06,
+      "loss": 0.403,
+      "step": 176
+    },
+    {
+      "epoch": 0.8939393939393939,
+      "grad_norm": 6.942677021026611,
+      "learning_rate": 6.094403879552213e-06,
+      "loss": 0.2913,
+      "step": 177
+    },
+    {
+      "epoch": 0.898989898989899,
+      "grad_norm": 2.8716115951538086,
+      "learning_rate": 5.533090839208133e-06,
+      "loss": 0.3676,
+      "step": 178
+    },
+    {
+      "epoch": 0.9040404040404041,
+      "grad_norm": 4.945950984954834,
+      "learning_rate": 4.998156514529595e-06,
+      "loss": 0.4171,
+      "step": 179
+    },
+    {
+      "epoch": 0.9090909090909091,
+      "grad_norm": 4.055920600891113,
+      "learning_rate": 4.489750279308757e-06,
+      "loss": 0.3429,
+      "step": 180
+    },
+    {
+      "epoch": 0.9141414141414141,
+      "grad_norm": 3.5922913551330566,
+      "learning_rate": 4.008014099696922e-06,
+      "loss": 0.5131,
+      "step": 181
+    },
+    {
+      "epoch": 0.9191919191919192,
+      "grad_norm": 4.818300247192383,
+      "learning_rate": 3.5530824945623542e-06,
+      "loss": 0.4656,
+      "step": 182
+    },
+    {
+      "epoch": 0.9242424242424242,
+      "grad_norm": 3.481379747390747,
+      "learning_rate": 3.1250824979274675e-06,
+      "loss": 0.392,
+      "step": 183
+    },
+    {
+      "epoch": 0.9292929292929293,
+      "grad_norm": 3.6859374046325684,
+      "learning_rate": 2.7241336234962944e-06,
+      "loss": 0.4633,
+      "step": 184
+    },
+    {
+      "epoch": 0.9343434343434344,
+      "grad_norm": 3.2945504188537598,
+      "learning_rate": 2.3503478312815298e-06,
+      "loss": 0.3309,
+      "step": 185
+    },
+    {
+      "epoch": 0.9393939393939394,
+      "grad_norm": 3.5352680683135986,
+      "learning_rate": 2.003829496341325e-06,
+      "loss": 0.3321,
+      "step": 186
+    },
+    {
+      "epoch": 0.9444444444444444,
+      "grad_norm": 3.4367051124572754,
+      "learning_rate": 1.684675379633649e-06,
+      "loss": 0.4528,
+      "step": 187
+    },
+    {
+      "epoch": 0.9494949494949495,
+      "grad_norm": 3.3837664127349854,
+      "learning_rate": 1.3929746009971433e-06,
+      "loss": 0.279,
+      "step": 188
+    },
+    {
+      "epoch": 0.9545454545454546,
+      "grad_norm": 4.299978733062744,
+      "learning_rate": 1.1288086142653864e-06,
+      "loss": 0.4305,
+      "step": 189
+    },
+    {
+      "epoch": 0.9595959595959596,
+      "grad_norm": 2.2504851818084717,
+      "learning_rate": 8.922511845219971e-07,
+      "loss": 0.2997,
+      "step": 190
+    },
+    {
+      "epoch": 0.9646464646464646,
+      "grad_norm": 2.9800455570220947,
+      "learning_rate": 6.833683675025904e-07,
+      "loss": 0.4066,
+      "step": 191
+    },
+    {
+      "epoch": 0.9696969696969697,
+      "grad_norm": 2.7905917167663574,
+      "learning_rate": 5.022184911495864e-07,
+      "loss": 0.2867,
+      "step": 192
+    },
+    {
+      "epoch": 0.9747474747474747,
+      "grad_norm": 3.33724308013916,
+      "learning_rate": 3.488521393248401e-07,
+      "loss": 0.3398,
+      "step": 193
+    },
+    {
+      "epoch": 0.9797979797979798,
+      "grad_norm": 3.7422661781311035,
+      "learning_rate": 2.2331213768468363e-07,
+      "loss": 0.4801,
+      "step": 194
+    },
+    {
+      "epoch": 0.9848484848484849,
+      "grad_norm": 3.9842588901519775,
+      "learning_rate": 1.2563354172142606e-07,
+      "loss": 0.4276,
+      "step": 195
+    },
+    {
+      "epoch": 0.98989898989899,
+      "grad_norm": 3.6827399730682373,
+      "learning_rate": 5.584362697453882e-08,
+      "loss": 0.5349,
+      "step": 196
+    },
+    {
+      "epoch": 0.9949494949494949,
+      "grad_norm": 10.242842674255371,
+      "learning_rate": 1.3961881414292778e-08,
+      "loss": 0.4155,
+      "step": 197
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 3.458073377609253,
+      "learning_rate": 0.0,
+      "loss": 0.4825,
+      "step": 198
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.812060362394829e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null