Training in progress, step 198, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +340 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:142409133f4ef71836708c2f670095faf0570a7ab4daeb8038d7f7f963054bec
 size 47724600

 version https://git-lfs.github.com/spec/v1
+oid sha256:6fd2957ad72dbe80039410eb3dd3a2b343a763ab7f13b17db987f5c307a58315
 size 47724600

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28203c3131643953ac482a75cd59347cdd325d4f28396bccc7207dfe69f830d1
 size 25331516

 version https://git-lfs.github.com/spec/v1
+oid sha256:be882b144435014759f56adacbff9c11068bb899e000714c3c0ffe736b79f026
 size 25331516

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec77f997d555f33bab0c493fee05043cf60797177f7fe76d260efdf8c9393bda
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0157921b798a43101d6f6da7d127e9044252ba6ca86855a0d8a7ff62d6854164
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae4f1bd750c09fc9bb727cae976f56e1bbe0dff5c4d4e1a6eec209a810ae59b2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:588b963689e2bc6a644ef6e066b36a07667462b36247fb966e7188944b9c91f2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7575757575757576,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,342 @@
       "eval_samples_per_second": 23.186,
       "eval_steps_per_second": 11.593,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1103,12 +1439,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.13169452613632e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 50,
+  "global_step": 198,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 23.186,
       "eval_steps_per_second": 11.593,
       "step": 150
+    },
+    {
+      "epoch": 0.7626262626262627,
+      "grad_norm": 9.661118507385254,
+      "learning_rate": 2.9289321881345254e-05,
+      "loss": 0.2918,
+      "step": 151
+    },
+    {
+      "epoch": 0.7676767676767676,
+      "grad_norm": 10.700360298156738,
+      "learning_rate": 2.8117631612207084e-05,
+      "loss": 0.3913,
+      "step": 152
+    },
+    {
+      "epoch": 0.7727272727272727,
+      "grad_norm": 9.030896186828613,
+      "learning_rate": 2.6966013605133088e-05,
+      "loss": 0.3069,
+      "step": 153
+    },
+    {
+      "epoch": 0.7777777777777778,
+      "grad_norm": 14.797833442687988,
+      "learning_rate": 2.5834789435204243e-05,
+      "loss": 0.3008,
+      "step": 154
+    },
+    {
+      "epoch": 0.7828282828282829,
+      "grad_norm": 14.041370391845703,
+      "learning_rate": 2.4724274982774865e-05,
+      "loss": 0.3847,
+      "step": 155
+    },
+    {
+      "epoch": 0.7878787878787878,
+      "grad_norm": 13.21018123626709,
+      "learning_rate": 2.3634780345266806e-05,
+      "loss": 0.3623,
+      "step": 156
+    },
+    {
+      "epoch": 0.7929292929292929,
+      "grad_norm": 12.600321769714355,
+      "learning_rate": 2.2566609750578673e-05,
+      "loss": 0.4142,
+      "step": 157
+    },
+    {
+      "epoch": 0.797979797979798,
+      "grad_norm": 9.882135391235352,
+      "learning_rate": 2.1520061472133902e-05,
+      "loss": 0.2679,
+      "step": 158
+    },
+    {
+      "epoch": 0.803030303030303,
+      "grad_norm": 11.879226684570312,
+      "learning_rate": 2.04954277455917e-05,
+      "loss": 0.3472,
+      "step": 159
+    },
+    {
+      "epoch": 0.8080808080808081,
+      "grad_norm": 12.724488258361816,
+      "learning_rate": 1.9492994687243714e-05,
+      "loss": 0.361,
+      "step": 160
+    },
+    {
+      "epoch": 0.8131313131313131,
+      "grad_norm": 12.550914764404297,
+      "learning_rate": 1.851304221411967e-05,
+      "loss": 0.4241,
+      "step": 161
+    },
+    {
+      "epoch": 0.8181818181818182,
+      "grad_norm": 16.152448654174805,
+      "learning_rate": 1.7555843965823992e-05,
+      "loss": 0.4147,
+      "step": 162
+    },
+    {
+      "epoch": 0.8232323232323232,
+      "grad_norm": 11.693159103393555,
+      "learning_rate": 1.6621667228125302e-05,
+      "loss": 0.4261,
+      "step": 163
+    },
+    {
+      "epoch": 0.8282828282828283,
+      "grad_norm": 11.59165096282959,
+      "learning_rate": 1.57107728583203e-05,
+      "loss": 0.3764,
+      "step": 164
+    },
+    {
+      "epoch": 0.8333333333333334,
+      "grad_norm": 21.655597686767578,
+      "learning_rate": 1.4823415212392377e-05,
+      "loss": 0.305,
+      "step": 165
+    },
+    {
+      "epoch": 0.8383838383838383,
+      "grad_norm": 9.225619316101074,
+      "learning_rate": 1.3959842073986085e-05,
+      "loss": 0.2656,
+      "step": 166
+    },
+    {
+      "epoch": 0.8434343434343434,
+      "grad_norm": 10.11462116241455,
+      "learning_rate": 1.3120294585216353e-05,
+      "loss": 0.3298,
+      "step": 167
+    },
+    {
+      "epoch": 0.8484848484848485,
+      "grad_norm": 12.122771263122559,
+      "learning_rate": 1.230500717933285e-05,
+      "loss": 0.3734,
+      "step": 168
+    },
+    {
+      "epoch": 0.8535353535353535,
+      "grad_norm": 13.203550338745117,
+      "learning_rate": 1.1514207515257147e-05,
+      "loss": 0.2908,
+      "step": 169
+    },
+    {
+      "epoch": 0.8585858585858586,
+      "grad_norm": 10.409919738769531,
+      "learning_rate": 1.0748116414011888e-05,
+      "loss": 0.2437,
+      "step": 170
+    },
+    {
+      "epoch": 0.8636363636363636,
+      "grad_norm": 12.256322860717773,
+      "learning_rate": 1.0006947797059219e-05,
+      "loss": 0.3154,
+      "step": 171
+    },
+    {
+      "epoch": 0.8686868686868687,
+      "grad_norm": 12.224120140075684,
+      "learning_rate": 9.29090862656593e-06,
+      "loss": 0.3863,
+      "step": 172
+    },
+    {
+      "epoch": 0.8737373737373737,
+      "grad_norm": 16.545303344726562,
+      "learning_rate": 8.600198847611729e-06,
+      "loss": 0.2737,
+      "step": 173
+    },
+    {
+      "epoch": 0.8787878787878788,
+      "grad_norm": 17.211322784423828,
+      "learning_rate": 7.935011332357112e-06,
+      "loss": 0.38,
+      "step": 174
+    },
+    {
+      "epoch": 0.8838383838383839,
+      "grad_norm": 12.975410461425781,
+      "learning_rate": 7.295531826186264e-06,
+      "loss": 0.3809,
+      "step": 175
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 17.656003952026367,
+      "learning_rate": 6.681938895839746e-06,
+      "loss": 0.3591,
+      "step": 176
+    },
+    {
+      "epoch": 0.8939393939393939,
+      "grad_norm": 10.347539901733398,
+      "learning_rate": 6.094403879552213e-06,
+      "loss": 0.2775,
+      "step": 177
+    },
+    {
+      "epoch": 0.898989898989899,
+      "grad_norm": 11.597780227661133,
+      "learning_rate": 5.533090839208133e-06,
+      "loss": 0.3635,
+      "step": 178
+    },
+    {
+      "epoch": 0.9040404040404041,
+      "grad_norm": 14.497785568237305,
+      "learning_rate": 4.998156514529595e-06,
+      "loss": 0.436,
+      "step": 179
+    },
+    {
+      "epoch": 0.9090909090909091,
+      "grad_norm": 12.996254920959473,
+      "learning_rate": 4.489750279308757e-06,
+      "loss": 0.3239,
+      "step": 180
+    },
+    {
+      "epoch": 0.9141414141414141,
+      "grad_norm": 35.80766296386719,
+      "learning_rate": 4.008014099696922e-06,
+      "loss": 0.5281,
+      "step": 181
+    },
+    {
+      "epoch": 0.9191919191919192,
+      "grad_norm": 50.79511260986328,
+      "learning_rate": 3.5530824945623542e-06,
+      "loss": 0.4338,
+      "step": 182
+    },
+    {
+      "epoch": 0.9242424242424242,
+      "grad_norm": 31.98566246032715,
+      "learning_rate": 3.1250824979274675e-06,
+      "loss": 0.4195,
+      "step": 183
+    },
+    {
+      "epoch": 0.9292929292929293,
+      "grad_norm": 14.075968742370605,
+      "learning_rate": 2.7241336234962944e-06,
+      "loss": 0.4469,
+      "step": 184
+    },
+    {
+      "epoch": 0.9343434343434344,
+      "grad_norm": 15.213479042053223,
+      "learning_rate": 2.3503478312815298e-06,
+      "loss": 0.3303,
+      "step": 185
+    },
+    {
+      "epoch": 0.9393939393939394,
+      "grad_norm": 14.08496379852295,
+      "learning_rate": 2.003829496341325e-06,
+      "loss": 0.352,
+      "step": 186
+    },
+    {
+      "epoch": 0.9444444444444444,
+      "grad_norm": 17.063922882080078,
+      "learning_rate": 1.684675379633649e-06,
+      "loss": 0.4462,
+      "step": 187
+    },
+    {
+      "epoch": 0.9494949494949495,
+      "grad_norm": 9.787590980529785,
+      "learning_rate": 1.3929746009971433e-06,
+      "loss": 0.2868,
+      "step": 188
+    },
+    {
+      "epoch": 0.9545454545454546,
+      "grad_norm": 23.884563446044922,
+      "learning_rate": 1.1288086142653864e-06,
+      "loss": 0.4657,
+      "step": 189
+    },
+    {
+      "epoch": 0.9595959595959596,
+      "grad_norm": 13.69776725769043,
+      "learning_rate": 8.922511845219971e-07,
+      "loss": 0.2993,
+      "step": 190
+    },
+    {
+      "epoch": 0.9646464646464646,
+      "grad_norm": 12.410402297973633,
+      "learning_rate": 6.833683675025904e-07,
+      "loss": 0.402,
+      "step": 191
+    },
+    {
+      "epoch": 0.9696969696969697,
+      "grad_norm": 10.721906661987305,
+      "learning_rate": 5.022184911495864e-07,
+      "loss": 0.2896,
+      "step": 192
+    },
+    {
+      "epoch": 0.9747474747474747,
+      "grad_norm": 12.637495994567871,
+      "learning_rate": 3.488521393248401e-07,
+      "loss": 0.3491,
+      "step": 193
+    },
+    {
+      "epoch": 0.9797979797979798,
+      "grad_norm": 12.802627563476562,
+      "learning_rate": 2.2331213768468363e-07,
+      "loss": 0.4633,
+      "step": 194
+    },
+    {
+      "epoch": 0.9848484848484849,
+      "grad_norm": 19.49329948425293,
+      "learning_rate": 1.2563354172142606e-07,
+      "loss": 0.4523,
+      "step": 195
+    },
+    {
+      "epoch": 0.98989898989899,
+      "grad_norm": 16.25078773498535,
+      "learning_rate": 5.584362697453882e-08,
+      "loss": 0.5107,
+      "step": 196
+    },
+    {
+      "epoch": 0.9949494949494949,
+      "grad_norm": 11.172027587890625,
+      "learning_rate": 1.3961881414292778e-08,
+      "loss": 0.3854,
+      "step": 197
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 29.771207809448242,
+      "learning_rate": 0.0,
+      "loss": 0.7279,
+      "step": 198
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.812060362394829e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null