Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +362 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96357fb0e78da9ddeb7d0598f32802b96afa2016adac01d085bf291b0c7ef24c
 size 27024

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e0854de05ff06a1bb6bebc463437e23deac75cb2c91c516e856d79e5ce02f0d
 size 27024

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5dd1c739c74a63dd88e6d988da0a91c90e23f03715aa8b6de63c6414165820fc
 size 63974

 version https://git-lfs.github.com/spec/v1
+oid sha256:edf6db02e4e23668a02eda25306d34b08e2c20f7be9971b73b2f4cd1fc571fe0
 size 63974

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2fb8dc2a85d0cc09b3d729bf71a8da974bf095341dbe298d83978634de38d32
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:982eb43f0b685032f5d229850fc4c6116b1560071673bb981b13063a46a6b3e1
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd5d42bb0afda20ec4c83d38c6af1131541c335ecab229c74e7f418894f3c13b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca9a25c72339c898b564e0c464a3f6fc75bbeec408008928b7ed05533156b98c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.028351367953503757,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 279.253,
       "eval_steps_per_second": 139.626,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1103,12 +1461,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3879272448000.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.037801823938005005,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 279.253,
       "eval_steps_per_second": 139.626,
       "step": 150
+    },
+    {
+      "epoch": 0.028540377073193783,
+      "grad_norm": 0.06216558441519737,
+      "learning_rate": 3.106465908814342e-05,
+      "loss": 10.3635,
+      "step": 151
+    },
+    {
+      "epoch": 0.028729386192883805,
+      "grad_norm": 0.05615238845348358,
+      "learning_rate": 2.9876321572751144e-05,
+      "loss": 10.3545,
+      "step": 152
+    },
+    {
+      "epoch": 0.02891839531257383,
+      "grad_norm": 0.057838547974824905,
+      "learning_rate": 2.87071551708603e-05,
+      "loss": 10.3583,
+      "step": 153
+    },
+    {
+      "epoch": 0.029107404432263857,
+      "grad_norm": 0.052986498922109604,
+      "learning_rate": 2.7557479520891104e-05,
+      "loss": 10.3552,
+      "step": 154
+    },
+    {
+      "epoch": 0.029296413551953882,
+      "grad_norm": 0.055312514305114746,
+      "learning_rate": 2.6427608932686843e-05,
+      "loss": 10.356,
+      "step": 155
+    },
+    {
+      "epoch": 0.029485422671643908,
+      "grad_norm": 0.05807175859808922,
+      "learning_rate": 2.5317852301584643e-05,
+      "loss": 10.3565,
+      "step": 156
+    },
+    {
+      "epoch": 0.029674431791333934,
+      "grad_norm": 0.06103501841425896,
+      "learning_rate": 2.422851302396655e-05,
+      "loss": 10.3547,
+      "step": 157
+    },
+    {
+      "epoch": 0.029863440911023956,
+      "grad_norm": 0.05311097204685211,
+      "learning_rate": 2.315988891431412e-05,
+      "loss": 10.355,
+      "step": 158
+    },
+    {
+      "epoch": 0.03005245003071398,
+      "grad_norm": 0.05598808079957962,
+      "learning_rate": 2.2112272123788768e-05,
+      "loss": 10.3571,
+      "step": 159
+    },
+    {
+      "epoch": 0.030241459150404007,
+      "grad_norm": 0.05674521625041962,
+      "learning_rate": 2.1085949060360654e-05,
+      "loss": 10.3537,
+      "step": 160
+    },
+    {
+      "epoch": 0.030430468270094033,
+      "grad_norm": 0.05536748096346855,
+      "learning_rate": 2.008120031050753e-05,
+      "loss": 10.3545,
+      "step": 161
+    },
+    {
+      "epoch": 0.03061947738978406,
+      "grad_norm": 0.051667895168066025,
+      "learning_rate": 1.9098300562505266e-05,
+      "loss": 10.3562,
+      "step": 162
+    },
+    {
+      "epoch": 0.03080848650947408,
+      "grad_norm": 0.05974581092596054,
+      "learning_rate": 1.8137518531330767e-05,
+      "loss": 10.3538,
+      "step": 163
+    },
+    {
+      "epoch": 0.030997495629164106,
+      "grad_norm": 0.05384739860892296,
+      "learning_rate": 1.7199116885197995e-05,
+      "loss": 10.3574,
+      "step": 164
+    },
+    {
+      "epoch": 0.031186504748854132,
+      "grad_norm": 0.06221851706504822,
+      "learning_rate": 1.6283352173747145e-05,
+      "loss": 10.3545,
+      "step": 165
+    },
+    {
+      "epoch": 0.031375513868544154,
+      "grad_norm": 0.05726971849799156,
+      "learning_rate": 1.5390474757906446e-05,
+      "loss": 10.3546,
+      "step": 166
+    },
+    {
+      "epoch": 0.03156452298823418,
+      "grad_norm": 0.061142805963754654,
+      "learning_rate": 1.4520728741446089e-05,
+      "loss": 10.3572,
+      "step": 167
+    },
+    {
+      "epoch": 0.031753532107924205,
+      "grad_norm": 0.055892013013362885,
+      "learning_rate": 1.3674351904242611e-05,
+      "loss": 10.3557,
+      "step": 168
+    },
+    {
+      "epoch": 0.03194254122761423,
+      "grad_norm": 0.04323485121130943,
+      "learning_rate": 1.2851575637272262e-05,
+      "loss": 10.3557,
+      "step": 169
+    },
+    {
+      "epoch": 0.03213155034730426,
+      "grad_norm": 0.060065269470214844,
+      "learning_rate": 1.2052624879351104e-05,
+      "loss": 10.358,
+      "step": 170
+    },
+    {
+      "epoch": 0.03232055946699428,
+      "grad_norm": 0.05526785925030708,
+      "learning_rate": 1.1277718055638819e-05,
+      "loss": 10.3573,
+      "step": 171
+    },
+    {
+      "epoch": 0.03250956858668431,
+      "grad_norm": 0.05861446261405945,
+      "learning_rate": 1.0527067017923654e-05,
+      "loss": 10.3558,
+      "step": 172
+    },
+    {
+      "epoch": 0.032698577706374334,
+      "grad_norm": 0.04735025390982628,
+      "learning_rate": 9.80087698670411e-06,
+      "loss": 10.36,
+      "step": 173
+    },
+    {
+      "epoch": 0.03288758682606436,
+      "grad_norm": 0.05781494081020355,
+      "learning_rate": 9.09934649508375e-06,
+      "loss": 10.3573,
+      "step": 174
+    },
+    {
+      "epoch": 0.033076595945754385,
+      "grad_norm": 0.050222914665937424,
+      "learning_rate": 8.422667334494249e-06,
+      "loss": 10.3582,
+      "step": 175
+    },
+    {
+      "epoch": 0.03326560506544441,
+      "grad_norm": 0.048023052513599396,
+      "learning_rate": 7.771024502261526e-06,
+      "loss": 10.3555,
+      "step": 176
+    },
+    {
+      "epoch": 0.033454614185134436,
+      "grad_norm": 0.05381546914577484,
+      "learning_rate": 7.144596151029303e-06,
+      "loss": 10.3539,
+      "step": 177
+    },
+    {
+      "epoch": 0.033643623304824455,
+      "grad_norm": 0.047901567071676254,
+      "learning_rate": 6.543553540053926e-06,
+      "loss": 10.3579,
+      "step": 178
+    },
+    {
+      "epoch": 0.03383263242451448,
+      "grad_norm": 0.06044565513730049,
+      "learning_rate": 5.968060988383883e-06,
+      "loss": 10.3576,
+      "step": 179
+    },
+    {
+      "epoch": 0.034021641544204506,
+      "grad_norm": 0.046510256826877594,
+      "learning_rate": 5.418275829936537e-06,
+      "loss": 10.3577,
+      "step": 180
+    },
+    {
+      "epoch": 0.03421065066389453,
+      "grad_norm": 0.06163431704044342,
+      "learning_rate": 4.8943483704846475e-06,
+      "loss": 10.3548,
+      "step": 181
+    },
+    {
+      "epoch": 0.03439965978358456,
+      "grad_norm": 0.04503452777862549,
+      "learning_rate": 4.3964218465642355e-06,
+      "loss": 10.3549,
+      "step": 182
+    },
+    {
+      "epoch": 0.03458866890327458,
+      "grad_norm": 0.06345223635435104,
+      "learning_rate": 3.924632386315186e-06,
+      "loss": 10.3554,
+      "step": 183
+    },
+    {
+      "epoch": 0.03477767802296461,
+      "grad_norm": 0.06070295348763466,
+      "learning_rate": 3.4791089722651436e-06,
+      "loss": 10.3572,
+      "step": 184
+    },
+    {
+      "epoch": 0.034966687142654634,
+      "grad_norm": 0.056091003119945526,
+      "learning_rate": 3.059973406066963e-06,
+      "loss": 10.3572,
+      "step": 185
+    },
+    {
+      "epoch": 0.03515569626234466,
+      "grad_norm": 0.06234334036707878,
+      "learning_rate": 2.667340275199426e-06,
+      "loss": 10.3522,
+      "step": 186
+    },
+    {
+      "epoch": 0.035344705382034686,
+      "grad_norm": 0.060099318623542786,
+      "learning_rate": 2.3013169216400733e-06,
+      "loss": 10.3557,
+      "step": 187
+    },
+    {
+      "epoch": 0.03553371450172471,
+      "grad_norm": 0.04145883023738861,
+      "learning_rate": 1.9620034125190644e-06,
+      "loss": 10.3583,
+      "step": 188
+    },
+    {
+      "epoch": 0.03572272362141473,
+      "grad_norm": 0.06402470171451569,
+      "learning_rate": 1.6494925127617634e-06,
+      "loss": 10.3515,
+      "step": 189
+    },
+    {
+      "epoch": 0.035911732741104756,
+      "grad_norm": 0.054933883249759674,
+      "learning_rate": 1.3638696597277679e-06,
+      "loss": 10.3598,
+      "step": 190
+    },
+    {
+      "epoch": 0.03610074186079478,
+      "grad_norm": 0.058887969702482224,
+      "learning_rate": 1.1052129398531507e-06,
+      "loss": 10.3567,
+      "step": 191
+    },
+    {
+      "epoch": 0.03628975098048481,
+      "grad_norm": 0.057484857738018036,
+      "learning_rate": 8.735930673024806e-07,
+      "loss": 10.3565,
+      "step": 192
+    },
+    {
+      "epoch": 0.03647876010017483,
+      "grad_norm": 0.057735905051231384,
+      "learning_rate": 6.690733646361857e-07,
+      "loss": 10.357,
+      "step": 193
+    },
+    {
+      "epoch": 0.03666776921986486,
+      "grad_norm": 0.05184992402791977,
+      "learning_rate": 4.917097454988584e-07,
+      "loss": 10.3594,
+      "step": 194
+    },
+    {
+      "epoch": 0.036856778339554884,
+      "grad_norm": 0.061963826417922974,
+      "learning_rate": 3.415506993330153e-07,
+      "loss": 10.3577,
+      "step": 195
+    },
+    {
+      "epoch": 0.03704578745924491,
+      "grad_norm": 0.06438171863555908,
+      "learning_rate": 2.1863727812254653e-07,
+      "loss": 10.3575,
+      "step": 196
+    },
+    {
+      "epoch": 0.037234796578934935,
+      "grad_norm": 0.05486460402607918,
+      "learning_rate": 1.230030851695263e-07,
+      "loss": 10.3526,
+      "step": 197
+    },
+    {
+      "epoch": 0.03742380569862496,
+      "grad_norm": 0.05459611490368843,
+      "learning_rate": 5.467426590739511e-08,
+      "loss": 10.3542,
+      "step": 198
+    },
+    {
+      "epoch": 0.03761281481831499,
+      "grad_norm": 0.05004322901368141,
+      "learning_rate": 1.3669500753099585e-08,
+      "loss": 10.3573,
+      "step": 199
+    },
+    {
+      "epoch": 0.037801823938005005,
+      "grad_norm": 0.06536795943975449,
+      "learning_rate": 0.0,
+      "loss": 10.3544,
+      "step": 200
+    },
+    {
+      "epoch": 0.037801823938005005,
+      "eval_loss": 10.35628604888916,
+      "eval_runtime": 7.8439,
+      "eval_samples_per_second": 284.041,
+      "eval_steps_per_second": 142.021,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5172363264000.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null