Training in progress, step 334, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +130 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:034b66bbefd5819fa88e17e2dea7b0e8ab5c14ab22537660f3048a25e6a7c617
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:3cc3f907aaa715b71a61eebdecb1333d7c5f0a6d30903b65d7ddcf916d23be30
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eac864be4e5f5a5eba9357b2e1306aebb2b578c74f0e112dcb1aaa4df98178a8
-size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:86b231a11b5058c73716d24e13fb98a1a8474c625633b1d6c44138c13b4edd7d
+size 341314644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d49d3439d604b444970a71de5cb79f27d2d4be72cae91cf222b427976fd29865
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:cce5595f22fff3c51a6d507b41d2596013d8383dba38b7309a6e6c86cbe8c90c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60f64d21f05d7df4421d09373be231cf5e5d1a10934be119a18d2b78545876ee
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:adcadd27beefdfbf5840092bf08d57b92f1d1b18154a8342ab8cd911b37488da
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.005213923047803401,
   "eval_steps": 334,
-  "global_step": 167,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -127,6 +127,133 @@
       "learning_rate": 0.00018888354486549237,
       "loss": 1.145,
       "step": 160
     }
   ],
   "logging_steps": 10,
@@ -146,7 +273,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2765796160136806e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.010427846095606801,
   "eval_steps": 334,
+  "global_step": 334,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00018888354486549237,
       "loss": 1.145,
       "step": 160
+    },
+    {
+      "epoch": 0.005307586336087294,
+      "grad_norm": 1.1813420057296753,
+      "learning_rate": 0.00018738493770697852,
+      "loss": 0.5603,
+      "step": 170
+    },
+    {
+      "epoch": 0.005619797297033605,
+      "grad_norm": 2.35960054397583,
+      "learning_rate": 0.00018579834132349772,
+      "loss": 0.8248,
+      "step": 180
+    },
+    {
+      "epoch": 0.005932008257979917,
+      "grad_norm": 2.443915605545044,
+      "learning_rate": 0.00018412535328311814,
+      "loss": 1.1141,
+      "step": 190
+    },
+    {
+      "epoch": 0.006244219218926228,
+      "grad_norm": 5.689703941345215,
+      "learning_rate": 0.0001823676581429833,
+      "loss": 1.2009,
+      "step": 200
+    },
+    {
+      "epoch": 0.00655643017987254,
+      "grad_norm": 1.4314906597137451,
+      "learning_rate": 0.00018052702575310588,
+      "loss": 1.1061,
+      "step": 210
+    },
+    {
+      "epoch": 0.006868641140818851,
+      "grad_norm": 0.6448104977607727,
+      "learning_rate": 0.00017860530947427875,
+      "loss": 0.4016,
+      "step": 220
+    },
+    {
+      "epoch": 0.007180852101765163,
+      "grad_norm": 2.0396196842193604,
+      "learning_rate": 0.0001766044443118978,
+      "loss": 0.8709,
+      "step": 230
+    },
+    {
+      "epoch": 0.007493063062711474,
+      "grad_norm": 2.5875227451324463,
+      "learning_rate": 0.0001745264449675755,
+      "loss": 1.1121,
+      "step": 240
+    },
+    {
+      "epoch": 0.007805274023657786,
+      "grad_norm": 3.9609525203704834,
+      "learning_rate": 0.00017237340381050703,
+      "loss": 1.251,
+      "step": 250
+    },
+    {
+      "epoch": 0.008117484984604097,
+      "grad_norm": 1.2032607793807983,
+      "learning_rate": 0.00017014748877063214,
+      "loss": 1.1823,
+      "step": 260
+    },
+    {
+      "epoch": 0.008429695945550408,
+      "grad_norm": 1.186848521232605,
+      "learning_rate": 0.00016785094115571322,
+      "loss": 0.6219,
+      "step": 270
+    },
+    {
+      "epoch": 0.00874190690649672,
+      "grad_norm": 1.85453462600708,
+      "learning_rate": 0.00016548607339452853,
+      "loss": 0.5809,
+      "step": 280
+    },
+    {
+      "epoch": 0.009054117867443031,
+      "grad_norm": 2.0443332195281982,
+      "learning_rate": 0.00016305526670845226,
+      "loss": 1.2146,
+      "step": 290
+    },
+    {
+      "epoch": 0.009366328828389343,
+      "grad_norm": 7.1448516845703125,
+      "learning_rate": 0.00016056096871376667,
+      "loss": 1.2524,
+      "step": 300
+    },
+    {
+      "epoch": 0.009678539789335654,
+      "grad_norm": 1.334848165512085,
+      "learning_rate": 0.00015800569095711982,
+      "loss": 1.1966,
+      "step": 310
+    },
+    {
+      "epoch": 0.009990750750281965,
+      "grad_norm": 0.4558267295360565,
+      "learning_rate": 0.00015539200638661104,
+      "loss": 0.589,
+      "step": 320
+    },
+    {
+      "epoch": 0.010302961711228277,
+      "grad_norm": 1.8344190120697021,
+      "learning_rate": 0.00015272254676105025,
+      "loss": 0.5806,
+      "step": 330
+    },
+    {
+      "epoch": 0.010427846095606801,
+      "eval_loss": 0.9566133618354797,
+      "eval_runtime": 13592.0734,
+      "eval_samples_per_second": 2.095,
+      "eval_steps_per_second": 2.095,
+      "step": 334
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.544673843146916e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null