Training in progress, step 501, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +122 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3cc3f907aaa715b71a61eebdecb1333d7c5f0a6d30903b65d7ddcf916d23be30
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0a90302ba872fc5e9fbd81b8001b1ec47581ce27eb6b7de96d47864ae6baea6
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86b231a11b5058c73716d24e13fb98a1a8474c625633b1d6c44138c13b4edd7d
 size 341314644

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d0af6fc6a885bd4f3dc9a38a3940aef3c5fcad0903424e373a457694dc79ba3
 size 341314644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cce5595f22fff3c51a6d507b41d2596013d8383dba38b7309a6e6c86cbe8c90c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2ef870d414744998f4151ccf29729885033e4a65886e92505678cc2e7a90569
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:adcadd27beefdfbf5840092bf08d57b92f1d1b18154a8342ab8cd911b37488da
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e594abdaba229076f66031ff8b5f12f7dac58b2668bc9aeb641b9d5709850a7f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.010427846095606801,
   "eval_steps": 334,
-  "global_step": 334,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -254,6 +254,125 @@
       "eval_samples_per_second": 2.095,
       "eval_steps_per_second": 2.095,
       "step": 334
     }
   ],
   "logging_steps": 10,
@@ -273,7 +392,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.544673843146916e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.015641769143410204,
   "eval_steps": 334,
+  "global_step": 501,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 2.095,
       "eval_steps_per_second": 2.095,
       "step": 334
+    },
+    {
+      "epoch": 0.010615172672174588,
+      "grad_norm": 1.9632649421691895,
+      "learning_rate": 0.00015000000000000001,
+      "loss": 1.0551,
+      "step": 340
+    },
+    {
+      "epoch": 0.0109273836331209,
+      "grad_norm": 4.136826992034912,
+      "learning_rate": 0.0001472271074772683,
+      "loss": 1.0784,
+      "step": 350
+    },
+    {
+      "epoch": 0.01123959459406721,
+      "grad_norm": 1.1779104471206665,
+      "learning_rate": 0.00014440666126057744,
+      "loss": 1.1613,
+      "step": 360
+    },
+    {
+      "epoch": 0.011551805555013523,
+      "grad_norm": 0.8325644731521606,
+      "learning_rate": 0.00014154150130018866,
+      "loss": 0.5119,
+      "step": 370
+    },
+    {
+      "epoch": 0.011864016515959834,
+      "grad_norm": 1.6711801290512085,
+      "learning_rate": 0.00013863451256931287,
+      "loss": 0.6156,
+      "step": 380
+    },
+    {
+      "epoch": 0.012176227476906146,
+      "grad_norm": 2.293975353240967,
+      "learning_rate": 0.00013568862215918717,
+      "loss": 1.0706,
+      "step": 390
+    },
+    {
+      "epoch": 0.012488438437852457,
+      "grad_norm": 2.2785656452178955,
+      "learning_rate": 0.00013270679633174218,
+      "loss": 1.2872,
+      "step": 400
+    },
+    {
+      "epoch": 0.012800649398798769,
+      "grad_norm": 1.2502048015594482,
+      "learning_rate": 0.0001296920375328275,
+      "loss": 1.0768,
+      "step": 410
+    },
+    {
+      "epoch": 0.01311286035974508,
+      "grad_norm": 0.7812928557395935,
+      "learning_rate": 0.00012664738136900348,
+      "loss": 0.5199,
+      "step": 420
+    },
+    {
+      "epoch": 0.01342507132069139,
+      "grad_norm": 2.0176918506622314,
+      "learning_rate": 0.00012357589355094275,
+      "loss": 0.8125,
+      "step": 430
+    },
+    {
+      "epoch": 0.013737282281637703,
+      "grad_norm": 2.014697313308716,
+      "learning_rate": 0.00012048066680651908,
+      "loss": 1.0261,
+      "step": 440
+    },
+    {
+      "epoch": 0.014049493242584013,
+      "grad_norm": 3.0161404609680176,
+      "learning_rate": 0.00011736481776669306,
+      "loss": 1.1352,
+      "step": 450
+    },
+    {
+      "epoch": 0.014361704203530326,
+      "grad_norm": 1.1186920404434204,
+      "learning_rate": 0.00011423148382732853,
+      "loss": 1.1374,
+      "step": 460
+    },
+    {
+      "epoch": 0.014673915164476636,
+      "grad_norm": 0.9820886850357056,
+      "learning_rate": 0.00011108381999010111,
+      "loss": 0.5135,
+      "step": 470
+    },
+    {
+      "epoch": 0.014986126125422949,
+      "grad_norm": 2.8473262786865234,
+      "learning_rate": 0.00010792499568567884,
+      "loss": 0.8812,
+      "step": 480
+    },
+    {
+      "epoch": 0.01529833708636926,
+      "grad_norm": 2.1481053829193115,
+      "learning_rate": 0.00010475819158237425,
+      "loss": 1.0178,
+      "step": 490
+    },
+    {
+      "epoch": 0.015610548047315572,
+      "grad_norm": 1.20015287399292,
+      "learning_rate": 0.00010158659638348081,
+      "loss": 1.0468,
+      "step": 500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.824081922120745e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null