Training in progress, step 100, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/adapter_config.json +3 -3
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +141 -106
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -25,13 +25,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "up_proj",
     "o_proj",
     "k_proj",
     "gate_proj",
     "q_proj",
-    "v_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "o_proj",
     "k_proj",
+    "v_proj",
+    "down_proj",
     "gate_proj",
     "q_proj",
+    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ba60744f926268ff7fdba5a7cfac8f3f850cb521c5738e6eb47d7d4e2da6d8f
 size 349243752

 version https://git-lfs.github.com/spec/v1
+oid sha256:2b9fbf5d11589875c8f0f18e5f8873568a3bc8596407bb07ad845db4518cf642
 size 349243752

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1366b8acb325858c396a2b90bc1e7d009944272ff6b31772e996c3a077d88767
 size 177908741

 version https://git-lfs.github.com/spec/v1
+oid sha256:0bc343d82a0e3f72cfba0430c8d86e3dcbe33feff1416efe2cf854f3b74275f8
 size 177908741

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15245f502f2b0897efe7ae5bd9aea29ced0716f57f68c802c749e42ddc72ee1c
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:5dd9c2e9d70880f16863f7e49763010a424edf0e7cc0c70adea8dfa600069583
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da0b998422a3dc253ae0972fd9207eebf2190589880dd54501b58c1760fdda21
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f7440e8d8eb662ab89372782909ec9a7c72ecb22b3a157dc2f42bb2972c021b
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,156 +2,191 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.01990049751243781,
   "eval_steps": 500,
-  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.0009950248756218905,
-      "grad_norm": 0.8776684999465942,
-      "learning_rate": 0.0,
-      "loss": 1.6946,
-      "step": 1
     },
     {
-      "epoch": 0.001990049751243781,
-      "grad_norm": 0.9731295704841614,
-      "learning_rate": 7e-07,
-      "loss": 1.852,
-      "step": 2
     },
     {
-      "epoch": 0.0029850746268656717,
-      "grad_norm": 0.9005614519119263,
-      "learning_rate": 1.4e-06,
-      "loss": 1.7867,
-      "step": 3
     },
     {
-      "epoch": 0.003980099502487562,
-      "grad_norm": 0.974780261516571,
-      "learning_rate": 2.1e-06,
-      "loss": 1.7229,
-      "step": 4
     },
     {
-      "epoch": 0.004975124378109453,
-      "grad_norm": 0.711536169052124,
-      "learning_rate": 2.8e-06,
-      "loss": 1.6955,
-      "step": 5
     },
     {
-      "epoch": 0.005970149253731343,
-      "grad_norm": 0.7311899065971375,
-      "learning_rate": 3.5e-06,
-      "loss": 1.8664,
-      "step": 6
     },
     {
-      "epoch": 0.006965174129353234,
-      "grad_norm": 0.7126452326774597,
-      "learning_rate": 4.2e-06,
-      "loss": 1.8133,
-      "step": 7
     },
     {
-      "epoch": 0.007960199004975124,
-      "grad_norm": 0.7019472122192383,
-      "learning_rate": 4.9e-06,
-      "loss": 1.6724,
-      "step": 8
     },
     {
-      "epoch": 0.008955223880597015,
-      "grad_norm": 0.7028383016586304,
-      "learning_rate": 5.6e-06,
-      "loss": 1.7337,
-      "step": 9
     },
     {
-      "epoch": 0.009950248756218905,
-      "grad_norm": 0.6948546767234802,
-      "learning_rate": 6.299999999999999e-06,
-      "loss": 1.6073,
-      "step": 10
     },
     {
-      "epoch": 0.010945273631840797,
-      "grad_norm": 0.6322774291038513,
-      "learning_rate": 7e-06,
-      "loss": 1.6254,
-      "step": 11
     },
     {
-      "epoch": 0.011940298507462687,
-      "grad_norm": 0.5230722427368164,
-      "learning_rate": 7.699999999999999e-06,
-      "loss": 1.704,
-      "step": 12
     },
     {
-      "epoch": 0.012935323383084577,
-      "grad_norm": 0.38045769929885864,
-      "learning_rate": 8.4e-06,
-      "loss": 1.5992,
-      "step": 13
     },
     {
-      "epoch": 0.013930348258706468,
-      "grad_norm": 0.43926432728767395,
-      "learning_rate": 9.1e-06,
-      "loss": 1.3649,
-      "step": 14
     },
     {
-      "epoch": 0.014925373134328358,
-      "grad_norm": 0.6113471388816833,
-      "learning_rate": 9.8e-06,
-      "loss": 1.7055,
-      "step": 15
     },
     {
-      "epoch": 0.015920398009950248,
-      "grad_norm": 0.549103856086731,
-      "learning_rate": 1.05e-05,
-      "loss": 1.6179,
-      "step": 16
     },
     {
-      "epoch": 0.01691542288557214,
-      "grad_norm": 0.39344537258148193,
-      "learning_rate": 1.12e-05,
-      "loss": 1.517,
-      "step": 17
     },
     {
-      "epoch": 0.01791044776119403,
-      "grad_norm": 0.3620043098926544,
-      "learning_rate": 1.19e-05,
-      "loss": 1.4598,
-      "step": 18
     },
     {
-      "epoch": 0.01890547263681592,
-      "grad_norm": 0.271251380443573,
-      "learning_rate": 1.2599999999999998e-05,
-      "loss": 1.441,
-      "step": 19
     },
     {
-      "epoch": 0.01990049751243781,
-      "grad_norm": 0.2406337559223175,
-      "learning_rate": 1.33e-05,
-      "loss": 1.5409,
-      "step": 20
     }
   ],
-  "logging_steps": 1,
-  "max_steps": 20,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,
@@ -162,12 +197,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.5380973908656128e+16,
   "train_batch_size": 24,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.09950248756218906,
   "eval_steps": 500,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.003980099502487562,
+      "grad_norm": 1.0094934701919556,
+      "learning_rate": 1.3649999999999998e-07,
+      "loss": 1.8949,
+      "step": 4
     },
     {
+      "epoch": 0.007960199004975124,
+      "grad_norm": 0.9780156016349792,
+      "learning_rate": 3.185e-07,
+      "loss": 1.966,
+      "step": 8
     },
     {
+      "epoch": 0.011940298507462687,
+      "grad_norm": 0.9392228722572327,
+      "learning_rate": 5.005e-07,
+      "loss": 1.9327,
+      "step": 12
     },
     {
+      "epoch": 0.015920398009950248,
+      "grad_norm": 0.7723965644836426,
+      "learning_rate": 6.824999999999999e-07,
+      "loss": 1.6594,
+      "step": 16
     },
     {
+      "epoch": 0.01990049751243781,
+      "grad_norm": 0.7648000121116638,
+      "learning_rate": 8.644999999999999e-07,
+      "loss": 1.8772,
+      "step": 20
     },
     {
+      "epoch": 0.023880597014925373,
+      "grad_norm": 0.7800928950309753,
+      "learning_rate": 1.0465e-06,
+      "loss": 1.7927,
+      "step": 24
     },
     {
+      "epoch": 0.027860696517412936,
+      "grad_norm": 0.6133188605308533,
+      "learning_rate": 1.2285e-06,
+      "loss": 1.8074,
+      "step": 28
     },
     {
+      "epoch": 0.031840796019900496,
+      "grad_norm": 0.6503555774688721,
+      "learning_rate": 1.4104999999999999e-06,
+      "loss": 1.8108,
+      "step": 32
     },
     {
+      "epoch": 0.03582089552238806,
+      "grad_norm": 0.6871572136878967,
+      "learning_rate": 1.5924999999999998e-06,
+      "loss": 1.6947,
+      "step": 36
     },
     {
+      "epoch": 0.03980099502487562,
+      "grad_norm": 0.5543396472930908,
+      "learning_rate": 1.7745e-06,
+      "loss": 1.6361,
+      "step": 40
     },
     {
+      "epoch": 0.04378109452736319,
+      "grad_norm": 0.502059280872345,
+      "learning_rate": 1.9565e-06,
+      "loss": 1.6627,
+      "step": 44
     },
     {
+      "epoch": 0.04776119402985075,
+      "grad_norm": 0.4744930565357208,
+      "learning_rate": 2.1384999999999995e-06,
+      "loss": 1.6207,
+      "step": 48
     },
     {
+      "epoch": 0.051741293532338306,
+      "grad_norm": 0.4121508300304413,
+      "learning_rate": 2.3205e-06,
+      "loss": 1.5601,
+      "step": 52
     },
     {
+      "epoch": 0.05572139303482587,
+      "grad_norm": 0.364520788192749,
+      "learning_rate": 2.5025e-06,
+      "loss": 1.605,
+      "step": 56
     },
     {
+      "epoch": 0.05970149253731343,
+      "grad_norm": 0.34304410219192505,
+      "learning_rate": 2.6844999999999995e-06,
+      "loss": 1.556,
+      "step": 60
     },
     {
+      "epoch": 0.06368159203980099,
+      "grad_norm": 0.28909891843795776,
+      "learning_rate": 2.8665e-06,
+      "loss": 1.6229,
+      "step": 64
     },
     {
+      "epoch": 0.06766169154228856,
+      "grad_norm": 0.2927353084087372,
+      "learning_rate": 3.0485e-06,
+      "loss": 1.6583,
+      "step": 68
     },
     {
+      "epoch": 0.07164179104477612,
+      "grad_norm": 0.2617412805557251,
+      "learning_rate": 3.2304999999999994e-06,
+      "loss": 1.3631,
+      "step": 72
     },
     {
+      "epoch": 0.07562189054726368,
+      "grad_norm": 0.20857785642147064,
+      "learning_rate": 3.4125e-06,
+      "loss": 1.4188,
+      "step": 76
     },
     {
+      "epoch": 0.07960199004975124,
+      "grad_norm": 0.21844792366027832,
+      "learning_rate": 3.5945e-06,
+      "loss": 1.4926,
+      "step": 80
+    },
+    {
+      "epoch": 0.08358208955223881,
+      "grad_norm": 0.21169371902942657,
+      "learning_rate": 3.7764999999999993e-06,
+      "loss": 1.4431,
+      "step": 84
+    },
+    {
+      "epoch": 0.08756218905472637,
+      "grad_norm": 0.2326797991991043,
+      "learning_rate": 3.9584999999999995e-06,
+      "loss": 1.453,
+      "step": 88
+    },
+    {
+      "epoch": 0.09154228855721393,
+      "grad_norm": 0.184279665350914,
+      "learning_rate": 4.1404999999999996e-06,
+      "loss": 1.4547,
+      "step": 92
+    },
+    {
+      "epoch": 0.0955223880597015,
+      "grad_norm": 0.21595323085784912,
+      "learning_rate": 4.3225e-06,
+      "loss": 1.5044,
+      "step": 96
+    },
+    {
+      "epoch": 0.09950248756218906,
+      "grad_norm": 0.22414354979991913,
+      "learning_rate": 4.5045e-06,
+      "loss": 1.5599,
+      "step": 100
     }
   ],
+  "logging_steps": 4,
+  "max_steps": 972,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.46000752878551e+16,
   "train_batch_size": 24,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15283826ff4848bc5c704b3622e4600c644b4dbefeeb2c0a0476dd07ee352d21
 size 7697

 version https://git-lfs.github.com/spec/v1
+oid sha256:8aa8c29fb1ed8d66b99ca0913e01151bf5c6d50026712b90dfade04ea18efec8
 size 7697