Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

adapter_config.json +2 -2
adapter_model.safetensors +1 -1
all_results.json +6 -6
checkpoint-50/adapter_config.json +3 -3
checkpoint-50/adapter_model.safetensors +1 -1
checkpoint-50/optimizer.pt +1 -1
checkpoint-50/rng_state.pth +1 -1
checkpoint-50/trainer_state.json +45 -85
checkpoint-50/training_args.bin +1 -1
train_results.json +6 -6
trainer_state.json +42 -350
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -24,9 +24,9 @@
   "revision": null,
   "target_modules": [
     "gate_up_proj",
-    "o_proj",
     "qkv_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "revision": null,
   "target_modules": [
     "gate_up_proj",
+    "down_proj",
     "qkv_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:67cfeac19a2419a44a38965724a3e10984b3cfb02dc513e9d9c24a26148c9165
 size 100697728

 version https://git-lfs.github.com/spec/v1
+oid sha256:4de8118eb3c068f60ce36d3ca641fe0eab10557be8ebd26263b7bf370c25545e
 size 100697728

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 27.0,
-    "total_flos": 1.3785203655910195e+17,
-    "train_loss": 0.12853161850599226,
-    "train_runtime": 2497.1205,
-    "train_samples_per_second": 1.802,
-    "train_steps_per_second": 0.108
 }

 {
+    "epoch": 5.0,
+    "total_flos": 2.5642849233567744e+16,
+    "train_loss": 0.5138080072402954,
+    "train_runtime": 465.5561,
+    "train_samples_per_second": 1.718,
+    "train_steps_per_second": 0.107
 }

checkpoint-50/adapter_config.json CHANGED Viewed

@@ -23,10 +23,10 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "qkv_proj",
-    "o_proj",
     "down_proj",
-    "gate_up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "gate_up_proj",
     "down_proj",
+    "qkv_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

checkpoint-50/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:128026d7a03702aaee46fa45f35639b5ff805f54560f6fa6a67213b3a3deabbf
 size 100697728

 version https://git-lfs.github.com/spec/v1
+oid sha256:4de8118eb3c068f60ce36d3ca641fe0eab10557be8ebd26263b7bf370c25545e
 size 100697728

checkpoint-50/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c491a3247a7ee5eea21a2e2b697593d159b3eb60006237830144a20053c047d5
 size 201541754

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f767fbe59969feec5bcec50d5fab9da54c2d7ca23cdccf7d44603ec638346f2
 size 201541754

checkpoint-50/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d72bf840684c4ffeae56d99ac7069162bd15b8d308b33e5cdefaf8d7910aea0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:20acd0328870e82721a4e81751e637c1df5076c8b6301d6bc1828612b34f862b
 size 14244

checkpoint-50/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_global_step": 48,
-  "best_metric": 0.6318144798278809,
-  "best_model_checkpoint": "//outputs/task7_microsoft/Phi-3.5-mini-instruct/checkpoint-48",
-  "epoch": 4.173913043478261,
   "eval_steps": 500,
   "global_step": 50,
   "is_hyper_param_search": false,
@@ -10,120 +10,80 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.43478260869565216,
-      "grad_norm": 0.3567487597465515,
-      "learning_rate": 4e-05,
-      "loss": 0.7927,
       "step": 5
     },
-    {
-      "epoch": 0.8695652173913043,
-      "grad_norm": 0.20804046094417572,
-      "learning_rate": 8e-05,
-      "loss": 0.877,
-      "step": 10
-    },
     {
       "epoch": 1.0,
-      "eval_loss": 0.7741447687149048,
-      "eval_runtime": 3.8872,
-      "eval_samples_per_second": 5.145,
-      "eval_steps_per_second": 0.772,
-      "step": 12
     },
     {
-      "epoch": 1.2608695652173914,
-      "grad_norm": 0.14667491614818573,
-      "learning_rate": 0.00012,
-      "loss": 0.797,
       "step": 15
     },
-    {
-      "epoch": 1.6956521739130435,
-      "grad_norm": 0.18651264905929565,
-      "learning_rate": 0.00016,
-      "loss": 0.6654,
-      "step": 20
-    },
     {
       "epoch": 2.0,
-      "eval_loss": 0.6869648694992065,
-      "eval_runtime": 3.8967,
-      "eval_samples_per_second": 5.132,
-      "eval_steps_per_second": 0.77,
-      "step": 24
     },
     {
-      "epoch": 2.0869565217391304,
-      "grad_norm": 0.18070296943187714,
-      "learning_rate": 0.0002,
-      "loss": 0.6351,
       "step": 25
     },
     {
-      "epoch": 2.5217391304347827,
-      "grad_norm": 0.23248770833015442,
-      "learning_rate": 0.00018090169943749476,
-      "loss": 0.6273,
       "step": 30
     },
     {
-      "epoch": 2.9565217391304346,
-      "grad_norm": 0.19235172867774963,
-      "learning_rate": 0.00013090169943749476,
-      "loss": 0.599,
       "step": 35
     },
     {
-      "epoch": 3.0,
-      "eval_loss": 0.6456981301307678,
-      "eval_runtime": 3.8903,
-      "eval_samples_per_second": 5.141,
-      "eval_steps_per_second": 0.771,
-      "step": 36
-    },
-    {
-      "epoch": 3.3478260869565215,
-      "grad_norm": 0.18532131612300873,
-      "learning_rate": 6.909830056250527e-05,
-      "loss": 0.5887,
       "step": 40
     },
     {
-      "epoch": 3.782608695652174,
-      "grad_norm": 0.23707902431488037,
-      "learning_rate": 1.9098300562505266e-05,
-      "loss": 0.5482,
       "step": 45
     },
     {
-      "epoch": 4.0,
-      "eval_loss": 0.6318144798278809,
-      "eval_runtime": 3.8893,
-      "eval_samples_per_second": 5.142,
-      "eval_steps_per_second": 0.771,
-      "step": 48
-    },
-    {
-      "epoch": 4.173913043478261,
-      "grad_norm": 0.20858274400234222,
       "learning_rate": 0.0,
-      "loss": 0.5586,
-      "step": 50
-    },
-    {
-      "epoch": 4.173913043478261,
-      "eval_loss": 0.6320340037345886,
-      "eval_runtime": 3.8925,
-      "eval_samples_per_second": 5.138,
-      "eval_steps_per_second": 0.771,
       "step": 50
     }
   ],
   "logging_steps": 5,
   "max_steps": 50,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 5,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -137,7 +97,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.8914967713060864e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
   "eval_steps": 500,
   "global_step": 50,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 0.21942713856697083,
+      "learning_rate": 0.000199107748815478,
+      "loss": 0.7714,
       "step": 5
     },
     {
       "epoch": 1.0,
+      "grad_norm": 0.27160170674324036,
+      "learning_rate": 0.00018925188358598813,
+      "loss": 0.6282,
+      "step": 10
     },
     {
+      "epoch": 1.5333333333333332,
+      "grad_norm": 0.16776053607463837,
+      "learning_rate": 0.00016951924276746425,
+      "loss": 0.5601,
       "step": 15
     },
     {
       "epoch": 2.0,
+      "grad_norm": 0.2320907711982727,
+      "learning_rate": 0.0001420934762428335,
+      "loss": 0.5471,
+      "step": 20
     },
     {
+      "epoch": 2.533333333333333,
+      "grad_norm": 0.15466105937957764,
+      "learning_rate": 0.00011000956916240985,
+      "loss": 0.4923,
       "step": 25
     },
     {
+      "epoch": 3.0,
+      "grad_norm": 0.31745201349258423,
+      "learning_rate": 7.681798497324716e-05,
+      "loss": 0.4651,
       "step": 30
     },
     {
+      "epoch": 3.533333333333333,
+      "grad_norm": 0.19490917026996613,
+      "learning_rate": 4.6191764683662744e-05,
+      "loss": 0.4367,
       "step": 35
     },
     {
+      "epoch": 4.0,
+      "grad_norm": 0.3345278799533844,
+      "learning_rate": 2.1520061472133902e-05,
+      "loss": 0.4268,
       "step": 40
     },
     {
+      "epoch": 4.533333333333333,
+      "grad_norm": 0.156062051653862,
+      "learning_rate": 5.533090839208133e-06,
+      "loss": 0.402,
       "step": 45
     },
     {
+      "epoch": 5.0,
+      "grad_norm": 0.24310481548309326,
       "learning_rate": 0.0,
+      "loss": 0.4084,
       "step": 50
     }
   ],
   "logging_steps": 5,
   "max_steps": 50,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 2.5642849233567744e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

checkpoint-50/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b379700727532ab22acd204ba858eb7dbdb3c8a9496a4558aed006ad9fda0ad3
 size 5624

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfc8b2f6195776ee0127d015bd85f458c29181888c317a3105ff4995afd4007f
 size 5624

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 27.0,
-    "total_flos": 1.3785203655910195e+17,
-    "train_loss": 0.12853161850599226,
-    "train_runtime": 2497.1205,
-    "train_samples_per_second": 1.802,
-    "train_steps_per_second": 0.108
 }

 {
+    "epoch": 5.0,
+    "total_flos": 2.5642849233567744e+16,
+    "train_loss": 0.5138080072402954,
+    "train_runtime": 465.5561,
+    "train_samples_per_second": 1.718,
+    "train_steps_per_second": 0.107
 }

trainer_state.json CHANGED Viewed

@@ -2,405 +2,97 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 27.0,
   "eval_steps": 500,
-  "global_step": 270,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.5333333333333333,
-      "grad_norm": 0.3464358448982239,
-      "learning_rate": 7.142857142857143e-05,
-      "loss": 0.7933,
       "step": 5
     },
     {
       "epoch": 1.0,
-      "grad_norm": 0.2775850296020508,
-      "learning_rate": 0.00014285714285714287,
-      "loss": 0.6763,
       "step": 10
     },
     {
       "epoch": 1.5333333333333332,
-      "grad_norm": 0.20196351408958435,
-      "learning_rate": 0.00019999247018391447,
-      "loss": 0.612,
       "step": 15
     },
     {
       "epoch": 2.0,
-      "grad_norm": 0.20944692194461823,
-      "learning_rate": 0.00019972904566786903,
-      "loss": 0.593,
       "step": 20
     },
     {
       "epoch": 2.533333333333333,
-      "grad_norm": 0.1796199381351471,
-      "learning_rate": 0.000199090263542778,
-      "loss": 0.528,
       "step": 25
     },
     {
       "epoch": 3.0,
-      "grad_norm": 0.3253108859062195,
-      "learning_rate": 0.00019807852804032305,
-      "loss": 0.4901,
       "step": 30
     },
     {
       "epoch": 3.533333333333333,
-      "grad_norm": 0.2155134528875351,
-      "learning_rate": 0.00019669764710448522,
-      "loss": 0.4385,
       "step": 35
     },
     {
       "epoch": 4.0,
-      "grad_norm": 0.40249642729759216,
-      "learning_rate": 0.00019495281805930367,
-      "loss": 0.4146,
       "step": 40
     },
     {
       "epoch": 4.533333333333333,
-      "grad_norm": 0.21931566298007965,
-      "learning_rate": 0.00019285060804732158,
-      "loss": 0.3422,
       "step": 45
     },
     {
       "epoch": 5.0,
-      "grad_norm": 0.47045040130615234,
-      "learning_rate": 0.00019039892931234435,
-      "loss": 0.334,
-      "step": 50
-    },
-    {
-      "epoch": 5.533333333333333,
-      "grad_norm": 0.33896583318710327,
-      "learning_rate": 0.00018760700941954065,
-      "loss": 0.3055,
-      "step": 55
-    },
-    {
-      "epoch": 6.0,
-      "grad_norm": 0.6106130480766296,
-      "learning_rate": 0.00018448535652497073,
-      "loss": 0.2364,
-      "step": 60
-    },
-    {
-      "epoch": 6.533333333333333,
-      "grad_norm": 0.34181615710258484,
-      "learning_rate": 0.0001810457198252595,
-      "loss": 0.19,
-      "step": 65
-    },
-    {
-      "epoch": 7.0,
-      "grad_norm": 0.8446937799453735,
-      "learning_rate": 0.0001773010453362737,
-      "loss": 0.1965,
-      "step": 70
-    },
-    {
-      "epoch": 7.533333333333333,
-      "grad_norm": 0.5083507299423218,
-      "learning_rate": 0.00017326542716724128,
-      "loss": 0.1207,
-      "step": 75
-    },
-    {
-      "epoch": 8.0,
-      "grad_norm": 0.7833607196807861,
-      "learning_rate": 0.0001689540544737067,
-      "loss": 0.1283,
-      "step": 80
-    },
-    {
-      "epoch": 8.533333333333333,
-      "grad_norm": 0.3553655743598938,
-      "learning_rate": 0.00016438315428897915,
-      "loss": 0.0811,
-      "step": 85
-    },
-    {
-      "epoch": 9.0,
-      "grad_norm": 0.7091866135597229,
-      "learning_rate": 0.00015956993044924334,
-      "loss": 0.0802,
-      "step": 90
-    },
-    {
-      "epoch": 9.533333333333333,
-      "grad_norm": 0.43794405460357666,
-      "learning_rate": 0.00015453249884220464,
-      "loss": 0.0492,
-      "step": 95
-    },
-    {
-      "epoch": 10.0,
-      "grad_norm": 0.5798842310905457,
-      "learning_rate": 0.00014928981922297842,
-      "loss": 0.0524,
-      "step": 100
-    },
-    {
-      "epoch": 10.533333333333333,
-      "grad_norm": 0.3912927210330963,
-      "learning_rate": 0.00014386162385385278,
-      "loss": 0.0302,
-      "step": 105
-    },
-    {
-      "epoch": 11.0,
-      "grad_norm": 0.4159412682056427,
-      "learning_rate": 0.000138268343236509,
-      "loss": 0.0328,
-      "step": 110
-    },
-    {
-      "epoch": 11.533333333333333,
-      "grad_norm": 0.24435237050056458,
-      "learning_rate": 0.0001325310292162263,
-      "loss": 0.0239,
-      "step": 115
-    },
-    {
-      "epoch": 12.0,
-      "grad_norm": 0.24223262071609497,
-      "learning_rate": 0.00012667127574748986,
-      "loss": 0.0171,
-      "step": 120
-    },
-    {
-      "epoch": 12.533333333333333,
-      "grad_norm": 0.1762484759092331,
-      "learning_rate": 0.00012071113761922186,
-      "loss": 0.0135,
-      "step": 125
-    },
-    {
-      "epoch": 13.0,
-      "grad_norm": 0.28046339750289917,
-      "learning_rate": 0.00011467304744553618,
-      "loss": 0.0143,
-      "step": 130
-    },
-    {
-      "epoch": 13.533333333333333,
-      "grad_norm": 0.28064408898353577,
-      "learning_rate": 0.000108579731234444,
-      "loss": 0.0128,
-      "step": 135
-    },
-    {
-      "epoch": 14.0,
-      "grad_norm": 0.19103752076625824,
-      "learning_rate": 0.00010245412285229124,
-      "loss": 0.0101,
-      "step": 140
-    },
-    {
-      "epoch": 14.533333333333333,
-      "grad_norm": 0.2761705815792084,
-      "learning_rate": 9.631927770586412e-05,
-      "loss": 0.0096,
-      "step": 145
-    },
-    {
-      "epoch": 15.0,
-      "grad_norm": 0.3451383709907532,
-      "learning_rate": 9.019828596704394e-05,
-      "loss": 0.0084,
-      "step": 150
-    },
-    {
-      "epoch": 15.533333333333333,
-      "grad_norm": 0.22912859916687012,
-      "learning_rate": 8.411418566661388e-05,
-      "loss": 0.0087,
-      "step": 155
-    },
-    {
-      "epoch": 16.0,
-      "grad_norm": 0.2961307466030121,
-      "learning_rate": 7.808987598431303e-05,
-      "loss": 0.0054,
-      "step": 160
-    },
-    {
-      "epoch": 16.533333333333335,
-      "grad_norm": 0.05174371972680092,
-      "learning_rate": 7.21480310614947e-05,
-      "loss": 0.0037,
-      "step": 165
-    },
-    {
-      "epoch": 17.0,
-      "grad_norm": 0.6668692827224731,
-      "learning_rate": 6.6311014660778e-05,
-      "loss": 0.0086,
-      "step": 170
-    },
-    {
-      "epoch": 17.533333333333335,
-      "grad_norm": 0.0423920676112175,
-      "learning_rate": 6.060079599389521e-05,
-      "loss": 0.0046,
-      "step": 175
-    },
-    {
-      "epoch": 18.0,
-      "grad_norm": 0.048323437571525574,
-      "learning_rate": 5.503886703453933e-05,
-      "loss": 0.005,
-      "step": 180
-    },
-    {
-      "epoch": 18.533333333333335,
-      "grad_norm": 0.14866454899311066,
-      "learning_rate": 4.964616162742826e-05,
-      "loss": 0.0039,
-      "step": 185
-    },
-    {
-      "epoch": 19.0,
-      "grad_norm": 0.04198712110519409,
-      "learning_rate": 4.444297669803981e-05,
-      "loss": 0.0033,
-      "step": 190
-    },
-    {
-      "epoch": 19.533333333333335,
-      "grad_norm": 0.037916265428066254,
-      "learning_rate": 3.944889585956746e-05,
-      "loss": 0.0075,
-      "step": 195
-    },
-    {
-      "epoch": 20.0,
-      "grad_norm": 0.031426794826984406,
-      "learning_rate": 3.468271570462235e-05,
-      "loss": 0.0036,
-      "step": 200
-    },
-    {
-      "epoch": 20.533333333333335,
-      "grad_norm": 0.1819785088300705,
-      "learning_rate": 3.016237505910272e-05,
-      "loss": 0.0062,
-      "step": 205
-    },
-    {
-      "epoch": 21.0,
-      "grad_norm": 0.09462948143482208,
-      "learning_rate": 2.5904887464504114e-05,
-      "loss": 0.0034,
-      "step": 210
-    },
-    {
-      "epoch": 21.533333333333335,
-      "grad_norm": 0.11672957241535187,
-      "learning_rate": 2.1926277142790552e-05,
-      "loss": 0.0025,
-      "step": 215
-    },
-    {
-      "epoch": 22.0,
-      "grad_norm": 0.04571106657385826,
-      "learning_rate": 1.824151868484164e-05,
-      "loss": 0.0042,
-      "step": 220
-    },
-    {
-      "epoch": 22.533333333333335,
-      "grad_norm": 0.019737839698791504,
-      "learning_rate": 1.486448068947348e-05,
-      "loss": 0.0021,
-      "step": 225
-    },
-    {
-      "epoch": 23.0,
-      "grad_norm": 0.03002820909023285,
-      "learning_rate": 1.1807873565164506e-05,
-      "loss": 0.0049,
-      "step": 230
-    },
-    {
-      "epoch": 23.533333333333335,
-      "grad_norm": 0.24355602264404297,
-      "learning_rate": 9.083201690947763e-06,
-      "loss": 0.004,
-      "step": 235
-    },
-    {
-      "epoch": 24.0,
-      "grad_norm": 0.035868410021066666,
-      "learning_rate": 6.700720116526116e-06,
-      "loss": 0.0024,
-      "step": 240
-    },
-    {
-      "epoch": 24.533333333333335,
-      "grad_norm": 0.029756512492895126,
-      "learning_rate": 4.669395964580614e-06,
-      "loss": 0.0025,
-      "step": 245
-    },
-    {
-      "epoch": 25.0,
-      "grad_norm": 0.2836012542247772,
-      "learning_rate": 2.996874680545603e-06,
-      "loss": 0.0059,
-      "step": 250
-    },
-    {
-      "epoch": 25.533333333333335,
-      "grad_norm": 0.2209901213645935,
-      "learning_rate": 1.6894512568783716e-06,
-      "loss": 0.004,
-      "step": 255
-    },
-    {
-      "epoch": 26.0,
-      "grad_norm": 0.023770242929458618,
-      "learning_rate": 7.520465401290033e-07,
-      "loss": 0.0033,
-      "step": 260
-    },
-    {
-      "epoch": 26.533333333333335,
-      "grad_norm": 0.02641889452934265,
-      "learning_rate": 1.8818870998508208e-07,
-      "loss": 0.0032,
-      "step": 265
-    },
-    {
-      "epoch": 27.0,
-      "grad_norm": 0.03176680952310562,
       "learning_rate": 0.0,
-      "loss": 0.0025,
-      "step": 270
     },
     {
-      "epoch": 27.0,
-      "step": 270,
-      "total_flos": 1.3785203655910195e+17,
-      "train_loss": 0.12853161850599226,
-      "train_runtime": 2497.1205,
-      "train_samples_per_second": 1.802,
-      "train_steps_per_second": 0.108
     }
   ],
   "logging_steps": 5,
-  "max_steps": 270,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 30,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -414,7 +106,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.3785203655910195e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 5.0,
   "eval_steps": 500,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.5333333333333333,
+      "grad_norm": 0.21942713856697083,
+      "learning_rate": 0.000199107748815478,
+      "loss": 0.7714,
       "step": 5
     },
     {
       "epoch": 1.0,
+      "grad_norm": 0.27160170674324036,
+      "learning_rate": 0.00018925188358598813,
+      "loss": 0.6282,
       "step": 10
     },
     {
       "epoch": 1.5333333333333332,
+      "grad_norm": 0.16776053607463837,
+      "learning_rate": 0.00016951924276746425,
+      "loss": 0.5601,
       "step": 15
     },
     {
       "epoch": 2.0,
+      "grad_norm": 0.2320907711982727,
+      "learning_rate": 0.0001420934762428335,
+      "loss": 0.5471,
       "step": 20
     },
     {
       "epoch": 2.533333333333333,
+      "grad_norm": 0.15466105937957764,
+      "learning_rate": 0.00011000956916240985,
+      "loss": 0.4923,
       "step": 25
     },
     {
       "epoch": 3.0,
+      "grad_norm": 0.31745201349258423,
+      "learning_rate": 7.681798497324716e-05,
+      "loss": 0.4651,
       "step": 30
     },
     {
       "epoch": 3.533333333333333,
+      "grad_norm": 0.19490917026996613,
+      "learning_rate": 4.6191764683662744e-05,
+      "loss": 0.4367,
       "step": 35
     },
     {
       "epoch": 4.0,
+      "grad_norm": 0.3345278799533844,
+      "learning_rate": 2.1520061472133902e-05,
+      "loss": 0.4268,
       "step": 40
     },
     {
       "epoch": 4.533333333333333,
+      "grad_norm": 0.156062051653862,
+      "learning_rate": 5.533090839208133e-06,
+      "loss": 0.402,
       "step": 45
     },
     {
       "epoch": 5.0,
+      "grad_norm": 0.24310481548309326,
       "learning_rate": 0.0,
+      "loss": 0.4084,
+      "step": 50
     },
     {
+      "epoch": 5.0,
+      "step": 50,
+      "total_flos": 2.5642849233567744e+16,
+      "train_loss": 0.5138080072402954,
+      "train_runtime": 465.5561,
+      "train_samples_per_second": 1.718,
+      "train_steps_per_second": 0.107
     }
   ],
   "logging_steps": 5,
+  "max_steps": 50,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 2.5642849233567744e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a98014c58d55a004fcf6f633921d0efe54b1543b60e7ac340ed1a1b1f40acb7
 size 5624

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfc8b2f6195776ee0127d015bd85f458c29181888c317a3105ff4995afd4007f
 size 5624