Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

adapter_config.json +4 -4
adapter_model.safetensors +2 -2
optimizer.pt +2 -2
rng_state.pth +1 -1
scheduler.pt +1 -1
tokenizer_config.json +1 -1
trainer_state.json +274 -259
training_args.bin +2 -2

adapter_config.json CHANGED Viewed

@@ -9,15 +9,15 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "modules_to_save": null,
-  "n_frequency": 100000,
   "n_frequency_pattern": {},
   "peft_type": "FOURIERFT",
   "random_loc_seed": 777,
   "revision": null,
-  "scaling": 512.0,
   "target_modules": [
-    "v_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "layers_pattern": null,
   "layers_to_transform": null,
   "modules_to_save": null,
+  "n_frequency": 1000,
   "n_frequency_pattern": {},
   "peft_type": "FOURIERFT",
   "random_loc_seed": 777,
   "revision": null,
+  "scaling": 300.0,
   "target_modules": [
+    "q_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e4fbb0015872578b2c8b2b7fe2c79c1e7f37bb2418b2364f89ecf07dbf522ed
-size 25608864

 version https://git-lfs.github.com/spec/v1
+oid sha256:4dccb0c0555ab8492aa0633409c9039f47c0f65dcacdaaf2a1d45e8b8334cb37
+size 264480

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f8fb2fe3b7a1f45e045e1abe8eb69243208b5ab9193023a6530575e206b3d01
-size 51254010

 version https://git-lfs.github.com/spec/v1
+oid sha256:03d541d47b76e3bb8e3f55a8abd2aa7f6078e58d48eaa9b18a4ecef64a6fbcb1
+size 561402

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e8f091fd805b9903d345d99c13640ce0c04978aa8df1b0b259f57dcc1650d70
 size 14180

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9db1c36a3fe626194b8016f36409bad40a8f19f4c1ea5186e4318edad327f17
 size 14180

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f6a8ea1b5c47dca8d6e3455f5f85a613048d99290bddfe4776319de664eba2f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:af6df9be5fec90cdc67071cd0bc07ba292c19bae637d054dfd010deb1ccf035e
 size 1064

tokenizer_config.json CHANGED Viewed

@@ -33,7 +33,7 @@
   "eos_token": "</s>",
   "extra_special_tokens": {},
   "legacy": false,
-  "model_max_length": 2048,
   "pad_token": "<unk>",
   "padding_side": "right",
   "sp_model_kwargs": {},

   "eos_token": "</s>",
   "extra_special_tokens": {},
   "legacy": false,
+  "model_max_length": 512,
   "pad_token": "<unk>",
   "padding_side": "right",
   "sp_model_kwargs": {},

trainer_state.json CHANGED Viewed

@@ -1,369 +1,384 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7727975270479135,
-  "eval_steps": 500,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.015455950540958269,
-      "grad_norm": 0.357046514749527,
-      "learning_rate": 0.0003846153846153846,
-      "loss": 1.5383,
-      "step": 10
     },
     {
-      "epoch": 0.030911901081916538,
-      "grad_norm": 0.11198900640010834,
-      "learning_rate": 0.0007692307692307692,
-      "loss": 1.1091,
-      "step": 20
     },
     {
-      "epoch": 0.04636785162287481,
-      "grad_norm": 0.056582603603601456,
-      "learning_rate": 0.001153846153846154,
-      "loss": 0.7939,
-      "step": 30
     },
     {
-      "epoch": 0.061823802163833076,
-      "grad_norm": 0.0210476852953434,
-      "learning_rate": 0.0015384615384615385,
-      "loss": 0.6165,
-      "step": 40
     },
     {
-      "epoch": 0.07727975270479134,
-      "grad_norm": 0.012083015404641628,
-      "learning_rate": 0.0019230769230769232,
-      "loss": 0.5624,
-      "step": 50
     },
     {
-      "epoch": 0.09273570324574962,
-      "grad_norm": 0.008505144156515598,
-      "learning_rate": 0.002307692307692308,
-      "loss": 0.5269,
-      "step": 60
     },
     {
-      "epoch": 0.10819165378670788,
-      "grad_norm": 0.0063809980638325214,
-      "learning_rate": 0.0026923076923076926,
-      "loss": 0.5059,
-      "step": 70
     },
     {
-      "epoch": 0.12364760432766615,
-      "grad_norm": 0.005832094699144363,
-      "learning_rate": 0.0029950657894736842,
-      "loss": 0.5202,
-      "step": 80
     },
     {
-      "epoch": 0.1391035548686244,
-      "grad_norm": 0.004662094172090292,
-      "learning_rate": 0.0029703947368421055,
-      "loss": 0.5087,
-      "step": 90
     },
     {
-      "epoch": 0.1545595054095827,
-      "grad_norm": 0.004813206382095814,
-      "learning_rate": 0.0029457236842105267,
-      "loss": 0.4979,
-      "step": 100
     },
     {
-      "epoch": 0.17001545595054096,
-      "grad_norm": 0.003981301095336676,
-      "learning_rate": 0.0029210526315789475,
-      "loss": 0.4833,
-      "step": 110
     },
     {
-      "epoch": 0.18547140649149924,
-      "grad_norm": 0.0037942214403301477,
-      "learning_rate": 0.0028963815789473687,
-      "loss": 0.4842,
-      "step": 120
     },
     {
-      "epoch": 0.2009273570324575,
-      "grad_norm": 0.0041742450557649136,
-      "learning_rate": 0.0028717105263157895,
-      "loss": 0.4818,
-      "step": 130
     },
     {
-      "epoch": 0.21638330757341576,
-      "grad_norm": 0.005099099595099688,
-      "learning_rate": 0.0028470394736842108,
-      "loss": 0.4809,
-      "step": 140
     },
     {
-      "epoch": 0.23183925811437403,
-      "grad_norm": 0.0031047100201249123,
-      "learning_rate": 0.0028223684210526316,
-      "loss": 0.5016,
-      "step": 150
     },
     {
-      "epoch": 0.2472952086553323,
-      "grad_norm": 0.0036040199920535088,
-      "learning_rate": 0.002797697368421053,
-      "loss": 0.4775,
-      "step": 160
     },
     {
-      "epoch": 0.26275115919629055,
-      "grad_norm": 0.0033861789852380753,
-      "learning_rate": 0.0027730263157894736,
-      "loss": 0.4784,
-      "step": 170
     },
     {
-      "epoch": 0.2782071097372488,
-      "grad_norm": 0.003118926426395774,
-      "learning_rate": 0.002748355263157895,
-      "loss": 0.4962,
-      "step": 180
     },
     {
-      "epoch": 0.2936630602782071,
-      "grad_norm": 0.0035265563055872917,
-      "learning_rate": 0.002723684210526316,
-      "loss": 0.4829,
-      "step": 190
     },
     {
-      "epoch": 0.3091190108191654,
-      "grad_norm": 0.0035475995391607285,
-      "learning_rate": 0.002699013157894737,
-      "loss": 0.485,
-      "step": 200
     },
     {
-      "epoch": 0.32457496136012365,
-      "grad_norm": 0.0030264686793088913,
-      "learning_rate": 0.002674342105263158,
-      "loss": 0.4681,
-      "step": 210
     },
     {
-      "epoch": 0.3400309119010819,
-      "grad_norm": 0.0033854299690574408,
-      "learning_rate": 0.002649671052631579,
-      "loss": 0.4805,
-      "step": 220
     },
     {
-      "epoch": 0.3554868624420402,
-      "grad_norm": 0.0029569112230092287,
-      "learning_rate": 0.002625,
-      "loss": 0.4688,
-      "step": 230
     },
     {
-      "epoch": 0.37094281298299847,
-      "grad_norm": 0.0032272525131702423,
-      "learning_rate": 0.002600328947368421,
-      "loss": 0.4752,
-      "step": 240
     },
     {
-      "epoch": 0.38639876352395675,
-      "grad_norm": 0.003502602456137538,
-      "learning_rate": 0.002575657894736842,
-      "loss": 0.4699,
-      "step": 250
     },
     {
-      "epoch": 0.401854714064915,
-      "grad_norm": 0.0031522298231720924,
-      "learning_rate": 0.002550986842105263,
-      "loss": 0.4756,
-      "step": 260
     },
     {
-      "epoch": 0.41731066460587324,
-      "grad_norm": 0.003098264569416642,
-      "learning_rate": 0.0025263157894736842,
-      "loss": 0.4574,
-      "step": 270
     },
     {
-      "epoch": 0.4327666151468315,
-      "grad_norm": 0.0025676521472632885,
-      "learning_rate": 0.0025016447368421055,
-      "loss": 0.4779,
-      "step": 280
     },
     {
-      "epoch": 0.4482225656877898,
-      "grad_norm": 0.0034302272833883762,
-      "learning_rate": 0.0024769736842105263,
-      "loss": 0.4729,
-      "step": 290
     },
     {
-      "epoch": 0.46367851622874806,
-      "grad_norm": 0.003159865504130721,
-      "learning_rate": 0.0024523026315789475,
-      "loss": 0.4715,
-      "step": 300
     },
     {
-      "epoch": 0.47913446676970634,
-      "grad_norm": 0.003168923780322075,
-      "learning_rate": 0.0024276315789473683,
-      "loss": 0.4764,
-      "step": 310
     },
     {
-      "epoch": 0.4945904173106646,
-      "grad_norm": 0.0034859515726566315,
-      "learning_rate": 0.0024029605263157896,
-      "loss": 0.4652,
-      "step": 320
     },
     {
-      "epoch": 0.5100463678516228,
-      "grad_norm": 0.003067239187657833,
-      "learning_rate": 0.0023782894736842104,
-      "loss": 0.4648,
-      "step": 330
     },
     {
-      "epoch": 0.5255023183925811,
-      "grad_norm": 0.0032223982270807028,
-      "learning_rate": 0.0023536184210526316,
-      "loss": 0.4725,
-      "step": 340
     },
     {
-      "epoch": 0.5409582689335394,
-      "grad_norm": 0.0027090355288237333,
-      "learning_rate": 0.0023289473684210524,
-      "loss": 0.4704,
-      "step": 350
     },
     {
-      "epoch": 0.5564142194744977,
-      "grad_norm": 0.003484300570562482,
-      "learning_rate": 0.0023042763157894736,
-      "loss": 0.4616,
-      "step": 360
     },
     {
-      "epoch": 0.5718701700154559,
-      "grad_norm": 0.003339330432936549,
-      "learning_rate": 0.0022796052631578944,
-      "loss": 0.4665,
-      "step": 370
     },
     {
-      "epoch": 0.5873261205564142,
-      "grad_norm": 0.0029797593597322702,
-      "learning_rate": 0.002254934210526316,
-      "loss": 0.4573,
-      "step": 380
     },
     {
-      "epoch": 0.6027820710973725,
-      "grad_norm": 0.0030033981893211603,
-      "learning_rate": 0.002230263157894737,
-      "loss": 0.4618,
-      "step": 390
     },
     {
-      "epoch": 0.6182380216383307,
-      "grad_norm": 0.005113155115395784,
-      "learning_rate": 0.002205592105263158,
-      "loss": 0.4589,
-      "step": 400
     },
     {
-      "epoch": 0.633693972179289,
-      "grad_norm": 0.002975397277623415,
-      "learning_rate": 0.002180921052631579,
-      "loss": 0.4765,
-      "step": 410
     },
     {
-      "epoch": 0.6491499227202473,
-      "grad_norm": 0.004753004759550095,
-      "learning_rate": 0.00215625,
-      "loss": 0.4631,
-      "step": 420
     },
     {
-      "epoch": 0.6646058732612056,
-      "grad_norm": 0.003564928425475955,
-      "learning_rate": 0.002131578947368421,
-      "loss": 0.4488,
-      "step": 430
     },
     {
-      "epoch": 0.6800618238021638,
-      "grad_norm": 0.0032665496692061424,
-      "learning_rate": 0.0021069078947368422,
-      "loss": 0.457,
-      "step": 440
     },
     {
-      "epoch": 0.6955177743431221,
-      "grad_norm": 0.0030079709831625223,
-      "learning_rate": 0.002082236842105263,
-      "loss": 0.4667,
-      "step": 450
     },
     {
-      "epoch": 0.7109737248840804,
-      "grad_norm": 0.0025733078364282846,
-      "learning_rate": 0.0020575657894736843,
-      "loss": 0.4667,
-      "step": 460
     },
     {
-      "epoch": 0.7264296754250387,
-      "grad_norm": 0.00270587345585227,
-      "learning_rate": 0.0020328947368421055,
-      "loss": 0.4679,
-      "step": 470
     },
     {
-      "epoch": 0.7418856259659969,
-      "grad_norm": 0.00273908581584692,
-      "learning_rate": 0.0020082236842105263,
-      "loss": 0.4694,
-      "step": 480
     },
     {
-      "epoch": 0.7573415765069552,
-      "grad_norm": 0.002720112446695566,
-      "learning_rate": 0.0019835526315789475,
-      "loss": 0.4513,
-      "step": 490
     },
     {
-      "epoch": 0.7727975270479135,
-      "grad_norm": 0.0028910296969115734,
-      "learning_rate": 0.0019588815789473683,
-      "loss": 0.4592,
-      "step": 500
     }
   ],
-  "logging_steps": 10,
-  "max_steps": 1294,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 2,
-  "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
@@ -371,12 +386,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.1151645505224704e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 5,
+  "global_step": 2588,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.019319938176197836,
+      "grad_norm": 0.00098650180734694,
+      "learning_rate": 0.009615384615384616,
+      "loss": 0.9907,
+      "step": 50
     },
     {
+      "epoch": 0.03863987635239567,
+      "grad_norm": 0.000779022928327322,
+      "learning_rate": 0.019230769230769232,
+      "loss": 0.9647,
+      "step": 100
     },
     {
+      "epoch": 0.05795981452859351,
+      "grad_norm": 0.000611725845374167,
+      "learning_rate": 0.028846153846153844,
+      "loss": 0.9412,
+      "step": 150
     },
     {
+      "epoch": 0.07727975270479134,
+      "grad_norm": 0.0005838441429659724,
+      "learning_rate": 0.029457236842105262,
+      "loss": 0.9322,
+      "step": 200
     },
     {
+      "epoch": 0.09659969088098919,
+      "grad_norm": 0.0007691067876294255,
+      "learning_rate": 0.028840460526315788,
+      "loss": 0.9131,
+      "step": 250
     },
     {
+      "epoch": 0.11591962905718702,
+      "grad_norm": 0.0005935626104474068,
+      "learning_rate": 0.028223684210526314,
+      "loss": 0.9104,
+      "step": 300
     },
     {
+      "epoch": 0.13523956723338484,
+      "grad_norm": 0.0006890599033795297,
+      "learning_rate": 0.02760690789473684,
+      "loss": 0.9214,
+      "step": 350
     },
     {
+      "epoch": 0.1545595054095827,
+      "grad_norm": 0.0006042916793376207,
+      "learning_rate": 0.02699013157894737,
+      "loss": 0.9,
+      "step": 400
     },
     {
+      "epoch": 0.17387944358578053,
+      "grad_norm": 0.0005447549629025161,
+      "learning_rate": 0.026373355263157892,
+      "loss": 0.9097,
+      "step": 450
     },
     {
+      "epoch": 0.19319938176197837,
+      "grad_norm": 0.0004888740368187428,
+      "learning_rate": 0.02575657894736842,
+      "loss": 0.9037,
+      "step": 500
     },
     {
+      "epoch": 0.2125193199381762,
+      "grad_norm": 0.0008238813607022166,
+      "learning_rate": 0.025139802631578945,
+      "loss": 0.899,
+      "step": 550
     },
     {
+      "epoch": 0.23183925811437403,
+      "grad_norm": 0.000727724633179605,
+      "learning_rate": 0.024523026315789474,
+      "loss": 0.923,
+      "step": 600
     },
     {
+      "epoch": 0.2511591962905719,
+      "grad_norm": 0.0005605846527032554,
+      "learning_rate": 0.02390625,
+      "loss": 0.9031,
+      "step": 650
     },
     {
+      "epoch": 0.2704791344667697,
+      "grad_norm": 0.0007705381722189486,
+      "learning_rate": 0.023289473684210523,
+      "loss": 0.9013,
+      "step": 700
     },
     {
+      "epoch": 0.28979907264296756,
+      "grad_norm": 0.0007164838025346398,
+      "learning_rate": 0.022672697368421053,
+      "loss": 0.8971,
+      "step": 750
     },
     {
+      "epoch": 0.3091190108191654,
+      "grad_norm": 0.000717374321538955,
+      "learning_rate": 0.02205592105263158,
+      "loss": 0.8866,
+      "step": 800
     },
     {
+      "epoch": 0.3284389489953632,
+      "grad_norm": 0.0006394012016244233,
+      "learning_rate": 0.021439144736842105,
+      "loss": 0.899,
+      "step": 850
     },
     {
+      "epoch": 0.34775888717156106,
+      "grad_norm": 0.0006252205348573625,
+      "learning_rate": 0.02082236842105263,
+      "loss": 0.894,
+      "step": 900
     },
     {
+      "epoch": 0.3670788253477589,
+      "grad_norm": 0.0006903470493853092,
+      "learning_rate": 0.020205592105263157,
+      "loss": 0.8858,
+      "step": 950
     },
     {
+      "epoch": 0.38639876352395675,
+      "grad_norm": 0.0008341589127667248,
+      "learning_rate": 0.019588815789473683,
+      "loss": 0.9168,
+      "step": 1000
     },
     {
+      "epoch": 0.40571870170015456,
+      "grad_norm": 0.0005771280848421156,
+      "learning_rate": 0.01897203947368421,
+      "loss": 0.9117,
+      "step": 1050
     },
     {
+      "epoch": 0.4250386398763524,
+      "grad_norm": 0.000522978079970926,
+      "learning_rate": 0.018355263157894736,
+      "loss": 0.8939,
+      "step": 1100
     },
     {
+      "epoch": 0.44435857805255025,
+      "grad_norm": 0.0005450574099086225,
+      "learning_rate": 0.017738486842105265,
+      "loss": 0.9049,
+      "step": 1150
     },
     {
+      "epoch": 0.46367851622874806,
+      "grad_norm": 0.0005660468013957143,
+      "learning_rate": 0.017121710526315788,
+      "loss": 0.8944,
+      "step": 1200
     },
     {
+      "epoch": 0.48299845440494593,
+      "grad_norm": 0.0006663696258328855,
+      "learning_rate": 0.016504934210526314,
+      "loss": 0.8971,
+      "step": 1250
     },
     {
+      "epoch": 0.5023183925811437,
+      "grad_norm": 0.0005968479672446847,
+      "learning_rate": 0.01588815789473684,
+      "loss": 0.8917,
+      "step": 1300
     },
     {
+      "epoch": 0.5216383307573416,
+      "grad_norm": 0.0007491153082810342,
+      "learning_rate": 0.01527138157894737,
+      "loss": 0.8829,
+      "step": 1350
     },
     {
+      "epoch": 0.5409582689335394,
+      "grad_norm": 0.0006275599589571357,
+      "learning_rate": 0.014654605263157894,
+      "loss": 0.9058,
+      "step": 1400
     },
     {
+      "epoch": 0.5602782071097373,
+      "grad_norm": 0.0007617810624651611,
+      "learning_rate": 0.01403782894736842,
+      "loss": 0.9051,
+      "step": 1450
     },
     {
+      "epoch": 0.5795981452859351,
+      "grad_norm": 0.0006214394234120846,
+      "learning_rate": 0.013421052631578946,
+      "loss": 0.8879,
+      "step": 1500
     },
     {
+      "epoch": 0.5989180834621329,
+      "grad_norm": 0.0006560624460689723,
+      "learning_rate": 0.012804276315789473,
+      "loss": 0.8991,
+      "step": 1550
     },
     {
+      "epoch": 0.6182380216383307,
+      "grad_norm": 0.0007683933363296092,
+      "learning_rate": 0.0121875,
+      "loss": 0.9081,
+      "step": 1600
     },
     {
+      "epoch": 0.6375579598145286,
+      "grad_norm": 0.0005783849046565592,
+      "learning_rate": 0.011570723684210527,
+      "loss": 0.9067,
+      "step": 1650
     },
     {
+      "epoch": 0.6568778979907264,
+      "grad_norm": 0.0007958198548294604,
+      "learning_rate": 0.010953947368421053,
+      "loss": 0.885,
+      "step": 1700
     },
     {
+      "epoch": 0.6761978361669243,
+      "grad_norm": 0.0006095783319324255,
+      "learning_rate": 0.010337171052631579,
+      "loss": 0.8928,
+      "step": 1750
+    },
+    {
+      "epoch": 0.6955177743431221,
+      "grad_norm": 0.000699816329870373,
+      "learning_rate": 0.009720394736842105,
+      "loss": 0.903,
+      "step": 1800
     },
     {
+      "epoch": 0.7148377125193199,
+      "grad_norm": 0.0008128538611344993,
+      "learning_rate": 0.009103618421052631,
+      "loss": 0.9036,
+      "step": 1850
     },
     {
+      "epoch": 0.7341576506955177,
+      "grad_norm": 0.0006495247362181544,
+      "learning_rate": 0.008486842105263157,
+      "loss": 0.8907,
+      "step": 1900
     },
     {
+      "epoch": 0.7534775888717156,
+      "grad_norm": 0.0005265743238851428,
+      "learning_rate": 0.007870065789473685,
+      "loss": 0.8843,
+      "step": 1950
     },
     {
+      "epoch": 0.7727975270479135,
+      "grad_norm": 0.0006601494387723505,
+      "learning_rate": 0.0072532894736842095,
+      "loss": 0.8925,
+      "step": 2000
     },
     {
+      "epoch": 0.7921174652241113,
+      "grad_norm": 0.0005823367391712964,
+      "learning_rate": 0.0066365131578947365,
+      "loss": 0.8954,
+      "step": 2050
     },
     {
+      "epoch": 0.8114374034003091,
+      "grad_norm": 0.0005229181842878461,
+      "learning_rate": 0.0060197368421052635,
+      "loss": 0.903,
+      "step": 2100
     },
     {
+      "epoch": 0.8307573415765069,
+      "grad_norm": 0.0005145368631929159,
+      "learning_rate": 0.00540296052631579,
+      "loss": 0.8923,
+      "step": 2150
     },
     {
+      "epoch": 0.8500772797527048,
+      "grad_norm": 0.0006071292445994914,
+      "learning_rate": 0.004786184210526316,
+      "loss": 0.8804,
+      "step": 2200
     },
     {
+      "epoch": 0.8693972179289027,
+      "grad_norm": 0.0006730407476425171,
+      "learning_rate": 0.004169407894736842,
+      "loss": 0.8919,
+      "step": 2250
     },
     {
+      "epoch": 0.8887171561051005,
+      "grad_norm": 0.0006455178954638541,
+      "learning_rate": 0.003552631578947368,
+      "loss": 0.896,
+      "step": 2300
     },
     {
+      "epoch": 0.9080370942812983,
+      "grad_norm": 0.0004997382056899369,
+      "learning_rate": 0.002935855263157895,
+      "loss": 0.8921,
+      "step": 2350
     },
     {
+      "epoch": 0.9273570324574961,
+      "grad_norm": 0.00045192165998741984,
+      "learning_rate": 0.002319078947368421,
+      "loss": 0.8839,
+      "step": 2400
     },
     {
+      "epoch": 0.9466769706336939,
+      "grad_norm": 0.0004822098126169294,
+      "learning_rate": 0.0017023026315789475,
+      "loss": 0.8988,
+      "step": 2450
     },
     {
+      "epoch": 0.9659969088098919,
+      "grad_norm": 0.0005721400957554579,
+      "learning_rate": 0.0010855263157894736,
+      "loss": 0.9045,
+      "step": 2500
     },
     {
+      "epoch": 0.9853168469860897,
+      "grad_norm": 0.0005698847235180438,
+      "learning_rate": 0.00046875,
+      "loss": 0.893,
+      "step": 2550
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.8954795002937317,
+      "eval_runtime": 1619.6937,
+      "eval_samples_per_second": 6.391,
+      "eval_steps_per_second": 0.799,
+      "step": 2588
     }
   ],
+  "logging_steps": 50,
+  "max_steps": 2588,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 50,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.394319248976609e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:751a143eac84ace0878962d5a6c61e00ae90313081b3e87461034dcd220797a8
-size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:e05980012924fba682aeee7b6335e0b05a33e8faf286e4879098fa0d40d4c691
+size 5496