End of training

Browse files

Files changed (5) hide show

README.md +2 -1
all_results.json +8 -0
train_results.json +8 -0
trainer_state.json +567 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: apache-2.0
 base_model: Qwen/Qwen2.5-7B-Instruct
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: 316_globalbatchsize64_lr2e5_epochs15
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # 316_globalbatchsize64_lr2e5_epochs15
-This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on an unknown dataset.
 ## Model description

 base_model: Qwen/Qwen2.5-7B-Instruct
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: 316_globalbatchsize64_lr2e5_epochs15
 # 316_globalbatchsize64_lr2e5_epochs15
+This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on the mlfoundations-dev/openthoughts_316 dataset.
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 15.0,
+    "total_flos": 9.86030068989952e+16,
+    "train_loss": 0.28399625040590765,
+    "train_runtime": 6242.4075,
+    "train_samples_per_second": 0.759,
+    "train_steps_per_second": 0.012
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 15.0,
+    "total_flos": 9.86030068989952e+16,
+    "train_loss": 0.28399625040590765,
+    "train_runtime": 6242.4075,
+    "train_samples_per_second": 0.759,
+    "train_steps_per_second": 0.012
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,567 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 15.0,
+  "eval_steps": 500,
+  "global_step": 75,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.2,
+      "grad_norm": 5.5820740662503425,
+      "learning_rate": 2.5e-06,
+      "loss": 0.832,
+      "step": 1
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 6.008099573926324,
+      "learning_rate": 5e-06,
+      "loss": 0.8909,
+      "step": 2
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 5.706742421629664,
+      "learning_rate": 7.500000000000001e-06,
+      "loss": 0.8975,
+      "step": 3
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 4.258416334954996,
+      "learning_rate": 1e-05,
+      "loss": 0.8647,
+      "step": 4
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 2.0857868158647546,
+      "learning_rate": 1.25e-05,
+      "loss": 0.7838,
+      "step": 5
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 5.647280764828131,
+      "learning_rate": 1.5000000000000002e-05,
+      "loss": 0.8144,
+      "step": 6
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 8.569379397089753,
+      "learning_rate": 1.7500000000000002e-05,
+      "loss": 0.8566,
+      "step": 7
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 7.735280978633036,
+      "learning_rate": 2e-05,
+      "loss": 0.837,
+      "step": 8
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 4.574011055853512,
+      "learning_rate": 1.9989008914857115e-05,
+      "loss": 0.7907,
+      "step": 9
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 3.2366031329888023,
+      "learning_rate": 1.9956059820218982e-05,
+      "loss": 0.7275,
+      "step": 10
+    },
+    {
+      "epoch": 2.2,
+      "grad_norm": 2.351602239413264,
+      "learning_rate": 1.990122514534651e-05,
+      "loss": 0.7024,
+      "step": 11
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 1.824388674691251,
+      "learning_rate": 1.982462542875576e-05,
+      "loss": 0.6445,
+      "step": 12
+    },
+    {
+      "epoch": 2.6,
+      "grad_norm": 1.2250296782921972,
+      "learning_rate": 1.972642905324813e-05,
+      "loss": 0.6175,
+      "step": 13
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 1.0940321158989026,
+      "learning_rate": 1.9606851875768404e-05,
+      "loss": 0.6367,
+      "step": 14
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 1.062560084444676,
+      "learning_rate": 1.9466156752904344e-05,
+      "loss": 0.607,
+      "step": 15
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 0.8418790140645667,
+      "learning_rate": 1.9304652963070868e-05,
+      "loss": 0.523,
+      "step": 16
+    },
+    {
+      "epoch": 3.4,
+      "grad_norm": 0.8596411326553409,
+      "learning_rate": 1.9122695526648968e-05,
+      "loss": 0.5878,
+      "step": 17
+    },
+    {
+      "epoch": 3.6,
+      "grad_norm": 2.683590177932973,
+      "learning_rate": 1.8920684425573865e-05,
+      "loss": 0.5315,
+      "step": 18
+    },
+    {
+      "epoch": 3.8,
+      "grad_norm": 1.0546413161380288,
+      "learning_rate": 1.8699063724087905e-05,
+      "loss": 0.5265,
+      "step": 19
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.7205640524524933,
+      "learning_rate": 1.8458320592590976e-05,
+      "loss": 0.4572,
+      "step": 20
+    },
+    {
+      "epoch": 4.2,
+      "grad_norm": 0.8440191277505408,
+      "learning_rate": 1.8198984236734246e-05,
+      "loss": 0.4667,
+      "step": 21
+    },
+    {
+      "epoch": 4.4,
+      "grad_norm": 0.7832786494693724,
+      "learning_rate": 1.7921624734111292e-05,
+      "loss": 0.4186,
+      "step": 22
+    },
+    {
+      "epoch": 4.6,
+      "grad_norm": 0.7726547610804203,
+      "learning_rate": 1.762685178110382e-05,
+      "loss": 0.4149,
+      "step": 23
+    },
+    {
+      "epoch": 4.8,
+      "grad_norm": 0.7438101223901954,
+      "learning_rate": 1.731531335263669e-05,
+      "loss": 0.4219,
+      "step": 24
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.9344642511130782,
+      "learning_rate": 1.698769427778842e-05,
+      "loss": 0.399,
+      "step": 25
+    },
+    {
+      "epoch": 5.2,
+      "grad_norm": 0.8455099974475572,
+      "learning_rate": 1.664471473438822e-05,
+      "loss": 0.3884,
+      "step": 26
+    },
+    {
+      "epoch": 5.4,
+      "grad_norm": 0.7685465960306714,
+      "learning_rate": 1.628712866590885e-05,
+      "loss": 0.3403,
+      "step": 27
+    },
+    {
+      "epoch": 5.6,
+      "grad_norm": 0.7248937279387117,
+      "learning_rate": 1.5915722124135227e-05,
+      "loss": 0.3294,
+      "step": 28
+    },
+    {
+      "epoch": 5.8,
+      "grad_norm": 0.8361135069555778,
+      "learning_rate": 1.5531311541251995e-05,
+      "loss": 0.2918,
+      "step": 29
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 0.6214130974166043,
+      "learning_rate": 1.513474193514842e-05,
+      "loss": 0.2839,
+      "step": 30
+    },
+    {
+      "epoch": 6.2,
+      "grad_norm": 0.8149573400371725,
+      "learning_rate": 1.4726885051885654e-05,
+      "loss": 0.2609,
+      "step": 31
+    },
+    {
+      "epoch": 6.4,
+      "grad_norm": 1.0360067290148387,
+      "learning_rate": 1.4308637449409705e-05,
+      "loss": 0.2481,
+      "step": 32
+    },
+    {
+      "epoch": 6.6,
+      "grad_norm": 0.7209378246648206,
+      "learning_rate": 1.3880918526722497e-05,
+      "loss": 0.2202,
+      "step": 33
+    },
+    {
+      "epoch": 6.8,
+      "grad_norm": 0.7516647486638462,
+      "learning_rate": 1.344466850284333e-05,
+      "loss": 0.2246,
+      "step": 34
+    },
+    {
+      "epoch": 7.0,
+      "grad_norm": 0.5980564556653549,
+      "learning_rate": 1.300084635000341e-05,
+      "loss": 0.2106,
+      "step": 35
+    },
+    {
+      "epoch": 7.2,
+      "grad_norm": 0.7302897741304413,
+      "learning_rate": 1.2550427685616767e-05,
+      "loss": 0.1665,
+      "step": 36
+    },
+    {
+      "epoch": 7.4,
+      "grad_norm": 0.7955427448866793,
+      "learning_rate": 1.2094402627661447e-05,
+      "loss": 0.1765,
+      "step": 37
+    },
+    {
+      "epoch": 7.6,
+      "grad_norm": 0.6947342716788957,
+      "learning_rate": 1.1633773618185302e-05,
+      "loss": 0.1476,
+      "step": 38
+    },
+    {
+      "epoch": 7.8,
+      "grad_norm": 0.666613403120678,
+      "learning_rate": 1.1169553219720828e-05,
+      "loss": 0.1556,
+      "step": 39
+    },
+    {
+      "epoch": 8.0,
+      "grad_norm": 0.7246014420654745,
+      "learning_rate": 1.070276188945293e-05,
+      "loss": 0.1497,
+      "step": 40
+    },
+    {
+      "epoch": 8.2,
+      "grad_norm": 0.5352518212418655,
+      "learning_rate": 1.0234425736032607e-05,
+      "loss": 0.1054,
+      "step": 41
+    },
+    {
+      "epoch": 8.4,
+      "grad_norm": 0.582620097153326,
+      "learning_rate": 9.765574263967397e-06,
+      "loss": 0.1116,
+      "step": 42
+    },
+    {
+      "epoch": 8.6,
+      "grad_norm": 0.7722198112951959,
+      "learning_rate": 9.297238110547075e-06,
+      "loss": 0.1009,
+      "step": 43
+    },
+    {
+      "epoch": 8.8,
+      "grad_norm": 0.7769161831075575,
+      "learning_rate": 8.830446780279175e-06,
+      "loss": 0.1081,
+      "step": 44
+    },
+    {
+      "epoch": 9.0,
+      "grad_norm": 0.5267516336891753,
+      "learning_rate": 8.366226381814698e-06,
+      "loss": 0.0924,
+      "step": 45
+    },
+    {
+      "epoch": 9.2,
+      "grad_norm": 0.6815118378301678,
+      "learning_rate": 7.905597372338558e-06,
+      "loss": 0.0788,
+      "step": 46
+    },
+    {
+      "epoch": 9.4,
+      "grad_norm": 0.6096654056601541,
+      "learning_rate": 7.449572314383237e-06,
+      "loss": 0.0704,
+      "step": 47
+    },
+    {
+      "epoch": 9.6,
+      "grad_norm": 0.4975893608049929,
+      "learning_rate": 6.999153649996595e-06,
+      "loss": 0.0608,
+      "step": 48
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.5089023783497421,
+      "learning_rate": 6.555331497156671e-06,
+      "loss": 0.0637,
+      "step": 49
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.7005654148661791,
+      "learning_rate": 6.119081473277502e-06,
+      "loss": 0.0613,
+      "step": 50
+    },
+    {
+      "epoch": 10.2,
+      "grad_norm": 0.46142897491616885,
+      "learning_rate": 5.6913625505902966e-06,
+      "loss": 0.0411,
+      "step": 51
+    },
+    {
+      "epoch": 10.4,
+      "grad_norm": 0.3561622014470182,
+      "learning_rate": 5.273114948114346e-06,
+      "loss": 0.0455,
+      "step": 52
+    },
+    {
+      "epoch": 10.6,
+      "grad_norm": 0.35910813816061127,
+      "learning_rate": 4.865258064851579e-06,
+      "loss": 0.0448,
+      "step": 53
+    },
+    {
+      "epoch": 10.8,
+      "grad_norm": 0.3319709763565861,
+      "learning_rate": 4.468688458748006e-06,
+      "loss": 0.0374,
+      "step": 54
+    },
+    {
+      "epoch": 11.0,
+      "grad_norm": 0.33083801499691196,
+      "learning_rate": 4.084277875864776e-06,
+      "loss": 0.0364,
+      "step": 55
+    },
+    {
+      "epoch": 11.2,
+      "grad_norm": 0.321908630777182,
+      "learning_rate": 3.712871334091154e-06,
+      "loss": 0.0324,
+      "step": 56
+    },
+    {
+      "epoch": 11.4,
+      "grad_norm": 0.28605671407511096,
+      "learning_rate": 3.355285265611784e-06,
+      "loss": 0.0277,
+      "step": 57
+    },
+    {
+      "epoch": 11.6,
+      "grad_norm": 0.2613134954891153,
+      "learning_rate": 3.0123057222115835e-06,
+      "loss": 0.0254,
+      "step": 58
+    },
+    {
+      "epoch": 11.8,
+      "grad_norm": 0.26765400526301425,
+      "learning_rate": 2.6846866473633126e-06,
+      "loss": 0.0252,
+      "step": 59
+    },
+    {
+      "epoch": 12.0,
+      "grad_norm": 0.24309760367630573,
+      "learning_rate": 2.373148218896182e-06,
+      "loss": 0.0264,
+      "step": 60
+    },
+    {
+      "epoch": 12.2,
+      "grad_norm": 0.20368260468560698,
+      "learning_rate": 2.078375265888707e-06,
+      "loss": 0.0245,
+      "step": 61
+    },
+    {
+      "epoch": 12.4,
+      "grad_norm": 0.23111592875956644,
+      "learning_rate": 1.8010157632657544e-06,
+      "loss": 0.0217,
+      "step": 62
+    },
+    {
+      "epoch": 12.6,
+      "grad_norm": 0.21424368315616057,
+      "learning_rate": 1.5416794074090258e-06,
+      "loss": 0.02,
+      "step": 63
+    },
+    {
+      "epoch": 12.8,
+      "grad_norm": 0.23486365442991192,
+      "learning_rate": 1.300936275912098e-06,
+      "loss": 0.0204,
+      "step": 64
+    },
+    {
+      "epoch": 13.0,
+      "grad_norm": 0.1966413751838419,
+      "learning_rate": 1.0793155744261352e-06,
+      "loss": 0.0162,
+      "step": 65
+    },
+    {
+      "epoch": 13.2,
+      "grad_norm": 0.1590508395588302,
+      "learning_rate": 8.773044733510338e-07,
+      "loss": 0.0181,
+      "step": 66
+    },
+    {
+      "epoch": 13.4,
+      "grad_norm": 0.14626626011540572,
+      "learning_rate": 6.953470369291349e-07,
+      "loss": 0.0143,
+      "step": 67
+    },
+    {
+      "epoch": 13.6,
+      "grad_norm": 0.16591443824765065,
+      "learning_rate": 5.33843247095659e-07,
+      "loss": 0.0194,
+      "step": 68
+    },
+    {
+      "epoch": 13.8,
+      "grad_norm": 0.1709470448469729,
+      "learning_rate": 3.931481242315993e-07,
+      "loss": 0.0168,
+      "step": 69
+    },
+    {
+      "epoch": 14.0,
+      "grad_norm": 0.14577825961665566,
+      "learning_rate": 2.735709467518699e-07,
+      "loss": 0.0134,
+      "step": 70
+    },
+    {
+      "epoch": 14.2,
+      "grad_norm": 0.13945121547728515,
+      "learning_rate": 1.7537457124423896e-07,
+      "loss": 0.0134,
+      "step": 71
+    },
+    {
+      "epoch": 14.4,
+      "grad_norm": 0.14310398445495884,
+      "learning_rate": 9.877485465349057e-08,
+      "loss": 0.0152,
+      "step": 72
+    },
+    {
+      "epoch": 14.6,
+      "grad_norm": 0.1404230436657977,
+      "learning_rate": 4.394017978101905e-08,
+      "loss": 0.0137,
+      "step": 73
+    },
+    {
+      "epoch": 14.8,
+      "grad_norm": 0.16581805815771083,
+      "learning_rate": 1.099108514288627e-08,
+      "loss": 0.0161,
+      "step": 74
+    },
+    {
+      "epoch": 15.0,
+      "grad_norm": 0.14366857308957606,
+      "learning_rate": 0.0,
+      "loss": 0.0168,
+      "step": 75
+    },
+    {
+      "epoch": 15.0,
+      "step": 75,
+      "total_flos": 9.86030068989952e+16,
+      "train_loss": 0.28399625040590765,
+      "train_runtime": 6242.4075,
+      "train_samples_per_second": 0.759,
+      "train_steps_per_second": 0.012
+    }
+  ],
+  "logging_steps": 1.0,
+  "max_steps": 75,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 9.86030068989952e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_loss.png ADDED Viewed