Model save

Browse files

Files changed (5) hide show

README.md +58 -0
all_results.json +8 -0
generation_config.json +9 -0
train_results.json +8 -0
trainer_state.json +539 -0

README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+base_model: Qwen/Qwen2.5-Math-1.5B-Instruct
+library_name: transformers
+model_name: Qwen2.5-1.5B-Math-Instruct-LIMO
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for Qwen2.5-1.5B-Math-Instruct-LIMO
+This model is a fine-tuned version of [Qwen/Qwen2.5-Math-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Math-1.5B-Instruct).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="haoyuw/Qwen2.5-1.5B-Math-Instruct-LIMO", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.16.0.dev0
+- Transformers: 4.49.0.dev0
+- Pytorch: 2.5.1
+- Datasets: 3.2.0
+- Tokenizers: 0.21.0
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "total_flos": 20283251490816.0,
+    "train_loss": 0.7583397939968645,
+    "train_runtime": 1935.5651,
+    "train_samples": 817,
+    "train_samples_per_second": 11.753,
+    "train_steps_per_second": 0.184
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "bos_token_id": 151643,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "transformers_version": "4.49.0.dev0"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "total_flos": 20283251490816.0,
+    "train_loss": 0.7583397939968645,
+    "train_runtime": 1935.5651,
+    "train_samples": 817,
+    "train_samples_per_second": 11.753,
+    "train_steps_per_second": 0.184
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,539 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 356,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.056179775280898875,
+      "grad_norm": 1.6944109649364811,
+      "learning_rate": 5.555555555555557e-06,
+      "loss": 1.3057,
+      "step": 5
+    },
+    {
+      "epoch": 0.11235955056179775,
+      "grad_norm": 1.0734576025480336,
+      "learning_rate": 1.1111111111111113e-05,
+      "loss": 1.2523,
+      "step": 10
+    },
+    {
+      "epoch": 0.16853932584269662,
+      "grad_norm": 0.9680158843960001,
+      "learning_rate": 1.6666666666666667e-05,
+      "loss": 1.1501,
+      "step": 15
+    },
+    {
+      "epoch": 0.2247191011235955,
+      "grad_norm": 0.8622223090177016,
+      "learning_rate": 1.9998445014500246e-05,
+      "loss": 1.0298,
+      "step": 20
+    },
+    {
+      "epoch": 0.2808988764044944,
+      "grad_norm": 0.7503899959880194,
+      "learning_rate": 1.998095759792783e-05,
+      "loss": 1.0131,
+      "step": 25
+    },
+    {
+      "epoch": 0.33707865168539325,
+      "grad_norm": 0.5972359554952426,
+      "learning_rate": 1.9944076920747774e-05,
+      "loss": 0.9485,
+      "step": 30
+    },
+    {
+      "epoch": 0.39325842696629215,
+      "grad_norm": 0.5365600358115739,
+      "learning_rate": 1.98878826221941e-05,
+      "loss": 0.9524,
+      "step": 35
+    },
+    {
+      "epoch": 0.449438202247191,
+      "grad_norm": 0.4710678595191818,
+      "learning_rate": 1.9812496046863375e-05,
+      "loss": 0.9382,
+      "step": 40
+    },
+    {
+      "epoch": 0.5056179775280899,
+      "grad_norm": 0.44555673749010527,
+      "learning_rate": 1.971807998268615e-05,
+      "loss": 0.91,
+      "step": 45
+    },
+    {
+      "epoch": 0.5617977528089888,
+      "grad_norm": 0.4474821677236859,
+      "learning_rate": 1.9604838309406668e-05,
+      "loss": 0.8664,
+      "step": 50
+    },
+    {
+      "epoch": 0.6179775280898876,
+      "grad_norm": 0.46993823824767494,
+      "learning_rate": 1.947301555832993e-05,
+      "loss": 0.8943,
+      "step": 55
+    },
+    {
+      "epoch": 0.6741573033707865,
+      "grad_norm": 0.40887692660248126,
+      "learning_rate": 1.932289638428676e-05,
+      "loss": 0.8427,
+      "step": 60
+    },
+    {
+      "epoch": 0.7303370786516854,
+      "grad_norm": 0.4178479209299752,
+      "learning_rate": 1.9154804950957096e-05,
+      "loss": 0.8765,
+      "step": 65
+    },
+    {
+      "epoch": 0.7865168539325843,
+      "grad_norm": 0.4358655706142232,
+      "learning_rate": 1.896910423087889e-05,
+      "loss": 0.8815,
+      "step": 70
+    },
+    {
+      "epoch": 0.8426966292134831,
+      "grad_norm": 0.4759614764741585,
+      "learning_rate": 1.8766195221654038e-05,
+      "loss": 0.9062,
+      "step": 75
+    },
+    {
+      "epoch": 0.898876404494382,
+      "grad_norm": 0.40941475449933445,
+      "learning_rate": 1.8546516080043956e-05,
+      "loss": 0.8525,
+      "step": 80
+    },
+    {
+      "epoch": 0.9550561797752809,
+      "grad_norm": 0.46163800722604575,
+      "learning_rate": 1.831054117582457e-05,
+      "loss": 0.8511,
+      "step": 85
+    },
+    {
+      "epoch": 1.0112359550561798,
+      "grad_norm": 0.4144691948002747,
+      "learning_rate": 1.805878006744379e-05,
+      "loss": 0.8351,
+      "step": 90
+    },
+    {
+      "epoch": 1.0674157303370786,
+      "grad_norm": 0.41318674441336783,
+      "learning_rate": 1.7791776401693455e-05,
+      "loss": 0.7575,
+      "step": 95
+    },
+    {
+      "epoch": 1.1235955056179776,
+      "grad_norm": 0.4140804142512328,
+      "learning_rate": 1.7510106739771698e-05,
+      "loss": 0.7629,
+      "step": 100
+    },
+    {
+      "epoch": 1.1797752808988764,
+      "grad_norm": 0.3888169360375874,
+      "learning_rate": 1.7214379312270786e-05,
+      "loss": 0.7725,
+      "step": 105
+    },
+    {
+      "epoch": 1.2359550561797752,
+      "grad_norm": 0.39830672831462005,
+      "learning_rate": 1.690523270577883e-05,
+      "loss": 0.7957,
+      "step": 110
+    },
+    {
+      "epoch": 1.2921348314606742,
+      "grad_norm": 0.4025052364368211,
+      "learning_rate": 1.6583334483931465e-05,
+      "loss": 0.8151,
+      "step": 115
+    },
+    {
+      "epoch": 1.348314606741573,
+      "grad_norm": 0.40954244286961317,
+      "learning_rate": 1.6249379745891277e-05,
+      "loss": 0.7625,
+      "step": 120
+    },
+    {
+      "epoch": 1.404494382022472,
+      "grad_norm": 0.38363991825348154,
+      "learning_rate": 1.590408962536759e-05,
+      "loss": 0.768,
+      "step": 125
+    },
+    {
+      "epoch": 1.4606741573033708,
+      "grad_norm": 0.4202167638100542,
+      "learning_rate": 1.5548209733417974e-05,
+      "loss": 0.7333,
+      "step": 130
+    },
+    {
+      "epoch": 1.5168539325842696,
+      "grad_norm": 0.39549093654722417,
+      "learning_rate": 1.5182508548393917e-05,
+      "loss": 0.7561,
+      "step": 135
+    },
+    {
+      "epoch": 1.5730337078651684,
+      "grad_norm": 0.38558554907475995,
+      "learning_rate": 1.4807775756507423e-05,
+      "loss": 0.7659,
+      "step": 140
+    },
+    {
+      "epoch": 1.6292134831460674,
+      "grad_norm": 0.40811098318454503,
+      "learning_rate": 1.4424820546601892e-05,
+      "loss": 0.7592,
+      "step": 145
+    },
+    {
+      "epoch": 1.6853932584269664,
+      "grad_norm": 0.4075116805060462,
+      "learning_rate": 1.4034469862809474e-05,
+      "loss": 0.7738,
+      "step": 150
+    },
+    {
+      "epoch": 1.7415730337078652,
+      "grad_norm": 0.4178748191013212,
+      "learning_rate": 1.363756661886812e-05,
+      "loss": 0.7677,
+      "step": 155
+    },
+    {
+      "epoch": 1.797752808988764,
+      "grad_norm": 0.41587042744707226,
+      "learning_rate": 1.32349678779542e-05,
+      "loss": 0.7555,
+      "step": 160
+    },
+    {
+      "epoch": 1.8539325842696628,
+      "grad_norm": 0.4186538870477955,
+      "learning_rate": 1.282754300196122e-05,
+      "loss": 0.7731,
+      "step": 165
+    },
+    {
+      "epoch": 1.9101123595505618,
+      "grad_norm": 0.40473262826860434,
+      "learning_rate": 1.2416171774220976e-05,
+      "loss": 0.7644,
+      "step": 170
+    },
+    {
+      "epoch": 1.9662921348314608,
+      "grad_norm": 0.38178573402418625,
+      "learning_rate": 1.2001742499720888e-05,
+      "loss": 0.7687,
+      "step": 175
+    },
+    {
+      "epoch": 2.0224719101123596,
+      "grad_norm": 0.3819894080787757,
+      "learning_rate": 1.1585150086919896e-05,
+      "loss": 0.7067,
+      "step": 180
+    },
+    {
+      "epoch": 2.0786516853932584,
+      "grad_norm": 0.394658925096467,
+      "learning_rate": 1.1167294115304992e-05,
+      "loss": 0.6749,
+      "step": 185
+    },
+    {
+      "epoch": 2.134831460674157,
+      "grad_norm": 0.39767908125309154,
+      "learning_rate": 1.0749076892861227e-05,
+      "loss": 0.6544,
+      "step": 190
+    },
+    {
+      "epoch": 2.191011235955056,
+      "grad_norm": 0.42673180505937613,
+      "learning_rate": 1.0331401507649868e-05,
+      "loss": 0.6896,
+      "step": 195
+    },
+    {
+      "epoch": 2.247191011235955,
+      "grad_norm": 0.408239645067392,
+      "learning_rate": 9.915169877702096e-06,
+      "loss": 0.726,
+      "step": 200
+    },
+    {
+      "epoch": 2.303370786516854,
+      "grad_norm": 0.39464002130518294,
+      "learning_rate": 9.501280803439204e-06,
+      "loss": 0.6655,
+      "step": 205
+    },
+    {
+      "epoch": 2.359550561797753,
+      "grad_norm": 0.40978323940865435,
+      "learning_rate": 9.090628026824941e-06,
+      "loss": 0.6764,
+      "step": 210
+    },
+    {
+      "epoch": 2.4157303370786516,
+      "grad_norm": 0.38292295583739444,
+      "learning_rate": 8.684098301440903e-06,
+      "loss": 0.6719,
+      "step": 215
+    },
+    {
+      "epoch": 2.4719101123595504,
+      "grad_norm": 0.38693445253625824,
+      "learning_rate": 8.2825694776525e-06,
+      "loss": 0.6584,
+      "step": 220
+    },
+    {
+      "epoch": 2.5280898876404496,
+      "grad_norm": 0.3994341136157858,
+      "learning_rate": 7.886908607000321e-06,
+      "loss": 0.6971,
+      "step": 225
+    },
+    {
+      "epoch": 2.5842696629213484,
+      "grad_norm": 0.40129558631958756,
+      "learning_rate": 7.497970069910192e-06,
+      "loss": 0.6747,
+      "step": 230
+    },
+    {
+      "epoch": 2.640449438202247,
+      "grad_norm": 0.39014270480667235,
+      "learning_rate": 7.116593730764929e-06,
+      "loss": 0.6796,
+      "step": 235
+    },
+    {
+      "epoch": 2.696629213483146,
+      "grad_norm": 0.36998791500930744,
+      "learning_rate": 6.743603124321712e-06,
+      "loss": 0.6902,
+      "step": 240
+    },
+    {
+      "epoch": 2.752808988764045,
+      "grad_norm": 0.3970482683621501,
+      "learning_rate": 6.379803677391223e-06,
+      "loss": 0.6934,
+      "step": 245
+    },
+    {
+      "epoch": 2.808988764044944,
+      "grad_norm": 0.3861644580574831,
+      "learning_rate": 6.02598096961865e-06,
+      "loss": 0.6614,
+      "step": 250
+    },
+    {
+      "epoch": 2.865168539325843,
+      "grad_norm": 0.3941319315860406,
+      "learning_rate": 5.682899037122178e-06,
+      "loss": 0.6839,
+      "step": 255
+    },
+    {
+      "epoch": 2.9213483146067416,
+      "grad_norm": 0.3898881332551341,
+      "learning_rate": 5.351298722652064e-06,
+      "loss": 0.6667,
+      "step": 260
+    },
+    {
+      "epoch": 2.9775280898876404,
+      "grad_norm": 0.40076378703515086,
+      "learning_rate": 5.031896075832846e-06,
+      "loss": 0.6708,
+      "step": 265
+    },
+    {
+      "epoch": 3.033707865168539,
+      "grad_norm": 0.41791394545484606,
+      "learning_rate": 4.725380806943299e-06,
+      "loss": 0.6575,
+      "step": 270
+    },
+    {
+      "epoch": 3.0898876404494384,
+      "grad_norm": 0.41375267265638194,
+      "learning_rate": 4.432414797572894e-06,
+      "loss": 0.6318,
+      "step": 275
+    },
+    {
+      "epoch": 3.146067415730337,
+      "grad_norm": 0.4218475144549854,
+      "learning_rate": 4.153630671370821e-06,
+      "loss": 0.6342,
+      "step": 280
+    },
+    {
+      "epoch": 3.202247191011236,
+      "grad_norm": 0.4164065437650473,
+      "learning_rate": 3.889630427973951e-06,
+      "loss": 0.6274,
+      "step": 285
+    },
+    {
+      "epoch": 3.258426966292135,
+      "grad_norm": 0.41742766148238786,
+      "learning_rate": 3.6409841430635166e-06,
+      "loss": 0.6078,
+      "step": 290
+    },
+    {
+      "epoch": 3.3146067415730336,
+      "grad_norm": 0.4472934678743176,
+      "learning_rate": 3.408228737357575e-06,
+      "loss": 0.6137,
+      "step": 295
+    },
+    {
+      "epoch": 3.370786516853933,
+      "grad_norm": 0.3983175026213215,
+      "learning_rate": 3.191866817197539e-06,
+      "loss": 0.6268,
+      "step": 300
+    },
+    {
+      "epoch": 3.4269662921348316,
+      "grad_norm": 0.41447625127416166,
+      "learning_rate": 2.9923655892323144e-06,
+      "loss": 0.635,
+      "step": 305
+    },
+    {
+      "epoch": 3.4831460674157304,
+      "grad_norm": 0.3967828745027063,
+      "learning_rate": 2.8101558515436506e-06,
+      "loss": 0.6181,
+      "step": 310
+    },
+    {
+      "epoch": 3.539325842696629,
+      "grad_norm": 0.39144720476319333,
+      "learning_rate": 2.645631063391285e-06,
+      "loss": 0.6002,
+      "step": 315
+    },
+    {
+      "epoch": 3.595505617977528,
+      "grad_norm": 0.40314596605224257,
+      "learning_rate": 2.4991464955866314e-06,
+      "loss": 0.6046,
+      "step": 320
+    },
+    {
+      "epoch": 3.6516853932584272,
+      "grad_norm": 0.3979575371097757,
+      "learning_rate": 2.371018463329651e-06,
+      "loss": 0.6091,
+      "step": 325
+    },
+    {
+      "epoch": 3.7078651685393256,
+      "grad_norm": 0.4108961213792489,
+      "learning_rate": 2.261523643165532e-06,
+      "loss": 0.6445,
+      "step": 330
+    },
+    {
+      "epoch": 3.764044943820225,
+      "grad_norm": 0.40810633042253025,
+      "learning_rate": 2.1708984755361205e-06,
+      "loss": 0.6311,
+      "step": 335
+    },
+    {
+      "epoch": 3.8202247191011236,
+      "grad_norm": 0.39820144430518867,
+      "learning_rate": 2.0993386542161944e-06,
+      "loss": 0.6154,
+      "step": 340
+    },
+    {
+      "epoch": 3.8764044943820224,
+      "grad_norm": 0.4070293846963812,
+      "learning_rate": 2.0469987037371005e-06,
+      "loss": 0.6193,
+      "step": 345
+    },
+    {
+      "epoch": 3.932584269662921,
+      "grad_norm": 0.40297827384808366,
+      "learning_rate": 2.013991645710262e-06,
+      "loss": 0.6471,
+      "step": 350
+    },
+    {
+      "epoch": 3.98876404494382,
+      "grad_norm": 0.4066161050267969,
+      "learning_rate": 2.0003887547710647e-06,
+      "loss": 0.6469,
+      "step": 355
+    },
+    {
+      "epoch": 4.0,
+      "step": 356,
+      "total_flos": 20283251490816.0,
+      "train_loss": 0.7583397939968645,
+      "train_runtime": 1935.5651,
+      "train_samples_per_second": 11.753,
+      "train_steps_per_second": 0.184
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 356,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 20283251490816.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}