Model save

Browse files

Files changed (4) hide show

README.md +57 -0
all_results.json +9 -0
train_results.json +9 -0
trainer_state.json +511 -0

README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+---
+base_model: Meta-Llama/Meta-Llama-3.1-8B
+library_name: transformers
+model_name: llama-3-1-8b-math-orca-qlora-10k-ep1-frac-100-seed-432
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for llama-3-1-8b-math-orca-qlora-10k-ep1-frac-100-seed-432
+This model is a fine-tuned version of [Meta-Llama/Meta-Llama-3.1-8B](https://huggingface.co/Meta-Llama/Meta-Llama-3.1-8B).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="kxnguyen/llama-3-1-8b-math-orca-qlora-10k-ep1-frac-100-seed-432", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.12.1
+- Transformers: 4.49.0
+- Pytorch: 2.5.1
+- Datasets: 3.1.0
+- Tokenizers: 0.21.1
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 0.9985228951255539,
+    "total_flos": 2.6821312997071258e+17,
+    "train_loss": 0.3007896387365443,
+    "train_runtime": 3313.8561,
+    "train_samples": 10000,
+    "train_samples_per_second": 1.632,
+    "train_steps_per_second": 0.102
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 0.9985228951255539,
+    "total_flos": 2.6821312997071258e+17,
+    "train_loss": 0.3007896387365443,
+    "train_runtime": 3313.8561,
+    "train_samples": 10000,
+    "train_samples_per_second": 1.632,
+    "train_steps_per_second": 0.102
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,511 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9985228951255539,
+  "eval_steps": 500,
+  "global_step": 338,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.014771048744460856,
+      "grad_norm": 2.5938093662261963,
+      "learning_rate": 0.0002,
+      "loss": 0.9809,
+      "step": 5
+    },
+    {
+      "epoch": 0.029542097488921712,
+      "grad_norm": 0.9637813568115234,
+      "learning_rate": 0.0002,
+      "loss": 0.4769,
+      "step": 10
+    },
+    {
+      "epoch": 0.04431314623338257,
+      "grad_norm": 1.666123628616333,
+      "learning_rate": 0.0002,
+      "loss": 0.4077,
+      "step": 15
+    },
+    {
+      "epoch": 0.059084194977843424,
+      "grad_norm": 0.5881379246711731,
+      "learning_rate": 0.0002,
+      "loss": 0.4035,
+      "step": 20
+    },
+    {
+      "epoch": 0.07385524372230429,
+      "grad_norm": 0.5366687774658203,
+      "learning_rate": 0.0002,
+      "loss": 0.3695,
+      "step": 25
+    },
+    {
+      "epoch": 0.08862629246676514,
+      "grad_norm": 0.5429549217224121,
+      "learning_rate": 0.0002,
+      "loss": 0.3579,
+      "step": 30
+    },
+    {
+      "epoch": 0.103397341211226,
+      "grad_norm": 0.5302016735076904,
+      "learning_rate": 0.0002,
+      "loss": 0.3623,
+      "step": 35
+    },
+    {
+      "epoch": 0.11816838995568685,
+      "grad_norm": 0.4706576466560364,
+      "learning_rate": 0.0002,
+      "loss": 0.3423,
+      "step": 40
+    },
+    {
+      "epoch": 0.1329394387001477,
+      "grad_norm": 0.47507619857788086,
+      "learning_rate": 0.0002,
+      "loss": 0.3301,
+      "step": 45
+    },
+    {
+      "epoch": 0.14771048744460857,
+      "grad_norm": 0.487821102142334,
+      "learning_rate": 0.0002,
+      "loss": 0.3262,
+      "step": 50
+    },
+    {
+      "epoch": 0.16248153618906944,
+      "grad_norm": 0.46188947558403015,
+      "learning_rate": 0.0002,
+      "loss": 0.3381,
+      "step": 55
+    },
+    {
+      "epoch": 0.17725258493353027,
+      "grad_norm": 0.49672871828079224,
+      "learning_rate": 0.0002,
+      "loss": 0.3474,
+      "step": 60
+    },
+    {
+      "epoch": 0.19202363367799113,
+      "grad_norm": 0.45688968896865845,
+      "learning_rate": 0.0002,
+      "loss": 0.3356,
+      "step": 65
+    },
+    {
+      "epoch": 0.206794682422452,
+      "grad_norm": 0.5083580017089844,
+      "learning_rate": 0.0002,
+      "loss": 0.317,
+      "step": 70
+    },
+    {
+      "epoch": 0.22156573116691286,
+      "grad_norm": 0.4326242506504059,
+      "learning_rate": 0.0002,
+      "loss": 0.3107,
+      "step": 75
+    },
+    {
+      "epoch": 0.2363367799113737,
+      "grad_norm": 0.7657620906829834,
+      "learning_rate": 0.0002,
+      "loss": 0.3055,
+      "step": 80
+    },
+    {
+      "epoch": 0.2511078286558346,
+      "grad_norm": 0.4073372483253479,
+      "learning_rate": 0.0002,
+      "loss": 0.3041,
+      "step": 85
+    },
+    {
+      "epoch": 0.2658788774002954,
+      "grad_norm": 0.4194050431251526,
+      "learning_rate": 0.0002,
+      "loss": 0.3121,
+      "step": 90
+    },
+    {
+      "epoch": 0.28064992614475626,
+      "grad_norm": 0.4937780499458313,
+      "learning_rate": 0.0002,
+      "loss": 0.3065,
+      "step": 95
+    },
+    {
+      "epoch": 0.29542097488921715,
+      "grad_norm": 0.39246585965156555,
+      "learning_rate": 0.0002,
+      "loss": 0.3081,
+      "step": 100
+    },
+    {
+      "epoch": 0.310192023633678,
+      "grad_norm": 0.4153652787208557,
+      "learning_rate": 0.0002,
+      "loss": 0.3074,
+      "step": 105
+    },
+    {
+      "epoch": 0.3249630723781389,
+      "grad_norm": 0.39885184168815613,
+      "learning_rate": 0.0002,
+      "loss": 0.3016,
+      "step": 110
+    },
+    {
+      "epoch": 0.3397341211225997,
+      "grad_norm": 0.3999512195587158,
+      "learning_rate": 0.0002,
+      "loss": 0.302,
+      "step": 115
+    },
+    {
+      "epoch": 0.35450516986706054,
+      "grad_norm": 0.40937578678131104,
+      "learning_rate": 0.0002,
+      "loss": 0.2964,
+      "step": 120
+    },
+    {
+      "epoch": 0.36927621861152143,
+      "grad_norm": 1.0849940776824951,
+      "learning_rate": 0.0002,
+      "loss": 0.3098,
+      "step": 125
+    },
+    {
+      "epoch": 0.38404726735598227,
+      "grad_norm": 0.36466699838638306,
+      "learning_rate": 0.0002,
+      "loss": 0.2964,
+      "step": 130
+    },
+    {
+      "epoch": 0.3988183161004431,
+      "grad_norm": 0.32518795132637024,
+      "learning_rate": 0.0002,
+      "loss": 0.2788,
+      "step": 135
+    },
+    {
+      "epoch": 0.413589364844904,
+      "grad_norm": 0.3508060872554779,
+      "learning_rate": 0.0002,
+      "loss": 0.2758,
+      "step": 140
+    },
+    {
+      "epoch": 0.42836041358936483,
+      "grad_norm": 0.34023162722587585,
+      "learning_rate": 0.0002,
+      "loss": 0.2955,
+      "step": 145
+    },
+    {
+      "epoch": 0.4431314623338257,
+      "grad_norm": 0.3429297208786011,
+      "learning_rate": 0.0002,
+      "loss": 0.2812,
+      "step": 150
+    },
+    {
+      "epoch": 0.45790251107828656,
+      "grad_norm": 0.3394342064857483,
+      "learning_rate": 0.0002,
+      "loss": 0.2751,
+      "step": 155
+    },
+    {
+      "epoch": 0.4726735598227474,
+      "grad_norm": 0.3172396421432495,
+      "learning_rate": 0.0002,
+      "loss": 0.2813,
+      "step": 160
+    },
+    {
+      "epoch": 0.4874446085672083,
+      "grad_norm": 0.5636305809020996,
+      "learning_rate": 0.0002,
+      "loss": 0.2714,
+      "step": 165
+    },
+    {
+      "epoch": 0.5022156573116692,
+      "grad_norm": 0.33329370617866516,
+      "learning_rate": 0.0002,
+      "loss": 0.2759,
+      "step": 170
+    },
+    {
+      "epoch": 0.51698670605613,
+      "grad_norm": 0.34862470626831055,
+      "learning_rate": 0.0002,
+      "loss": 0.2875,
+      "step": 175
+    },
+    {
+      "epoch": 0.5317577548005908,
+      "grad_norm": 0.41521379351615906,
+      "learning_rate": 0.0002,
+      "loss": 0.2744,
+      "step": 180
+    },
+    {
+      "epoch": 0.5465288035450517,
+      "grad_norm": 0.3359523117542267,
+      "learning_rate": 0.0002,
+      "loss": 0.282,
+      "step": 185
+    },
+    {
+      "epoch": 0.5612998522895125,
+      "grad_norm": 0.3089170455932617,
+      "learning_rate": 0.0002,
+      "loss": 0.2628,
+      "step": 190
+    },
+    {
+      "epoch": 0.5760709010339734,
+      "grad_norm": 0.36551329493522644,
+      "learning_rate": 0.0002,
+      "loss": 0.2776,
+      "step": 195
+    },
+    {
+      "epoch": 0.5908419497784343,
+      "grad_norm": 0.32992231845855713,
+      "learning_rate": 0.0002,
+      "loss": 0.2599,
+      "step": 200
+    },
+    {
+      "epoch": 0.6056129985228951,
+      "grad_norm": 0.3119284510612488,
+      "learning_rate": 0.0002,
+      "loss": 0.2699,
+      "step": 205
+    },
+    {
+      "epoch": 0.620384047267356,
+      "grad_norm": 0.2953311800956726,
+      "learning_rate": 0.0002,
+      "loss": 0.2705,
+      "step": 210
+    },
+    {
+      "epoch": 0.6351550960118169,
+      "grad_norm": 0.3757329285144806,
+      "learning_rate": 0.0002,
+      "loss": 0.2918,
+      "step": 215
+    },
+    {
+      "epoch": 0.6499261447562777,
+      "grad_norm": 0.36705055832862854,
+      "learning_rate": 0.0002,
+      "loss": 0.2545,
+      "step": 220
+    },
+    {
+      "epoch": 0.6646971935007385,
+      "grad_norm": 0.3092058002948761,
+      "learning_rate": 0.0002,
+      "loss": 0.2624,
+      "step": 225
+    },
+    {
+      "epoch": 0.6794682422451994,
+      "grad_norm": 0.31742286682128906,
+      "learning_rate": 0.0002,
+      "loss": 0.2602,
+      "step": 230
+    },
+    {
+      "epoch": 0.6942392909896603,
+      "grad_norm": 0.2955617308616638,
+      "learning_rate": 0.0002,
+      "loss": 0.256,
+      "step": 235
+    },
+    {
+      "epoch": 0.7090103397341211,
+      "grad_norm": 0.3345969617366791,
+      "learning_rate": 0.0002,
+      "loss": 0.2687,
+      "step": 240
+    },
+    {
+      "epoch": 0.723781388478582,
+      "grad_norm": 0.2796613276004791,
+      "learning_rate": 0.0002,
+      "loss": 0.2526,
+      "step": 245
+    },
+    {
+      "epoch": 0.7385524372230429,
+      "grad_norm": 0.5415365695953369,
+      "learning_rate": 0.0002,
+      "loss": 0.2545,
+      "step": 250
+    },
+    {
+      "epoch": 0.7533234859675036,
+      "grad_norm": 0.3844436705112457,
+      "learning_rate": 0.0002,
+      "loss": 0.2599,
+      "step": 255
+    },
+    {
+      "epoch": 0.7680945347119645,
+      "grad_norm": 0.3186696171760559,
+      "learning_rate": 0.0002,
+      "loss": 0.2477,
+      "step": 260
+    },
+    {
+      "epoch": 0.7828655834564254,
+      "grad_norm": 0.38170936703681946,
+      "learning_rate": 0.0002,
+      "loss": 0.2582,
+      "step": 265
+    },
+    {
+      "epoch": 0.7976366322008862,
+      "grad_norm": 0.29369300603866577,
+      "learning_rate": 0.0002,
+      "loss": 0.2505,
+      "step": 270
+    },
+    {
+      "epoch": 0.8124076809453471,
+      "grad_norm": 0.29856300354003906,
+      "learning_rate": 0.0002,
+      "loss": 0.2675,
+      "step": 275
+    },
+    {
+      "epoch": 0.827178729689808,
+      "grad_norm": 0.2721855342388153,
+      "learning_rate": 0.0002,
+      "loss": 0.2489,
+      "step": 280
+    },
+    {
+      "epoch": 0.8419497784342689,
+      "grad_norm": 0.3029973804950714,
+      "learning_rate": 0.0002,
+      "loss": 0.2575,
+      "step": 285
+    },
+    {
+      "epoch": 0.8567208271787297,
+      "grad_norm": 0.2983309030532837,
+      "learning_rate": 0.0002,
+      "loss": 0.2628,
+      "step": 290
+    },
+    {
+      "epoch": 0.8714918759231906,
+      "grad_norm": 0.5093730092048645,
+      "learning_rate": 0.0002,
+      "loss": 0.2552,
+      "step": 295
+    },
+    {
+      "epoch": 0.8862629246676514,
+      "grad_norm": 0.28230157494544983,
+      "learning_rate": 0.0002,
+      "loss": 0.2592,
+      "step": 300
+    },
+    {
+      "epoch": 0.9010339734121122,
+      "grad_norm": 0.371902197599411,
+      "learning_rate": 0.0002,
+      "loss": 0.2596,
+      "step": 305
+    },
+    {
+      "epoch": 0.9158050221565731,
+      "grad_norm": 0.3786104619503021,
+      "learning_rate": 0.0002,
+      "loss": 0.25,
+      "step": 310
+    },
+    {
+      "epoch": 0.930576070901034,
+      "grad_norm": 0.4518865942955017,
+      "learning_rate": 0.0002,
+      "loss": 0.2546,
+      "step": 315
+    },
+    {
+      "epoch": 0.9453471196454948,
+      "grad_norm": 0.29951682686805725,
+      "learning_rate": 0.0002,
+      "loss": 0.2433,
+      "step": 320
+    },
+    {
+      "epoch": 0.9601181683899557,
+      "grad_norm": 0.2999703884124756,
+      "learning_rate": 0.0002,
+      "loss": 0.2419,
+      "step": 325
+    },
+    {
+      "epoch": 0.9748892171344166,
+      "grad_norm": 0.2904799282550812,
+      "learning_rate": 0.0002,
+      "loss": 0.2474,
+      "step": 330
+    },
+    {
+      "epoch": 0.9896602658788775,
+      "grad_norm": 0.28127652406692505,
+      "learning_rate": 0.0002,
+      "loss": 0.2458,
+      "step": 335
+    },
+    {
+      "epoch": 0.9985228951255539,
+      "step": 338,
+      "total_flos": 2.6821312997071258e+17,
+      "train_loss": 0.3007896387365443,
+      "train_runtime": 3313.8561,
+      "train_samples_per_second": 1.632,
+      "train_steps_per_second": 0.102
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 338,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.6821312997071258e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}