End of training

Browse files

Files changed (13) hide show

README.md +120 -0
adapter_config.json +39 -0
adapter_model.safetensors +3 -0
added_tokens.json +19 -0
config.json +51 -0
generation_config.json +6 -0
merges.txt +0 -0
model.safetensors +3 -0
special_tokens_map.json +24 -0
tokenizer.json +0 -0
tokenizer_config.json +161 -0
training_args.bin +3 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,120 @@

+---
+library_name: peft
+base_model: roneneldan/TinyStories-1M
+tags:
+- generated_from_trainer
+model-index:
+- name: test_1M_1-2025-02-16-18-59
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# test_1M_1-2025-02-16-18-59
+This model is a fine-tuned version of [roneneldan/TinyStories-1M](https://huggingface.co/roneneldan/TinyStories-1M) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 2.3658
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2.5e-05
+- train_batch_size: 4
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 8
+- optimizer: Use OptimizerNames.PAGED_ADAMW_8BIT with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 1
+- num_epochs: 30
+### Training results
+| Training Loss | Epoch | Step  | Validation Loss |
+|:-------------:|:-----:|:-----:|:---------------:|
+| 2.5392        | 0.5   | 297   | 2.4605          |
+| 2.4817        | 1.0   | 594   | 2.4445          |
+| 2.4207        | 1.5   | 891   | 2.4344          |
+| 2.4092        | 2.0   | 1188  | 2.4328          |
+| 2.4385        | 2.5   | 1485  | 2.4275          |
+| 2.5104        | 3.0   | 1782  | 2.4149          |
+| 2.3552        | 3.5   | 2079  | 2.4131          |
+| 2.402         | 4.0   | 2376  | 2.4120          |
+| 2.4328        | 4.5   | 2673  | 2.4143          |
+| 2.4508        | 5.0   | 2970  | 2.4052          |
+| 2.2452        | 5.5   | 3267  | 2.4064          |
+| 2.5212        | 6.0   | 3564  | 2.4137          |
+| 2.3123        | 6.5   | 3861  | 2.4038          |
+| 2.3935        | 7.0   | 4158  | 2.4001          |
+| 2.2864        | 7.5   | 4455  | 2.3967          |
+| 2.3657        | 8.0   | 4752  | 2.3980          |
+| 2.5036        | 8.5   | 5049  | 2.4018          |
+| 2.3336        | 9.0   | 5346  | 2.3965          |
+| 2.3799        | 9.5   | 5643  | 2.3916          |
+| 2.478         | 10.0  | 5940  | 2.3979          |
+| 2.3376        | 10.5  | 6237  | 2.3923          |
+| 2.3039        | 11.0  | 6534  | 2.3923          |
+| 2.3658        | 11.5  | 6831  | 2.3900          |
+| 2.473         | 12.0  | 7128  | 2.3901          |
+| 2.3923        | 12.5  | 7425  | 2.3869          |
+| 2.4122        | 13.0  | 7722  | 2.3867          |
+| 2.4238        | 13.5  | 8019  | 2.3870          |
+| 2.4234        | 14.0  | 8316  | 2.3843          |
+| 2.4062        | 14.5  | 8613  | 2.3869          |
+| 2.3188        | 15.0  | 8910  | 2.3813          |
+| 2.2888        | 15.5  | 9207  | 2.3835          |
+| 2.3326        | 16.0  | 9504  | 2.3779          |
+| 2.3273        | 16.5  | 9801  | 2.3807          |
+| 2.3338        | 17.0  | 10098 | 2.3788          |
+| 2.4337        | 17.5  | 10395 | 2.3792          |
+| 2.3396        | 18.0  | 10692 | 2.3800          |
+| 2.3172        | 18.5  | 10989 | 2.3806          |
+| 2.3586        | 19.0  | 11286 | 2.3807          |
+| 2.3708        | 19.5  | 11583 | 2.3789          |
+| 2.449         | 20.0  | 11880 | 2.3762          |
+| 2.3071        | 20.5  | 12177 | 2.3786          |
+| 2.2589        | 21.0  | 12474 | 2.3750          |
+| 2.2423        | 21.5  | 12771 | 2.3749          |
+| 2.2852        | 22.0  | 13068 | 2.3737          |
+| 2.2754        | 22.5  | 13365 | 2.3750          |
+| 2.2977        | 23.0  | 13662 | 2.3737          |
+| 2.2701        | 23.5  | 13959 | 2.3701          |
+| 2.2638        | 24.0  | 14256 | 2.3726          |
+| 2.377         | 24.5  | 14553 | 2.3733          |
+| 2.3774        | 25.0  | 14850 | 2.3725          |
+| 2.2137        | 25.5  | 15147 | 2.3722          |
+| 2.3267        | 26.0  | 15444 | 2.3681          |
+| 2.2415        | 26.5  | 15741 | 2.3706          |
+| 2.2957        | 27.0  | 16038 | 2.3687          |
+| 2.3003        | 27.5  | 16335 | 2.3678          |
+| 2.3662        | 28.0  | 16632 | 2.3678          |
+| 2.305         | 28.5  | 16929 | 2.3673          |
+| 2.2603        | 29.0  | 17226 | 2.3667          |
+| 2.2806        | 29.5  | 17523 | 2.3665          |
+| 2.2674        | 30.0  | 17820 | 2.3658          |
+### Framework versions
+- PEFT 0.14.0
+- Transformers 4.48.1
+- Pytorch 2.5.1+cu124
+- Datasets 3.2.0
+- Tokenizers 0.21.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "roneneldan/TinyStories-1M",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "wte"
+  ],
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "q_proj",
+    "v_proj",
+    "down_proj",
+    "k_proj",
+    "o_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": true
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ccbaacd517e47540304e3bcb92cb2e1d6e5d6d7c60ec23f1b1ad0eb2dbd6eac4
+size 25740544

added_tokens.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "<cluster_0>": 50268,
+  "<cluster_10>": 50264,
+  "<cluster_11>": 50261,
+  "<cluster_12>": 50257,
+  "<cluster_13>": 50265,
+  "<cluster_14>": 50262,
+  "<cluster_15>": 50272,
+  "<cluster_16>": 50273,
+  "<cluster_1>": 50260,
+  "<cluster_2>": 50259,
+  "<cluster_3>": 50269,
+  "<cluster_4>": 50267,
+  "<cluster_5>": 50270,
+  "<cluster_6>": 50266,
+  "<cluster_7>": 50263,
+  "<cluster_8>": 50271,
+  "<cluster_9>": 50258
+}

config.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "_name_or_path": "roneneldan/TinyStories-1M",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPTNeoForCausalLM"
+  ],
+  "attention_dropout": 0,
+  "attention_layers": [
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local"
+  ],
+  "attention_types": [
+    [
+      [
+        "global",
+        "local"
+      ],
+      4
+    ]
+  ],
+  "bos_token_id": 50256,
+  "classifier_dropout": 0.1,
+  "embed_dropout": 0,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "hidden_size": 64,
+  "initializer_range": 0.02,
+  "intermediate_size": null,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 2048,
+  "model_type": "gpt_neo",
+  "num_heads": 16,
+  "num_layers": 8,
+  "resid_dropout": 0,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.1",
+  "use_cache": false,
+  "vocab_size": 50274,
+  "window_size": 256
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.48.1"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2197d13e61209616f27150ee83fffdafe0827f40978ece11b61a5a582cc61ee6
+size 27869656

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,161 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": true,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50257": {
+      "content": "<cluster_12>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50258": {
+      "content": "<cluster_9>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50259": {
+      "content": "<cluster_2>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50260": {
+      "content": "<cluster_1>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50261": {
+      "content": "<cluster_11>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50262": {
+      "content": "<cluster_14>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50263": {
+      "content": "<cluster_7>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50264": {
+      "content": "<cluster_10>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50265": {
+      "content": "<cluster_13>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50266": {
+      "content": "<cluster_6>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50267": {
+      "content": "<cluster_4>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50268": {
+      "content": "<cluster_0>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50269": {
+      "content": "<cluster_3>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50270": {
+      "content": "<cluster_5>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50271": {
+      "content": "<cluster_8>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50272": {
+      "content": "<cluster_15>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50273": {
+      "content": "<cluster_16>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 2048,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9733dd4c13ec18a95179d81062f4cbde8cae7871ec337b1ff77318e2d80bb239
+size 5368

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff