danielhanchen commited on
Commit
8de9338
·
verified ·
1 Parent(s): d4836ab

Add files using upload-large-folder tool

Browse files
README.md CHANGED
@@ -14,13 +14,13 @@ language:
14
  - tl
15
  - vi
16
  base_model:
17
- - meta-llama/Llama-4-Scout-17B-16E-Instruct
18
  tags:
19
  - facebook
20
  - meta
21
  - pytorch
22
  - llama
23
- - llama-4
24
  extra_gated_prompt: >-
25
  **LLAMA 4 COMMUNITY LICENSE AGREEMENT**
26
 
@@ -97,44 +97,9 @@ extra_gated_heading: "Please be sure to provide your full legal name, date of bi
97
  license: other
98
  license_name: llama4
99
  ---
100
- <div>
101
- <p style="margin-bottom: 0; margin-top: 0;">
102
- <strong>This is the original 16-bit Llama 4 Scout model by Meta. <br> See <a href="https://huggingface.co/collections/unsloth/llama-4-67f19503d764b0f3a2a868d2">our collection</a> for versions of Llama 4 including GGUF & 4-bit formats.</strong>
103
- </p>
104
- <p style="margin-bottom: 0;">
105
- <em><a href="https://docs.unsloth.ai/basics/tutorial-how-to-run-and-fine-tune-llama-4">Read our Guide</a> to see how to Fine-tune & Run Llama 4 correctly.</em>
106
- </p>
107
- <div style="display: flex; gap: 5px; align-items: center; ">
108
- <a href="https://github.com/unslothai/unsloth/">
109
- <img src="https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png" width="133">
110
- </a>
111
- <a href="https://discord.gg/unsloth">
112
- <img src="https://github.com/unslothai/unsloth/raw/main/images/Discord%20button.png" width="173">
113
- </a>
114
- <a href="https://docs.unsloth.ai/basics/tutorials-how-to-fine-tune-and-run-llms">
115
- <img src="https://raw.githubusercontent.com/unslothai/unsloth/refs/heads/main/images/documentation%20green%20button.png" width="143">
116
- </a>
117
- </div>
118
- <h1 style="margin-top: 0rem;">✨ Fine-tune Llama 4 with Unsloth!</h1>
119
- </div>
120
-
121
- - Fine-tune Llama-4-Scout on a single H100 80GB GPU using Unsloth!
122
- - Read our Blog about Llama 4 support: [unsloth.ai/blog/llama4](https://unsloth.ai/blog/llama4)
123
- - View the rest of our notebooks in our [docs here](https://docs.unsloth.ai/get-started/unsloth-notebooks).
124
- - Export your fine-tuned model to GGUF, Ollama, llama.cpp, vLLM or 🤗HF.
125
-
126
- | Unsloth supports | Free Notebooks | Performance | Memory use |
127
- |-----------------|--------------------------------------------------------------------------------------------------------------------------|-------------|----------|
128
- | **GRPO with Llama 3.1 (8B)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.1_(8B)-GRPO.ipynb) | 2x faster | 80% less |
129
- | **Llama-3.2 (3B)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(1B_and_3B)-Conversational.ipynb) | 2.4x faster | 58% less |
130
- | **Llama-3.2 (11B vision)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(11B)-Vision.ipynb) | 2x faster | 60% less |
131
- | **Qwen2.5 (7B)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen2.5_(7B)-Alpaca.ipynb) | 2x faster | 60% less |
132
- | **Phi-4 (14B)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Phi_4-Conversational.ipynb) | 2x faster | 50% less |
133
- | **Mistral (7B)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Mistral_v0.3_(7B)-Conversational.ipynb) | 2.2x faster | 62% less |
134
-
135
- <br>
136
-
137
- # Llama 4 model details
138
 
139
  The Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences. These models leverage a mixture-of-experts architecture to offer industry-leading performance in text and image understanding.
140
 
 
14
  - tl
15
  - vi
16
  base_model:
17
+ - meta-llama/Llama-4-Scout-17B-16E
18
  tags:
19
  - facebook
20
  - meta
21
  - pytorch
22
  - llama
23
+ - llama4
24
  extra_gated_prompt: >-
25
  **LLAMA 4 COMMUNITY LICENSE AGREEMENT**
26
 
 
97
  license: other
98
  license_name: llama4
99
  ---
100
+
101
+
102
+ ## Model Information
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  The Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences. These models leverage a mixture-of-experts architecture to offer industry-leading performance in text and image understanding.
105
 
config.json CHANGED
@@ -3,26 +3,20 @@
3
  "Llama4ForConditionalGeneration"
4
  ],
5
  "boi_token_index": 200080,
6
- "bos_token_id": 200000,
7
  "eoi_token_index": 200081,
8
- "eos_token_id": 200008,
9
  "image_token_index": 200092,
10
  "model_type": "llama4",
11
- "pad_token_id": 200018,
12
  "text_config": {
13
  "_attn_implementation_autoset": true,
14
  "attention_bias": false,
15
  "attention_chunk_size": 8192,
16
  "attention_dropout": 0.0,
17
- "attn_scale": 0.1,
18
- "attn_temperature_tuning": 4,
19
  "bos_token_id": 200000,
20
  "eos_token_id": [
21
  200001,
22
  200007,
23
  200008
24
  ],
25
- "floor_scale": 8192,
26
  "for_llm_compressor": false,
27
  "head_dim": 128,
28
  "hidden_act": "silu",
@@ -33,106 +27,7 @@
33
  "intermediate_size_mlp": 16384,
34
  "max_position_embeddings": 10485760,
35
  "model_type": "llama4_text",
36
- "moe_layers": [
37
- 0,
38
- 1,
39
- 2,
40
- 3,
41
- 4,
42
- 5,
43
- 6,
44
- 7,
45
- 8,
46
- 9,
47
- 10,
48
- 11,
49
- 12,
50
- 13,
51
- 14,
52
- 15,
53
- 16,
54
- 17,
55
- 18,
56
- 19,
57
- 20,
58
- 21,
59
- 22,
60
- 23,
61
- 24,
62
- 25,
63
- 26,
64
- 27,
65
- 28,
66
- 29,
67
- 30,
68
- 31,
69
- 32,
70
- 33,
71
- 34,
72
- 35,
73
- 36,
74
- 37,
75
- 38,
76
- 39,
77
- 40,
78
- 41,
79
- 42,
80
- 43,
81
- 44,
82
- 45,
83
- 46,
84
- 47
85
- ],
86
- "no_rope_layers": [
87
- 1,
88
- 1,
89
- 1,
90
- 0,
91
- 1,
92
- 1,
93
- 1,
94
- 0,
95
- 1,
96
- 1,
97
- 1,
98
- 0,
99
- 1,
100
- 1,
101
- 1,
102
- 0,
103
- 1,
104
- 1,
105
- 1,
106
- 0,
107
- 1,
108
- 1,
109
- 1,
110
- 0,
111
- 1,
112
- 1,
113
- 1,
114
- 0,
115
- 1,
116
- 1,
117
- 1,
118
- 0,
119
- 1,
120
- 1,
121
- 1,
122
- 0,
123
- 1,
124
- 1,
125
- 1,
126
- 0,
127
- 1,
128
- 1,
129
- 1,
130
- 0,
131
- 1,
132
- 1,
133
- 1,
134
- 0
135
- ],
136
  "num_attention_heads": 40,
137
  "num_experts_per_tok": 1,
138
  "num_hidden_layers": 48,
@@ -156,10 +51,8 @@
156
  "use_qk_norm": true,
157
  "vocab_size": 202048
158
  },
159
- "tie_word_embeddings": false,
160
  "torch_dtype": "bfloat16",
161
- "transformers_version": "4.51.0",
162
- "unsloth_fixed": true,
163
  "vision_config": {
164
  "_attn_implementation_autoset": true,
165
  "attention_dropout": 0.0,
@@ -180,7 +73,6 @@
180
  "projector_input_dim": 4096,
181
  "projector_output_dim": 4096,
182
  "rope_theta": 10000,
183
- "torch_dtype": "bfloat16",
184
  "vision_feature_layer": -1,
185
  "vision_feature_select_strategy": "default",
186
  "vision_output_dim": 4096
 
3
  "Llama4ForConditionalGeneration"
4
  ],
5
  "boi_token_index": 200080,
 
6
  "eoi_token_index": 200081,
 
7
  "image_token_index": 200092,
8
  "model_type": "llama4",
 
9
  "text_config": {
10
  "_attn_implementation_autoset": true,
11
  "attention_bias": false,
12
  "attention_chunk_size": 8192,
13
  "attention_dropout": 0.0,
 
 
14
  "bos_token_id": 200000,
15
  "eos_token_id": [
16
  200001,
17
  200007,
18
  200008
19
  ],
 
20
  "for_llm_compressor": false,
21
  "head_dim": 128,
22
  "hidden_act": "silu",
 
27
  "intermediate_size_mlp": 16384,
28
  "max_position_embeddings": 10485760,
29
  "model_type": "llama4_text",
30
+ "no_rope_layers": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "num_attention_heads": 40,
32
  "num_experts_per_tok": 1,
33
  "num_hidden_layers": 48,
 
51
  "use_qk_norm": true,
52
  "vocab_size": 202048
53
  },
 
54
  "torch_dtype": "bfloat16",
55
+ "transformers_version": "4.51.0.dev0",
 
56
  "vision_config": {
57
  "_attn_implementation_autoset": true,
58
  "attention_dropout": 0.0,
 
73
  "projector_input_dim": 4096,
74
  "projector_output_dim": 4096,
75
  "rope_theta": 10000,
 
76
  "vision_feature_layer": -1,
77
  "vision_feature_select_strategy": "default",
78
  "vision_output_dim": 4096
generation_config.json CHANGED
@@ -9,5 +9,5 @@
9
  "pad_token_id": 200018,
10
  "temperature": 0.6,
11
  "top_p": 0.9,
12
- "transformers_version": "4.51.0"
13
  }
 
9
  "pad_token_id": 200018,
10
  "temperature": 0.6,
11
  "top_p": 0.9,
12
+ "transformers_version": "4.51.0.dev0"
13
  }
special_tokens_map.json CHANGED
@@ -1,23 +1,5 @@
1
  {
2
- "bos_token": {
3
- "content": "<|begin_of_text|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|eot|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|finetune_right_pad|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
  }
 
1
  {
2
+ "bos_token": "<|begin_of_text|>",
3
+ "eos_token": "<|eot|>",
4
+ "pad_token": "<|finetune_right_pad|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  }
tokenizer_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "add_bos_token": true,
3
  "added_tokens_decoder": {
4
  "200000": {
5
  "content": "<|begin_of_text|>",
@@ -9072,14 +9071,6 @@
9072
  "rstrip": false,
9073
  "single_word": false,
9074
  "special": true
9075
- },
9076
- "201134": {
9077
- "content": "<|finetune_right_pad_id|>",
9078
- "lstrip": false,
9079
- "normalized": false,
9080
- "rstrip": false,
9081
- "single_word": false,
9082
- "special": true
9083
  }
9084
  },
9085
  "bos_token": "<|begin_of_text|>",
@@ -9093,8 +9084,6 @@
9093
  ],
9094
  "model_max_length": 10485760,
9095
  "pad_token": "<|finetune_right_pad|>",
9096
- "padding_side": "left",
9097
  "processor_class": "Llama4Processor",
9098
- "tokenizer_class": "PreTrainedTokenizer",
9099
- "unk_token": null
9100
  }
 
1
  {
 
2
  "added_tokens_decoder": {
3
  "200000": {
4
  "content": "<|begin_of_text|>",
 
9071
  "rstrip": false,
9072
  "single_word": false,
9073
  "special": true
 
 
 
 
 
 
 
 
9074
  }
9075
  },
9076
  "bos_token": "<|begin_of_text|>",
 
9084
  ],
9085
  "model_max_length": 10485760,
9086
  "pad_token": "<|finetune_right_pad|>",
 
9087
  "processor_class": "Llama4Processor",
9088
+ "tokenizer_class": "PreTrainedTokenizer"
 
9089
  }