Add files using upload-large-folder tool
Browse files- README.md +5 -40
- config.json +2 -110
- generation_config.json +1 -1
- special_tokens_map.json +3 -21
- tokenizer_config.json +1 -12
README.md
CHANGED
@@ -14,13 +14,13 @@ language:
|
|
14 |
- tl
|
15 |
- vi
|
16 |
base_model:
|
17 |
-
- meta-llama/Llama-4-Scout-17B-16E
|
18 |
tags:
|
19 |
- facebook
|
20 |
- meta
|
21 |
- pytorch
|
22 |
- llama
|
23 |
-
-
|
24 |
extra_gated_prompt: >-
|
25 |
**LLAMA 4 COMMUNITY LICENSE AGREEMENT**
|
26 |
|
@@ -97,44 +97,9 @@ extra_gated_heading: "Please be sure to provide your full legal name, date of bi
|
|
97 |
license: other
|
98 |
license_name: llama4
|
99 |
---
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
</p>
|
104 |
-
<p style="margin-bottom: 0;">
|
105 |
-
<em><a href="https://docs.unsloth.ai/basics/tutorial-how-to-run-and-fine-tune-llama-4">Read our Guide</a> to see how to Fine-tune & Run Llama 4 correctly.</em>
|
106 |
-
</p>
|
107 |
-
<div style="display: flex; gap: 5px; align-items: center; ">
|
108 |
-
<a href="https://github.com/unslothai/unsloth/">
|
109 |
-
<img src="https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png" width="133">
|
110 |
-
</a>
|
111 |
-
<a href="https://discord.gg/unsloth">
|
112 |
-
<img src="https://github.com/unslothai/unsloth/raw/main/images/Discord%20button.png" width="173">
|
113 |
-
</a>
|
114 |
-
<a href="https://docs.unsloth.ai/basics/tutorials-how-to-fine-tune-and-run-llms">
|
115 |
-
<img src="https://raw.githubusercontent.com/unslothai/unsloth/refs/heads/main/images/documentation%20green%20button.png" width="143">
|
116 |
-
</a>
|
117 |
-
</div>
|
118 |
-
<h1 style="margin-top: 0rem;">✨ Fine-tune Llama 4 with Unsloth!</h1>
|
119 |
-
</div>
|
120 |
-
|
121 |
-
- Fine-tune Llama-4-Scout on a single H100 80GB GPU using Unsloth!
|
122 |
-
- Read our Blog about Llama 4 support: [unsloth.ai/blog/llama4](https://unsloth.ai/blog/llama4)
|
123 |
-
- View the rest of our notebooks in our [docs here](https://docs.unsloth.ai/get-started/unsloth-notebooks).
|
124 |
-
- Export your fine-tuned model to GGUF, Ollama, llama.cpp, vLLM or 🤗HF.
|
125 |
-
|
126 |
-
| Unsloth supports | Free Notebooks | Performance | Memory use |
|
127 |
-
|-----------------|--------------------------------------------------------------------------------------------------------------------------|-------------|----------|
|
128 |
-
| **GRPO with Llama 3.1 (8B)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.1_(8B)-GRPO.ipynb) | 2x faster | 80% less |
|
129 |
-
| **Llama-3.2 (3B)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(1B_and_3B)-Conversational.ipynb) | 2.4x faster | 58% less |
|
130 |
-
| **Llama-3.2 (11B vision)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(11B)-Vision.ipynb) | 2x faster | 60% less |
|
131 |
-
| **Qwen2.5 (7B)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen2.5_(7B)-Alpaca.ipynb) | 2x faster | 60% less |
|
132 |
-
| **Phi-4 (14B)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Phi_4-Conversational.ipynb) | 2x faster | 50% less |
|
133 |
-
| **Mistral (7B)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Mistral_v0.3_(7B)-Conversational.ipynb) | 2.2x faster | 62% less |
|
134 |
-
|
135 |
-
<br>
|
136 |
-
|
137 |
-
# Llama 4 model details
|
138 |
|
139 |
The Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences. These models leverage a mixture-of-experts architecture to offer industry-leading performance in text and image understanding.
|
140 |
|
|
|
14 |
- tl
|
15 |
- vi
|
16 |
base_model:
|
17 |
+
- meta-llama/Llama-4-Scout-17B-16E
|
18 |
tags:
|
19 |
- facebook
|
20 |
- meta
|
21 |
- pytorch
|
22 |
- llama
|
23 |
+
- llama4
|
24 |
extra_gated_prompt: >-
|
25 |
**LLAMA 4 COMMUNITY LICENSE AGREEMENT**
|
26 |
|
|
|
97 |
license: other
|
98 |
license_name: llama4
|
99 |
---
|
100 |
+
|
101 |
+
|
102 |
+
## Model Information
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
The Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences. These models leverage a mixture-of-experts architecture to offer industry-leading performance in text and image understanding.
|
105 |
|
config.json
CHANGED
@@ -3,26 +3,20 @@
|
|
3 |
"Llama4ForConditionalGeneration"
|
4 |
],
|
5 |
"boi_token_index": 200080,
|
6 |
-
"bos_token_id": 200000,
|
7 |
"eoi_token_index": 200081,
|
8 |
-
"eos_token_id": 200008,
|
9 |
"image_token_index": 200092,
|
10 |
"model_type": "llama4",
|
11 |
-
"pad_token_id": 200018,
|
12 |
"text_config": {
|
13 |
"_attn_implementation_autoset": true,
|
14 |
"attention_bias": false,
|
15 |
"attention_chunk_size": 8192,
|
16 |
"attention_dropout": 0.0,
|
17 |
-
"attn_scale": 0.1,
|
18 |
-
"attn_temperature_tuning": 4,
|
19 |
"bos_token_id": 200000,
|
20 |
"eos_token_id": [
|
21 |
200001,
|
22 |
200007,
|
23 |
200008
|
24 |
],
|
25 |
-
"floor_scale": 8192,
|
26 |
"for_llm_compressor": false,
|
27 |
"head_dim": 128,
|
28 |
"hidden_act": "silu",
|
@@ -33,106 +27,7 @@
|
|
33 |
"intermediate_size_mlp": 16384,
|
34 |
"max_position_embeddings": 10485760,
|
35 |
"model_type": "llama4_text",
|
36 |
-
"
|
37 |
-
0,
|
38 |
-
1,
|
39 |
-
2,
|
40 |
-
3,
|
41 |
-
4,
|
42 |
-
5,
|
43 |
-
6,
|
44 |
-
7,
|
45 |
-
8,
|
46 |
-
9,
|
47 |
-
10,
|
48 |
-
11,
|
49 |
-
12,
|
50 |
-
13,
|
51 |
-
14,
|
52 |
-
15,
|
53 |
-
16,
|
54 |
-
17,
|
55 |
-
18,
|
56 |
-
19,
|
57 |
-
20,
|
58 |
-
21,
|
59 |
-
22,
|
60 |
-
23,
|
61 |
-
24,
|
62 |
-
25,
|
63 |
-
26,
|
64 |
-
27,
|
65 |
-
28,
|
66 |
-
29,
|
67 |
-
30,
|
68 |
-
31,
|
69 |
-
32,
|
70 |
-
33,
|
71 |
-
34,
|
72 |
-
35,
|
73 |
-
36,
|
74 |
-
37,
|
75 |
-
38,
|
76 |
-
39,
|
77 |
-
40,
|
78 |
-
41,
|
79 |
-
42,
|
80 |
-
43,
|
81 |
-
44,
|
82 |
-
45,
|
83 |
-
46,
|
84 |
-
47
|
85 |
-
],
|
86 |
-
"no_rope_layers": [
|
87 |
-
1,
|
88 |
-
1,
|
89 |
-
1,
|
90 |
-
0,
|
91 |
-
1,
|
92 |
-
1,
|
93 |
-
1,
|
94 |
-
0,
|
95 |
-
1,
|
96 |
-
1,
|
97 |
-
1,
|
98 |
-
0,
|
99 |
-
1,
|
100 |
-
1,
|
101 |
-
1,
|
102 |
-
0,
|
103 |
-
1,
|
104 |
-
1,
|
105 |
-
1,
|
106 |
-
0,
|
107 |
-
1,
|
108 |
-
1,
|
109 |
-
1,
|
110 |
-
0,
|
111 |
-
1,
|
112 |
-
1,
|
113 |
-
1,
|
114 |
-
0,
|
115 |
-
1,
|
116 |
-
1,
|
117 |
-
1,
|
118 |
-
0,
|
119 |
-
1,
|
120 |
-
1,
|
121 |
-
1,
|
122 |
-
0,
|
123 |
-
1,
|
124 |
-
1,
|
125 |
-
1,
|
126 |
-
0,
|
127 |
-
1,
|
128 |
-
1,
|
129 |
-
1,
|
130 |
-
0,
|
131 |
-
1,
|
132 |
-
1,
|
133 |
-
1,
|
134 |
-
0
|
135 |
-
],
|
136 |
"num_attention_heads": 40,
|
137 |
"num_experts_per_tok": 1,
|
138 |
"num_hidden_layers": 48,
|
@@ -156,10 +51,8 @@
|
|
156 |
"use_qk_norm": true,
|
157 |
"vocab_size": 202048
|
158 |
},
|
159 |
-
"tie_word_embeddings": false,
|
160 |
"torch_dtype": "bfloat16",
|
161 |
-
"transformers_version": "4.51.0",
|
162 |
-
"unsloth_fixed": true,
|
163 |
"vision_config": {
|
164 |
"_attn_implementation_autoset": true,
|
165 |
"attention_dropout": 0.0,
|
@@ -180,7 +73,6 @@
|
|
180 |
"projector_input_dim": 4096,
|
181 |
"projector_output_dim": 4096,
|
182 |
"rope_theta": 10000,
|
183 |
-
"torch_dtype": "bfloat16",
|
184 |
"vision_feature_layer": -1,
|
185 |
"vision_feature_select_strategy": "default",
|
186 |
"vision_output_dim": 4096
|
|
|
3 |
"Llama4ForConditionalGeneration"
|
4 |
],
|
5 |
"boi_token_index": 200080,
|
|
|
6 |
"eoi_token_index": 200081,
|
|
|
7 |
"image_token_index": 200092,
|
8 |
"model_type": "llama4",
|
|
|
9 |
"text_config": {
|
10 |
"_attn_implementation_autoset": true,
|
11 |
"attention_bias": false,
|
12 |
"attention_chunk_size": 8192,
|
13 |
"attention_dropout": 0.0,
|
|
|
|
|
14 |
"bos_token_id": 200000,
|
15 |
"eos_token_id": [
|
16 |
200001,
|
17 |
200007,
|
18 |
200008
|
19 |
],
|
|
|
20 |
"for_llm_compressor": false,
|
21 |
"head_dim": 128,
|
22 |
"hidden_act": "silu",
|
|
|
27 |
"intermediate_size_mlp": 16384,
|
28 |
"max_position_embeddings": 10485760,
|
29 |
"model_type": "llama4_text",
|
30 |
+
"no_rope_layers": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
"num_attention_heads": 40,
|
32 |
"num_experts_per_tok": 1,
|
33 |
"num_hidden_layers": 48,
|
|
|
51 |
"use_qk_norm": true,
|
52 |
"vocab_size": 202048
|
53 |
},
|
|
|
54 |
"torch_dtype": "bfloat16",
|
55 |
+
"transformers_version": "4.51.0.dev0",
|
|
|
56 |
"vision_config": {
|
57 |
"_attn_implementation_autoset": true,
|
58 |
"attention_dropout": 0.0,
|
|
|
73 |
"projector_input_dim": 4096,
|
74 |
"projector_output_dim": 4096,
|
75 |
"rope_theta": 10000,
|
|
|
76 |
"vision_feature_layer": -1,
|
77 |
"vision_feature_select_strategy": "default",
|
78 |
"vision_output_dim": 4096
|
generation_config.json
CHANGED
@@ -9,5 +9,5 @@
|
|
9 |
"pad_token_id": 200018,
|
10 |
"temperature": 0.6,
|
11 |
"top_p": 0.9,
|
12 |
-
"transformers_version": "4.51.0"
|
13 |
}
|
|
|
9 |
"pad_token_id": 200018,
|
10 |
"temperature": 0.6,
|
11 |
"top_p": 0.9,
|
12 |
+
"transformers_version": "4.51.0.dev0"
|
13 |
}
|
special_tokens_map.json
CHANGED
@@ -1,23 +1,5 @@
|
|
1 |
{
|
2 |
-
"bos_token":
|
3 |
-
|
4 |
-
|
5 |
-
"normalized": false,
|
6 |
-
"rstrip": false,
|
7 |
-
"single_word": false
|
8 |
-
},
|
9 |
-
"eos_token": {
|
10 |
-
"content": "<|eot|>",
|
11 |
-
"lstrip": false,
|
12 |
-
"normalized": false,
|
13 |
-
"rstrip": false,
|
14 |
-
"single_word": false
|
15 |
-
},
|
16 |
-
"pad_token": {
|
17 |
-
"content": "<|finetune_right_pad|>",
|
18 |
-
"lstrip": false,
|
19 |
-
"normalized": false,
|
20 |
-
"rstrip": false,
|
21 |
-
"single_word": false
|
22 |
-
}
|
23 |
}
|
|
|
1 |
{
|
2 |
+
"bos_token": "<|begin_of_text|>",
|
3 |
+
"eos_token": "<|eot|>",
|
4 |
+
"pad_token": "<|finetune_right_pad|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
}
|
tokenizer_config.json
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
{
|
2 |
-
"add_bos_token": true,
|
3 |
"added_tokens_decoder": {
|
4 |
"200000": {
|
5 |
"content": "<|begin_of_text|>",
|
@@ -9072,14 +9071,6 @@
|
|
9072 |
"rstrip": false,
|
9073 |
"single_word": false,
|
9074 |
"special": true
|
9075 |
-
},
|
9076 |
-
"201134": {
|
9077 |
-
"content": "<|finetune_right_pad_id|>",
|
9078 |
-
"lstrip": false,
|
9079 |
-
"normalized": false,
|
9080 |
-
"rstrip": false,
|
9081 |
-
"single_word": false,
|
9082 |
-
"special": true
|
9083 |
}
|
9084 |
},
|
9085 |
"bos_token": "<|begin_of_text|>",
|
@@ -9093,8 +9084,6 @@
|
|
9093 |
],
|
9094 |
"model_max_length": 10485760,
|
9095 |
"pad_token": "<|finetune_right_pad|>",
|
9096 |
-
"padding_side": "left",
|
9097 |
"processor_class": "Llama4Processor",
|
9098 |
-
"tokenizer_class": "PreTrainedTokenizer"
|
9099 |
-
"unk_token": null
|
9100 |
}
|
|
|
1 |
{
|
|
|
2 |
"added_tokens_decoder": {
|
3 |
"200000": {
|
4 |
"content": "<|begin_of_text|>",
|
|
|
9071 |
"rstrip": false,
|
9072 |
"single_word": false,
|
9073 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9074 |
}
|
9075 |
},
|
9076 |
"bos_token": "<|begin_of_text|>",
|
|
|
9084 |
],
|
9085 |
"model_max_length": 10485760,
|
9086 |
"pad_token": "<|finetune_right_pad|>",
|
|
|
9087 |
"processor_class": "Llama4Processor",
|
9088 |
+
"tokenizer_class": "PreTrainedTokenizer"
|
|
|
9089 |
}
|