Add files using upload-large-folder tool

Browse files

Files changed (5) hide show

README.md +5 -40
config.json +2 -110
generation_config.json +1 -1
special_tokens_map.json +3 -21
tokenizer_config.json +1 -12

README.md CHANGED Viewed

@@ -14,13 +14,13 @@ language:
 - tl
 - vi
 base_model:
-- meta-llama/Llama-4-Scout-17B-16E-Instruct
 tags:
 - facebook
 - meta
 - pytorch
 - llama
-- llama-4
 extra_gated_prompt: >-
     **LLAMA 4 COMMUNITY LICENSE AGREEMENT**
@@ -97,44 +97,9 @@ extra_gated_heading: "Please be sure to provide your full legal name, date of bi
 license: other
 license_name: llama4
 ---
-<div>
-  <p style="margin-bottom: 0; margin-top: 0;">
-    <strong>This is the original 16-bit Llama 4 Scout model by Meta. <br> See <a href="https://huggingface.co/collections/unsloth/llama-4-67f19503d764b0f3a2a868d2">our collection</a> for versions of Llama 4 including GGUF & 4-bit formats.</strong>
-  </p>
-  <p style="margin-bottom: 0;">
-    <em><a href="https://docs.unsloth.ai/basics/tutorial-how-to-run-and-fine-tune-llama-4">Read our Guide</a> to see how to Fine-tune & Run Llama 4 correctly.</em>
-  </p>
-  <div style="display: flex; gap: 5px; align-items: center; ">
-    <a href="https://github.com/unslothai/unsloth/">
-      <img src="https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png" width="133">
-    </a>
-    <a href="https://discord.gg/unsloth">
-      <img src="https://github.com/unslothai/unsloth/raw/main/images/Discord%20button.png" width="173">
-    </a>
-    <a href="https://docs.unsloth.ai/basics/tutorials-how-to-fine-tune-and-run-llms">
-      <img src="https://raw.githubusercontent.com/unslothai/unsloth/refs/heads/main/images/documentation%20green%20button.png" width="143">
-    </a>
-  </div>
-<h1 style="margin-top: 0rem;">✨ Fine-tune Llama 4 with Unsloth!</h1>
-</div>
-- Fine-tune Llama-4-Scout on a single H100 80GB GPU using Unsloth!
-- Read our Blog about Llama 4 support: [unsloth.ai/blog/llama4](https://unsloth.ai/blog/llama4)
-- View the rest of our notebooks in our [docs here](https://docs.unsloth.ai/get-started/unsloth-notebooks).
-- Export your fine-tuned model to GGUF, Ollama, llama.cpp, vLLM or 🤗HF.
-| Unsloth supports          |    Free Notebooks                                                                                           | Performance | Memory use |
-|-----------------|--------------------------------------------------------------------------------------------------------------------------|-------------|----------|
-| **GRPO with Llama 3.1 (8B)**      | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.1_(8B)-GRPO.ipynb)               | 2x faster | 80% less |
-| **Llama-3.2 (3B)**      | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(1B_and_3B)-Conversational.ipynb)               | 2.4x faster | 58% less |
-| **Llama-3.2 (11B vision)**      | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(11B)-Vision.ipynb)               | 2x faster | 60% less |
-| **Qwen2.5 (7B)**      | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen2.5_(7B)-Alpaca.ipynb)               | 2x faster | 60% less |
-| **Phi-4 (14B)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Phi_4-Conversational.ipynb)               | 2x faster | 50% less |
-| **Mistral (7B)**    | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Mistral_v0.3_(7B)-Conversational.ipynb)               | 2.2x faster | 62% less |
-<br>
-# Llama 4 model details
 The Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences. These models leverage a mixture-of-experts architecture to offer industry-leading performance in text and image understanding.

 - tl
 - vi
 base_model:
+- meta-llama/Llama-4-Scout-17B-16E
 tags:
 - facebook
 - meta
 - pytorch
 - llama
+- llama4
 extra_gated_prompt: >-
     **LLAMA 4 COMMUNITY LICENSE AGREEMENT**
 license: other
 license_name: llama4
 ---
+## Model Information
 The Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences. These models leverage a mixture-of-experts architecture to offer industry-leading performance in text and image understanding.

config.json CHANGED Viewed

@@ -3,26 +3,20 @@
     "Llama4ForConditionalGeneration"
   ],
   "boi_token_index": 200080,
-  "bos_token_id": 200000,
   "eoi_token_index": 200081,
-  "eos_token_id": 200008,
   "image_token_index": 200092,
   "model_type": "llama4",
-  "pad_token_id": 200018,
   "text_config": {
     "_attn_implementation_autoset": true,
     "attention_bias": false,
     "attention_chunk_size": 8192,
     "attention_dropout": 0.0,
-    "attn_scale": 0.1,
-    "attn_temperature_tuning": 4,
     "bos_token_id": 200000,
     "eos_token_id": [
       200001,
       200007,
       200008
     ],
-    "floor_scale": 8192,
     "for_llm_compressor": false,
     "head_dim": 128,
     "hidden_act": "silu",
@@ -33,106 +27,7 @@
     "intermediate_size_mlp": 16384,
     "max_position_embeddings": 10485760,
     "model_type": "llama4_text",
-    "moe_layers": [
-      0,
-      1,
-      2,
-      3,
-      4,
-      5,
-      6,
-      7,
-      8,
-      9,
-      10,
-      11,
-      12,
-      13,
-      14,
-      15,
-      16,
-      17,
-      18,
-      19,
-      20,
-      21,
-      22,
-      23,
-      24,
-      25,
-      26,
-      27,
-      28,
-      29,
-      30,
-      31,
-      32,
-      33,
-      34,
-      35,
-      36,
-      37,
-      38,
-      39,
-      40,
-      41,
-      42,
-      43,
-      44,
-      45,
-      46,
-      47
-    ],
-    "no_rope_layers": [
-      1,
-      1,
-      1,
-      0,
-      1,
-      1,
-      1,
-      0,
-      1,
-      1,
-      1,
-      0,
-      1,
-      1,
-      1,
-      0,
-      1,
-      1,
-      1,
-      0,
-      1,
-      1,
-      1,
-      0,
-      1,
-      1,
-      1,
-      0,
-      1,
-      1,
-      1,
-      0,
-      1,
-      1,
-      1,
-      0,
-      1,
-      1,
-      1,
-      0,
-      1,
-      1,
-      1,
-      0,
-      1,
-      1,
-      1,
-      0
-    ],
     "num_attention_heads": 40,
     "num_experts_per_tok": 1,
     "num_hidden_layers": 48,
@@ -156,10 +51,8 @@
     "use_qk_norm": true,
     "vocab_size": 202048
   },
-  "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.51.0",
-  "unsloth_fixed": true,
   "vision_config": {
     "_attn_implementation_autoset": true,
     "attention_dropout": 0.0,
@@ -180,7 +73,6 @@
     "projector_input_dim": 4096,
     "projector_output_dim": 4096,
     "rope_theta": 10000,
-    "torch_dtype": "bfloat16",
     "vision_feature_layer": -1,
     "vision_feature_select_strategy": "default",
     "vision_output_dim": 4096

     "Llama4ForConditionalGeneration"
   ],
   "boi_token_index": 200080,
   "eoi_token_index": 200081,
   "image_token_index": 200092,
   "model_type": "llama4",
   "text_config": {
     "_attn_implementation_autoset": true,
     "attention_bias": false,
     "attention_chunk_size": 8192,
     "attention_dropout": 0.0,
     "bos_token_id": 200000,
     "eos_token_id": [
       200001,
       200007,
       200008
     ],
     "for_llm_compressor": false,
     "head_dim": 128,
     "hidden_act": "silu",
     "intermediate_size_mlp": 16384,
     "max_position_embeddings": 10485760,
     "model_type": "llama4_text",
+    "no_rope_layers": [],
     "num_attention_heads": 40,
     "num_experts_per_tok": 1,
     "num_hidden_layers": 48,
     "use_qk_norm": true,
     "vocab_size": 202048
   },
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0.dev0",
   "vision_config": {
     "_attn_implementation_autoset": true,
     "attention_dropout": 0.0,
     "projector_input_dim": 4096,
     "projector_output_dim": 4096,
     "rope_theta": 10000,
     "vision_feature_layer": -1,
     "vision_feature_select_strategy": "default",
     "vision_output_dim": 4096

generation_config.json CHANGED Viewed

@@ -9,5 +9,5 @@
   "pad_token_id": 200018,
   "temperature": 0.6,
   "top_p": 0.9,
-  "transformers_version": "4.51.0"
 }

   "pad_token_id": 200018,
   "temperature": 0.6,
   "top_p": 0.9,
+  "transformers_version": "4.51.0.dev0"
 }

special_tokens_map.json CHANGED Viewed

@@ -1,23 +1,5 @@
 {
-  "bos_token": {
-    "content": "<|begin_of_text|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "<|eot|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "<|finetune_right_pad|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
 }

 {
+  "bos_token": "<|begin_of_text|>",
+  "eos_token": "<|eot|>",
+  "pad_token": "<|finetune_right_pad|>"
 }

tokenizer_config.json CHANGED Viewed

@@ -1,5 +1,4 @@
 {
-  "add_bos_token": true,
   "added_tokens_decoder": {
     "200000": {
       "content": "<|begin_of_text|>",
@@ -9072,14 +9071,6 @@
       "rstrip": false,
       "single_word": false,
       "special": true
-    },
-    "201134": {
-      "content": "<|finetune_right_pad_id|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
     }
   },
   "bos_token": "<|begin_of_text|>",
@@ -9093,8 +9084,6 @@
   ],
   "model_max_length": 10485760,
   "pad_token": "<|finetune_right_pad|>",
-  "padding_side": "left",
   "processor_class": "Llama4Processor",
-  "tokenizer_class": "PreTrainedTokenizer",
-  "unk_token": null
 }

 {
   "added_tokens_decoder": {
     "200000": {
       "content": "<|begin_of_text|>",
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "bos_token": "<|begin_of_text|>",
   ],
   "model_max_length": 10485760,
   "pad_token": "<|finetune_right_pad|>",
   "processor_class": "Llama4Processor",
+  "tokenizer_class": "PreTrainedTokenizer"
 }