Upload folder using huggingface_hub
Browse files- .gitattributes +8 -0
- README.md +28 -3
- config.json +80 -0
- generation_config.json +13 -0
- llama4-scout-instruct-q4k-0.uqff +3 -0
- llama4-scout-instruct-q4k-1.uqff +3 -0
- llama4-scout-instruct-q4k-2.uqff +3 -0
- llama4-scout-instruct-q4k-3.uqff +3 -0
- llama4-scout-instruct-q4k-4.uqff +3 -0
- llama4-scout-instruct-q4k-5.uqff +3 -0
- llama4-scout-instruct-q4k-6.uqff +3 -0
- preprocessor_config.json +33 -0
- processor_config.json +6 -0
- residual.safetensors +3 -0
- tokenizer.json +3 -0
- tokenizer_config.json +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
llama4-scout-instruct-q4k-0.uqff filter=lfs diff=lfs merge=lfs -text
|
37 |
+
llama4-scout-instruct-q4k-1.uqff filter=lfs diff=lfs merge=lfs -text
|
38 |
+
llama4-scout-instruct-q4k-2.uqff filter=lfs diff=lfs merge=lfs -text
|
39 |
+
llama4-scout-instruct-q4k-3.uqff filter=lfs diff=lfs merge=lfs -text
|
40 |
+
llama4-scout-instruct-q4k-4.uqff filter=lfs diff=lfs merge=lfs -text
|
41 |
+
llama4-scout-instruct-q4k-5.uqff filter=lfs diff=lfs merge=lfs -text
|
42 |
+
llama4-scout-instruct-q4k-6.uqff filter=lfs diff=lfs merge=lfs -text
|
43 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,3 +1,28 @@
|
|
1 |
-
---
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- uqff
|
4 |
+
- mistral.rs
|
5 |
+
base_model: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
6 |
+
base_model_relation: quantized
|
7 |
+
---
|
8 |
+
|
9 |
+
<!-- Autogenerated from user input. -->
|
10 |
+
|
11 |
+
# `meta-llama/Llama-4-Scout-17B-16E-Instruct`, UQFF quantization
|
12 |
+
|
13 |
+
|
14 |
+
Run with [mistral.rs](https://github.com/EricLBuehler/mistral.rs). Documentation: [UQFF docs](https://github.com/EricLBuehler/mistral.rs/blob/master/docs/UQFF.md).
|
15 |
+
|
16 |
+
1) **Flexible** 🌀: Multiple quantization formats in *one* file format with *one* framework to run them all.
|
17 |
+
2) **Reliable** 🔒: Compatibility ensured with *embedded* and *checked* semantic versioning information from day 1.
|
18 |
+
3) **Easy** 🤗: Download UQFF models *easily* and *quickly* from Hugging Face, or use a local file.
|
19 |
+
3) **Customizable** 🛠️: Make and publish your own UQFF files in minutes.
|
20 |
+
|
21 |
+
## Examples
|
22 |
+
|
23 |
+
Note: If you are using an Apple Silicon device (on Metal), prefer using an 🔥 AFQ quantization for optimum performance!
|
24 |
+
|
25 |
+
|Quantization type(s)|Example|
|
26 |
+
|--|--|
|
27 |
+
|Q4K|`./mistralrs-server -i vision-plain -m EricB/Llama-4-Scout-17B-16E-Instruct-UQFF -a llama4 --from-uqff "llama4-scout-instruct-q4k-0.uqff;llama4-scout-instruct-q4k-1.uqff;llama4-scout-instruct-q4k-2.uqff;llama4-scout-instruct-q4k-3.uqff;llama4-scout-instruct-q4k-4.uqff;llama4-scout-instruct-q4k-5.uqff;llama4-scout-instruct-q4k-6.uqff"`|
|
28 |
+
|AFQ4 |Coming soon!|
|
config.json
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"Llama4ForConditionalGeneration"
|
4 |
+
],
|
5 |
+
"boi_token_index": 200080,
|
6 |
+
"eoi_token_index": 200081,
|
7 |
+
"image_token_index": 200092,
|
8 |
+
"model_type": "llama4",
|
9 |
+
"text_config": {
|
10 |
+
"_attn_implementation_autoset": true,
|
11 |
+
"attention_bias": false,
|
12 |
+
"attention_chunk_size": 8192,
|
13 |
+
"attention_dropout": 0.0,
|
14 |
+
"bos_token_id": 200000,
|
15 |
+
"eos_token_id": [
|
16 |
+
200001,
|
17 |
+
200007,
|
18 |
+
200008
|
19 |
+
],
|
20 |
+
"for_llm_compressor": false,
|
21 |
+
"head_dim": 128,
|
22 |
+
"hidden_act": "silu",
|
23 |
+
"hidden_size": 5120,
|
24 |
+
"initializer_range": 0.02,
|
25 |
+
"interleave_moe_layer_step": 1,
|
26 |
+
"intermediate_size": 8192,
|
27 |
+
"intermediate_size_mlp": 16384,
|
28 |
+
"max_position_embeddings": 10485760,
|
29 |
+
"model_type": "llama4_text",
|
30 |
+
"no_rope_layers": [],
|
31 |
+
"num_attention_heads": 40,
|
32 |
+
"num_experts_per_tok": 1,
|
33 |
+
"num_hidden_layers": 48,
|
34 |
+
"num_key_value_heads": 8,
|
35 |
+
"num_local_experts": 16,
|
36 |
+
"output_router_logits": false,
|
37 |
+
"pad_token_id": 200018,
|
38 |
+
"rms_norm_eps": 1e-05,
|
39 |
+
"rope_scaling": {
|
40 |
+
"factor": 16.0,
|
41 |
+
"high_freq_factor": 1.0,
|
42 |
+
"low_freq_factor": 1.0,
|
43 |
+
"original_max_position_embeddings": 8192,
|
44 |
+
"rope_type": "llama3"
|
45 |
+
},
|
46 |
+
"rope_theta": 500000.0,
|
47 |
+
"router_aux_loss_coef": 0.001,
|
48 |
+
"router_jitter_noise": 0.0,
|
49 |
+
"torch_dtype": "bfloat16",
|
50 |
+
"use_cache": true,
|
51 |
+
"use_qk_norm": true,
|
52 |
+
"vocab_size": 202048
|
53 |
+
},
|
54 |
+
"torch_dtype": "bfloat16",
|
55 |
+
"transformers_version": "4.51.0.dev0",
|
56 |
+
"vision_config": {
|
57 |
+
"_attn_implementation_autoset": true,
|
58 |
+
"attention_dropout": 0.0,
|
59 |
+
"hidden_act": "gelu",
|
60 |
+
"hidden_size": 1408,
|
61 |
+
"image_size": 336,
|
62 |
+
"initializer_range": 0.02,
|
63 |
+
"intermediate_size": 5632,
|
64 |
+
"model_type": "llama4_vision_model",
|
65 |
+
"multi_modal_projector_bias": false,
|
66 |
+
"norm_eps": 1e-05,
|
67 |
+
"num_attention_heads": 16,
|
68 |
+
"num_channels": 3,
|
69 |
+
"num_hidden_layers": 34,
|
70 |
+
"patch_size": 14,
|
71 |
+
"pixel_shuffle_ratio": 0.5,
|
72 |
+
"projector_dropout": 0.0,
|
73 |
+
"projector_input_dim": 4096,
|
74 |
+
"projector_output_dim": 4096,
|
75 |
+
"rope_theta": 10000,
|
76 |
+
"vision_feature_layer": -1,
|
77 |
+
"vision_feature_select_strategy": "default",
|
78 |
+
"vision_output_dim": 4096
|
79 |
+
}
|
80 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 200000,
|
3 |
+
"do_sample": true,
|
4 |
+
"eos_token_id": [
|
5 |
+
200001,
|
6 |
+
200007,
|
7 |
+
200008
|
8 |
+
],
|
9 |
+
"pad_token_id": 200018,
|
10 |
+
"temperature": 0.6,
|
11 |
+
"top_p": 0.9,
|
12 |
+
"transformers_version": "4.51.0.dev0"
|
13 |
+
}
|
llama4-scout-instruct-q4k-0.uqff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6cad10ce6f77ac322176c5caadfb4b2a2c5ef9e7a0b1134214a90cc36ef33f0
|
3 |
+
size 11187399004
|
llama4-scout-instruct-q4k-1.uqff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec14fb86ff33941f4bb0b0084e2156da3f9f9031f1a63123939d28d7feb1b92f
|
3 |
+
size 10629093732
|
llama4-scout-instruct-q4k-2.uqff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02f7d19d8fdd825fbbc26c5164d39fe04dd031527204e17561c23cd0a5a9043b
|
3 |
+
size 10711623460
|
llama4-scout-instruct-q4k-3.uqff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a00012bbe2e499d6742ca557140e530f0f0936b1649371c18efa2cdcc01d844
|
3 |
+
size 10629094212
|
llama4-scout-instruct-q4k-4.uqff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6d50da30540606a3380e5b500d106880c7b5d88f5dc0d7432e046a3d21b4427
|
3 |
+
size 10702776140
|
llama4-scout-instruct-q4k-5.uqff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efe27f9e454c98fafed7a546324c70e5bc4a40dc1651e7a297f9c5cc918ffd78
|
3 |
+
size 6664625396
|
llama4-scout-instruct-q4k-6.uqff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a230dfd04529c488df5a3ec1b7292f0eaf71f2ac284c61f2796a285a6e040f15
|
3 |
+
size 26879814
|
preprocessor_config.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"crop_size": null,
|
3 |
+
"data_format": "channels_first",
|
4 |
+
"default_to_square": true,
|
5 |
+
"device": null,
|
6 |
+
"do_center_crop": null,
|
7 |
+
"do_convert_rgb": true,
|
8 |
+
"do_normalize": true,
|
9 |
+
"do_rescale": true,
|
10 |
+
"do_resize": true,
|
11 |
+
"image_mean": [
|
12 |
+
0.5,
|
13 |
+
0.5,
|
14 |
+
0.5
|
15 |
+
],
|
16 |
+
"image_processor_type": "Llama4ImageProcessorFast",
|
17 |
+
"image_std": [
|
18 |
+
0.5,
|
19 |
+
0.5,
|
20 |
+
0.5
|
21 |
+
],
|
22 |
+
"input_data_format": null,
|
23 |
+
"max_patches": 16,
|
24 |
+
"processor_class": "Llama4Processor",
|
25 |
+
"resample": 2,
|
26 |
+
"rescale_factor": 0.00392156862745098,
|
27 |
+
"resize_to_max_canvas": false,
|
28 |
+
"return_tensors": null,
|
29 |
+
"size": {
|
30 |
+
"height": 336,
|
31 |
+
"width": 336
|
32 |
+
}
|
33 |
+
}
|
processor_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"fake_image_token": "<|image|>",
|
3 |
+
"image_token": "<|image|>",
|
4 |
+
"patch_size": 14,
|
5 |
+
"processor_class": "Llama4Processor"
|
6 |
+
}
|
residual.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2530ffaf3baab074f01c4b0adb4c8127fa98af17b2ee0627482ee68e6b77f93d
|
3 |
+
size 2115616688
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d98b584d0de067c4f8865975052b632d10e2e2aff6fcff80e8ec2f8e7990545f
|
3 |
+
size 28011016
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|