medmekk HF Staff commited on
Commit
20906f2
·
verified ·
1 Parent(s): 8d9a8b6

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - medmekk/Llama-3.3-70B-Instruct-BNB-INT4
4
+ ---
5
+
6
+ # medmekk/Llama-3.3-70B-Instruct-BNB-INT4 (Quantized)
7
+
8
+ ## Description
9
+ This model is a quantized version of the original model `medmekk/Llama-3.3-70B-Instruct-BNB-INT4`. It has been quantized using int4 quantization with bitsandbytes.
10
+
11
+ ## Quantization Details
12
+ - **Quantization Type**: int4
13
+ - **bnb_4bit_quant_type**: fp4
14
+ - **bnb_4bit_use_double_quant**: False
15
+ - **bnb_4bit_compute_dtype**: float32
16
+ - **bnb_4bit_quant_storage**: uint8
17
+
18
+ ## Usage
19
+ You can use this model in your applications by loading it directly from the Hugging Face Hub:
20
+ ```python
21
+ from transformers import AutoModel
22
+
23
+ model = AutoModel.from_pretrained("medmekk/Llama-3.3-70B-Instruct-BNB-INT4")
config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128008,
11
+ 128009
12
+ ],
13
+ "head_dim": 128,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 8192,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 28672,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "llama",
21
+ "num_attention_heads": 64,
22
+ "num_hidden_layers": 80,
23
+ "num_key_value_heads": 8,
24
+ "pretraining_tp": 1,
25
+ "quantization_config": {
26
+ "_load_in_4bit": true,
27
+ "_load_in_8bit": false,
28
+ "bnb_4bit_compute_dtype": "float32",
29
+ "bnb_4bit_quant_storage": "uint8",
30
+ "bnb_4bit_quant_type": "fp4",
31
+ "bnb_4bit_use_double_quant": false,
32
+ "llm_int8_enable_fp32_cpu_offload": false,
33
+ "llm_int8_has_fp16_weight": false,
34
+ "llm_int8_skip_modules": null,
35
+ "llm_int8_threshold": 6.0,
36
+ "load_in_4bit": true,
37
+ "load_in_8bit": false,
38
+ "quant_method": "bitsandbytes"
39
+ },
40
+ "rms_norm_eps": 1e-05,
41
+ "rope_scaling": {
42
+ "factor": 8.0,
43
+ "high_freq_factor": 4.0,
44
+ "low_freq_factor": 1.0,
45
+ "original_max_position_embeddings": 8192,
46
+ "rope_type": "llama3"
47
+ },
48
+ "rope_theta": 500000.0,
49
+ "tie_word_embeddings": false,
50
+ "torch_dtype": "float16",
51
+ "transformers_version": "4.50.0.dev0",
52
+ "use_cache": true,
53
+ "vocab_size": 128256
54
+ }
model-00001-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49f157482e4e762d7eeed4fe1748243cc7cca51016d770856dc87f27f19aee4e
3
+ size 4989348020
model-00002-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edbb4a7499922947099c7ba87ee22a4d426ed34d0418ea079deee4b2991ee600
3
+ size 4898273256
model-00003-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b03d3f67b89fc2c72d2bd6602535a45b08555da7ad35a0855f9327b152e1484
3
+ size 4945457462
model-00004-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c73815eafb85f2841db0c941a303a2f046ace29fbcd9f9e412bbe1072c3d7c3a
3
+ size 4945457454
model-00005-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228bfa8560d476000bf1c14b4c9ed2800b0f249125e6e315a67947a3d1d67ec7
3
+ size 4992678209
model-00006-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:075e5aa70e073d356a07e665ac1f9e5db1ad3f2e7474656517b11850449149e5
3
+ size 4983206815
model-00007-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cb60decb404ba0a741c9ae8ed795ac6a4cf61709a70780c8f4f3b6b4780000f
3
+ size 4945457454
model-00008-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9167fec454cb2da31e446c43fe22ce0d6835291a5c8c3ab03f49f6f953640e00
3
+ size 4992678209
model-00009-of-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06baff3f50a79729a6aa4d10a62ad0533ec68c7a34ef9c73d46e4c44481cad9e
3
+ size 915495921
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff