jsaizant commited on
Commit
68611a4
·
2 Parent(s): 5a0c4ac 4b87213

Merging version 1.1

Browse files
config.json CHANGED
@@ -1,20 +1,34 @@
1
  {
2
- "_name_or_path": "/home/jsaiz/storage5/gpfs/projects/bsc88/text/models/salamandra_quantized_speculative/salamandra-7b-base-fp8",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 1,
9
- "compression_config": {
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "config_groups": {
11
  "group_0": {
12
  "input_activations": {
 
13
  "block_structure": null,
14
  "dynamic": true,
15
  "group_size": null,
16
  "num_bits": 8,
17
- "observer": "memoryless",
18
  "observer_kwargs": {},
19
  "strategy": "token",
20
  "symmetric": true,
@@ -25,6 +39,7 @@
25
  "Linear"
26
  ],
27
  "weights": {
 
28
  "block_structure": null,
29
  "dynamic": false,
30
  "group_size": null,
@@ -38,33 +53,21 @@
38
  }
39
  },
40
  "format": "float-quantized",
41
- "global_compression_ratio": 1.2392940378575874,
42
  "ignore": [
43
  "lm_head"
44
  ],
45
  "kv_cache_scheme": null,
46
  "quant_method": "compressed-tensors",
47
- "quantization_status": "frozen"
 
48
  },
49
- "eos_token_id": 2,
50
- "head_dim": 128,
51
- "hidden_act": "silu",
52
- "hidden_size": 4096,
53
- "initializer_range": 0.02,
54
- "intermediate_size": 11008,
55
- "max_position_embeddings": 8192,
56
- "mlp_bias": false,
57
- "model_type": "llama",
58
- "num_attention_heads": 32,
59
- "num_hidden_layers": 32,
60
- "num_key_value_heads": 8,
61
- "pretraining_tp": 1,
62
- "rms_norm_eps": 1e-06,
63
  "rope_scaling": null,
64
  "rope_theta": 10000.0,
65
  "tie_word_embeddings": false,
66
- "torch_dtype": "float32",
67
- "transformers_version": "4.46.1",
68
  "use_cache": true,
69
  "vocab_size": 256000
70
  }
 
1
  {
2
+ "_name_or_path": "/gpfs/projects/bsc88/text/models/salamandra_quantized_speculative/V1.1/salamandra-7b-base-fp8",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 11008,
15
+ "max_position_embeddings": 8192,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 32,
19
+ "num_hidden_layers": 32,
20
+ "num_key_value_heads": 8,
21
+ "pretraining_tp": 1,
22
+ "quantization_config": {
23
  "config_groups": {
24
  "group_0": {
25
  "input_activations": {
26
+ "actorder": null,
27
  "block_structure": null,
28
  "dynamic": true,
29
  "group_size": null,
30
  "num_bits": 8,
31
+ "observer": null,
32
  "observer_kwargs": {},
33
  "strategy": "token",
34
  "symmetric": true,
 
39
  "Linear"
40
  ],
41
  "weights": {
42
+ "actorder": null,
43
  "block_structure": null,
44
  "dynamic": false,
45
  "group_size": null,
 
53
  }
54
  },
55
  "format": "float-quantized",
56
+ "global_compression_ratio": 1.4589662622052346,
57
  "ignore": [
58
  "lm_head"
59
  ],
60
  "kv_cache_scheme": null,
61
  "quant_method": "compressed-tensors",
62
+ "quantization_status": "compressed",
63
+ "sparsity_config": {}
64
  },
65
+ "rms_norm_eps": 1e-05,
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  "rope_scaling": null,
67
  "rope_theta": 10000.0,
68
  "tie_word_embeddings": false,
69
+ "torch_dtype": "float16",
70
+ "transformers_version": "4.49.0",
71
  "use_cache": true,
72
  "vocab_size": 256000
73
  }
generation_config.json CHANGED
@@ -7,5 +7,5 @@
7
  "repetition_penalty": 1.2,
8
  "temperature": 0.1,
9
  "top_p": 0.95,
10
- "transformers_version": "4.46.1"
11
  }
 
7
  "repetition_penalty": 1.2,
8
  "temperature": 0.1,
9
  "top_p": 0.95,
10
+ "transformers_version": "4.49.0"
11
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6105385784c5789c2f40fef1fa1ac3c5976b511b7891ea845a2f9f72b7687e9f
3
- size 4975921944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f81e23cf67fa0a03f32b8d9b4bb38a77092118c10cd21a35df57fac3162dd88
3
+ size 4975943928
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f99296721901abd10d05c66515bdd7e25cf3f5000b131f45dd8d13c7d7610c27
3
- size 4892001400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b220b56f7b5417ab4edebf55c0633baea339ccc9dddeeec55f5cc9c9a28f4cf
3
+ size 4891979120
model.safetensors.index.json CHANGED
@@ -137,7 +137,7 @@
137
  "model.layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
138
  "model.layers.16.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
139
  "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
140
- "model.layers.16.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
141
  "model.layers.16.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
142
  "model.layers.16.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
143
  "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
 
137
  "model.layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
138
  "model.layers.16.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
139
  "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
140
+ "model.layers.16.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
141
  "model.layers.16.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
142
  "model.layers.16.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
143
  "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:990527d1e7b98c027d386c742250b2f8517bd3adf98c46cc6c1c2f35b234c224
3
- size 37007559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e90b85b3e3b3ebfc6b9bafeb954b37f2435eed595738337e53f2a746d23d5a2
3
+ size 37007416
tokenizer.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa490e57cebce5cb1a0a5b1a5d3fa4de05aee53dc3a44791f1c3401db44d802d
3
- size 4813274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab94ddf46d14f0279254858d53770c5319c5129d47291ee2bada530271cb1292
3
+ size 4813276
tokenizer_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
- "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
@@ -1087,8 +1087,13 @@
1087
  "bos_token": "<s>",
1088
  "clean_up_tokenization_spaces": false,
1089
  "eos_token": "</s>",
1090
- "legacy": true,
 
 
1091
  "model_max_length": 1000000000000000019884624838656,
 
 
 
1092
  "tokenizer_class": "LlamaTokenizer",
1093
  "unk_token": "<unk>",
1094
  "use_default_system_prompt": false
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
1087
  "bos_token": "<s>",
1088
  "clean_up_tokenization_spaces": false,
1089
  "eos_token": "</s>",
1090
+ "extra_special_tokens": {},
1091
+ "legacy": false,
1092
+ "local_files_only": true,
1093
  "model_max_length": 1000000000000000019884624838656,
1094
+ "pad_token": null,
1095
+ "sp_model_kwargs": {},
1096
+ "spaces_between_special_tokens": false,
1097
  "tokenizer_class": "LlamaTokenizer",
1098
  "unk_token": "<unk>",
1099
  "use_default_system_prompt": false