Merging version 1.1

Files changed (8) hide show

config.json +24 -21
generation_config.json +1 -1
model-00001-of-00002.safetensors +2 -2
model-00002-of-00002.safetensors +2 -2
model.safetensors.index.json +1 -1
tokenizer.json +2 -2
tokenizer.model +2 -2
tokenizer_config.json +7 -2

config.json CHANGED Viewed

@@ -1,20 +1,34 @@
 {
-  "_name_or_path": "/home/jsaiz/storage5/gpfs/projects/bsc88/text/models/salamandra_quantized_speculative/salamandra-7b-base-fp8",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 1,
-  "compression_config": {
     "config_groups": {
       "group_0": {
         "input_activations": {
           "block_structure": null,
           "dynamic": true,
           "group_size": null,
           "num_bits": 8,
-          "observer": "memoryless",
           "observer_kwargs": {},
           "strategy": "token",
           "symmetric": true,
@@ -25,6 +39,7 @@
           "Linear"
         ],
         "weights": {
           "block_structure": null,
           "dynamic": false,
           "group_size": null,
@@ -38,33 +53,21 @@
       }
     },
     "format": "float-quantized",
-    "global_compression_ratio": 1.2392940378575874,
     "ignore": [
       "lm_head"
     ],
     "kv_cache_scheme": null,
     "quant_method": "compressed-tensors",
-    "quantization_status": "frozen"
   },
-  "eos_token_id": 2,
-  "head_dim": 128,
-  "hidden_act": "silu",
-  "hidden_size": 4096,
-  "initializer_range": 0.02,
-  "intermediate_size": 11008,
-  "max_position_embeddings": 8192,
-  "mlp_bias": false,
-  "model_type": "llama",
-  "num_attention_heads": 32,
-  "num_hidden_layers": 32,
-  "num_key_value_heads": 8,
-  "pretraining_tp": 1,
-  "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
-  "torch_dtype": "float32",
-  "transformers_version": "4.46.1",
   "use_cache": true,
   "vocab_size": 256000
 }

 {
+  "_name_or_path": "/gpfs/projects/bsc88/text/models/salamandra_quantized_speculative/V1.1/salamandra-7b-base-fp8",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 1,
+  "eos_token_id": 2,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "max_position_embeddings": 8192,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "quantization_config": {
     "config_groups": {
       "group_0": {
         "input_activations": {
+          "actorder": null,
           "block_structure": null,
           "dynamic": true,
           "group_size": null,
           "num_bits": 8,
+          "observer": null,
           "observer_kwargs": {},
           "strategy": "token",
           "symmetric": true,
           "Linear"
         ],
         "weights": {
+          "actorder": null,
           "block_structure": null,
           "dynamic": false,
           "group_size": null,
       }
     },
     "format": "float-quantized",
+    "global_compression_ratio": 1.4589662622052346,
     "ignore": [
       "lm_head"
     ],
     "kv_cache_scheme": null,
     "quant_method": "compressed-tensors",
+    "quantization_status": "compressed",
+    "sparsity_config": {}
   },
+  "rms_norm_eps": 1e-05,
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.49.0",
   "use_cache": true,
   "vocab_size": 256000
 }

generation_config.json CHANGED Viewed

@@ -7,5 +7,5 @@
   "repetition_penalty": 1.2,
   "temperature": 0.1,
   "top_p": 0.95,
-  "transformers_version": "4.46.1"
 }

   "repetition_penalty": 1.2,
   "temperature": 0.1,
   "top_p": 0.95,
+  "transformers_version": "4.49.0"
 }

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6105385784c5789c2f40fef1fa1ac3c5976b511b7891ea845a2f9f72b7687e9f
-size 4975921944

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f81e23cf67fa0a03f32b8d9b4bb38a77092118c10cd21a35df57fac3162dd88
+size 4975943928

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f99296721901abd10d05c66515bdd7e25cf3f5000b131f45dd8d13c7d7610c27
-size 4892001400

 version https://git-lfs.github.com/spec/v1
+oid sha256:2b220b56f7b5417ab4edebf55c0633baea339ccc9dddeeec55f5cc9c9a28f4cf
+size 4891979120

model.safetensors.index.json CHANGED Viewed

@@ -137,7 +137,7 @@
     "model.layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
     "model.layers.16.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
     "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
-    "model.layers.16.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
     "model.layers.16.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
     "model.layers.16.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
     "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",

     "model.layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
     "model.layers.16.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
     "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.16.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
     "model.layers.16.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
     "model.layers.16.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
     "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:990527d1e7b98c027d386c742250b2f8517bd3adf98c46cc6c1c2f35b234c224
-size 37007559

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e90b85b3e3b3ebfc6b9bafeb954b37f2435eed595738337e53f2a746d23d5a2
+size 37007416

tokenizer.model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa490e57cebce5cb1a0a5b1a5d3fa4de05aee53dc3a44791f1c3401db44d802d
-size 4813274

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab94ddf46d14f0279254858d53770c5319c5129d47291ee2bada530271cb1292
+size 4813276

tokenizer_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "add_bos_token": true,
   "add_eos_token": false,
-  "add_prefix_space": null,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
@@ -1087,8 +1087,13 @@
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
-  "legacy": true,
   "model_max_length": 1000000000000000019884624838656,
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
   "use_default_system_prompt": false

 {
   "add_bos_token": true,
   "add_eos_token": false,
+  "add_prefix_space": true,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "local_files_only": true,
   "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
   "use_default_system_prompt": false