jw2yang commited on
Commit
4446406
·
1 Parent(s): ee95aa9

remove unused keys

Browse files
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "microsoft/Magma-8B",
3
  "architectures": [
4
  "MagmaForCausalLM"
5
  ],
@@ -8,7 +7,7 @@
8
  "auto_map": {
9
  "AutoConfig": "microsoft/Magma-8B--configuration_magma.MagmaConfig",
10
  "AutoModelForCausalLM": "microsoft/Magma-8B--modeling_magma.MagmaForCausalLM"
11
- },
12
  "hidden_act": "silu",
13
  "hidden_size": 4096,
14
  "image_token_id": 128257,
@@ -33,89 +32,36 @@
33
  "spatial_quant_size": 256,
34
  "text_config": {
35
  "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
36
- "add_cross_attention": false,
37
  "architectures": [
38
  "LlamaForCausalLM"
39
  ],
40
  "attention_bias": false,
41
  "attention_dropout": 0.0,
42
- "bad_words_ids": null,
43
- "begin_suppress_tokens": null,
44
  "bos_token_id": 128000,
45
- "chunk_size_feed_forward": 0,
46
- "cross_attention_hidden_size": null,
47
- "decoder_start_token_id": null,
48
- "diversity_penalty": 0.0,
49
- "do_sample": false,
50
- "early_stopping": false,
51
- "encoder_no_repeat_ngram_size": 0,
52
  "eos_token_id": 128009,
53
- "exponential_decay_length_penalty": null,
54
- "finetuning_task": null,
55
- "forced_bos_token_id": null,
56
- "forced_eos_token_id": null,
57
  "hidden_act": "silu",
58
  "hidden_size": 4096,
59
- "id2label": {
60
- "0": "LABEL_0",
61
- "1": "LABEL_1"
62
- },
63
  "initializer_range": 0.02,
64
  "intermediate_size": 14336,
65
- "is_decoder": false,
66
- "is_encoder_decoder": false,
67
- "label2id": {
68
- "LABEL_0": 0,
69
- "LABEL_1": 1
70
- },
71
- "length_penalty": 1.0,
72
- "max_length": 20,
73
  "max_position_embeddings": 8192,
74
- "min_length": 0,
75
  "mlp_bias": false,
76
  "model_type": "llama",
77
- "no_repeat_ngram_size": 0,
78
  "num_attention_heads": 32,
79
- "num_beam_groups": 1,
80
- "num_beams": 1,
81
  "num_hidden_layers": 32,
82
  "num_key_value_heads": 8,
83
- "num_return_sequences": 1,
84
- "output_attentions": false,
85
- "output_hidden_states": false,
86
- "output_scores": false,
87
  "pad_token_id": 128256,
88
- "prefix": null,
89
  "pretraining_tp": 1,
90
- "problem_type": null,
91
- "pruned_heads": {},
92
- "remove_invalid_values": false,
93
- "repetition_penalty": 1.0,
94
- "return_dict": true,
95
- "return_dict_in_generate": false,
96
  "rms_norm_eps": 1e-05,
97
  "rope_scaling": null,
98
  "rope_theta": 500000.0,
99
- "sep_token_id": null,
100
- "suppress_tokens": null,
101
- "task_specific_params": null,
102
- "temperature": 1.0,
103
- "tf_legacy_loss": false,
104
- "tie_encoder_decoder": false,
105
- "tie_word_embeddings": false,
106
- "tokenizer_class": null,
107
- "top_k": 50,
108
- "top_p": 1.0,
109
  "torch_dtype": "bfloat16",
110
- "torchscript": false,
111
- "typical_p": 1.0,
112
- "use_bfloat16": false,
113
  "use_cache": true,
114
  "vocab_size": 128261
115
  },
116
  "tie_word_embeddings": false,
117
  "torch_dtype": "bfloat16",
118
- "transformers_version": "4.44.1",
119
  "use_cache": false,
120
  "vision_config": {
121
  "attention_bias": false,
@@ -138,9 +84,10 @@
138
  "mm_projector_type": "mlp2x_gelu",
139
  "mm_use_im_patch_token": false,
140
  "mm_use_im_start_end": false,
 
141
  "mm_vision_select_feature": "patch",
142
  "mm_vision_select_layer": -2,
143
- "mm_use_row_seperator": true,
144
  "num_attention_heads": 32,
145
  "num_hidden_layers": 32,
146
  "num_key_value_heads": 8,
@@ -153,7 +100,6 @@
153
  "tokenizer_model_max_length": 4096,
154
  "tokenizer_padding_side": "right",
155
  "torch_dtype": "bfloat16",
156
- "transformers_version": "4.36.2",
157
  "tune_mm_mlp_adapter": false,
158
  "tune_vision_tokenizer": "all",
159
  "use_cache": true,
 
1
  {
 
2
  "architectures": [
3
  "MagmaForCausalLM"
4
  ],
 
7
  "auto_map": {
8
  "AutoConfig": "microsoft/Magma-8B--configuration_magma.MagmaConfig",
9
  "AutoModelForCausalLM": "microsoft/Magma-8B--modeling_magma.MagmaForCausalLM"
10
+ },
11
  "hidden_act": "silu",
12
  "hidden_size": 4096,
13
  "image_token_id": 128257,
 
32
  "spatial_quant_size": 256,
33
  "text_config": {
34
  "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
 
35
  "architectures": [
36
  "LlamaForCausalLM"
37
  ],
38
  "attention_bias": false,
39
  "attention_dropout": 0.0,
 
 
40
  "bos_token_id": 128000,
 
 
 
 
 
 
 
41
  "eos_token_id": 128009,
42
+ "head_dim": 128,
 
 
 
43
  "hidden_act": "silu",
44
  "hidden_size": 4096,
 
 
 
 
45
  "initializer_range": 0.02,
46
  "intermediate_size": 14336,
 
 
 
 
 
 
 
 
47
  "max_position_embeddings": 8192,
 
48
  "mlp_bias": false,
49
  "model_type": "llama",
 
50
  "num_attention_heads": 32,
 
 
51
  "num_hidden_layers": 32,
52
  "num_key_value_heads": 8,
 
 
 
 
53
  "pad_token_id": 128256,
 
54
  "pretraining_tp": 1,
 
 
 
 
 
 
55
  "rms_norm_eps": 1e-05,
56
  "rope_scaling": null,
57
  "rope_theta": 500000.0,
 
 
 
 
 
 
 
 
 
 
58
  "torch_dtype": "bfloat16",
 
 
 
59
  "use_cache": true,
60
  "vocab_size": 128261
61
  },
62
  "tie_word_embeddings": false,
63
  "torch_dtype": "bfloat16",
64
+ "transformers_version": "4.52.0.dev0",
65
  "use_cache": false,
66
  "vision_config": {
67
  "attention_bias": false,
 
84
  "mm_projector_type": "mlp2x_gelu",
85
  "mm_use_im_patch_token": false,
86
  "mm_use_im_start_end": false,
87
+ "mm_use_row_seperator": true,
88
  "mm_vision_select_feature": "patch",
89
  "mm_vision_select_layer": -2,
90
+ "model_type": "magma_vision",
91
  "num_attention_heads": 32,
92
  "num_hidden_layers": 32,
93
  "num_key_value_heads": 8,
 
100
  "tokenizer_model_max_length": 4096,
101
  "tokenizer_padding_side": "right",
102
  "torch_dtype": "bfloat16",
 
103
  "tune_mm_mlp_adapter": false,
104
  "tune_vision_tokenizer": "all",
105
  "use_cache": true,
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "bos_token_id": 128000,
4
  "eos_token_id": 128009,
5
  "pad_token_id": 128256,
6
- "transformers_version": "4.44.1"
7
  }
 
3
  "bos_token_id": 128000,
4
  "eos_token_id": 128009,
5
  "pad_token_id": 128256,
6
+ "transformers_version": "4.52.0.dev0"
7
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf018c291b927b63801b3d036b96f553a351f085852db2b4c62025323e6e2bee
3
- size 4983726904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a66e191d251086f7d69a9429369335d1a1fdab095a1c092663ee68b3c1617c9
3
+ size 4977422784
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 17812436736
4
  },
5
  "weight_map": {
6
  "language_model.lm_head.weight": "model-00004-of-00004.safetensors",
@@ -299,9 +299,6 @@
299
  "multi_modal_projector.proj.2.bias": "model-00001-of-00004.safetensors",
300
  "multi_modal_projector.proj.2.weight": "model-00001-of-00004.safetensors",
301
  "multi_modal_projector.row_seperator": "model-00001-of-00004.safetensors",
302
- "vision_tower.clip_vision_model.head.proj.weight": "model-00001-of-00004.safetensors",
303
- "vision_tower.clip_vision_model.trunk.head.norm.bias": "model-00001-of-00004.safetensors",
304
- "vision_tower.clip_vision_model.trunk.head.norm.weight": "model-00001-of-00004.safetensors",
305
  "vision_tower.clip_vision_model.trunk.stages.0.blocks.0.conv_dw.bias": "model-00001-of-00004.safetensors",
306
  "vision_tower.clip_vision_model.trunk.stages.0.blocks.0.conv_dw.weight": "model-00001-of-00004.safetensors",
307
  "vision_tower.clip_vision_model.trunk.stages.0.blocks.0.gamma": "model-00001-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 17806132992
4
  },
5
  "weight_map": {
6
  "language_model.lm_head.weight": "model-00004-of-00004.safetensors",
 
299
  "multi_modal_projector.proj.2.bias": "model-00001-of-00004.safetensors",
300
  "multi_modal_projector.proj.2.weight": "model-00001-of-00004.safetensors",
301
  "multi_modal_projector.row_seperator": "model-00001-of-00004.safetensors",
 
 
 
302
  "vision_tower.clip_vision_model.trunk.stages.0.blocks.0.conv_dw.bias": "model-00001-of-00004.safetensors",
303
  "vision_tower.clip_vision_model.trunk.stages.0.blocks.0.conv_dw.weight": "model-00001-of-00004.safetensors",
304
  "vision_tower.clip_vision_model.trunk.stages.0.blocks.0.gamma": "model-00001-of-00004.safetensors",