123lxb commited on
Commit
dd4486d
·
verified ·
1 Parent(s): ca94c52

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +35 -53
config.json CHANGED
@@ -1,60 +1,42 @@
1
  {
2
- "model_type": "llama",
3
- "architectures": ["LlamaForCausalLM"],
4
- "hidden_size": 2048,
5
- "intermediate_size": 5632,
6
- "max_position_embeddings": 16384,
7
- "num_attention_heads": 16,
8
- "num_hidden_layers": 24,
9
- "num_key_value_heads": 16,
10
- "vocab_size": 102400,
11
  "torch_dtype": "bfloat16",
12
- "transformers_version": "4.33.1",
13
- "aligner_config": {
14
- "cls": "MlpProjector",
15
- "model_type": "aligner",
16
- "params": {
17
- "depth": 2,
18
- "input_dim": 1024,
19
- "n_embed": 2048,
20
- "projector_type": "mlp_gelu"
21
- }
 
 
22
  },
23
- "gen_aligner_config": {
24
- "cls": "MlpProjector",
25
- "model_type": "gen_aligner",
26
- "params": {
27
- "depth": 2,
28
- "input_dim": 8,
29
- "n_embed": 2048,
30
- "projector_type": "mlp_gelu"
31
- }
 
32
  },
33
- "gen_head_config": {
34
- "cls": "vision_head",
35
- "model_type": "gen_head",
36
- "params": {
37
- "image_token_embed": 2048,
38
- "image_token_size": 16384,
39
- "n_embed": 2048
40
- }
41
  },
42
- "gen_vision_config": {
43
- "cls": "VQ-16",
44
- "model_type": "gen_vision",
45
- "params": {
46
- "image_token_size": 16384,
47
- "n_embed": 8
48
- }
49
- },
50
- "vision_config": {
51
- "cls": "CLIPVisionTower",
52
- "model_type": "vision",
53
- "params": {
54
- "image_size": 384,
55
- "model_name": "siglip_large_patch16_384",
56
- "select_feature": "same",
57
- "select_layer": -1
58
- }
59
  }
60
  }
 
1
  {
2
+ "architectures": ["MultiModalityCausalLM"],
3
+ "model_type": "multi_modality",
 
 
 
 
 
 
 
4
  "torch_dtype": "bfloat16",
5
+
6
+ "llama_config": {
7
+ "architectures": ["LlamaForCausalLM"],
8
+ "hidden_size": 2048,
9
+ "intermediate_size": 5632,
10
+ "max_position_embeddings": 16384,
11
+ "num_attention_heads": 16,
12
+ "num_hidden_layers": 24,
13
+ "num_key_value_heads": 16,
14
+ "rms_norm_eps": 1e-6,
15
+ "vocab_size": 102400,
16
+ "torch_dtype": "bfloat16"
17
  },
18
+
19
+ "clip_vision_config": {
20
+ "architectures": ["CLIPVisionModel"],
21
+ "hidden_size": 1024,
22
+ "image_size": 384,
23
+ "patch_size": 16,
24
+ "projection_dim": 2048,
25
+ "num_hidden_layers": 24,
26
+ "num_attention_heads": 16,
27
+ "intermediate_size": 4096
28
  },
29
+
30
+ "alignment_config": {
31
+ "projector_type": "mlp_gelu",
32
+ "depth": 2,
33
+ "input_dim": 1024,
34
+ "n_embed": 2048
 
 
35
  },
36
+
37
+ "generation_config": {
38
+ "image_token_size": 16384,
39
+ "vq_codebook_size": 8,
40
+ "image_token_embed": 2048
 
 
 
 
 
 
 
 
 
 
 
 
41
  }
42
  }