Bingsu commited on
Commit
ddfbe00
·
verified ·
1 Parent(s): c8f5175

Upload HCXVisionForCausalLM

Browse files
config.json CHANGED
@@ -5,18 +5,123 @@
5
  ],
6
  "auto_map": {
7
  "AutoConfig": "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B--configuration_hyperclovax.HCXVisionConfig",
8
- "AutoModelForCausalLM": "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B--modeling_hyperclovax.HCXVisionForCausalLM",
9
- "AutoProcessor": "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B--preprocessor.HCXVisionProcessor"
10
  },
11
  "decoder_max_length": 16384,
 
12
  "freeze_decoder": false,
13
  "freeze_encoder": true,
14
  "freeze_mm_projector": false,
15
  "hidden_size": 3072,
16
  "ignore_index": -100,
17
- "img_start_id": 100271,
18
- "language_config": {
19
- "_attn_implementation_autoset": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  "_name_or_path": "",
21
  "add_cross_attention": false,
22
  "architectures": [
@@ -97,26 +202,18 @@
97
  "top_p": 1.0,
98
  "torch_dtype": "bfloat16",
99
  "torchscript": false,
100
- "transformers_version": "4.51.3",
101
  "typical_p": 1.0,
102
  "use_bfloat16": false,
103
  "use_cache": true,
104
  "vocab_size": 110592
105
  },
106
- "max_image_cnt": 12,
107
- "max_num_grids": 9,
108
- "model_type": "hyperclovax_vlm",
109
- "num_queries_vis_abstractor": 81,
110
- "proj_pos_emb": true,
111
- "proj_prenorm": false,
112
- "q_former_model_name_or_path": null,
113
  "torch_dtype": "bfloat16",
114
- "transformers_version": "4.51.3",
115
  "unpad": true,
116
  "use_1x1_grid": true,
117
  "use_nth_layer": -2,
 
118
  "vision_config": {
119
- "_attn_implementation_autoset": true,
120
  "_name_or_path": "",
121
  "add_cross_attention": false,
122
  "anyres": true,
@@ -192,7 +289,6 @@
192
  "top_p": 1.0,
193
  "torch_dtype": "bfloat16",
194
  "torchscript": false,
195
- "transformers_version": "4.51.3",
196
  "typical_p": 1.0,
197
  "use_bfloat16": true
198
  }
 
5
  ],
6
  "auto_map": {
7
  "AutoConfig": "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B--configuration_hyperclovax.HCXVisionConfig",
8
+ "AutoModelForCausalLM": "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B--modeling_hyperclovax.HCXVisionForCausalLM"
 
9
  },
10
  "decoder_max_length": 16384,
11
+ "first_last_frames_slow": false,
12
  "freeze_decoder": false,
13
  "freeze_encoder": true,
14
  "freeze_mm_projector": false,
15
  "hidden_size": 3072,
16
  "ignore_index": -100,
17
+ "image_token_id": 100271,
18
+ "img_start_id": 100009,
19
+ "max_image_cnt": 12,
20
+ "max_num_grids": 9,
21
+ "mm_projector_type": "cabstractor",
22
+ "model_type": "hyperclovax_vlm",
23
+ "num_queries_vis_abstractor": -1,
24
+ "num_queries_vis_abstractor_image": 81,
25
+ "num_queries_vis_abstractor_video_fast": 9,
26
+ "num_queries_vis_abstractor_video_slow": 81,
27
+ "possible_resolutions": [
28
+ [
29
+ 378,
30
+ 378
31
+ ],
32
+ [
33
+ 378,
34
+ 756
35
+ ],
36
+ [
37
+ 378,
38
+ 1134
39
+ ],
40
+ [
41
+ 378,
42
+ 1512
43
+ ],
44
+ [
45
+ 378,
46
+ 1890
47
+ ],
48
+ [
49
+ 378,
50
+ 2268
51
+ ],
52
+ [
53
+ 378,
54
+ 2646
55
+ ],
56
+ [
57
+ 378,
58
+ 3024
59
+ ],
60
+ [
61
+ 378,
62
+ 3402
63
+ ],
64
+ [
65
+ 756,
66
+ 378
67
+ ],
68
+ [
69
+ 756,
70
+ 756
71
+ ],
72
+ [
73
+ 756,
74
+ 1134
75
+ ],
76
+ [
77
+ 756,
78
+ 1512
79
+ ],
80
+ [
81
+ 1134,
82
+ 378
83
+ ],
84
+ [
85
+ 1134,
86
+ 756
87
+ ],
88
+ [
89
+ 1134,
90
+ 1134
91
+ ],
92
+ [
93
+ 1512,
94
+ 378
95
+ ],
96
+ [
97
+ 1512,
98
+ 756
99
+ ],
100
+ [
101
+ 1890,
102
+ 378
103
+ ],
104
+ [
105
+ 2268,
106
+ 378
107
+ ],
108
+ [
109
+ 2646,
110
+ 378
111
+ ],
112
+ [
113
+ 3024,
114
+ 378
115
+ ],
116
+ [
117
+ 3402,
118
+ 378
119
+ ]
120
+ ],
121
+ "proj_pos_emb": true,
122
+ "proj_prenorm": false,
123
+ "q_former_model_name_or_path": null,
124
+ "text_config": {
125
  "_name_or_path": "",
126
  "add_cross_attention": false,
127
  "architectures": [
 
202
  "top_p": 1.0,
203
  "torch_dtype": "bfloat16",
204
  "torchscript": false,
 
205
  "typical_p": 1.0,
206
  "use_bfloat16": false,
207
  "use_cache": true,
208
  "vocab_size": 110592
209
  },
 
 
 
 
 
 
 
210
  "torch_dtype": "bfloat16",
211
+ "transformers_version": "4.52.4",
212
  "unpad": true,
213
  "use_1x1_grid": true,
214
  "use_nth_layer": -2,
215
+ "video_token_id": 100270,
216
  "vision_config": {
 
217
  "_name_or_path": "",
218
  "add_cross_attention": false,
219
  "anyres": true,
 
289
  "top_p": 1.0,
290
  "torch_dtype": "bfloat16",
291
  "torchscript": false,
 
292
  "typical_p": 1.0,
293
  "use_bfloat16": true
294
  }
generation_config.json CHANGED
@@ -1,4 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
- "transformers_version": "4.51.3"
 
 
 
4
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 100257,
4
+ "eos_token_id": 100257,
5
+ "pad_token_id": 100257,
6
+ "transformers_version": "4.52.4"
7
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ddce2bbbb5980a812ba9bc83cceeaf00122a95a1f490f89b2aa87c23fcfeab8
3
- size 4958791776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7c1d6739129ff1e9b43102c088a466faa5b12fbe9f4f5d370a95255974c4eed
3
+ size 4982442712
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f809fc19c3aa1811e5467c6533865cfe912f222ae9684a2a04f5573b8bbbdb89
3
- size 2483804336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8c185a170438b51a9ce587a16166873b5759349738975a5b40767c0147a7ff4
3
+ size 2460153400
model.safetensors.index.json CHANGED
@@ -98,7 +98,7 @@
98
  "language_model.model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
99
  "language_model.model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
100
  "language_model.model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
101
- "language_model.model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
102
  "language_model.model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
103
  "language_model.model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
104
  "language_model.model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -294,89 +294,89 @@
294
  "language_model.model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
295
  "language_model.model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
296
  "language_model.model.norm.weight": "model-00002-of-00002.safetensors",
297
- "mm_projector.net.0.b1.conv1.bn.bias": "model-00002-of-00002.safetensors",
298
- "mm_projector.net.0.b1.conv1.bn.weight": "model-00002-of-00002.safetensors",
299
- "mm_projector.net.0.b1.conv1.conv.weight": "model-00002-of-00002.safetensors",
300
- "mm_projector.net.0.b1.conv2.bn.bias": "model-00002-of-00002.safetensors",
301
- "mm_projector.net.0.b1.conv2.bn.weight": "model-00002-of-00002.safetensors",
302
- "mm_projector.net.0.b1.conv2.conv.weight": "model-00002-of-00002.safetensors",
303
- "mm_projector.net.0.b1.conv3.bn.bias": "model-00002-of-00002.safetensors",
304
- "mm_projector.net.0.b1.conv3.bn.weight": "model-00002-of-00002.safetensors",
305
- "mm_projector.net.0.b1.conv3.conv.weight": "model-00002-of-00002.safetensors",
306
- "mm_projector.net.0.b1.se.fc1.bias": "model-00002-of-00002.safetensors",
307
- "mm_projector.net.0.b1.se.fc1.weight": "model-00002-of-00002.safetensors",
308
- "mm_projector.net.0.b1.se.fc2.bias": "model-00002-of-00002.safetensors",
309
- "mm_projector.net.0.b1.se.fc2.weight": "model-00002-of-00002.safetensors",
310
- "mm_projector.net.0.b2.conv1.bn.bias": "model-00002-of-00002.safetensors",
311
- "mm_projector.net.0.b2.conv1.bn.weight": "model-00002-of-00002.safetensors",
312
- "mm_projector.net.0.b2.conv1.conv.weight": "model-00002-of-00002.safetensors",
313
- "mm_projector.net.0.b2.conv2.bn.bias": "model-00002-of-00002.safetensors",
314
- "mm_projector.net.0.b2.conv2.bn.weight": "model-00002-of-00002.safetensors",
315
- "mm_projector.net.0.b2.conv2.conv.weight": "model-00002-of-00002.safetensors",
316
- "mm_projector.net.0.b2.conv3.bn.bias": "model-00002-of-00002.safetensors",
317
- "mm_projector.net.0.b2.conv3.bn.weight": "model-00002-of-00002.safetensors",
318
- "mm_projector.net.0.b2.conv3.conv.weight": "model-00002-of-00002.safetensors",
319
- "mm_projector.net.0.b2.se.fc1.bias": "model-00002-of-00002.safetensors",
320
- "mm_projector.net.0.b2.se.fc1.weight": "model-00002-of-00002.safetensors",
321
- "mm_projector.net.0.b2.se.fc2.bias": "model-00002-of-00002.safetensors",
322
- "mm_projector.net.0.b2.se.fc2.weight": "model-00002-of-00002.safetensors",
323
- "mm_projector.net.0.b3.conv1.bn.bias": "model-00002-of-00002.safetensors",
324
- "mm_projector.net.0.b3.conv1.bn.weight": "model-00002-of-00002.safetensors",
325
- "mm_projector.net.0.b3.conv1.conv.weight": "model-00002-of-00002.safetensors",
326
- "mm_projector.net.0.b3.conv2.bn.bias": "model-00002-of-00002.safetensors",
327
- "mm_projector.net.0.b3.conv2.bn.weight": "model-00002-of-00002.safetensors",
328
- "mm_projector.net.0.b3.conv2.conv.weight": "model-00002-of-00002.safetensors",
329
- "mm_projector.net.0.b3.conv3.bn.bias": "model-00002-of-00002.safetensors",
330
- "mm_projector.net.0.b3.conv3.bn.weight": "model-00002-of-00002.safetensors",
331
- "mm_projector.net.0.b3.conv3.conv.weight": "model-00002-of-00002.safetensors",
332
- "mm_projector.net.0.b3.se.fc1.bias": "model-00002-of-00002.safetensors",
333
- "mm_projector.net.0.b3.se.fc1.weight": "model-00002-of-00002.safetensors",
334
- "mm_projector.net.0.b3.se.fc2.bias": "model-00002-of-00002.safetensors",
335
- "mm_projector.net.0.b3.se.fc2.weight": "model-00002-of-00002.safetensors",
336
- "mm_projector.net.2.b1.conv1.bn.bias": "model-00002-of-00002.safetensors",
337
- "mm_projector.net.2.b1.conv1.bn.weight": "model-00002-of-00002.safetensors",
338
- "mm_projector.net.2.b1.conv1.conv.weight": "model-00002-of-00002.safetensors",
339
- "mm_projector.net.2.b1.conv2.bn.bias": "model-00002-of-00002.safetensors",
340
- "mm_projector.net.2.b1.conv2.bn.weight": "model-00002-of-00002.safetensors",
341
- "mm_projector.net.2.b1.conv2.conv.weight": "model-00002-of-00002.safetensors",
342
- "mm_projector.net.2.b1.conv3.bn.bias": "model-00002-of-00002.safetensors",
343
- "mm_projector.net.2.b1.conv3.bn.weight": "model-00002-of-00002.safetensors",
344
- "mm_projector.net.2.b1.conv3.conv.weight": "model-00002-of-00002.safetensors",
345
- "mm_projector.net.2.b1.se.fc1.bias": "model-00002-of-00002.safetensors",
346
- "mm_projector.net.2.b1.se.fc1.weight": "model-00002-of-00002.safetensors",
347
- "mm_projector.net.2.b1.se.fc2.bias": "model-00002-of-00002.safetensors",
348
- "mm_projector.net.2.b1.se.fc2.weight": "model-00002-of-00002.safetensors",
349
- "mm_projector.net.2.b2.conv1.bn.bias": "model-00002-of-00002.safetensors",
350
- "mm_projector.net.2.b2.conv1.bn.weight": "model-00002-of-00002.safetensors",
351
- "mm_projector.net.2.b2.conv1.conv.weight": "model-00002-of-00002.safetensors",
352
- "mm_projector.net.2.b2.conv2.bn.bias": "model-00002-of-00002.safetensors",
353
- "mm_projector.net.2.b2.conv2.bn.weight": "model-00002-of-00002.safetensors",
354
- "mm_projector.net.2.b2.conv2.conv.weight": "model-00002-of-00002.safetensors",
355
- "mm_projector.net.2.b2.conv3.bn.bias": "model-00002-of-00002.safetensors",
356
- "mm_projector.net.2.b2.conv3.bn.weight": "model-00002-of-00002.safetensors",
357
- "mm_projector.net.2.b2.conv3.conv.weight": "model-00002-of-00002.safetensors",
358
- "mm_projector.net.2.b2.se.fc1.bias": "model-00002-of-00002.safetensors",
359
- "mm_projector.net.2.b2.se.fc1.weight": "model-00002-of-00002.safetensors",
360
- "mm_projector.net.2.b2.se.fc2.bias": "model-00002-of-00002.safetensors",
361
- "mm_projector.net.2.b2.se.fc2.weight": "model-00002-of-00002.safetensors",
362
- "mm_projector.net.2.b3.conv1.bn.bias": "model-00002-of-00002.safetensors",
363
- "mm_projector.net.2.b3.conv1.bn.weight": "model-00002-of-00002.safetensors",
364
- "mm_projector.net.2.b3.conv1.conv.weight": "model-00002-of-00002.safetensors",
365
- "mm_projector.net.2.b3.conv2.bn.bias": "model-00002-of-00002.safetensors",
366
- "mm_projector.net.2.b3.conv2.bn.weight": "model-00002-of-00002.safetensors",
367
- "mm_projector.net.2.b3.conv2.conv.weight": "model-00002-of-00002.safetensors",
368
- "mm_projector.net.2.b3.conv3.bn.bias": "model-00002-of-00002.safetensors",
369
- "mm_projector.net.2.b3.conv3.bn.weight": "model-00002-of-00002.safetensors",
370
- "mm_projector.net.2.b3.conv3.conv.weight": "model-00002-of-00002.safetensors",
371
- "mm_projector.net.2.b3.se.fc1.bias": "model-00002-of-00002.safetensors",
372
- "mm_projector.net.2.b3.se.fc1.weight": "model-00002-of-00002.safetensors",
373
- "mm_projector.net.2.b3.se.fc2.bias": "model-00002-of-00002.safetensors",
374
- "mm_projector.net.2.b3.se.fc2.weight": "model-00002-of-00002.safetensors",
375
- "mm_projector.pos_emb": "model-00002-of-00002.safetensors",
376
- "mm_projector.readout.0.bias": "model-00002-of-00002.safetensors",
377
- "mm_projector.readout.0.weight": "model-00002-of-00002.safetensors",
378
- "mm_projector.readout.2.bias": "model-00002-of-00002.safetensors",
379
- "mm_projector.readout.2.weight": "model-00002-of-00002.safetensors",
380
  "vision_model.vision_model.embeddings.patch_embedding.bias": "model-00001-of-00002.safetensors",
381
  "vision_model.vision_model.embeddings.patch_embedding.weight": "model-00001-of-00002.safetensors",
382
  "vision_model.vision_model.embeddings.position_embedding.weight": "model-00001-of-00002.safetensors",
 
98
  "language_model.model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
99
  "language_model.model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
100
  "language_model.model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
101
+ "language_model.model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
102
  "language_model.model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
103
  "language_model.model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
104
  "language_model.model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
294
  "language_model.model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
295
  "language_model.model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
296
  "language_model.model.norm.weight": "model-00002-of-00002.safetensors",
297
+ "mm_projector.net.0.b1.conv1.bn.bias": "model-00001-of-00002.safetensors",
298
+ "mm_projector.net.0.b1.conv1.bn.weight": "model-00001-of-00002.safetensors",
299
+ "mm_projector.net.0.b1.conv1.conv.weight": "model-00001-of-00002.safetensors",
300
+ "mm_projector.net.0.b1.conv2.bn.bias": "model-00001-of-00002.safetensors",
301
+ "mm_projector.net.0.b1.conv2.bn.weight": "model-00001-of-00002.safetensors",
302
+ "mm_projector.net.0.b1.conv2.conv.weight": "model-00001-of-00002.safetensors",
303
+ "mm_projector.net.0.b1.conv3.bn.bias": "model-00001-of-00002.safetensors",
304
+ "mm_projector.net.0.b1.conv3.bn.weight": "model-00001-of-00002.safetensors",
305
+ "mm_projector.net.0.b1.conv3.conv.weight": "model-00001-of-00002.safetensors",
306
+ "mm_projector.net.0.b1.se.fc1.bias": "model-00001-of-00002.safetensors",
307
+ "mm_projector.net.0.b1.se.fc1.weight": "model-00001-of-00002.safetensors",
308
+ "mm_projector.net.0.b1.se.fc2.bias": "model-00001-of-00002.safetensors",
309
+ "mm_projector.net.0.b1.se.fc2.weight": "model-00001-of-00002.safetensors",
310
+ "mm_projector.net.0.b2.conv1.bn.bias": "model-00001-of-00002.safetensors",
311
+ "mm_projector.net.0.b2.conv1.bn.weight": "model-00001-of-00002.safetensors",
312
+ "mm_projector.net.0.b2.conv1.conv.weight": "model-00001-of-00002.safetensors",
313
+ "mm_projector.net.0.b2.conv2.bn.bias": "model-00001-of-00002.safetensors",
314
+ "mm_projector.net.0.b2.conv2.bn.weight": "model-00001-of-00002.safetensors",
315
+ "mm_projector.net.0.b2.conv2.conv.weight": "model-00001-of-00002.safetensors",
316
+ "mm_projector.net.0.b2.conv3.bn.bias": "model-00001-of-00002.safetensors",
317
+ "mm_projector.net.0.b2.conv3.bn.weight": "model-00001-of-00002.safetensors",
318
+ "mm_projector.net.0.b2.conv3.conv.weight": "model-00001-of-00002.safetensors",
319
+ "mm_projector.net.0.b2.se.fc1.bias": "model-00001-of-00002.safetensors",
320
+ "mm_projector.net.0.b2.se.fc1.weight": "model-00001-of-00002.safetensors",
321
+ "mm_projector.net.0.b2.se.fc2.bias": "model-00001-of-00002.safetensors",
322
+ "mm_projector.net.0.b2.se.fc2.weight": "model-00001-of-00002.safetensors",
323
+ "mm_projector.net.0.b3.conv1.bn.bias": "model-00001-of-00002.safetensors",
324
+ "mm_projector.net.0.b3.conv1.bn.weight": "model-00001-of-00002.safetensors",
325
+ "mm_projector.net.0.b3.conv1.conv.weight": "model-00001-of-00002.safetensors",
326
+ "mm_projector.net.0.b3.conv2.bn.bias": "model-00001-of-00002.safetensors",
327
+ "mm_projector.net.0.b3.conv2.bn.weight": "model-00001-of-00002.safetensors",
328
+ "mm_projector.net.0.b3.conv2.conv.weight": "model-00001-of-00002.safetensors",
329
+ "mm_projector.net.0.b3.conv3.bn.bias": "model-00001-of-00002.safetensors",
330
+ "mm_projector.net.0.b3.conv3.bn.weight": "model-00001-of-00002.safetensors",
331
+ "mm_projector.net.0.b3.conv3.conv.weight": "model-00001-of-00002.safetensors",
332
+ "mm_projector.net.0.b3.se.fc1.bias": "model-00001-of-00002.safetensors",
333
+ "mm_projector.net.0.b3.se.fc1.weight": "model-00001-of-00002.safetensors",
334
+ "mm_projector.net.0.b3.se.fc2.bias": "model-00001-of-00002.safetensors",
335
+ "mm_projector.net.0.b3.se.fc2.weight": "model-00001-of-00002.safetensors",
336
+ "mm_projector.net.2.b1.conv1.bn.bias": "model-00001-of-00002.safetensors",
337
+ "mm_projector.net.2.b1.conv1.bn.weight": "model-00001-of-00002.safetensors",
338
+ "mm_projector.net.2.b1.conv1.conv.weight": "model-00001-of-00002.safetensors",
339
+ "mm_projector.net.2.b1.conv2.bn.bias": "model-00001-of-00002.safetensors",
340
+ "mm_projector.net.2.b1.conv2.bn.weight": "model-00001-of-00002.safetensors",
341
+ "mm_projector.net.2.b1.conv2.conv.weight": "model-00001-of-00002.safetensors",
342
+ "mm_projector.net.2.b1.conv3.bn.bias": "model-00001-of-00002.safetensors",
343
+ "mm_projector.net.2.b1.conv3.bn.weight": "model-00001-of-00002.safetensors",
344
+ "mm_projector.net.2.b1.conv3.conv.weight": "model-00001-of-00002.safetensors",
345
+ "mm_projector.net.2.b1.se.fc1.bias": "model-00001-of-00002.safetensors",
346
+ "mm_projector.net.2.b1.se.fc1.weight": "model-00001-of-00002.safetensors",
347
+ "mm_projector.net.2.b1.se.fc2.bias": "model-00001-of-00002.safetensors",
348
+ "mm_projector.net.2.b1.se.fc2.weight": "model-00001-of-00002.safetensors",
349
+ "mm_projector.net.2.b2.conv1.bn.bias": "model-00001-of-00002.safetensors",
350
+ "mm_projector.net.2.b2.conv1.bn.weight": "model-00001-of-00002.safetensors",
351
+ "mm_projector.net.2.b2.conv1.conv.weight": "model-00001-of-00002.safetensors",
352
+ "mm_projector.net.2.b2.conv2.bn.bias": "model-00001-of-00002.safetensors",
353
+ "mm_projector.net.2.b2.conv2.bn.weight": "model-00001-of-00002.safetensors",
354
+ "mm_projector.net.2.b2.conv2.conv.weight": "model-00001-of-00002.safetensors",
355
+ "mm_projector.net.2.b2.conv3.bn.bias": "model-00001-of-00002.safetensors",
356
+ "mm_projector.net.2.b2.conv3.bn.weight": "model-00001-of-00002.safetensors",
357
+ "mm_projector.net.2.b2.conv3.conv.weight": "model-00001-of-00002.safetensors",
358
+ "mm_projector.net.2.b2.se.fc1.bias": "model-00001-of-00002.safetensors",
359
+ "mm_projector.net.2.b2.se.fc1.weight": "model-00001-of-00002.safetensors",
360
+ "mm_projector.net.2.b2.se.fc2.bias": "model-00001-of-00002.safetensors",
361
+ "mm_projector.net.2.b2.se.fc2.weight": "model-00001-of-00002.safetensors",
362
+ "mm_projector.net.2.b3.conv1.bn.bias": "model-00001-of-00002.safetensors",
363
+ "mm_projector.net.2.b3.conv1.bn.weight": "model-00001-of-00002.safetensors",
364
+ "mm_projector.net.2.b3.conv1.conv.weight": "model-00001-of-00002.safetensors",
365
+ "mm_projector.net.2.b3.conv2.bn.bias": "model-00001-of-00002.safetensors",
366
+ "mm_projector.net.2.b3.conv2.bn.weight": "model-00001-of-00002.safetensors",
367
+ "mm_projector.net.2.b3.conv2.conv.weight": "model-00001-of-00002.safetensors",
368
+ "mm_projector.net.2.b3.conv3.bn.bias": "model-00001-of-00002.safetensors",
369
+ "mm_projector.net.2.b3.conv3.bn.weight": "model-00001-of-00002.safetensors",
370
+ "mm_projector.net.2.b3.conv3.conv.weight": "model-00001-of-00002.safetensors",
371
+ "mm_projector.net.2.b3.se.fc1.bias": "model-00001-of-00002.safetensors",
372
+ "mm_projector.net.2.b3.se.fc1.weight": "model-00001-of-00002.safetensors",
373
+ "mm_projector.net.2.b3.se.fc2.bias": "model-00001-of-00002.safetensors",
374
+ "mm_projector.net.2.b3.se.fc2.weight": "model-00001-of-00002.safetensors",
375
+ "mm_projector.pos_emb": "model-00001-of-00002.safetensors",
376
+ "mm_projector.readout.0.bias": "model-00001-of-00002.safetensors",
377
+ "mm_projector.readout.0.weight": "model-00001-of-00002.safetensors",
378
+ "mm_projector.readout.2.bias": "model-00001-of-00002.safetensors",
379
+ "mm_projector.readout.2.weight": "model-00001-of-00002.safetensors",
380
  "vision_model.vision_model.embeddings.patch_embedding.bias": "model-00001-of-00002.safetensors",
381
  "vision_model.vision_model.embeddings.patch_embedding.weight": "model-00001-of-00002.safetensors",
382
  "vision_model.vision_model.embeddings.position_embedding.weight": "model-00001-of-00002.safetensors",