Upload HCXVisionForCausalLM
Browse files- config.json +112 -16
- generation_config.json +4 -1
- model-00001-of-00002.safetensors +2 -2
- model-00002-of-00002.safetensors +2 -2
- model.safetensors.index.json +84 -84
config.json
CHANGED
@@ -5,18 +5,123 @@
|
|
5 |
],
|
6 |
"auto_map": {
|
7 |
"AutoConfig": "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B--configuration_hyperclovax.HCXVisionConfig",
|
8 |
-
"AutoModelForCausalLM": "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B--modeling_hyperclovax.HCXVisionForCausalLM"
|
9 |
-
"AutoProcessor": "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B--preprocessor.HCXVisionProcessor"
|
10 |
},
|
11 |
"decoder_max_length": 16384,
|
|
|
12 |
"freeze_decoder": false,
|
13 |
"freeze_encoder": true,
|
14 |
"freeze_mm_projector": false,
|
15 |
"hidden_size": 3072,
|
16 |
"ignore_index": -100,
|
17 |
-
"
|
18 |
-
"
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
"_name_or_path": "",
|
21 |
"add_cross_attention": false,
|
22 |
"architectures": [
|
@@ -97,26 +202,18 @@
|
|
97 |
"top_p": 1.0,
|
98 |
"torch_dtype": "bfloat16",
|
99 |
"torchscript": false,
|
100 |
-
"transformers_version": "4.51.3",
|
101 |
"typical_p": 1.0,
|
102 |
"use_bfloat16": false,
|
103 |
"use_cache": true,
|
104 |
"vocab_size": 110592
|
105 |
},
|
106 |
-
"max_image_cnt": 12,
|
107 |
-
"max_num_grids": 9,
|
108 |
-
"model_type": "hyperclovax_vlm",
|
109 |
-
"num_queries_vis_abstractor": 81,
|
110 |
-
"proj_pos_emb": true,
|
111 |
-
"proj_prenorm": false,
|
112 |
-
"q_former_model_name_or_path": null,
|
113 |
"torch_dtype": "bfloat16",
|
114 |
-
"transformers_version": "4.
|
115 |
"unpad": true,
|
116 |
"use_1x1_grid": true,
|
117 |
"use_nth_layer": -2,
|
|
|
118 |
"vision_config": {
|
119 |
-
"_attn_implementation_autoset": true,
|
120 |
"_name_or_path": "",
|
121 |
"add_cross_attention": false,
|
122 |
"anyres": true,
|
@@ -192,7 +289,6 @@
|
|
192 |
"top_p": 1.0,
|
193 |
"torch_dtype": "bfloat16",
|
194 |
"torchscript": false,
|
195 |
-
"transformers_version": "4.51.3",
|
196 |
"typical_p": 1.0,
|
197 |
"use_bfloat16": true
|
198 |
}
|
|
|
5 |
],
|
6 |
"auto_map": {
|
7 |
"AutoConfig": "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B--configuration_hyperclovax.HCXVisionConfig",
|
8 |
+
"AutoModelForCausalLM": "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B--modeling_hyperclovax.HCXVisionForCausalLM"
|
|
|
9 |
},
|
10 |
"decoder_max_length": 16384,
|
11 |
+
"first_last_frames_slow": false,
|
12 |
"freeze_decoder": false,
|
13 |
"freeze_encoder": true,
|
14 |
"freeze_mm_projector": false,
|
15 |
"hidden_size": 3072,
|
16 |
"ignore_index": -100,
|
17 |
+
"image_token_id": 100271,
|
18 |
+
"img_start_id": 100009,
|
19 |
+
"max_image_cnt": 12,
|
20 |
+
"max_num_grids": 9,
|
21 |
+
"mm_projector_type": "cabstractor",
|
22 |
+
"model_type": "hyperclovax_vlm",
|
23 |
+
"num_queries_vis_abstractor": -1,
|
24 |
+
"num_queries_vis_abstractor_image": 81,
|
25 |
+
"num_queries_vis_abstractor_video_fast": 9,
|
26 |
+
"num_queries_vis_abstractor_video_slow": 81,
|
27 |
+
"possible_resolutions": [
|
28 |
+
[
|
29 |
+
378,
|
30 |
+
378
|
31 |
+
],
|
32 |
+
[
|
33 |
+
378,
|
34 |
+
756
|
35 |
+
],
|
36 |
+
[
|
37 |
+
378,
|
38 |
+
1134
|
39 |
+
],
|
40 |
+
[
|
41 |
+
378,
|
42 |
+
1512
|
43 |
+
],
|
44 |
+
[
|
45 |
+
378,
|
46 |
+
1890
|
47 |
+
],
|
48 |
+
[
|
49 |
+
378,
|
50 |
+
2268
|
51 |
+
],
|
52 |
+
[
|
53 |
+
378,
|
54 |
+
2646
|
55 |
+
],
|
56 |
+
[
|
57 |
+
378,
|
58 |
+
3024
|
59 |
+
],
|
60 |
+
[
|
61 |
+
378,
|
62 |
+
3402
|
63 |
+
],
|
64 |
+
[
|
65 |
+
756,
|
66 |
+
378
|
67 |
+
],
|
68 |
+
[
|
69 |
+
756,
|
70 |
+
756
|
71 |
+
],
|
72 |
+
[
|
73 |
+
756,
|
74 |
+
1134
|
75 |
+
],
|
76 |
+
[
|
77 |
+
756,
|
78 |
+
1512
|
79 |
+
],
|
80 |
+
[
|
81 |
+
1134,
|
82 |
+
378
|
83 |
+
],
|
84 |
+
[
|
85 |
+
1134,
|
86 |
+
756
|
87 |
+
],
|
88 |
+
[
|
89 |
+
1134,
|
90 |
+
1134
|
91 |
+
],
|
92 |
+
[
|
93 |
+
1512,
|
94 |
+
378
|
95 |
+
],
|
96 |
+
[
|
97 |
+
1512,
|
98 |
+
756
|
99 |
+
],
|
100 |
+
[
|
101 |
+
1890,
|
102 |
+
378
|
103 |
+
],
|
104 |
+
[
|
105 |
+
2268,
|
106 |
+
378
|
107 |
+
],
|
108 |
+
[
|
109 |
+
2646,
|
110 |
+
378
|
111 |
+
],
|
112 |
+
[
|
113 |
+
3024,
|
114 |
+
378
|
115 |
+
],
|
116 |
+
[
|
117 |
+
3402,
|
118 |
+
378
|
119 |
+
]
|
120 |
+
],
|
121 |
+
"proj_pos_emb": true,
|
122 |
+
"proj_prenorm": false,
|
123 |
+
"q_former_model_name_or_path": null,
|
124 |
+
"text_config": {
|
125 |
"_name_or_path": "",
|
126 |
"add_cross_attention": false,
|
127 |
"architectures": [
|
|
|
202 |
"top_p": 1.0,
|
203 |
"torch_dtype": "bfloat16",
|
204 |
"torchscript": false,
|
|
|
205 |
"typical_p": 1.0,
|
206 |
"use_bfloat16": false,
|
207 |
"use_cache": true,
|
208 |
"vocab_size": 110592
|
209 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
"torch_dtype": "bfloat16",
|
211 |
+
"transformers_version": "4.52.4",
|
212 |
"unpad": true,
|
213 |
"use_1x1_grid": true,
|
214 |
"use_nth_layer": -2,
|
215 |
+
"video_token_id": 100270,
|
216 |
"vision_config": {
|
|
|
217 |
"_name_or_path": "",
|
218 |
"add_cross_attention": false,
|
219 |
"anyres": true,
|
|
|
289 |
"top_p": 1.0,
|
290 |
"torch_dtype": "bfloat16",
|
291 |
"torchscript": false,
|
|
|
292 |
"typical_p": 1.0,
|
293 |
"use_bfloat16": true
|
294 |
}
|
generation_config.json
CHANGED
@@ -1,4 +1,7 @@
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
-
"
|
|
|
|
|
|
|
4 |
}
|
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
+
"bos_token_id": 100257,
|
4 |
+
"eos_token_id": 100257,
|
5 |
+
"pad_token_id": 100257,
|
6 |
+
"transformers_version": "4.52.4"
|
7 |
}
|
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7c1d6739129ff1e9b43102c088a466faa5b12fbe9f4f5d370a95255974c4eed
|
3 |
+
size 4982442712
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8c185a170438b51a9ce587a16166873b5759349738975a5b40767c0147a7ff4
|
3 |
+
size 2460153400
|
model.safetensors.index.json
CHANGED
@@ -98,7 +98,7 @@
|
|
98 |
"language_model.model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
99 |
"language_model.model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
100 |
"language_model.model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
101 |
-
"language_model.model.layers.18.mlp.up_proj.weight": "model-
|
102 |
"language_model.model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
103 |
"language_model.model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
104 |
"language_model.model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
@@ -294,89 +294,89 @@
|
|
294 |
"language_model.model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
295 |
"language_model.model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
296 |
"language_model.model.norm.weight": "model-00002-of-00002.safetensors",
|
297 |
-
"mm_projector.net.0.b1.conv1.bn.bias": "model-
|
298 |
-
"mm_projector.net.0.b1.conv1.bn.weight": "model-
|
299 |
-
"mm_projector.net.0.b1.conv1.conv.weight": "model-
|
300 |
-
"mm_projector.net.0.b1.conv2.bn.bias": "model-
|
301 |
-
"mm_projector.net.0.b1.conv2.bn.weight": "model-
|
302 |
-
"mm_projector.net.0.b1.conv2.conv.weight": "model-
|
303 |
-
"mm_projector.net.0.b1.conv3.bn.bias": "model-
|
304 |
-
"mm_projector.net.0.b1.conv3.bn.weight": "model-
|
305 |
-
"mm_projector.net.0.b1.conv3.conv.weight": "model-
|
306 |
-
"mm_projector.net.0.b1.se.fc1.bias": "model-
|
307 |
-
"mm_projector.net.0.b1.se.fc1.weight": "model-
|
308 |
-
"mm_projector.net.0.b1.se.fc2.bias": "model-
|
309 |
-
"mm_projector.net.0.b1.se.fc2.weight": "model-
|
310 |
-
"mm_projector.net.0.b2.conv1.bn.bias": "model-
|
311 |
-
"mm_projector.net.0.b2.conv1.bn.weight": "model-
|
312 |
-
"mm_projector.net.0.b2.conv1.conv.weight": "model-
|
313 |
-
"mm_projector.net.0.b2.conv2.bn.bias": "model-
|
314 |
-
"mm_projector.net.0.b2.conv2.bn.weight": "model-
|
315 |
-
"mm_projector.net.0.b2.conv2.conv.weight": "model-
|
316 |
-
"mm_projector.net.0.b2.conv3.bn.bias": "model-
|
317 |
-
"mm_projector.net.0.b2.conv3.bn.weight": "model-
|
318 |
-
"mm_projector.net.0.b2.conv3.conv.weight": "model-
|
319 |
-
"mm_projector.net.0.b2.se.fc1.bias": "model-
|
320 |
-
"mm_projector.net.0.b2.se.fc1.weight": "model-
|
321 |
-
"mm_projector.net.0.b2.se.fc2.bias": "model-
|
322 |
-
"mm_projector.net.0.b2.se.fc2.weight": "model-
|
323 |
-
"mm_projector.net.0.b3.conv1.bn.bias": "model-
|
324 |
-
"mm_projector.net.0.b3.conv1.bn.weight": "model-
|
325 |
-
"mm_projector.net.0.b3.conv1.conv.weight": "model-
|
326 |
-
"mm_projector.net.0.b3.conv2.bn.bias": "model-
|
327 |
-
"mm_projector.net.0.b3.conv2.bn.weight": "model-
|
328 |
-
"mm_projector.net.0.b3.conv2.conv.weight": "model-
|
329 |
-
"mm_projector.net.0.b3.conv3.bn.bias": "model-
|
330 |
-
"mm_projector.net.0.b3.conv3.bn.weight": "model-
|
331 |
-
"mm_projector.net.0.b3.conv3.conv.weight": "model-
|
332 |
-
"mm_projector.net.0.b3.se.fc1.bias": "model-
|
333 |
-
"mm_projector.net.0.b3.se.fc1.weight": "model-
|
334 |
-
"mm_projector.net.0.b3.se.fc2.bias": "model-
|
335 |
-
"mm_projector.net.0.b3.se.fc2.weight": "model-
|
336 |
-
"mm_projector.net.2.b1.conv1.bn.bias": "model-
|
337 |
-
"mm_projector.net.2.b1.conv1.bn.weight": "model-
|
338 |
-
"mm_projector.net.2.b1.conv1.conv.weight": "model-
|
339 |
-
"mm_projector.net.2.b1.conv2.bn.bias": "model-
|
340 |
-
"mm_projector.net.2.b1.conv2.bn.weight": "model-
|
341 |
-
"mm_projector.net.2.b1.conv2.conv.weight": "model-
|
342 |
-
"mm_projector.net.2.b1.conv3.bn.bias": "model-
|
343 |
-
"mm_projector.net.2.b1.conv3.bn.weight": "model-
|
344 |
-
"mm_projector.net.2.b1.conv3.conv.weight": "model-
|
345 |
-
"mm_projector.net.2.b1.se.fc1.bias": "model-
|
346 |
-
"mm_projector.net.2.b1.se.fc1.weight": "model-
|
347 |
-
"mm_projector.net.2.b1.se.fc2.bias": "model-
|
348 |
-
"mm_projector.net.2.b1.se.fc2.weight": "model-
|
349 |
-
"mm_projector.net.2.b2.conv1.bn.bias": "model-
|
350 |
-
"mm_projector.net.2.b2.conv1.bn.weight": "model-
|
351 |
-
"mm_projector.net.2.b2.conv1.conv.weight": "model-
|
352 |
-
"mm_projector.net.2.b2.conv2.bn.bias": "model-
|
353 |
-
"mm_projector.net.2.b2.conv2.bn.weight": "model-
|
354 |
-
"mm_projector.net.2.b2.conv2.conv.weight": "model-
|
355 |
-
"mm_projector.net.2.b2.conv3.bn.bias": "model-
|
356 |
-
"mm_projector.net.2.b2.conv3.bn.weight": "model-
|
357 |
-
"mm_projector.net.2.b2.conv3.conv.weight": "model-
|
358 |
-
"mm_projector.net.2.b2.se.fc1.bias": "model-
|
359 |
-
"mm_projector.net.2.b2.se.fc1.weight": "model-
|
360 |
-
"mm_projector.net.2.b2.se.fc2.bias": "model-
|
361 |
-
"mm_projector.net.2.b2.se.fc2.weight": "model-
|
362 |
-
"mm_projector.net.2.b3.conv1.bn.bias": "model-
|
363 |
-
"mm_projector.net.2.b3.conv1.bn.weight": "model-
|
364 |
-
"mm_projector.net.2.b3.conv1.conv.weight": "model-
|
365 |
-
"mm_projector.net.2.b3.conv2.bn.bias": "model-
|
366 |
-
"mm_projector.net.2.b3.conv2.bn.weight": "model-
|
367 |
-
"mm_projector.net.2.b3.conv2.conv.weight": "model-
|
368 |
-
"mm_projector.net.2.b3.conv3.bn.bias": "model-
|
369 |
-
"mm_projector.net.2.b3.conv3.bn.weight": "model-
|
370 |
-
"mm_projector.net.2.b3.conv3.conv.weight": "model-
|
371 |
-
"mm_projector.net.2.b3.se.fc1.bias": "model-
|
372 |
-
"mm_projector.net.2.b3.se.fc1.weight": "model-
|
373 |
-
"mm_projector.net.2.b3.se.fc2.bias": "model-
|
374 |
-
"mm_projector.net.2.b3.se.fc2.weight": "model-
|
375 |
-
"mm_projector.pos_emb": "model-
|
376 |
-
"mm_projector.readout.0.bias": "model-
|
377 |
-
"mm_projector.readout.0.weight": "model-
|
378 |
-
"mm_projector.readout.2.bias": "model-
|
379 |
-
"mm_projector.readout.2.weight": "model-
|
380 |
"vision_model.vision_model.embeddings.patch_embedding.bias": "model-00001-of-00002.safetensors",
|
381 |
"vision_model.vision_model.embeddings.patch_embedding.weight": "model-00001-of-00002.safetensors",
|
382 |
"vision_model.vision_model.embeddings.position_embedding.weight": "model-00001-of-00002.safetensors",
|
|
|
98 |
"language_model.model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
99 |
"language_model.model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
100 |
"language_model.model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
101 |
+
"language_model.model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
102 |
"language_model.model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
103 |
"language_model.model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
104 |
"language_model.model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
|
|
294 |
"language_model.model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
295 |
"language_model.model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
296 |
"language_model.model.norm.weight": "model-00002-of-00002.safetensors",
|
297 |
+
"mm_projector.net.0.b1.conv1.bn.bias": "model-00001-of-00002.safetensors",
|
298 |
+
"mm_projector.net.0.b1.conv1.bn.weight": "model-00001-of-00002.safetensors",
|
299 |
+
"mm_projector.net.0.b1.conv1.conv.weight": "model-00001-of-00002.safetensors",
|
300 |
+
"mm_projector.net.0.b1.conv2.bn.bias": "model-00001-of-00002.safetensors",
|
301 |
+
"mm_projector.net.0.b1.conv2.bn.weight": "model-00001-of-00002.safetensors",
|
302 |
+
"mm_projector.net.0.b1.conv2.conv.weight": "model-00001-of-00002.safetensors",
|
303 |
+
"mm_projector.net.0.b1.conv3.bn.bias": "model-00001-of-00002.safetensors",
|
304 |
+
"mm_projector.net.0.b1.conv3.bn.weight": "model-00001-of-00002.safetensors",
|
305 |
+
"mm_projector.net.0.b1.conv3.conv.weight": "model-00001-of-00002.safetensors",
|
306 |
+
"mm_projector.net.0.b1.se.fc1.bias": "model-00001-of-00002.safetensors",
|
307 |
+
"mm_projector.net.0.b1.se.fc1.weight": "model-00001-of-00002.safetensors",
|
308 |
+
"mm_projector.net.0.b1.se.fc2.bias": "model-00001-of-00002.safetensors",
|
309 |
+
"mm_projector.net.0.b1.se.fc2.weight": "model-00001-of-00002.safetensors",
|
310 |
+
"mm_projector.net.0.b2.conv1.bn.bias": "model-00001-of-00002.safetensors",
|
311 |
+
"mm_projector.net.0.b2.conv1.bn.weight": "model-00001-of-00002.safetensors",
|
312 |
+
"mm_projector.net.0.b2.conv1.conv.weight": "model-00001-of-00002.safetensors",
|
313 |
+
"mm_projector.net.0.b2.conv2.bn.bias": "model-00001-of-00002.safetensors",
|
314 |
+
"mm_projector.net.0.b2.conv2.bn.weight": "model-00001-of-00002.safetensors",
|
315 |
+
"mm_projector.net.0.b2.conv2.conv.weight": "model-00001-of-00002.safetensors",
|
316 |
+
"mm_projector.net.0.b2.conv3.bn.bias": "model-00001-of-00002.safetensors",
|
317 |
+
"mm_projector.net.0.b2.conv3.bn.weight": "model-00001-of-00002.safetensors",
|
318 |
+
"mm_projector.net.0.b2.conv3.conv.weight": "model-00001-of-00002.safetensors",
|
319 |
+
"mm_projector.net.0.b2.se.fc1.bias": "model-00001-of-00002.safetensors",
|
320 |
+
"mm_projector.net.0.b2.se.fc1.weight": "model-00001-of-00002.safetensors",
|
321 |
+
"mm_projector.net.0.b2.se.fc2.bias": "model-00001-of-00002.safetensors",
|
322 |
+
"mm_projector.net.0.b2.se.fc2.weight": "model-00001-of-00002.safetensors",
|
323 |
+
"mm_projector.net.0.b3.conv1.bn.bias": "model-00001-of-00002.safetensors",
|
324 |
+
"mm_projector.net.0.b3.conv1.bn.weight": "model-00001-of-00002.safetensors",
|
325 |
+
"mm_projector.net.0.b3.conv1.conv.weight": "model-00001-of-00002.safetensors",
|
326 |
+
"mm_projector.net.0.b3.conv2.bn.bias": "model-00001-of-00002.safetensors",
|
327 |
+
"mm_projector.net.0.b3.conv2.bn.weight": "model-00001-of-00002.safetensors",
|
328 |
+
"mm_projector.net.0.b3.conv2.conv.weight": "model-00001-of-00002.safetensors",
|
329 |
+
"mm_projector.net.0.b3.conv3.bn.bias": "model-00001-of-00002.safetensors",
|
330 |
+
"mm_projector.net.0.b3.conv3.bn.weight": "model-00001-of-00002.safetensors",
|
331 |
+
"mm_projector.net.0.b3.conv3.conv.weight": "model-00001-of-00002.safetensors",
|
332 |
+
"mm_projector.net.0.b3.se.fc1.bias": "model-00001-of-00002.safetensors",
|
333 |
+
"mm_projector.net.0.b3.se.fc1.weight": "model-00001-of-00002.safetensors",
|
334 |
+
"mm_projector.net.0.b3.se.fc2.bias": "model-00001-of-00002.safetensors",
|
335 |
+
"mm_projector.net.0.b3.se.fc2.weight": "model-00001-of-00002.safetensors",
|
336 |
+
"mm_projector.net.2.b1.conv1.bn.bias": "model-00001-of-00002.safetensors",
|
337 |
+
"mm_projector.net.2.b1.conv1.bn.weight": "model-00001-of-00002.safetensors",
|
338 |
+
"mm_projector.net.2.b1.conv1.conv.weight": "model-00001-of-00002.safetensors",
|
339 |
+
"mm_projector.net.2.b1.conv2.bn.bias": "model-00001-of-00002.safetensors",
|
340 |
+
"mm_projector.net.2.b1.conv2.bn.weight": "model-00001-of-00002.safetensors",
|
341 |
+
"mm_projector.net.2.b1.conv2.conv.weight": "model-00001-of-00002.safetensors",
|
342 |
+
"mm_projector.net.2.b1.conv3.bn.bias": "model-00001-of-00002.safetensors",
|
343 |
+
"mm_projector.net.2.b1.conv3.bn.weight": "model-00001-of-00002.safetensors",
|
344 |
+
"mm_projector.net.2.b1.conv3.conv.weight": "model-00001-of-00002.safetensors",
|
345 |
+
"mm_projector.net.2.b1.se.fc1.bias": "model-00001-of-00002.safetensors",
|
346 |
+
"mm_projector.net.2.b1.se.fc1.weight": "model-00001-of-00002.safetensors",
|
347 |
+
"mm_projector.net.2.b1.se.fc2.bias": "model-00001-of-00002.safetensors",
|
348 |
+
"mm_projector.net.2.b1.se.fc2.weight": "model-00001-of-00002.safetensors",
|
349 |
+
"mm_projector.net.2.b2.conv1.bn.bias": "model-00001-of-00002.safetensors",
|
350 |
+
"mm_projector.net.2.b2.conv1.bn.weight": "model-00001-of-00002.safetensors",
|
351 |
+
"mm_projector.net.2.b2.conv1.conv.weight": "model-00001-of-00002.safetensors",
|
352 |
+
"mm_projector.net.2.b2.conv2.bn.bias": "model-00001-of-00002.safetensors",
|
353 |
+
"mm_projector.net.2.b2.conv2.bn.weight": "model-00001-of-00002.safetensors",
|
354 |
+
"mm_projector.net.2.b2.conv2.conv.weight": "model-00001-of-00002.safetensors",
|
355 |
+
"mm_projector.net.2.b2.conv3.bn.bias": "model-00001-of-00002.safetensors",
|
356 |
+
"mm_projector.net.2.b2.conv3.bn.weight": "model-00001-of-00002.safetensors",
|
357 |
+
"mm_projector.net.2.b2.conv3.conv.weight": "model-00001-of-00002.safetensors",
|
358 |
+
"mm_projector.net.2.b2.se.fc1.bias": "model-00001-of-00002.safetensors",
|
359 |
+
"mm_projector.net.2.b2.se.fc1.weight": "model-00001-of-00002.safetensors",
|
360 |
+
"mm_projector.net.2.b2.se.fc2.bias": "model-00001-of-00002.safetensors",
|
361 |
+
"mm_projector.net.2.b2.se.fc2.weight": "model-00001-of-00002.safetensors",
|
362 |
+
"mm_projector.net.2.b3.conv1.bn.bias": "model-00001-of-00002.safetensors",
|
363 |
+
"mm_projector.net.2.b3.conv1.bn.weight": "model-00001-of-00002.safetensors",
|
364 |
+
"mm_projector.net.2.b3.conv1.conv.weight": "model-00001-of-00002.safetensors",
|
365 |
+
"mm_projector.net.2.b3.conv2.bn.bias": "model-00001-of-00002.safetensors",
|
366 |
+
"mm_projector.net.2.b3.conv2.bn.weight": "model-00001-of-00002.safetensors",
|
367 |
+
"mm_projector.net.2.b3.conv2.conv.weight": "model-00001-of-00002.safetensors",
|
368 |
+
"mm_projector.net.2.b3.conv3.bn.bias": "model-00001-of-00002.safetensors",
|
369 |
+
"mm_projector.net.2.b3.conv3.bn.weight": "model-00001-of-00002.safetensors",
|
370 |
+
"mm_projector.net.2.b3.conv3.conv.weight": "model-00001-of-00002.safetensors",
|
371 |
+
"mm_projector.net.2.b3.se.fc1.bias": "model-00001-of-00002.safetensors",
|
372 |
+
"mm_projector.net.2.b3.se.fc1.weight": "model-00001-of-00002.safetensors",
|
373 |
+
"mm_projector.net.2.b3.se.fc2.bias": "model-00001-of-00002.safetensors",
|
374 |
+
"mm_projector.net.2.b3.se.fc2.weight": "model-00001-of-00002.safetensors",
|
375 |
+
"mm_projector.pos_emb": "model-00001-of-00002.safetensors",
|
376 |
+
"mm_projector.readout.0.bias": "model-00001-of-00002.safetensors",
|
377 |
+
"mm_projector.readout.0.weight": "model-00001-of-00002.safetensors",
|
378 |
+
"mm_projector.readout.2.bias": "model-00001-of-00002.safetensors",
|
379 |
+
"mm_projector.readout.2.weight": "model-00001-of-00002.safetensors",
|
380 |
"vision_model.vision_model.embeddings.patch_embedding.bias": "model-00001-of-00002.safetensors",
|
381 |
"vision_model.vision_model.embeddings.patch_embedding.weight": "model-00001-of-00002.safetensors",
|
382 |
"vision_model.vision_model.embeddings.position_embedding.weight": "model-00001-of-00002.safetensors",
|