tcm03
commited on
Commit
·
475fbaa
1
Parent(s):
cb14d91
Track log files with git lfs
Browse files- runtime_logs/run_2025-02-14_04-23-49.log +3 -382
- runtime_logs/run_2025-02-14_04-25-33.log +3 -378
- runtime_logs/run_2025-02-14_04-26-13.log +0 -0
- runtime_logs/run_2025-02-14_05-01-33.log +3 -366
- runtime_logs/run_2025-02-14_05-02-58.log +0 -0
- runtime_logs/run_2025-02-14_17-32-33.log +3 -378
- runtime_logs/run_2025-02-14_17-36-05.log +0 -0
- runtime_logs/run_2025-02-15_02-18-13.log +0 -0
- runtime_logs/run_2025-02-15_02-47-54.log +0 -0
- runtime_logs/run_2025-02-15_02-56-12.log +3 -378
- runtime_logs/run_2025-02-15_03-00-20.log +0 -0
- runtime_logs/run_2025-02-15_03-02-43.log +3 -383
runtime_logs/run_2025-02-14_04-23-49.log
CHANGED
@@ -1,382 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
2025-02-14 04:23:50,511 - configuration_utils.py:800 - from_dict - INFO - Model config CambrianConfig {
|
5 |
-
"_name_or_path": "/tmp/iopath_cache/manifold_cache/tree/users/shenx/finetune/09281004-cambrian_llama3_2_t576_ov",
|
6 |
-
"architectures": [
|
7 |
-
"CambrianLlamaForCausalLM"
|
8 |
-
],
|
9 |
-
"attention_bias": false,
|
10 |
-
"attention_dropout": 0.0,
|
11 |
-
"bos_token_id": 128000,
|
12 |
-
"connect_layer": 2,
|
13 |
-
"connector_depth": 3,
|
14 |
-
"connector_only": true,
|
15 |
-
"dino_threshold": 0.83,
|
16 |
-
"drop_threshold": 0.8,
|
17 |
-
"eos_token_id": [
|
18 |
-
128001,
|
19 |
-
128008,
|
20 |
-
128009
|
21 |
-
],
|
22 |
-
"frame_pos": false,
|
23 |
-
"freeze_mm_mlp_adapter": false,
|
24 |
-
"hidden_act": "silu",
|
25 |
-
"hidden_size": 3072,
|
26 |
-
"highres": true,
|
27 |
-
"highres_connect": false,
|
28 |
-
"image_aspect_ratio": "pad",
|
29 |
-
"image_position": 91,
|
30 |
-
"image_token_len": 144,
|
31 |
-
"initializer_range": 0.02,
|
32 |
-
"intermediate_size": 8192,
|
33 |
-
"is_image_newline": true,
|
34 |
-
"is_st_sampler": false,
|
35 |
-
"lowres_token": 8,
|
36 |
-
"max_position_embeddings": 131072,
|
37 |
-
"mlp_bias": false,
|
38 |
-
"mm_patch_merge_type": "flat",
|
39 |
-
"mm_projector_lr": null,
|
40 |
-
"mm_projector_type": "sva",
|
41 |
-
"mm_use_im_patch_token": false,
|
42 |
-
"mm_use_im_start_end": false,
|
43 |
-
"mm_vision_sampler_lr": null,
|
44 |
-
"mm_vision_select_feature": "patch",
|
45 |
-
"mm_vision_select_layer": -2,
|
46 |
-
"mm_vision_tower_aux_list": [
|
47 |
-
"siglip/CLIP-ViT-SO400M-14-384",
|
48 |
-
"facebook/dinov2-giant-res378"
|
49 |
-
],
|
50 |
-
"mm_vision_tower_aux_token_len_list": [
|
51 |
-
576,
|
52 |
-
576
|
53 |
-
],
|
54 |
-
"mm_vision_tower_lr": null,
|
55 |
-
"model_type": "cambrian_llama",
|
56 |
-
"num_attention_heads": 24,
|
57 |
-
"num_hidden_layers": 28,
|
58 |
-
"num_key_value_heads": 8,
|
59 |
-
"num_of_vision_sampler_layers": 10,
|
60 |
-
"num_query_group": 1,
|
61 |
-
"pretraining_tp": 1,
|
62 |
-
"query_num_list": [
|
63 |
-
144
|
64 |
-
],
|
65 |
-
"rms_norm_eps": 1e-05,
|
66 |
-
"rope_scaling": {
|
67 |
-
"factor": 32.0,
|
68 |
-
"high_freq_factor": 4.0,
|
69 |
-
"low_freq_factor": 1.0,
|
70 |
-
"original_max_position_embeddings": 8192,
|
71 |
-
"rope_type": "llama3"
|
72 |
-
},
|
73 |
-
"rope_theta": 500000.0,
|
74 |
-
"spmd_debug": null,
|
75 |
-
"spmd_fsdp_sharding": null,
|
76 |
-
"spmd_mesh": null,
|
77 |
-
"start_of_vision_sampler_layers": 0,
|
78 |
-
"stride_of_vision_sampler_layers": 3,
|
79 |
-
"tie_word_embeddings": false,
|
80 |
-
"tokenizer_model_max_length": 8192,
|
81 |
-
"tokenizer_padding_side": "right",
|
82 |
-
"torch_dtype": "float32",
|
83 |
-
"transformers_version": "4.43.1",
|
84 |
-
"tune_mm_mlp_adapter": false,
|
85 |
-
"unfreeze_mm_vision_tower": false,
|
86 |
-
"use_cache": false,
|
87 |
-
"use_mm_proj": true,
|
88 |
-
"vision_hidden_size": 1024,
|
89 |
-
"vision_tower_aux_token_len_list": [
|
90 |
-
576,
|
91 |
-
576
|
92 |
-
],
|
93 |
-
"vocab_size": 128256
|
94 |
-
}
|
95 |
-
|
96 |
-
2025-02-14 04:23:50,511 - modeling_utils.py:3618 - from_pretrained - INFO - loading weights file ./checkpoints/longvu_llama3_2/pytorch_model.bin
|
97 |
-
2025-02-14 04:23:50,551 - configuration_utils.py:1038 - from_dict - INFO - Generate config GenerationConfig {
|
98 |
-
"bos_token_id": 128000,
|
99 |
-
"eos_token_id": [
|
100 |
-
128001,
|
101 |
-
128008,
|
102 |
-
128009
|
103 |
-
],
|
104 |
-
"use_cache": false
|
105 |
-
}
|
106 |
-
|
107 |
-
2025-02-14 04:23:50,771 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/config.json
|
108 |
-
2025-02-14 04:23:50,774 - configuration_utils.py:800 - from_dict - INFO - Model config Dinov2Config {
|
109 |
-
"apply_layernorm": true,
|
110 |
-
"architectures": [
|
111 |
-
"Dinov2Model"
|
112 |
-
],
|
113 |
-
"attention_probs_dropout_prob": 0.0,
|
114 |
-
"drop_path_rate": 0.0,
|
115 |
-
"hidden_act": "gelu",
|
116 |
-
"hidden_dropout_prob": 0.0,
|
117 |
-
"hidden_size": 1536,
|
118 |
-
"image_size": 518,
|
119 |
-
"initializer_range": 0.02,
|
120 |
-
"layer_norm_eps": 1e-06,
|
121 |
-
"layerscale_value": 1.0,
|
122 |
-
"mlp_ratio": 4,
|
123 |
-
"model_type": "dinov2",
|
124 |
-
"num_attention_heads": 24,
|
125 |
-
"num_channels": 3,
|
126 |
-
"num_hidden_layers": 40,
|
127 |
-
"out_features": [
|
128 |
-
"stage40"
|
129 |
-
],
|
130 |
-
"out_indices": [
|
131 |
-
40
|
132 |
-
],
|
133 |
-
"patch_size": 14,
|
134 |
-
"qkv_bias": true,
|
135 |
-
"reshape_hidden_states": true,
|
136 |
-
"stage_names": [
|
137 |
-
"stem",
|
138 |
-
"stage1",
|
139 |
-
"stage2",
|
140 |
-
"stage3",
|
141 |
-
"stage4",
|
142 |
-
"stage5",
|
143 |
-
"stage6",
|
144 |
-
"stage7",
|
145 |
-
"stage8",
|
146 |
-
"stage9",
|
147 |
-
"stage10",
|
148 |
-
"stage11",
|
149 |
-
"stage12",
|
150 |
-
"stage13",
|
151 |
-
"stage14",
|
152 |
-
"stage15",
|
153 |
-
"stage16",
|
154 |
-
"stage17",
|
155 |
-
"stage18",
|
156 |
-
"stage19",
|
157 |
-
"stage20",
|
158 |
-
"stage21",
|
159 |
-
"stage22",
|
160 |
-
"stage23",
|
161 |
-
"stage24",
|
162 |
-
"stage25",
|
163 |
-
"stage26",
|
164 |
-
"stage27",
|
165 |
-
"stage28",
|
166 |
-
"stage29",
|
167 |
-
"stage30",
|
168 |
-
"stage31",
|
169 |
-
"stage32",
|
170 |
-
"stage33",
|
171 |
-
"stage34",
|
172 |
-
"stage35",
|
173 |
-
"stage36",
|
174 |
-
"stage37",
|
175 |
-
"stage38",
|
176 |
-
"stage39",
|
177 |
-
"stage40"
|
178 |
-
],
|
179 |
-
"torch_dtype": "float32",
|
180 |
-
"transformers_version": "4.43.1",
|
181 |
-
"use_swiglu_ffn": true
|
182 |
-
}
|
183 |
-
|
184 |
-
2025-02-14 04:23:52,141 - modeling_utils.py:4450 - _load_pretrained_model - INFO - All model checkpoint weights were used when initializing CambrianLlamaForCausalLM.
|
185 |
-
|
186 |
-
2025-02-14 04:23:52,141 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of CambrianLlamaForCausalLM were initialized from the model checkpoint at ./checkpoints/longvu_llama3_2.
|
187 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use CambrianLlamaForCausalLM for predictions without further training.
|
188 |
-
2025-02-14 04:23:52,147 - configuration_utils.py:991 - from_pretrained - INFO - loading configuration file ./checkpoints/longvu_llama3_2/generation_config.json
|
189 |
-
2025-02-14 04:23:52,147 - configuration_utils.py:1038 - from_dict - INFO - Generate config GenerationConfig {
|
190 |
-
"bos_token_id": 128000,
|
191 |
-
"do_sample": true,
|
192 |
-
"eos_token_id": [
|
193 |
-
128001,
|
194 |
-
128008,
|
195 |
-
128009
|
196 |
-
],
|
197 |
-
"temperature": 0.6,
|
198 |
-
"top_p": 0.9
|
199 |
-
}
|
200 |
-
|
201 |
-
2025-02-14 04:23:52,674 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file tokenizer.json
|
202 |
-
2025-02-14 04:23:52,674 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file added_tokens.json
|
203 |
-
2025-02-14 04:23:52,674 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file special_tokens_map.json
|
204 |
-
2025-02-14 04:23:52,674 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file tokenizer_config.json
|
205 |
-
2025-02-14 04:23:53,030 - tokenization_utils_base.py:2533 - _from_pretrained - INFO - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
206 |
-
2025-02-14 04:23:53,702 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/config.json
|
207 |
-
2025-02-14 04:23:53,704 - configuration_utils.py:800 - from_dict - INFO - Model config SiglipVisionConfig {
|
208 |
-
"attention_dropout": 0.0,
|
209 |
-
"hidden_act": "gelu_pytorch_tanh",
|
210 |
-
"hidden_size": 1152,
|
211 |
-
"image_size": 384,
|
212 |
-
"intermediate_size": 4304,
|
213 |
-
"layer_norm_eps": 1e-06,
|
214 |
-
"model_type": "siglip_vision_model",
|
215 |
-
"num_attention_heads": 16,
|
216 |
-
"num_channels": 3,
|
217 |
-
"num_hidden_layers": 27,
|
218 |
-
"patch_size": 14,
|
219 |
-
"transformers_version": "4.43.1"
|
220 |
-
}
|
221 |
-
|
222 |
-
2025-02-14 04:23:53,704 - modeling_utils.py:3621 - from_pretrained - INFO - loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/model.safetensors
|
223 |
-
2025-02-14 04:23:53,969 - modeling_utils.py:4440 - _load_pretrained_model - INFO - Some weights of the model checkpoint at google/siglip-so400m-patch14-384 were not used when initializing SiglipVisionModel: ['logit_bias', 'logit_scale', 'text_model.embeddings.position_embedding.weight', 'text_model.embeddings.token_embedding.weight', 'text_model.encoder.layers.0.layer_norm1.bias', 'text_model.encoder.layers.0.layer_norm1.weight', 'text_model.encoder.layers.0.layer_norm2.bias', 'text_model.encoder.layers.0.layer_norm2.weight', 'text_model.encoder.layers.0.mlp.fc1.bias', 'text_model.encoder.layers.0.mlp.fc1.weight', 'text_model.encoder.layers.0.mlp.fc2.bias', 'text_model.encoder.layers.0.mlp.fc2.weight', 'text_model.encoder.layers.0.self_attn.k_proj.bias', 'text_model.encoder.layers.0.self_attn.k_proj.weight', 'text_model.encoder.layers.0.self_attn.out_proj.bias', 'text_model.encoder.layers.0.self_attn.out_proj.weight', 'text_model.encoder.layers.0.self_attn.q_proj.bias', 'text_model.encoder.layers.0.self_attn.q_proj.weight', 'text_model.encoder.layers.0.self_attn.v_proj.bias', 'text_model.encoder.layers.0.self_attn.v_proj.weight', 'text_model.encoder.layers.1.layer_norm1.bias', 'text_model.encoder.layers.1.layer_norm1.weight', 'text_model.encoder.layers.1.layer_norm2.bias', 'text_model.encoder.layers.1.layer_norm2.weight', 'text_model.encoder.layers.1.mlp.fc1.bias', 'text_model.encoder.layers.1.mlp.fc1.weight', 'text_model.encoder.layers.1.mlp.fc2.bias', 'text_model.encoder.layers.1.mlp.fc2.weight', 'text_model.encoder.layers.1.self_attn.k_proj.bias', 'text_model.encoder.layers.1.self_attn.k_proj.weight', 'text_model.encoder.layers.1.self_attn.out_proj.bias', 'text_model.encoder.layers.1.self_attn.out_proj.weight', 'text_model.encoder.layers.1.self_attn.q_proj.bias', 'text_model.encoder.layers.1.self_attn.q_proj.weight', 'text_model.encoder.layers.1.self_attn.v_proj.bias', 'text_model.encoder.layers.1.self_attn.v_proj.weight', 'text_model.encoder.layers.10.layer_norm1.bias', 'text_model.encoder.layers.10.layer_norm1.weight', 'text_model.encoder.layers.10.layer_norm2.bias', 'text_model.encoder.layers.10.layer_norm2.weight', 'text_model.encoder.layers.10.mlp.fc1.bias', 'text_model.encoder.layers.10.mlp.fc1.weight', 'text_model.encoder.layers.10.mlp.fc2.bias', 'text_model.encoder.layers.10.mlp.fc2.weight', 'text_model.encoder.layers.10.self_attn.k_proj.bias', 'text_model.encoder.layers.10.self_attn.k_proj.weight', 'text_model.encoder.layers.10.self_attn.out_proj.bias', 'text_model.encoder.layers.10.self_attn.out_proj.weight', 'text_model.encoder.layers.10.self_attn.q_proj.bias', 'text_model.encoder.layers.10.self_attn.q_proj.weight', 'text_model.encoder.layers.10.self_attn.v_proj.bias', 'text_model.encoder.layers.10.self_attn.v_proj.weight', 'text_model.encoder.layers.11.layer_norm1.bias', 'text_model.encoder.layers.11.layer_norm1.weight', 'text_model.encoder.layers.11.layer_norm2.bias', 'text_model.encoder.layers.11.layer_norm2.weight', 'text_model.encoder.layers.11.mlp.fc1.bias', 'text_model.encoder.layers.11.mlp.fc1.weight', 'text_model.encoder.layers.11.mlp.fc2.bias', 'text_model.encoder.layers.11.mlp.fc2.weight', 'text_model.encoder.layers.11.self_attn.k_proj.bias', 'text_model.encoder.layers.11.self_attn.k_proj.weight', 'text_model.encoder.layers.11.self_attn.out_proj.bias', 'text_model.encoder.layers.11.self_attn.out_proj.weight', 'text_model.encoder.layers.11.self_attn.q_proj.bias', 'text_model.encoder.layers.11.self_attn.q_proj.weight', 'text_model.encoder.layers.11.self_attn.v_proj.bias', 'text_model.encoder.layers.11.self_attn.v_proj.weight', 'text_model.encoder.layers.12.layer_norm1.bias', 'text_model.encoder.layers.12.layer_norm1.weight', 'text_model.encoder.layers.12.layer_norm2.bias', 'text_model.encoder.layers.12.layer_norm2.weight', 'text_model.encoder.layers.12.mlp.fc1.bias', 'text_model.encoder.layers.12.mlp.fc1.weight', 'text_model.encoder.layers.12.mlp.fc2.bias', 'text_model.encoder.layers.12.mlp.fc2.weight', 'text_model.encoder.layers.12.self_attn.k_proj.bias', 'text_model.encoder.layers.12.self_attn.k_proj.weight', 'text_model.encoder.layers.12.self_attn.out_proj.bias', 'text_model.encoder.layers.12.self_attn.out_proj.weight', 'text_model.encoder.layers.12.self_attn.q_proj.bias', 'text_model.encoder.layers.12.self_attn.q_proj.weight', 'text_model.encoder.layers.12.self_attn.v_proj.bias', 'text_model.encoder.layers.12.self_attn.v_proj.weight', 'text_model.encoder.layers.13.layer_norm1.bias', 'text_model.encoder.layers.13.layer_norm1.weight', 'text_model.encoder.layers.13.layer_norm2.bias', 'text_model.encoder.layers.13.layer_norm2.weight', 'text_model.encoder.layers.13.mlp.fc1.bias', 'text_model.encoder.layers.13.mlp.fc1.weight', 'text_model.encoder.layers.13.mlp.fc2.bias', 'text_model.encoder.layers.13.mlp.fc2.weight', 'text_model.encoder.layers.13.self_attn.k_proj.bias', 'text_model.encoder.layers.13.self_attn.k_proj.weight', 'text_model.encoder.layers.13.self_attn.out_proj.bias', 'text_model.encoder.layers.13.self_attn.out_proj.weight', 'text_model.encoder.layers.13.self_attn.q_proj.bias', 'text_model.encoder.layers.13.self_attn.q_proj.weight', 'text_model.encoder.layers.13.self_attn.v_proj.bias', 'text_model.encoder.layers.13.self_attn.v_proj.weight', 'text_model.encoder.layers.14.layer_norm1.bias', 'text_model.encoder.layers.14.layer_norm1.weight', 'text_model.encoder.layers.14.layer_norm2.bias', 'text_model.encoder.layers.14.layer_norm2.weight', 'text_model.encoder.layers.14.mlp.fc1.bias', 'text_model.encoder.layers.14.mlp.fc1.weight', 'text_model.encoder.layers.14.mlp.fc2.bias', 'text_model.encoder.layers.14.mlp.fc2.weight', 'text_model.encoder.layers.14.self_attn.k_proj.bias', 'text_model.encoder.layers.14.self_attn.k_proj.weight', 'text_model.encoder.layers.14.self_attn.out_proj.bias', 'text_model.encoder.layers.14.self_attn.out_proj.weight', 'text_model.encoder.layers.14.self_attn.q_proj.bias', 'text_model.encoder.layers.14.self_attn.q_proj.weight', 'text_model.encoder.layers.14.self_attn.v_proj.bias', 'text_model.encoder.layers.14.self_attn.v_proj.weight', 'text_model.encoder.layers.15.layer_norm1.bias', 'text_model.encoder.layers.15.layer_norm1.weight', 'text_model.encoder.layers.15.layer_norm2.bias', 'text_model.encoder.layers.15.layer_norm2.weight', 'text_model.encoder.layers.15.mlp.fc1.bias', 'text_model.encoder.layers.15.mlp.fc1.weight', 'text_model.encoder.layers.15.mlp.fc2.bias', 'text_model.encoder.layers.15.mlp.fc2.weight', 'text_model.encoder.layers.15.self_attn.k_proj.bias', 'text_model.encoder.layers.15.self_attn.k_proj.weight', 'text_model.encoder.layers.15.self_attn.out_proj.bias', 'text_model.encoder.layers.15.self_attn.out_proj.weight', 'text_model.encoder.layers.15.self_attn.q_proj.bias', 'text_model.encoder.layers.15.self_attn.q_proj.weight', 'text_model.encoder.layers.15.self_attn.v_proj.bias', 'text_model.encoder.layers.15.self_attn.v_proj.weight', 'text_model.encoder.layers.16.layer_norm1.bias', 'text_model.encoder.layers.16.layer_norm1.weight', 'text_model.encoder.layers.16.layer_norm2.bias', 'text_model.encoder.layers.16.layer_norm2.weight', 'text_model.encoder.layers.16.mlp.fc1.bias', 'text_model.encoder.layers.16.mlp.fc1.weight', 'text_model.encoder.layers.16.mlp.fc2.bias', 'text_model.encoder.layers.16.mlp.fc2.weight', 'text_model.encoder.layers.16.self_attn.k_proj.bias', 'text_model.encoder.layers.16.self_attn.k_proj.weight', 'text_model.encoder.layers.16.self_attn.out_proj.bias', 'text_model.encoder.layers.16.self_attn.out_proj.weight', 'text_model.encoder.layers.16.self_attn.q_proj.bias', 'text_model.encoder.layers.16.self_attn.q_proj.weight', 'text_model.encoder.layers.16.self_attn.v_proj.bias', 'text_model.encoder.layers.16.self_attn.v_proj.weight', 'text_model.encoder.layers.17.layer_norm1.bias', 'text_model.encoder.layers.17.layer_norm1.weight', 'text_model.encoder.layers.17.layer_norm2.bias', 'text_model.encoder.layers.17.layer_norm2.weight', 'text_model.encoder.layers.17.mlp.fc1.bias', 'text_model.encoder.layers.17.mlp.fc1.weight', 'text_model.encoder.layers.17.mlp.fc2.bias', 'text_model.encoder.layers.17.mlp.fc2.weight', 'text_model.encoder.layers.17.self_attn.k_proj.bias', 'text_model.encoder.layers.17.self_attn.k_proj.weight', 'text_model.encoder.layers.17.self_attn.out_proj.bias', 'text_model.encoder.layers.17.self_attn.out_proj.weight', 'text_model.encoder.layers.17.self_attn.q_proj.bias', 'text_model.encoder.layers.17.self_attn.q_proj.weight', 'text_model.encoder.layers.17.self_attn.v_proj.bias', 'text_model.encoder.layers.17.self_attn.v_proj.weight', 'text_model.encoder.layers.18.layer_norm1.bias', 'text_model.encoder.layers.18.layer_norm1.weight', 'text_model.encoder.layers.18.layer_norm2.bias', 'text_model.encoder.layers.18.layer_norm2.weight', 'text_model.encoder.layers.18.mlp.fc1.bias', 'text_model.encoder.layers.18.mlp.fc1.weight', 'text_model.encoder.layers.18.mlp.fc2.bias', 'text_model.encoder.layers.18.mlp.fc2.weight', 'text_model.encoder.layers.18.self_attn.k_proj.bias', 'text_model.encoder.layers.18.self_attn.k_proj.weight', 'text_model.encoder.layers.18.self_attn.out_proj.bias', 'text_model.encoder.layers.18.self_attn.out_proj.weight', 'text_model.encoder.layers.18.self_attn.q_proj.bias', 'text_model.encoder.layers.18.self_attn.q_proj.weight', 'text_model.encoder.layers.18.self_attn.v_proj.bias', 'text_model.encoder.layers.18.self_attn.v_proj.weight', 'text_model.encoder.layers.19.layer_norm1.bias', 'text_model.encoder.layers.19.layer_norm1.weight', 'text_model.encoder.layers.19.layer_norm2.bias', 'text_model.encoder.layers.19.layer_norm2.weight', 'text_model.encoder.layers.19.mlp.fc1.bias', 'text_model.encoder.layers.19.mlp.fc1.weight', 'text_model.encoder.layers.19.mlp.fc2.bias', 'text_model.encoder.layers.19.mlp.fc2.weight', 'text_model.encoder.layers.19.self_attn.k_proj.bias', 'text_model.encoder.layers.19.self_attn.k_proj.weight', 'text_model.encoder.layers.19.self_attn.out_proj.bias', 'text_model.encoder.layers.19.self_attn.out_proj.weight', 'text_model.encoder.layers.19.self_attn.q_proj.bias', 'text_model.encoder.layers.19.self_attn.q_proj.weight', 'text_model.encoder.layers.19.self_attn.v_proj.bias', 'text_model.encoder.layers.19.self_attn.v_proj.weight', 'text_model.encoder.layers.2.layer_norm1.bias', 'text_model.encoder.layers.2.layer_norm1.weight', 'text_model.encoder.layers.2.layer_norm2.bias', 'text_model.encoder.layers.2.layer_norm2.weight', 'text_model.encoder.layers.2.mlp.fc1.bias', 'text_model.encoder.layers.2.mlp.fc1.weight', 'text_model.encoder.layers.2.mlp.fc2.bias', 'text_model.encoder.layers.2.mlp.fc2.weight', 'text_model.encoder.layers.2.self_attn.k_proj.bias', 'text_model.encoder.layers.2.self_attn.k_proj.weight', 'text_model.encoder.layers.2.self_attn.out_proj.bias', 'text_model.encoder.layers.2.self_attn.out_proj.weight', 'text_model.encoder.layers.2.self_attn.q_proj.bias', 'text_model.encoder.layers.2.self_attn.q_proj.weight', 'text_model.encoder.layers.2.self_attn.v_proj.bias', 'text_model.encoder.layers.2.self_attn.v_proj.weight', 'text_model.encoder.layers.20.layer_norm1.bias', 'text_model.encoder.layers.20.layer_norm1.weight', 'text_model.encoder.layers.20.layer_norm2.bias', 'text_model.encoder.layers.20.layer_norm2.weight', 'text_model.encoder.layers.20.mlp.fc1.bias', 'text_model.encoder.layers.20.mlp.fc1.weight', 'text_model.encoder.layers.20.mlp.fc2.bias', 'text_model.encoder.layers.20.mlp.fc2.weight', 'text_model.encoder.layers.20.self_attn.k_proj.bias', 'text_model.encoder.layers.20.self_attn.k_proj.weight', 'text_model.encoder.layers.20.self_attn.out_proj.bias', 'text_model.encoder.layers.20.self_attn.out_proj.weight', 'text_model.encoder.layers.20.self_attn.q_proj.bias', 'text_model.encoder.layers.20.self_attn.q_proj.weight', 'text_model.encoder.layers.20.self_attn.v_proj.bias', 'text_model.encoder.layers.20.self_attn.v_proj.weight', 'text_model.encoder.layers.21.layer_norm1.bias', 'text_model.encoder.layers.21.layer_norm1.weight', 'text_model.encoder.layers.21.layer_norm2.bias', 'text_model.encoder.layers.21.layer_norm2.weight', 'text_model.encoder.layers.21.mlp.fc1.bias', 'text_model.encoder.layers.21.mlp.fc1.weight', 'text_model.encoder.layers.21.mlp.fc2.bias', 'text_model.encoder.layers.21.mlp.fc2.weight', 'text_model.encoder.layers.21.self_attn.k_proj.bias', 'text_model.encoder.layers.21.self_attn.k_proj.weight', 'text_model.encoder.layers.21.self_attn.out_proj.bias', 'text_model.encoder.layers.21.self_attn.out_proj.weight', 'text_model.encoder.layers.21.self_attn.q_proj.bias', 'text_model.encoder.layers.21.self_attn.q_proj.weight', 'text_model.encoder.layers.21.self_attn.v_proj.bias', 'text_model.encoder.layers.21.self_attn.v_proj.weight', 'text_model.encoder.layers.22.layer_norm1.bias', 'text_model.encoder.layers.22.layer_norm1.weight', 'text_model.encoder.layers.22.layer_norm2.bias', 'text_model.encoder.layers.22.layer_norm2.weight', 'text_model.encoder.layers.22.mlp.fc1.bias', 'text_model.encoder.layers.22.mlp.fc1.weight', 'text_model.encoder.layers.22.mlp.fc2.bias', 'text_model.encoder.layers.22.mlp.fc2.weight', 'text_model.encoder.layers.22.self_attn.k_proj.bias', 'text_model.encoder.layers.22.self_attn.k_proj.weight', 'text_model.encoder.layers.22.self_attn.out_proj.bias', 'text_model.encoder.layers.22.self_attn.out_proj.weight', 'text_model.encoder.layers.22.self_attn.q_proj.bias', 'text_model.encoder.layers.22.self_attn.q_proj.weight', 'text_model.encoder.layers.22.self_attn.v_proj.bias', 'text_model.encoder.layers.22.self_attn.v_proj.weight', 'text_model.encoder.layers.23.layer_norm1.bias', 'text_model.encoder.layers.23.layer_norm1.weight', 'text_model.encoder.layers.23.layer_norm2.bias', 'text_model.encoder.layers.23.layer_norm2.weight', 'text_model.encoder.layers.23.mlp.fc1.bias', 'text_model.encoder.layers.23.mlp.fc1.weight', 'text_model.encoder.layers.23.mlp.fc2.bias', 'text_model.encoder.layers.23.mlp.fc2.weight', 'text_model.encoder.layers.23.self_attn.k_proj.bias', 'text_model.encoder.layers.23.self_attn.k_proj.weight', 'text_model.encoder.layers.23.self_attn.out_proj.bias', 'text_model.encoder.layers.23.self_attn.out_proj.weight', 'text_model.encoder.layers.23.self_attn.q_proj.bias', 'text_model.encoder.layers.23.self_attn.q_proj.weight', 'text_model.encoder.layers.23.self_attn.v_proj.bias', 'text_model.encoder.layers.23.self_attn.v_proj.weight', 'text_model.encoder.layers.24.layer_norm1.bias', 'text_model.encoder.layers.24.layer_norm1.weight', 'text_model.encoder.layers.24.layer_norm2.bias', 'text_model.encoder.layers.24.layer_norm2.weight', 'text_model.encoder.layers.24.mlp.fc1.bias', 'text_model.encoder.layers.24.mlp.fc1.weight', 'text_model.encoder.layers.24.mlp.fc2.bias', 'text_model.encoder.layers.24.mlp.fc2.weight', 'text_model.encoder.layers.24.self_attn.k_proj.bias', 'text_model.encoder.layers.24.self_attn.k_proj.weight', 'text_model.encoder.layers.24.self_attn.out_proj.bias', 'text_model.encoder.layers.24.self_attn.out_proj.weight', 'text_model.encoder.layers.24.self_attn.q_proj.bias', 'text_model.encoder.layers.24.self_attn.q_proj.weight', 'text_model.encoder.layers.24.self_attn.v_proj.bias', 'text_model.encoder.layers.24.self_attn.v_proj.weight', 'text_model.encoder.layers.25.layer_norm1.bias', 'text_model.encoder.layers.25.layer_norm1.weight', 'text_model.encoder.layers.25.layer_norm2.bias', 'text_model.encoder.layers.25.layer_norm2.weight', 'text_model.encoder.layers.25.mlp.fc1.bias', 'text_model.encoder.layers.25.mlp.fc1.weight', 'text_model.encoder.layers.25.mlp.fc2.bias', 'text_model.encoder.layers.25.mlp.fc2.weight', 'text_model.encoder.layers.25.self_attn.k_proj.bias', 'text_model.encoder.layers.25.self_attn.k_proj.weight', 'text_model.encoder.layers.25.self_attn.out_proj.bias', 'text_model.encoder.layers.25.self_attn.out_proj.weight', 'text_model.encoder.layers.25.self_attn.q_proj.bias', 'text_model.encoder.layers.25.self_attn.q_proj.weight', 'text_model.encoder.layers.25.self_attn.v_proj.bias', 'text_model.encoder.layers.25.self_attn.v_proj.weight', 'text_model.encoder.layers.26.layer_norm1.bias', 'text_model.encoder.layers.26.layer_norm1.weight', 'text_model.encoder.layers.26.layer_norm2.bias', 'text_model.encoder.layers.26.layer_norm2.weight', 'text_model.encoder.layers.26.mlp.fc1.bias', 'text_model.encoder.layers.26.mlp.fc1.weight', 'text_model.encoder.layers.26.mlp.fc2.bias', 'text_model.encoder.layers.26.mlp.fc2.weight', 'text_model.encoder.layers.26.self_attn.k_proj.bias', 'text_model.encoder.layers.26.self_attn.k_proj.weight', 'text_model.encoder.layers.26.self_attn.out_proj.bias', 'text_model.encoder.layers.26.self_attn.out_proj.weight', 'text_model.encoder.layers.26.self_attn.q_proj.bias', 'text_model.encoder.layers.26.self_attn.q_proj.weight', 'text_model.encoder.layers.26.self_attn.v_proj.bias', 'text_model.encoder.layers.26.self_attn.v_proj.weight', 'text_model.encoder.layers.3.layer_norm1.bias', 'text_model.encoder.layers.3.layer_norm1.weight', 'text_model.encoder.layers.3.layer_norm2.bias', 'text_model.encoder.layers.3.layer_norm2.weight', 'text_model.encoder.layers.3.mlp.fc1.bias', 'text_model.encoder.layers.3.mlp.fc1.weight', 'text_model.encoder.layers.3.mlp.fc2.bias', 'text_model.encoder.layers.3.mlp.fc2.weight', 'text_model.encoder.layers.3.self_attn.k_proj.bias', 'text_model.encoder.layers.3.self_attn.k_proj.weight', 'text_model.encoder.layers.3.self_attn.out_proj.bias', 'text_model.encoder.layers.3.self_attn.out_proj.weight', 'text_model.encoder.layers.3.self_attn.q_proj.bias', 'text_model.encoder.layers.3.self_attn.q_proj.weight', 'text_model.encoder.layers.3.self_attn.v_proj.bias', 'text_model.encoder.layers.3.self_attn.v_proj.weight', 'text_model.encoder.layers.4.layer_norm1.bias', 'text_model.encoder.layers.4.layer_norm1.weight', 'text_model.encoder.layers.4.layer_norm2.bias', 'text_model.encoder.layers.4.layer_norm2.weight', 'text_model.encoder.layers.4.mlp.fc1.bias', 'text_model.encoder.layers.4.mlp.fc1.weight', 'text_model.encoder.layers.4.mlp.fc2.bias', 'text_model.encoder.layers.4.mlp.fc2.weight', 'text_model.encoder.layers.4.self_attn.k_proj.bias', 'text_model.encoder.layers.4.self_attn.k_proj.weight', 'text_model.encoder.layers.4.self_attn.out_proj.bias', 'text_model.encoder.layers.4.self_attn.out_proj.weight', 'text_model.encoder.layers.4.self_attn.q_proj.bias', 'text_model.encoder.layers.4.self_attn.q_proj.weight', 'text_model.encoder.layers.4.self_attn.v_proj.bias', 'text_model.encoder.layers.4.self_attn.v_proj.weight', 'text_model.encoder.layers.5.layer_norm1.bias', 'text_model.encoder.layers.5.layer_norm1.weight', 'text_model.encoder.layers.5.layer_norm2.bias', 'text_model.encoder.layers.5.layer_norm2.weight', 'text_model.encoder.layers.5.mlp.fc1.bias', 'text_model.encoder.layers.5.mlp.fc1.weight', 'text_model.encoder.layers.5.mlp.fc2.bias', 'text_model.encoder.layers.5.mlp.fc2.weight', 'text_model.encoder.layers.5.self_attn.k_proj.bias', 'text_model.encoder.layers.5.self_attn.k_proj.weight', 'text_model.encoder.layers.5.self_attn.out_proj.bias', 'text_model.encoder.layers.5.self_attn.out_proj.weight', 'text_model.encoder.layers.5.self_attn.q_proj.bias', 'text_model.encoder.layers.5.self_attn.q_proj.weight', 'text_model.encoder.layers.5.self_attn.v_proj.bias', 'text_model.encoder.layers.5.self_attn.v_proj.weight', 'text_model.encoder.layers.6.layer_norm1.bias', 'text_model.encoder.layers.6.layer_norm1.weight', 'text_model.encoder.layers.6.layer_norm2.bias', 'text_model.encoder.layers.6.layer_norm2.weight', 'text_model.encoder.layers.6.mlp.fc1.bias', 'text_model.encoder.layers.6.mlp.fc1.weight', 'text_model.encoder.layers.6.mlp.fc2.bias', 'text_model.encoder.layers.6.mlp.fc2.weight', 'text_model.encoder.layers.6.self_attn.k_proj.bias', 'text_model.encoder.layers.6.self_attn.k_proj.weight', 'text_model.encoder.layers.6.self_attn.out_proj.bias', 'text_model.encoder.layers.6.self_attn.out_proj.weight', 'text_model.encoder.layers.6.self_attn.q_proj.bias', 'text_model.encoder.layers.6.self_attn.q_proj.weight', 'text_model.encoder.layers.6.self_attn.v_proj.bias', 'text_model.encoder.layers.6.self_attn.v_proj.weight', 'text_model.encoder.layers.7.layer_norm1.bias', 'text_model.encoder.layers.7.layer_norm1.weight', 'text_model.encoder.layers.7.layer_norm2.bias', 'text_model.encoder.layers.7.layer_norm2.weight', 'text_model.encoder.layers.7.mlp.fc1.bias', 'text_model.encoder.layers.7.mlp.fc1.weight', 'text_model.encoder.layers.7.mlp.fc2.bias', 'text_model.encoder.layers.7.mlp.fc2.weight', 'text_model.encoder.layers.7.self_attn.k_proj.bias', 'text_model.encoder.layers.7.self_attn.k_proj.weight', 'text_model.encoder.layers.7.self_attn.out_proj.bias', 'text_model.encoder.layers.7.self_attn.out_proj.weight', 'text_model.encoder.layers.7.self_attn.q_proj.bias', 'text_model.encoder.layers.7.self_attn.q_proj.weight', 'text_model.encoder.layers.7.self_attn.v_proj.bias', 'text_model.encoder.layers.7.self_attn.v_proj.weight', 'text_model.encoder.layers.8.layer_norm1.bias', 'text_model.encoder.layers.8.layer_norm1.weight', 'text_model.encoder.layers.8.layer_norm2.bias', 'text_model.encoder.layers.8.layer_norm2.weight', 'text_model.encoder.layers.8.mlp.fc1.bias', 'text_model.encoder.layers.8.mlp.fc1.weight', 'text_model.encoder.layers.8.mlp.fc2.bias', 'text_model.encoder.layers.8.mlp.fc2.weight', 'text_model.encoder.layers.8.self_attn.k_proj.bias', 'text_model.encoder.layers.8.self_attn.k_proj.weight', 'text_model.encoder.layers.8.self_attn.out_proj.bias', 'text_model.encoder.layers.8.self_attn.out_proj.weight', 'text_model.encoder.layers.8.self_attn.q_proj.bias', 'text_model.encoder.layers.8.self_attn.q_proj.weight', 'text_model.encoder.layers.8.self_attn.v_proj.bias', 'text_model.encoder.layers.8.self_attn.v_proj.weight', 'text_model.encoder.layers.9.layer_norm1.bias', 'text_model.encoder.layers.9.layer_norm1.weight', 'text_model.encoder.layers.9.layer_norm2.bias', 'text_model.encoder.layers.9.layer_norm2.weight', 'text_model.encoder.layers.9.mlp.fc1.bias', 'text_model.encoder.layers.9.mlp.fc1.weight', 'text_model.encoder.layers.9.mlp.fc2.bias', 'text_model.encoder.layers.9.mlp.fc2.weight', 'text_model.encoder.layers.9.self_attn.k_proj.bias', 'text_model.encoder.layers.9.self_attn.k_proj.weight', 'text_model.encoder.layers.9.self_attn.out_proj.bias', 'text_model.encoder.layers.9.self_attn.out_proj.weight', 'text_model.encoder.layers.9.self_attn.q_proj.bias', 'text_model.encoder.layers.9.self_attn.q_proj.weight', 'text_model.encoder.layers.9.self_attn.v_proj.bias', 'text_model.encoder.layers.9.self_attn.v_proj.weight', 'text_model.final_layer_norm.bias', 'text_model.final_layer_norm.weight', 'text_model.head.bias', 'text_model.head.weight']
|
224 |
-
- This IS expected if you are initializing SiglipVisionModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
|
225 |
-
- This IS NOT expected if you are initializing SiglipVisionModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
|
226 |
-
2025-02-14 04:23:53,971 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of SiglipVisionModel were initialized from the model checkpoint at google/siglip-so400m-patch14-384.
|
227 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use SiglipVisionModel for predictions without further training.
|
228 |
-
2025-02-14 04:23:54,163 - image_processing_base.py:375 - get_image_processor_dict - INFO - loading configuration file preprocessor_config.json from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/preprocessor_config.json
|
229 |
-
2025-02-14 04:23:54,164 - image_processing_base.py:429 - from_dict - INFO - Image processor SiglipImageProcessor {
|
230 |
-
"do_convert_rgb": null,
|
231 |
-
"do_normalize": true,
|
232 |
-
"do_rescale": true,
|
233 |
-
"do_resize": true,
|
234 |
-
"image_mean": [
|
235 |
-
0.5,
|
236 |
-
0.5,
|
237 |
-
0.5
|
238 |
-
],
|
239 |
-
"image_processor_type": "SiglipImageProcessor",
|
240 |
-
"image_std": [
|
241 |
-
0.5,
|
242 |
-
0.5,
|
243 |
-
0.5
|
244 |
-
],
|
245 |
-
"processor_class": "SiglipProcessor",
|
246 |
-
"resample": 3,
|
247 |
-
"rescale_factor": 0.00392156862745098,
|
248 |
-
"size": {
|
249 |
-
"height": 384,
|
250 |
-
"width": 384
|
251 |
-
}
|
252 |
-
}
|
253 |
-
|
254 |
-
2025-02-14 04:23:54,834 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/config.json
|
255 |
-
2025-02-14 04:23:54,837 - configuration_utils.py:800 - from_dict - INFO - Model config Dinov2Config {
|
256 |
-
"apply_layernorm": true,
|
257 |
-
"architectures": [
|
258 |
-
"Dinov2Model"
|
259 |
-
],
|
260 |
-
"attention_probs_dropout_prob": 0.0,
|
261 |
-
"drop_path_rate": 0.0,
|
262 |
-
"hidden_act": "gelu",
|
263 |
-
"hidden_dropout_prob": 0.0,
|
264 |
-
"hidden_size": 1536,
|
265 |
-
"image_size": 518,
|
266 |
-
"initializer_range": 0.02,
|
267 |
-
"layer_norm_eps": 1e-06,
|
268 |
-
"layerscale_value": 1.0,
|
269 |
-
"mlp_ratio": 4,
|
270 |
-
"model_type": "dinov2",
|
271 |
-
"num_attention_heads": 24,
|
272 |
-
"num_channels": 3,
|
273 |
-
"num_hidden_layers": 40,
|
274 |
-
"out_features": [
|
275 |
-
"stage40"
|
276 |
-
],
|
277 |
-
"out_indices": [
|
278 |
-
40
|
279 |
-
],
|
280 |
-
"patch_size": 14,
|
281 |
-
"qkv_bias": true,
|
282 |
-
"reshape_hidden_states": true,
|
283 |
-
"stage_names": [
|
284 |
-
"stem",
|
285 |
-
"stage1",
|
286 |
-
"stage2",
|
287 |
-
"stage3",
|
288 |
-
"stage4",
|
289 |
-
"stage5",
|
290 |
-
"stage6",
|
291 |
-
"stage7",
|
292 |
-
"stage8",
|
293 |
-
"stage9",
|
294 |
-
"stage10",
|
295 |
-
"stage11",
|
296 |
-
"stage12",
|
297 |
-
"stage13",
|
298 |
-
"stage14",
|
299 |
-
"stage15",
|
300 |
-
"stage16",
|
301 |
-
"stage17",
|
302 |
-
"stage18",
|
303 |
-
"stage19",
|
304 |
-
"stage20",
|
305 |
-
"stage21",
|
306 |
-
"stage22",
|
307 |
-
"stage23",
|
308 |
-
"stage24",
|
309 |
-
"stage25",
|
310 |
-
"stage26",
|
311 |
-
"stage27",
|
312 |
-
"stage28",
|
313 |
-
"stage29",
|
314 |
-
"stage30",
|
315 |
-
"stage31",
|
316 |
-
"stage32",
|
317 |
-
"stage33",
|
318 |
-
"stage34",
|
319 |
-
"stage35",
|
320 |
-
"stage36",
|
321 |
-
"stage37",
|
322 |
-
"stage38",
|
323 |
-
"stage39",
|
324 |
-
"stage40"
|
325 |
-
],
|
326 |
-
"torch_dtype": "float32",
|
327 |
-
"transformers_version": "4.43.1",
|
328 |
-
"use_swiglu_ffn": true
|
329 |
-
}
|
330 |
-
|
331 |
-
2025-02-14 04:23:54,838 - modeling_utils.py:3621 - from_pretrained - INFO - loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/model.safetensors
|
332 |
-
2025-02-14 04:23:55,363 - modeling_utils.py:4450 - _load_pretrained_model - INFO - All model checkpoint weights were used when initializing Dinov2Model.
|
333 |
-
|
334 |
-
2025-02-14 04:23:55,364 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of Dinov2Model were initialized from the model checkpoint at facebook/dinov2-giant.
|
335 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use Dinov2Model for predictions without further training.
|
336 |
-
2025-02-14 04:23:55,551 - image_processing_base.py:375 - get_image_processor_dict - INFO - loading configuration file preprocessor_config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/preprocessor_config.json
|
337 |
-
2025-02-14 04:23:55,554 - image_processing_base.py:429 - from_dict - INFO - Image processor BitImageProcessor {
|
338 |
-
"crop_size": {
|
339 |
-
"height": 378,
|
340 |
-
"width": 378
|
341 |
-
},
|
342 |
-
"do_center_crop": true,
|
343 |
-
"do_convert_rgb": true,
|
344 |
-
"do_normalize": true,
|
345 |
-
"do_rescale": true,
|
346 |
-
"do_resize": true,
|
347 |
-
"image_mean": [
|
348 |
-
0.485,
|
349 |
-
0.456,
|
350 |
-
0.406
|
351 |
-
],
|
352 |
-
"image_processor_type": "BitImageProcessor",
|
353 |
-
"image_std": [
|
354 |
-
0.229,
|
355 |
-
0.224,
|
356 |
-
0.225
|
357 |
-
],
|
358 |
-
"resample": 3,
|
359 |
-
"rescale_factor": 0.00392156862745098,
|
360 |
-
"size": {
|
361 |
-
"shortest_edge": 378
|
362 |
-
}
|
363 |
-
}
|
364 |
-
|
365 |
-
2025-02-14 04:23:56,625 - finetune_llama.py:1239 - train - INFO - Total params: 3264865280
|
366 |
-
2025-02-14 04:23:56,625 - finetune_llama.py:1240 - train - INFO - Trainable params: 12589056
|
367 |
-
2025-02-14 04:23:56,625 - finetune_llama.py:1241 - train - INFO - LM head params: 394002432
|
368 |
-
2025-02-14 04:23:57,952 - trainer_callback.py:423 - add_callback - WARNING - You are adding a <class 'transformers.integrations.integration_utils.TensorBoardCallback'> to the callbacks of this Trainer, but there is already one. The currentlist of callbacks is
|
369 |
-
:DefaultFlowCallback
|
370 |
-
TensorBoardCallback
|
371 |
-
2025-02-14 04:23:57,952 - trainer.py:648 - __init__ - INFO - Using auto half precision backend
|
372 |
-
2025-02-14 04:24:00,013 - trainer.py:2134 - _inner_training_loop - INFO - ***** Running training *****
|
373 |
-
2025-02-14 04:24:00,013 - trainer.py:2135 - _inner_training_loop - INFO - Num examples = 554
|
374 |
-
2025-02-14 04:24:00,013 - trainer.py:2136 - _inner_training_loop - INFO - Num Epochs = 2
|
375 |
-
2025-02-14 04:24:00,013 - trainer.py:2137 - _inner_training_loop - INFO - Instantaneous batch size per device = 1
|
376 |
-
2025-02-14 04:24:00,013 - trainer.py:2140 - _inner_training_loop - INFO - Total train batch size (w. parallel, distributed & accumulation) = 1
|
377 |
-
2025-02-14 04:24:00,013 - trainer.py:2141 - _inner_training_loop - INFO - Gradient Accumulation steps = 1
|
378 |
-
2025-02-14 04:24:00,013 - trainer.py:2142 - _inner_training_loop - INFO - Total optimization steps = 1,108
|
379 |
-
2025-02-14 04:24:00,015 - trainer.py:2143 - _inner_training_loop - INFO - Number of trainable parameters = 406,591,488
|
380 |
-
2025-02-14 04:24:26,002 - resource_logging.py:42 - debug_tensor - DEBUG - File: Unknown, Line: Unknown
|
381 |
-
2025-02-14 04:24:26,003 - resource_logging.py:45 - debug_tensor - DEBUG - In compute_loss(): inputs['labels']: [torch.Size([1, 8192]), torch.int64, cuda:0]
|
382 |
-
2025-02-14 04:24:26,038 - mm_trainer.py:618 - compute_loss - DEBUG - In compute_loss(): assistant token at position 224
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c214f7ce3a6683296aa31dd99ef51f5692f03033e916f7973d03b3cb7b3484d
|
3 |
+
size 37176
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
runtime_logs/run_2025-02-14_04-25-33.log
CHANGED
@@ -1,378 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
"_name_or_path": "/tmp/iopath_cache/manifold_cache/tree/users/shenx/finetune/09281004-cambrian_llama3_2_t576_ov",
|
5 |
-
"architectures": [
|
6 |
-
"CambrianLlamaForCausalLM"
|
7 |
-
],
|
8 |
-
"attention_bias": false,
|
9 |
-
"attention_dropout": 0.0,
|
10 |
-
"bos_token_id": 128000,
|
11 |
-
"connect_layer": 2,
|
12 |
-
"connector_depth": 3,
|
13 |
-
"connector_only": true,
|
14 |
-
"dino_threshold": 0.83,
|
15 |
-
"drop_threshold": 0.8,
|
16 |
-
"eos_token_id": [
|
17 |
-
128001,
|
18 |
-
128008,
|
19 |
-
128009
|
20 |
-
],
|
21 |
-
"frame_pos": false,
|
22 |
-
"freeze_mm_mlp_adapter": false,
|
23 |
-
"hidden_act": "silu",
|
24 |
-
"hidden_size": 3072,
|
25 |
-
"highres": true,
|
26 |
-
"highres_connect": false,
|
27 |
-
"image_aspect_ratio": "pad",
|
28 |
-
"image_position": 91,
|
29 |
-
"image_token_len": 144,
|
30 |
-
"initializer_range": 0.02,
|
31 |
-
"intermediate_size": 8192,
|
32 |
-
"is_image_newline": true,
|
33 |
-
"is_st_sampler": false,
|
34 |
-
"lowres_token": 8,
|
35 |
-
"max_position_embeddings": 131072,
|
36 |
-
"mlp_bias": false,
|
37 |
-
"mm_patch_merge_type": "flat",
|
38 |
-
"mm_projector_lr": null,
|
39 |
-
"mm_projector_type": "sva",
|
40 |
-
"mm_use_im_patch_token": false,
|
41 |
-
"mm_use_im_start_end": false,
|
42 |
-
"mm_vision_sampler_lr": null,
|
43 |
-
"mm_vision_select_feature": "patch",
|
44 |
-
"mm_vision_select_layer": -2,
|
45 |
-
"mm_vision_tower_aux_list": [
|
46 |
-
"siglip/CLIP-ViT-SO400M-14-384",
|
47 |
-
"facebook/dinov2-giant-res378"
|
48 |
-
],
|
49 |
-
"mm_vision_tower_aux_token_len_list": [
|
50 |
-
576,
|
51 |
-
576
|
52 |
-
],
|
53 |
-
"mm_vision_tower_lr": null,
|
54 |
-
"model_type": "cambrian_llama",
|
55 |
-
"num_attention_heads": 24,
|
56 |
-
"num_hidden_layers": 28,
|
57 |
-
"num_key_value_heads": 8,
|
58 |
-
"num_of_vision_sampler_layers": 10,
|
59 |
-
"num_query_group": 1,
|
60 |
-
"pretraining_tp": 1,
|
61 |
-
"query_num_list": [
|
62 |
-
144
|
63 |
-
],
|
64 |
-
"rms_norm_eps": 1e-05,
|
65 |
-
"rope_scaling": {
|
66 |
-
"factor": 32.0,
|
67 |
-
"high_freq_factor": 4.0,
|
68 |
-
"low_freq_factor": 1.0,
|
69 |
-
"original_max_position_embeddings": 8192,
|
70 |
-
"rope_type": "llama3"
|
71 |
-
},
|
72 |
-
"rope_theta": 500000.0,
|
73 |
-
"spmd_debug": null,
|
74 |
-
"spmd_fsdp_sharding": null,
|
75 |
-
"spmd_mesh": null,
|
76 |
-
"start_of_vision_sampler_layers": 0,
|
77 |
-
"stride_of_vision_sampler_layers": 3,
|
78 |
-
"tie_word_embeddings": false,
|
79 |
-
"tokenizer_model_max_length": 8192,
|
80 |
-
"tokenizer_padding_side": "right",
|
81 |
-
"torch_dtype": "float32",
|
82 |
-
"transformers_version": "4.43.1",
|
83 |
-
"tune_mm_mlp_adapter": false,
|
84 |
-
"unfreeze_mm_vision_tower": false,
|
85 |
-
"use_cache": false,
|
86 |
-
"use_mm_proj": true,
|
87 |
-
"vision_hidden_size": 1024,
|
88 |
-
"vision_tower_aux_token_len_list": [
|
89 |
-
576,
|
90 |
-
576
|
91 |
-
],
|
92 |
-
"vocab_size": 128256
|
93 |
-
}
|
94 |
-
|
95 |
-
2025-02-14 04:25:34,092 - modeling_utils.py:3618 - from_pretrained - INFO - loading weights file ./checkpoints/longvu_llama3_2/pytorch_model.bin
|
96 |
-
2025-02-14 04:25:34,151 - configuration_utils.py:1038 - from_dict - INFO - Generate config GenerationConfig {
|
97 |
-
"bos_token_id": 128000,
|
98 |
-
"eos_token_id": [
|
99 |
-
128001,
|
100 |
-
128008,
|
101 |
-
128009
|
102 |
-
],
|
103 |
-
"use_cache": false
|
104 |
-
}
|
105 |
-
|
106 |
-
2025-02-14 04:25:34,708 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/config.json
|
107 |
-
2025-02-14 04:25:34,712 - configuration_utils.py:800 - from_dict - INFO - Model config Dinov2Config {
|
108 |
-
"apply_layernorm": true,
|
109 |
-
"architectures": [
|
110 |
-
"Dinov2Model"
|
111 |
-
],
|
112 |
-
"attention_probs_dropout_prob": 0.0,
|
113 |
-
"drop_path_rate": 0.0,
|
114 |
-
"hidden_act": "gelu",
|
115 |
-
"hidden_dropout_prob": 0.0,
|
116 |
-
"hidden_size": 1536,
|
117 |
-
"image_size": 518,
|
118 |
-
"initializer_range": 0.02,
|
119 |
-
"layer_norm_eps": 1e-06,
|
120 |
-
"layerscale_value": 1.0,
|
121 |
-
"mlp_ratio": 4,
|
122 |
-
"model_type": "dinov2",
|
123 |
-
"num_attention_heads": 24,
|
124 |
-
"num_channels": 3,
|
125 |
-
"num_hidden_layers": 40,
|
126 |
-
"out_features": [
|
127 |
-
"stage40"
|
128 |
-
],
|
129 |
-
"out_indices": [
|
130 |
-
40
|
131 |
-
],
|
132 |
-
"patch_size": 14,
|
133 |
-
"qkv_bias": true,
|
134 |
-
"reshape_hidden_states": true,
|
135 |
-
"stage_names": [
|
136 |
-
"stem",
|
137 |
-
"stage1",
|
138 |
-
"stage2",
|
139 |
-
"stage3",
|
140 |
-
"stage4",
|
141 |
-
"stage5",
|
142 |
-
"stage6",
|
143 |
-
"stage7",
|
144 |
-
"stage8",
|
145 |
-
"stage9",
|
146 |
-
"stage10",
|
147 |
-
"stage11",
|
148 |
-
"stage12",
|
149 |
-
"stage13",
|
150 |
-
"stage14",
|
151 |
-
"stage15",
|
152 |
-
"stage16",
|
153 |
-
"stage17",
|
154 |
-
"stage18",
|
155 |
-
"stage19",
|
156 |
-
"stage20",
|
157 |
-
"stage21",
|
158 |
-
"stage22",
|
159 |
-
"stage23",
|
160 |
-
"stage24",
|
161 |
-
"stage25",
|
162 |
-
"stage26",
|
163 |
-
"stage27",
|
164 |
-
"stage28",
|
165 |
-
"stage29",
|
166 |
-
"stage30",
|
167 |
-
"stage31",
|
168 |
-
"stage32",
|
169 |
-
"stage33",
|
170 |
-
"stage34",
|
171 |
-
"stage35",
|
172 |
-
"stage36",
|
173 |
-
"stage37",
|
174 |
-
"stage38",
|
175 |
-
"stage39",
|
176 |
-
"stage40"
|
177 |
-
],
|
178 |
-
"torch_dtype": "float32",
|
179 |
-
"transformers_version": "4.43.1",
|
180 |
-
"use_swiglu_ffn": true
|
181 |
-
}
|
182 |
-
|
183 |
-
2025-02-14 04:25:36,246 - modeling_utils.py:4450 - _load_pretrained_model - INFO - All model checkpoint weights were used when initializing CambrianLlamaForCausalLM.
|
184 |
-
|
185 |
-
2025-02-14 04:25:36,247 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of CambrianLlamaForCausalLM were initialized from the model checkpoint at ./checkpoints/longvu_llama3_2.
|
186 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use CambrianLlamaForCausalLM for predictions without further training.
|
187 |
-
2025-02-14 04:25:36,252 - configuration_utils.py:991 - from_pretrained - INFO - loading configuration file ./checkpoints/longvu_llama3_2/generation_config.json
|
188 |
-
2025-02-14 04:25:36,253 - configuration_utils.py:1038 - from_dict - INFO - Generate config GenerationConfig {
|
189 |
-
"bos_token_id": 128000,
|
190 |
-
"do_sample": true,
|
191 |
-
"eos_token_id": [
|
192 |
-
128001,
|
193 |
-
128008,
|
194 |
-
128009
|
195 |
-
],
|
196 |
-
"temperature": 0.6,
|
197 |
-
"top_p": 0.9
|
198 |
-
}
|
199 |
-
|
200 |
-
2025-02-14 04:25:36,601 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file tokenizer.json
|
201 |
-
2025-02-14 04:25:36,601 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file added_tokens.json
|
202 |
-
2025-02-14 04:25:36,601 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file special_tokens_map.json
|
203 |
-
2025-02-14 04:25:36,601 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file tokenizer_config.json
|
204 |
-
2025-02-14 04:25:37,048 - tokenization_utils_base.py:2533 - _from_pretrained - INFO - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
205 |
-
2025-02-14 04:25:37,745 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/config.json
|
206 |
-
2025-02-14 04:25:37,748 - configuration_utils.py:800 - from_dict - INFO - Model config SiglipVisionConfig {
|
207 |
-
"attention_dropout": 0.0,
|
208 |
-
"hidden_act": "gelu_pytorch_tanh",
|
209 |
-
"hidden_size": 1152,
|
210 |
-
"image_size": 384,
|
211 |
-
"intermediate_size": 4304,
|
212 |
-
"layer_norm_eps": 1e-06,
|
213 |
-
"model_type": "siglip_vision_model",
|
214 |
-
"num_attention_heads": 16,
|
215 |
-
"num_channels": 3,
|
216 |
-
"num_hidden_layers": 27,
|
217 |
-
"patch_size": 14,
|
218 |
-
"transformers_version": "4.43.1"
|
219 |
-
}
|
220 |
-
|
221 |
-
2025-02-14 04:25:37,749 - modeling_utils.py:3621 - from_pretrained - INFO - loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/model.safetensors
|
222 |
-
2025-02-14 04:25:38,020 - modeling_utils.py:4440 - _load_pretrained_model - INFO - Some weights of the model checkpoint at google/siglip-so400m-patch14-384 were not used when initializing SiglipVisionModel: ['logit_bias', 'logit_scale', 'text_model.embeddings.position_embedding.weight', 'text_model.embeddings.token_embedding.weight', 'text_model.encoder.layers.0.layer_norm1.bias', 'text_model.encoder.layers.0.layer_norm1.weight', 'text_model.encoder.layers.0.layer_norm2.bias', 'text_model.encoder.layers.0.layer_norm2.weight', 'text_model.encoder.layers.0.mlp.fc1.bias', 'text_model.encoder.layers.0.mlp.fc1.weight', 'text_model.encoder.layers.0.mlp.fc2.bias', 'text_model.encoder.layers.0.mlp.fc2.weight', 'text_model.encoder.layers.0.self_attn.k_proj.bias', 'text_model.encoder.layers.0.self_attn.k_proj.weight', 'text_model.encoder.layers.0.self_attn.out_proj.bias', 'text_model.encoder.layers.0.self_attn.out_proj.weight', 'text_model.encoder.layers.0.self_attn.q_proj.bias', 'text_model.encoder.layers.0.self_attn.q_proj.weight', 'text_model.encoder.layers.0.self_attn.v_proj.bias', 'text_model.encoder.layers.0.self_attn.v_proj.weight', 'text_model.encoder.layers.1.layer_norm1.bias', 'text_model.encoder.layers.1.layer_norm1.weight', 'text_model.encoder.layers.1.layer_norm2.bias', 'text_model.encoder.layers.1.layer_norm2.weight', 'text_model.encoder.layers.1.mlp.fc1.bias', 'text_model.encoder.layers.1.mlp.fc1.weight', 'text_model.encoder.layers.1.mlp.fc2.bias', 'text_model.encoder.layers.1.mlp.fc2.weight', 'text_model.encoder.layers.1.self_attn.k_proj.bias', 'text_model.encoder.layers.1.self_attn.k_proj.weight', 'text_model.encoder.layers.1.self_attn.out_proj.bias', 'text_model.encoder.layers.1.self_attn.out_proj.weight', 'text_model.encoder.layers.1.self_attn.q_proj.bias', 'text_model.encoder.layers.1.self_attn.q_proj.weight', 'text_model.encoder.layers.1.self_attn.v_proj.bias', 'text_model.encoder.layers.1.self_attn.v_proj.weight', 'text_model.encoder.layers.10.layer_norm1.bias', 'text_model.encoder.layers.10.layer_norm1.weight', 'text_model.encoder.layers.10.layer_norm2.bias', 'text_model.encoder.layers.10.layer_norm2.weight', 'text_model.encoder.layers.10.mlp.fc1.bias', 'text_model.encoder.layers.10.mlp.fc1.weight', 'text_model.encoder.layers.10.mlp.fc2.bias', 'text_model.encoder.layers.10.mlp.fc2.weight', 'text_model.encoder.layers.10.self_attn.k_proj.bias', 'text_model.encoder.layers.10.self_attn.k_proj.weight', 'text_model.encoder.layers.10.self_attn.out_proj.bias', 'text_model.encoder.layers.10.self_attn.out_proj.weight', 'text_model.encoder.layers.10.self_attn.q_proj.bias', 'text_model.encoder.layers.10.self_attn.q_proj.weight', 'text_model.encoder.layers.10.self_attn.v_proj.bias', 'text_model.encoder.layers.10.self_attn.v_proj.weight', 'text_model.encoder.layers.11.layer_norm1.bias', 'text_model.encoder.layers.11.layer_norm1.weight', 'text_model.encoder.layers.11.layer_norm2.bias', 'text_model.encoder.layers.11.layer_norm2.weight', 'text_model.encoder.layers.11.mlp.fc1.bias', 'text_model.encoder.layers.11.mlp.fc1.weight', 'text_model.encoder.layers.11.mlp.fc2.bias', 'text_model.encoder.layers.11.mlp.fc2.weight', 'text_model.encoder.layers.11.self_attn.k_proj.bias', 'text_model.encoder.layers.11.self_attn.k_proj.weight', 'text_model.encoder.layers.11.self_attn.out_proj.bias', 'text_model.encoder.layers.11.self_attn.out_proj.weight', 'text_model.encoder.layers.11.self_attn.q_proj.bias', 'text_model.encoder.layers.11.self_attn.q_proj.weight', 'text_model.encoder.layers.11.self_attn.v_proj.bias', 'text_model.encoder.layers.11.self_attn.v_proj.weight', 'text_model.encoder.layers.12.layer_norm1.bias', 'text_model.encoder.layers.12.layer_norm1.weight', 'text_model.encoder.layers.12.layer_norm2.bias', 'text_model.encoder.layers.12.layer_norm2.weight', 'text_model.encoder.layers.12.mlp.fc1.bias', 'text_model.encoder.layers.12.mlp.fc1.weight', 'text_model.encoder.layers.12.mlp.fc2.bias', 'text_model.encoder.layers.12.mlp.fc2.weight', 'text_model.encoder.layers.12.self_attn.k_proj.bias', 'text_model.encoder.layers.12.self_attn.k_proj.weight', 'text_model.encoder.layers.12.self_attn.out_proj.bias', 'text_model.encoder.layers.12.self_attn.out_proj.weight', 'text_model.encoder.layers.12.self_attn.q_proj.bias', 'text_model.encoder.layers.12.self_attn.q_proj.weight', 'text_model.encoder.layers.12.self_attn.v_proj.bias', 'text_model.encoder.layers.12.self_attn.v_proj.weight', 'text_model.encoder.layers.13.layer_norm1.bias', 'text_model.encoder.layers.13.layer_norm1.weight', 'text_model.encoder.layers.13.layer_norm2.bias', 'text_model.encoder.layers.13.layer_norm2.weight', 'text_model.encoder.layers.13.mlp.fc1.bias', 'text_model.encoder.layers.13.mlp.fc1.weight', 'text_model.encoder.layers.13.mlp.fc2.bias', 'text_model.encoder.layers.13.mlp.fc2.weight', 'text_model.encoder.layers.13.self_attn.k_proj.bias', 'text_model.encoder.layers.13.self_attn.k_proj.weight', 'text_model.encoder.layers.13.self_attn.out_proj.bias', 'text_model.encoder.layers.13.self_attn.out_proj.weight', 'text_model.encoder.layers.13.self_attn.q_proj.bias', 'text_model.encoder.layers.13.self_attn.q_proj.weight', 'text_model.encoder.layers.13.self_attn.v_proj.bias', 'text_model.encoder.layers.13.self_attn.v_proj.weight', 'text_model.encoder.layers.14.layer_norm1.bias', 'text_model.encoder.layers.14.layer_norm1.weight', 'text_model.encoder.layers.14.layer_norm2.bias', 'text_model.encoder.layers.14.layer_norm2.weight', 'text_model.encoder.layers.14.mlp.fc1.bias', 'text_model.encoder.layers.14.mlp.fc1.weight', 'text_model.encoder.layers.14.mlp.fc2.bias', 'text_model.encoder.layers.14.mlp.fc2.weight', 'text_model.encoder.layers.14.self_attn.k_proj.bias', 'text_model.encoder.layers.14.self_attn.k_proj.weight', 'text_model.encoder.layers.14.self_attn.out_proj.bias', 'text_model.encoder.layers.14.self_attn.out_proj.weight', 'text_model.encoder.layers.14.self_attn.q_proj.bias', 'text_model.encoder.layers.14.self_attn.q_proj.weight', 'text_model.encoder.layers.14.self_attn.v_proj.bias', 'text_model.encoder.layers.14.self_attn.v_proj.weight', 'text_model.encoder.layers.15.layer_norm1.bias', 'text_model.encoder.layers.15.layer_norm1.weight', 'text_model.encoder.layers.15.layer_norm2.bias', 'text_model.encoder.layers.15.layer_norm2.weight', 'text_model.encoder.layers.15.mlp.fc1.bias', 'text_model.encoder.layers.15.mlp.fc1.weight', 'text_model.encoder.layers.15.mlp.fc2.bias', 'text_model.encoder.layers.15.mlp.fc2.weight', 'text_model.encoder.layers.15.self_attn.k_proj.bias', 'text_model.encoder.layers.15.self_attn.k_proj.weight', 'text_model.encoder.layers.15.self_attn.out_proj.bias', 'text_model.encoder.layers.15.self_attn.out_proj.weight', 'text_model.encoder.layers.15.self_attn.q_proj.bias', 'text_model.encoder.layers.15.self_attn.q_proj.weight', 'text_model.encoder.layers.15.self_attn.v_proj.bias', 'text_model.encoder.layers.15.self_attn.v_proj.weight', 'text_model.encoder.layers.16.layer_norm1.bias', 'text_model.encoder.layers.16.layer_norm1.weight', 'text_model.encoder.layers.16.layer_norm2.bias', 'text_model.encoder.layers.16.layer_norm2.weight', 'text_model.encoder.layers.16.mlp.fc1.bias', 'text_model.encoder.layers.16.mlp.fc1.weight', 'text_model.encoder.layers.16.mlp.fc2.bias', 'text_model.encoder.layers.16.mlp.fc2.weight', 'text_model.encoder.layers.16.self_attn.k_proj.bias', 'text_model.encoder.layers.16.self_attn.k_proj.weight', 'text_model.encoder.layers.16.self_attn.out_proj.bias', 'text_model.encoder.layers.16.self_attn.out_proj.weight', 'text_model.encoder.layers.16.self_attn.q_proj.bias', 'text_model.encoder.layers.16.self_attn.q_proj.weight', 'text_model.encoder.layers.16.self_attn.v_proj.bias', 'text_model.encoder.layers.16.self_attn.v_proj.weight', 'text_model.encoder.layers.17.layer_norm1.bias', 'text_model.encoder.layers.17.layer_norm1.weight', 'text_model.encoder.layers.17.layer_norm2.bias', 'text_model.encoder.layers.17.layer_norm2.weight', 'text_model.encoder.layers.17.mlp.fc1.bias', 'text_model.encoder.layers.17.mlp.fc1.weight', 'text_model.encoder.layers.17.mlp.fc2.bias', 'text_model.encoder.layers.17.mlp.fc2.weight', 'text_model.encoder.layers.17.self_attn.k_proj.bias', 'text_model.encoder.layers.17.self_attn.k_proj.weight', 'text_model.encoder.layers.17.self_attn.out_proj.bias', 'text_model.encoder.layers.17.self_attn.out_proj.weight', 'text_model.encoder.layers.17.self_attn.q_proj.bias', 'text_model.encoder.layers.17.self_attn.q_proj.weight', 'text_model.encoder.layers.17.self_attn.v_proj.bias', 'text_model.encoder.layers.17.self_attn.v_proj.weight', 'text_model.encoder.layers.18.layer_norm1.bias', 'text_model.encoder.layers.18.layer_norm1.weight', 'text_model.encoder.layers.18.layer_norm2.bias', 'text_model.encoder.layers.18.layer_norm2.weight', 'text_model.encoder.layers.18.mlp.fc1.bias', 'text_model.encoder.layers.18.mlp.fc1.weight', 'text_model.encoder.layers.18.mlp.fc2.bias', 'text_model.encoder.layers.18.mlp.fc2.weight', 'text_model.encoder.layers.18.self_attn.k_proj.bias', 'text_model.encoder.layers.18.self_attn.k_proj.weight', 'text_model.encoder.layers.18.self_attn.out_proj.bias', 'text_model.encoder.layers.18.self_attn.out_proj.weight', 'text_model.encoder.layers.18.self_attn.q_proj.bias', 'text_model.encoder.layers.18.self_attn.q_proj.weight', 'text_model.encoder.layers.18.self_attn.v_proj.bias', 'text_model.encoder.layers.18.self_attn.v_proj.weight', 'text_model.encoder.layers.19.layer_norm1.bias', 'text_model.encoder.layers.19.layer_norm1.weight', 'text_model.encoder.layers.19.layer_norm2.bias', 'text_model.encoder.layers.19.layer_norm2.weight', 'text_model.encoder.layers.19.mlp.fc1.bias', 'text_model.encoder.layers.19.mlp.fc1.weight', 'text_model.encoder.layers.19.mlp.fc2.bias', 'text_model.encoder.layers.19.mlp.fc2.weight', 'text_model.encoder.layers.19.self_attn.k_proj.bias', 'text_model.encoder.layers.19.self_attn.k_proj.weight', 'text_model.encoder.layers.19.self_attn.out_proj.bias', 'text_model.encoder.layers.19.self_attn.out_proj.weight', 'text_model.encoder.layers.19.self_attn.q_proj.bias', 'text_model.encoder.layers.19.self_attn.q_proj.weight', 'text_model.encoder.layers.19.self_attn.v_proj.bias', 'text_model.encoder.layers.19.self_attn.v_proj.weight', 'text_model.encoder.layers.2.layer_norm1.bias', 'text_model.encoder.layers.2.layer_norm1.weight', 'text_model.encoder.layers.2.layer_norm2.bias', 'text_model.encoder.layers.2.layer_norm2.weight', 'text_model.encoder.layers.2.mlp.fc1.bias', 'text_model.encoder.layers.2.mlp.fc1.weight', 'text_model.encoder.layers.2.mlp.fc2.bias', 'text_model.encoder.layers.2.mlp.fc2.weight', 'text_model.encoder.layers.2.self_attn.k_proj.bias', 'text_model.encoder.layers.2.self_attn.k_proj.weight', 'text_model.encoder.layers.2.self_attn.out_proj.bias', 'text_model.encoder.layers.2.self_attn.out_proj.weight', 'text_model.encoder.layers.2.self_attn.q_proj.bias', 'text_model.encoder.layers.2.self_attn.q_proj.weight', 'text_model.encoder.layers.2.self_attn.v_proj.bias', 'text_model.encoder.layers.2.self_attn.v_proj.weight', 'text_model.encoder.layers.20.layer_norm1.bias', 'text_model.encoder.layers.20.layer_norm1.weight', 'text_model.encoder.layers.20.layer_norm2.bias', 'text_model.encoder.layers.20.layer_norm2.weight', 'text_model.encoder.layers.20.mlp.fc1.bias', 'text_model.encoder.layers.20.mlp.fc1.weight', 'text_model.encoder.layers.20.mlp.fc2.bias', 'text_model.encoder.layers.20.mlp.fc2.weight', 'text_model.encoder.layers.20.self_attn.k_proj.bias', 'text_model.encoder.layers.20.self_attn.k_proj.weight', 'text_model.encoder.layers.20.self_attn.out_proj.bias', 'text_model.encoder.layers.20.self_attn.out_proj.weight', 'text_model.encoder.layers.20.self_attn.q_proj.bias', 'text_model.encoder.layers.20.self_attn.q_proj.weight', 'text_model.encoder.layers.20.self_attn.v_proj.bias', 'text_model.encoder.layers.20.self_attn.v_proj.weight', 'text_model.encoder.layers.21.layer_norm1.bias', 'text_model.encoder.layers.21.layer_norm1.weight', 'text_model.encoder.layers.21.layer_norm2.bias', 'text_model.encoder.layers.21.layer_norm2.weight', 'text_model.encoder.layers.21.mlp.fc1.bias', 'text_model.encoder.layers.21.mlp.fc1.weight', 'text_model.encoder.layers.21.mlp.fc2.bias', 'text_model.encoder.layers.21.mlp.fc2.weight', 'text_model.encoder.layers.21.self_attn.k_proj.bias', 'text_model.encoder.layers.21.self_attn.k_proj.weight', 'text_model.encoder.layers.21.self_attn.out_proj.bias', 'text_model.encoder.layers.21.self_attn.out_proj.weight', 'text_model.encoder.layers.21.self_attn.q_proj.bias', 'text_model.encoder.layers.21.self_attn.q_proj.weight', 'text_model.encoder.layers.21.self_attn.v_proj.bias', 'text_model.encoder.layers.21.self_attn.v_proj.weight', 'text_model.encoder.layers.22.layer_norm1.bias', 'text_model.encoder.layers.22.layer_norm1.weight', 'text_model.encoder.layers.22.layer_norm2.bias', 'text_model.encoder.layers.22.layer_norm2.weight', 'text_model.encoder.layers.22.mlp.fc1.bias', 'text_model.encoder.layers.22.mlp.fc1.weight', 'text_model.encoder.layers.22.mlp.fc2.bias', 'text_model.encoder.layers.22.mlp.fc2.weight', 'text_model.encoder.layers.22.self_attn.k_proj.bias', 'text_model.encoder.layers.22.self_attn.k_proj.weight', 'text_model.encoder.layers.22.self_attn.out_proj.bias', 'text_model.encoder.layers.22.self_attn.out_proj.weight', 'text_model.encoder.layers.22.self_attn.q_proj.bias', 'text_model.encoder.layers.22.self_attn.q_proj.weight', 'text_model.encoder.layers.22.self_attn.v_proj.bias', 'text_model.encoder.layers.22.self_attn.v_proj.weight', 'text_model.encoder.layers.23.layer_norm1.bias', 'text_model.encoder.layers.23.layer_norm1.weight', 'text_model.encoder.layers.23.layer_norm2.bias', 'text_model.encoder.layers.23.layer_norm2.weight', 'text_model.encoder.layers.23.mlp.fc1.bias', 'text_model.encoder.layers.23.mlp.fc1.weight', 'text_model.encoder.layers.23.mlp.fc2.bias', 'text_model.encoder.layers.23.mlp.fc2.weight', 'text_model.encoder.layers.23.self_attn.k_proj.bias', 'text_model.encoder.layers.23.self_attn.k_proj.weight', 'text_model.encoder.layers.23.self_attn.out_proj.bias', 'text_model.encoder.layers.23.self_attn.out_proj.weight', 'text_model.encoder.layers.23.self_attn.q_proj.bias', 'text_model.encoder.layers.23.self_attn.q_proj.weight', 'text_model.encoder.layers.23.self_attn.v_proj.bias', 'text_model.encoder.layers.23.self_attn.v_proj.weight', 'text_model.encoder.layers.24.layer_norm1.bias', 'text_model.encoder.layers.24.layer_norm1.weight', 'text_model.encoder.layers.24.layer_norm2.bias', 'text_model.encoder.layers.24.layer_norm2.weight', 'text_model.encoder.layers.24.mlp.fc1.bias', 'text_model.encoder.layers.24.mlp.fc1.weight', 'text_model.encoder.layers.24.mlp.fc2.bias', 'text_model.encoder.layers.24.mlp.fc2.weight', 'text_model.encoder.layers.24.self_attn.k_proj.bias', 'text_model.encoder.layers.24.self_attn.k_proj.weight', 'text_model.encoder.layers.24.self_attn.out_proj.bias', 'text_model.encoder.layers.24.self_attn.out_proj.weight', 'text_model.encoder.layers.24.self_attn.q_proj.bias', 'text_model.encoder.layers.24.self_attn.q_proj.weight', 'text_model.encoder.layers.24.self_attn.v_proj.bias', 'text_model.encoder.layers.24.self_attn.v_proj.weight', 'text_model.encoder.layers.25.layer_norm1.bias', 'text_model.encoder.layers.25.layer_norm1.weight', 'text_model.encoder.layers.25.layer_norm2.bias', 'text_model.encoder.layers.25.layer_norm2.weight', 'text_model.encoder.layers.25.mlp.fc1.bias', 'text_model.encoder.layers.25.mlp.fc1.weight', 'text_model.encoder.layers.25.mlp.fc2.bias', 'text_model.encoder.layers.25.mlp.fc2.weight', 'text_model.encoder.layers.25.self_attn.k_proj.bias', 'text_model.encoder.layers.25.self_attn.k_proj.weight', 'text_model.encoder.layers.25.self_attn.out_proj.bias', 'text_model.encoder.layers.25.self_attn.out_proj.weight', 'text_model.encoder.layers.25.self_attn.q_proj.bias', 'text_model.encoder.layers.25.self_attn.q_proj.weight', 'text_model.encoder.layers.25.self_attn.v_proj.bias', 'text_model.encoder.layers.25.self_attn.v_proj.weight', 'text_model.encoder.layers.26.layer_norm1.bias', 'text_model.encoder.layers.26.layer_norm1.weight', 'text_model.encoder.layers.26.layer_norm2.bias', 'text_model.encoder.layers.26.layer_norm2.weight', 'text_model.encoder.layers.26.mlp.fc1.bias', 'text_model.encoder.layers.26.mlp.fc1.weight', 'text_model.encoder.layers.26.mlp.fc2.bias', 'text_model.encoder.layers.26.mlp.fc2.weight', 'text_model.encoder.layers.26.self_attn.k_proj.bias', 'text_model.encoder.layers.26.self_attn.k_proj.weight', 'text_model.encoder.layers.26.self_attn.out_proj.bias', 'text_model.encoder.layers.26.self_attn.out_proj.weight', 'text_model.encoder.layers.26.self_attn.q_proj.bias', 'text_model.encoder.layers.26.self_attn.q_proj.weight', 'text_model.encoder.layers.26.self_attn.v_proj.bias', 'text_model.encoder.layers.26.self_attn.v_proj.weight', 'text_model.encoder.layers.3.layer_norm1.bias', 'text_model.encoder.layers.3.layer_norm1.weight', 'text_model.encoder.layers.3.layer_norm2.bias', 'text_model.encoder.layers.3.layer_norm2.weight', 'text_model.encoder.layers.3.mlp.fc1.bias', 'text_model.encoder.layers.3.mlp.fc1.weight', 'text_model.encoder.layers.3.mlp.fc2.bias', 'text_model.encoder.layers.3.mlp.fc2.weight', 'text_model.encoder.layers.3.self_attn.k_proj.bias', 'text_model.encoder.layers.3.self_attn.k_proj.weight', 'text_model.encoder.layers.3.self_attn.out_proj.bias', 'text_model.encoder.layers.3.self_attn.out_proj.weight', 'text_model.encoder.layers.3.self_attn.q_proj.bias', 'text_model.encoder.layers.3.self_attn.q_proj.weight', 'text_model.encoder.layers.3.self_attn.v_proj.bias', 'text_model.encoder.layers.3.self_attn.v_proj.weight', 'text_model.encoder.layers.4.layer_norm1.bias', 'text_model.encoder.layers.4.layer_norm1.weight', 'text_model.encoder.layers.4.layer_norm2.bias', 'text_model.encoder.layers.4.layer_norm2.weight', 'text_model.encoder.layers.4.mlp.fc1.bias', 'text_model.encoder.layers.4.mlp.fc1.weight', 'text_model.encoder.layers.4.mlp.fc2.bias', 'text_model.encoder.layers.4.mlp.fc2.weight', 'text_model.encoder.layers.4.self_attn.k_proj.bias', 'text_model.encoder.layers.4.self_attn.k_proj.weight', 'text_model.encoder.layers.4.self_attn.out_proj.bias', 'text_model.encoder.layers.4.self_attn.out_proj.weight', 'text_model.encoder.layers.4.self_attn.q_proj.bias', 'text_model.encoder.layers.4.self_attn.q_proj.weight', 'text_model.encoder.layers.4.self_attn.v_proj.bias', 'text_model.encoder.layers.4.self_attn.v_proj.weight', 'text_model.encoder.layers.5.layer_norm1.bias', 'text_model.encoder.layers.5.layer_norm1.weight', 'text_model.encoder.layers.5.layer_norm2.bias', 'text_model.encoder.layers.5.layer_norm2.weight', 'text_model.encoder.layers.5.mlp.fc1.bias', 'text_model.encoder.layers.5.mlp.fc1.weight', 'text_model.encoder.layers.5.mlp.fc2.bias', 'text_model.encoder.layers.5.mlp.fc2.weight', 'text_model.encoder.layers.5.self_attn.k_proj.bias', 'text_model.encoder.layers.5.self_attn.k_proj.weight', 'text_model.encoder.layers.5.self_attn.out_proj.bias', 'text_model.encoder.layers.5.self_attn.out_proj.weight', 'text_model.encoder.layers.5.self_attn.q_proj.bias', 'text_model.encoder.layers.5.self_attn.q_proj.weight', 'text_model.encoder.layers.5.self_attn.v_proj.bias', 'text_model.encoder.layers.5.self_attn.v_proj.weight', 'text_model.encoder.layers.6.layer_norm1.bias', 'text_model.encoder.layers.6.layer_norm1.weight', 'text_model.encoder.layers.6.layer_norm2.bias', 'text_model.encoder.layers.6.layer_norm2.weight', 'text_model.encoder.layers.6.mlp.fc1.bias', 'text_model.encoder.layers.6.mlp.fc1.weight', 'text_model.encoder.layers.6.mlp.fc2.bias', 'text_model.encoder.layers.6.mlp.fc2.weight', 'text_model.encoder.layers.6.self_attn.k_proj.bias', 'text_model.encoder.layers.6.self_attn.k_proj.weight', 'text_model.encoder.layers.6.self_attn.out_proj.bias', 'text_model.encoder.layers.6.self_attn.out_proj.weight', 'text_model.encoder.layers.6.self_attn.q_proj.bias', 'text_model.encoder.layers.6.self_attn.q_proj.weight', 'text_model.encoder.layers.6.self_attn.v_proj.bias', 'text_model.encoder.layers.6.self_attn.v_proj.weight', 'text_model.encoder.layers.7.layer_norm1.bias', 'text_model.encoder.layers.7.layer_norm1.weight', 'text_model.encoder.layers.7.layer_norm2.bias', 'text_model.encoder.layers.7.layer_norm2.weight', 'text_model.encoder.layers.7.mlp.fc1.bias', 'text_model.encoder.layers.7.mlp.fc1.weight', 'text_model.encoder.layers.7.mlp.fc2.bias', 'text_model.encoder.layers.7.mlp.fc2.weight', 'text_model.encoder.layers.7.self_attn.k_proj.bias', 'text_model.encoder.layers.7.self_attn.k_proj.weight', 'text_model.encoder.layers.7.self_attn.out_proj.bias', 'text_model.encoder.layers.7.self_attn.out_proj.weight', 'text_model.encoder.layers.7.self_attn.q_proj.bias', 'text_model.encoder.layers.7.self_attn.q_proj.weight', 'text_model.encoder.layers.7.self_attn.v_proj.bias', 'text_model.encoder.layers.7.self_attn.v_proj.weight', 'text_model.encoder.layers.8.layer_norm1.bias', 'text_model.encoder.layers.8.layer_norm1.weight', 'text_model.encoder.layers.8.layer_norm2.bias', 'text_model.encoder.layers.8.layer_norm2.weight', 'text_model.encoder.layers.8.mlp.fc1.bias', 'text_model.encoder.layers.8.mlp.fc1.weight', 'text_model.encoder.layers.8.mlp.fc2.bias', 'text_model.encoder.layers.8.mlp.fc2.weight', 'text_model.encoder.layers.8.self_attn.k_proj.bias', 'text_model.encoder.layers.8.self_attn.k_proj.weight', 'text_model.encoder.layers.8.self_attn.out_proj.bias', 'text_model.encoder.layers.8.self_attn.out_proj.weight', 'text_model.encoder.layers.8.self_attn.q_proj.bias', 'text_model.encoder.layers.8.self_attn.q_proj.weight', 'text_model.encoder.layers.8.self_attn.v_proj.bias', 'text_model.encoder.layers.8.self_attn.v_proj.weight', 'text_model.encoder.layers.9.layer_norm1.bias', 'text_model.encoder.layers.9.layer_norm1.weight', 'text_model.encoder.layers.9.layer_norm2.bias', 'text_model.encoder.layers.9.layer_norm2.weight', 'text_model.encoder.layers.9.mlp.fc1.bias', 'text_model.encoder.layers.9.mlp.fc1.weight', 'text_model.encoder.layers.9.mlp.fc2.bias', 'text_model.encoder.layers.9.mlp.fc2.weight', 'text_model.encoder.layers.9.self_attn.k_proj.bias', 'text_model.encoder.layers.9.self_attn.k_proj.weight', 'text_model.encoder.layers.9.self_attn.out_proj.bias', 'text_model.encoder.layers.9.self_attn.out_proj.weight', 'text_model.encoder.layers.9.self_attn.q_proj.bias', 'text_model.encoder.layers.9.self_attn.q_proj.weight', 'text_model.encoder.layers.9.self_attn.v_proj.bias', 'text_model.encoder.layers.9.self_attn.v_proj.weight', 'text_model.final_layer_norm.bias', 'text_model.final_layer_norm.weight', 'text_model.head.bias', 'text_model.head.weight']
|
223 |
-
- This IS expected if you are initializing SiglipVisionModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
|
224 |
-
- This IS NOT expected if you are initializing SiglipVisionModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
|
225 |
-
2025-02-14 04:25:38,022 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of SiglipVisionModel were initialized from the model checkpoint at google/siglip-so400m-patch14-384.
|
226 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use SiglipVisionModel for predictions without further training.
|
227 |
-
2025-02-14 04:25:38,212 - image_processing_base.py:375 - get_image_processor_dict - INFO - loading configuration file preprocessor_config.json from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/preprocessor_config.json
|
228 |
-
2025-02-14 04:25:38,212 - image_processing_base.py:429 - from_dict - INFO - Image processor SiglipImageProcessor {
|
229 |
-
"do_convert_rgb": null,
|
230 |
-
"do_normalize": true,
|
231 |
-
"do_rescale": true,
|
232 |
-
"do_resize": true,
|
233 |
-
"image_mean": [
|
234 |
-
0.5,
|
235 |
-
0.5,
|
236 |
-
0.5
|
237 |
-
],
|
238 |
-
"image_processor_type": "SiglipImageProcessor",
|
239 |
-
"image_std": [
|
240 |
-
0.5,
|
241 |
-
0.5,
|
242 |
-
0.5
|
243 |
-
],
|
244 |
-
"processor_class": "SiglipProcessor",
|
245 |
-
"resample": 3,
|
246 |
-
"rescale_factor": 0.00392156862745098,
|
247 |
-
"size": {
|
248 |
-
"height": 384,
|
249 |
-
"width": 384
|
250 |
-
}
|
251 |
-
}
|
252 |
-
|
253 |
-
2025-02-14 04:25:38,584 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/config.json
|
254 |
-
2025-02-14 04:25:38,587 - configuration_utils.py:800 - from_dict - INFO - Model config Dinov2Config {
|
255 |
-
"apply_layernorm": true,
|
256 |
-
"architectures": [
|
257 |
-
"Dinov2Model"
|
258 |
-
],
|
259 |
-
"attention_probs_dropout_prob": 0.0,
|
260 |
-
"drop_path_rate": 0.0,
|
261 |
-
"hidden_act": "gelu",
|
262 |
-
"hidden_dropout_prob": 0.0,
|
263 |
-
"hidden_size": 1536,
|
264 |
-
"image_size": 518,
|
265 |
-
"initializer_range": 0.02,
|
266 |
-
"layer_norm_eps": 1e-06,
|
267 |
-
"layerscale_value": 1.0,
|
268 |
-
"mlp_ratio": 4,
|
269 |
-
"model_type": "dinov2",
|
270 |
-
"num_attention_heads": 24,
|
271 |
-
"num_channels": 3,
|
272 |
-
"num_hidden_layers": 40,
|
273 |
-
"out_features": [
|
274 |
-
"stage40"
|
275 |
-
],
|
276 |
-
"out_indices": [
|
277 |
-
40
|
278 |
-
],
|
279 |
-
"patch_size": 14,
|
280 |
-
"qkv_bias": true,
|
281 |
-
"reshape_hidden_states": true,
|
282 |
-
"stage_names": [
|
283 |
-
"stem",
|
284 |
-
"stage1",
|
285 |
-
"stage2",
|
286 |
-
"stage3",
|
287 |
-
"stage4",
|
288 |
-
"stage5",
|
289 |
-
"stage6",
|
290 |
-
"stage7",
|
291 |
-
"stage8",
|
292 |
-
"stage9",
|
293 |
-
"stage10",
|
294 |
-
"stage11",
|
295 |
-
"stage12",
|
296 |
-
"stage13",
|
297 |
-
"stage14",
|
298 |
-
"stage15",
|
299 |
-
"stage16",
|
300 |
-
"stage17",
|
301 |
-
"stage18",
|
302 |
-
"stage19",
|
303 |
-
"stage20",
|
304 |
-
"stage21",
|
305 |
-
"stage22",
|
306 |
-
"stage23",
|
307 |
-
"stage24",
|
308 |
-
"stage25",
|
309 |
-
"stage26",
|
310 |
-
"stage27",
|
311 |
-
"stage28",
|
312 |
-
"stage29",
|
313 |
-
"stage30",
|
314 |
-
"stage31",
|
315 |
-
"stage32",
|
316 |
-
"stage33",
|
317 |
-
"stage34",
|
318 |
-
"stage35",
|
319 |
-
"stage36",
|
320 |
-
"stage37",
|
321 |
-
"stage38",
|
322 |
-
"stage39",
|
323 |
-
"stage40"
|
324 |
-
],
|
325 |
-
"torch_dtype": "float32",
|
326 |
-
"transformers_version": "4.43.1",
|
327 |
-
"use_swiglu_ffn": true
|
328 |
-
}
|
329 |
-
|
330 |
-
2025-02-14 04:25:38,588 - modeling_utils.py:3621 - from_pretrained - INFO - loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/model.safetensors
|
331 |
-
2025-02-14 04:25:39,038 - modeling_utils.py:4450 - _load_pretrained_model - INFO - All model checkpoint weights were used when initializing Dinov2Model.
|
332 |
-
|
333 |
-
2025-02-14 04:25:39,038 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of Dinov2Model were initialized from the model checkpoint at facebook/dinov2-giant.
|
334 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use Dinov2Model for predictions without further training.
|
335 |
-
2025-02-14 04:25:39,536 - image_processing_base.py:375 - get_image_processor_dict - INFO - loading configuration file preprocessor_config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/preprocessor_config.json
|
336 |
-
2025-02-14 04:25:39,540 - image_processing_base.py:429 - from_dict - INFO - Image processor BitImageProcessor {
|
337 |
-
"crop_size": {
|
338 |
-
"height": 378,
|
339 |
-
"width": 378
|
340 |
-
},
|
341 |
-
"do_center_crop": true,
|
342 |
-
"do_convert_rgb": true,
|
343 |
-
"do_normalize": true,
|
344 |
-
"do_rescale": true,
|
345 |
-
"do_resize": true,
|
346 |
-
"image_mean": [
|
347 |
-
0.485,
|
348 |
-
0.456,
|
349 |
-
0.406
|
350 |
-
],
|
351 |
-
"image_processor_type": "BitImageProcessor",
|
352 |
-
"image_std": [
|
353 |
-
0.229,
|
354 |
-
0.224,
|
355 |
-
0.225
|
356 |
-
],
|
357 |
-
"resample": 3,
|
358 |
-
"rescale_factor": 0.00392156862745098,
|
359 |
-
"size": {
|
360 |
-
"shortest_edge": 378
|
361 |
-
}
|
362 |
-
}
|
363 |
-
|
364 |
-
2025-02-14 04:25:40,430 - finetune_llama.py:1239 - train - INFO - Total params: 3264865280
|
365 |
-
2025-02-14 04:25:40,430 - finetune_llama.py:1240 - train - INFO - Trainable params: 12589056
|
366 |
-
2025-02-14 04:25:40,430 - finetune_llama.py:1241 - train - INFO - LM head params: 394002432
|
367 |
-
2025-02-14 04:25:42,550 - trainer_callback.py:423 - add_callback - WARNING - You are adding a <class 'transformers.integrations.integration_utils.TensorBoardCallback'> to the callbacks of this Trainer, but there is already one. The currentlist of callbacks is
|
368 |
-
:DefaultFlowCallback
|
369 |
-
TensorBoardCallback
|
370 |
-
2025-02-14 04:25:42,550 - trainer.py:648 - __init__ - INFO - Using auto half precision backend
|
371 |
-
2025-02-14 04:25:42,857 - trainer.py:2134 - _inner_training_loop - INFO - ***** Running training *****
|
372 |
-
2025-02-14 04:25:42,857 - trainer.py:2135 - _inner_training_loop - INFO - Num examples = 554
|
373 |
-
2025-02-14 04:25:42,857 - trainer.py:2136 - _inner_training_loop - INFO - Num Epochs = 2
|
374 |
-
2025-02-14 04:25:42,857 - trainer.py:2137 - _inner_training_loop - INFO - Instantaneous batch size per device = 1
|
375 |
-
2025-02-14 04:25:42,857 - trainer.py:2140 - _inner_training_loop - INFO - Total train batch size (w. parallel, distributed & accumulation) = 1
|
376 |
-
2025-02-14 04:25:42,857 - trainer.py:2141 - _inner_training_loop - INFO - Gradient Accumulation steps = 1
|
377 |
-
2025-02-14 04:25:42,857 - trainer.py:2142 - _inner_training_loop - INFO - Total optimization steps = 1,108
|
378 |
-
2025-02-14 04:25:42,859 - trainer.py:2143 - _inner_training_loop - INFO - Number of trainable parameters = 406,591,488
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b74b5d170fd29d8ff01362cf2bd56abc278847a760cc589f3e9f2986ccc2a236
|
3 |
+
size 36433
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
runtime_logs/run_2025-02-14_04-26-13.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
runtime_logs/run_2025-02-14_05-01-33.log
CHANGED
@@ -1,366 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
"_name_or_path": "/tmp/iopath_cache/manifold_cache/tree/users/shenx/finetune/09281004-cambrian_llama3_2_t576_ov",
|
5 |
-
"architectures": [
|
6 |
-
"CambrianLlamaForCausalLM"
|
7 |
-
],
|
8 |
-
"attention_bias": false,
|
9 |
-
"attention_dropout": 0.0,
|
10 |
-
"bos_token_id": 128000,
|
11 |
-
"connect_layer": 2,
|
12 |
-
"connector_depth": 3,
|
13 |
-
"connector_only": true,
|
14 |
-
"dino_threshold": 0.83,
|
15 |
-
"drop_threshold": 0.8,
|
16 |
-
"eos_token_id": [
|
17 |
-
128001,
|
18 |
-
128008,
|
19 |
-
128009
|
20 |
-
],
|
21 |
-
"frame_pos": false,
|
22 |
-
"freeze_mm_mlp_adapter": false,
|
23 |
-
"hidden_act": "silu",
|
24 |
-
"hidden_size": 3072,
|
25 |
-
"highres": true,
|
26 |
-
"highres_connect": false,
|
27 |
-
"image_aspect_ratio": "pad",
|
28 |
-
"image_position": 91,
|
29 |
-
"image_token_len": 144,
|
30 |
-
"initializer_range": 0.02,
|
31 |
-
"intermediate_size": 8192,
|
32 |
-
"is_image_newline": true,
|
33 |
-
"is_st_sampler": false,
|
34 |
-
"lowres_token": 8,
|
35 |
-
"max_position_embeddings": 131072,
|
36 |
-
"mlp_bias": false,
|
37 |
-
"mm_patch_merge_type": "flat",
|
38 |
-
"mm_projector_lr": null,
|
39 |
-
"mm_projector_type": "sva",
|
40 |
-
"mm_use_im_patch_token": false,
|
41 |
-
"mm_use_im_start_end": false,
|
42 |
-
"mm_vision_sampler_lr": null,
|
43 |
-
"mm_vision_select_feature": "patch",
|
44 |
-
"mm_vision_select_layer": -2,
|
45 |
-
"mm_vision_tower_aux_list": [
|
46 |
-
"siglip/CLIP-ViT-SO400M-14-384",
|
47 |
-
"facebook/dinov2-giant-res378"
|
48 |
-
],
|
49 |
-
"mm_vision_tower_aux_token_len_list": [
|
50 |
-
576,
|
51 |
-
576
|
52 |
-
],
|
53 |
-
"mm_vision_tower_lr": null,
|
54 |
-
"model_type": "cambrian_llama",
|
55 |
-
"num_attention_heads": 24,
|
56 |
-
"num_hidden_layers": 28,
|
57 |
-
"num_key_value_heads": 8,
|
58 |
-
"num_of_vision_sampler_layers": 10,
|
59 |
-
"num_query_group": 1,
|
60 |
-
"pretraining_tp": 1,
|
61 |
-
"query_num_list": [
|
62 |
-
144
|
63 |
-
],
|
64 |
-
"rms_norm_eps": 1e-05,
|
65 |
-
"rope_scaling": {
|
66 |
-
"factor": 32.0,
|
67 |
-
"high_freq_factor": 4.0,
|
68 |
-
"low_freq_factor": 1.0,
|
69 |
-
"original_max_position_embeddings": 8192,
|
70 |
-
"rope_type": "llama3"
|
71 |
-
},
|
72 |
-
"rope_theta": 500000.0,
|
73 |
-
"spmd_debug": null,
|
74 |
-
"spmd_fsdp_sharding": null,
|
75 |
-
"spmd_mesh": null,
|
76 |
-
"start_of_vision_sampler_layers": 0,
|
77 |
-
"stride_of_vision_sampler_layers": 3,
|
78 |
-
"tie_word_embeddings": false,
|
79 |
-
"tokenizer_model_max_length": 8192,
|
80 |
-
"tokenizer_padding_side": "right",
|
81 |
-
"torch_dtype": "float32",
|
82 |
-
"transformers_version": "4.43.1",
|
83 |
-
"tune_mm_mlp_adapter": false,
|
84 |
-
"unfreeze_mm_vision_tower": false,
|
85 |
-
"use_cache": false,
|
86 |
-
"use_mm_proj": true,
|
87 |
-
"vision_hidden_size": 1024,
|
88 |
-
"vision_tower_aux_token_len_list": [
|
89 |
-
576,
|
90 |
-
576
|
91 |
-
],
|
92 |
-
"vocab_size": 128256
|
93 |
-
}
|
94 |
-
|
95 |
-
2025-02-14 05:01:34,508 - modeling_utils.py:3618 - from_pretrained - INFO - loading weights file ./checkpoints/longvu_llama3_2/pytorch_model.bin
|
96 |
-
2025-02-14 05:01:34,547 - configuration_utils.py:1038 - from_dict - INFO - Generate config GenerationConfig {
|
97 |
-
"bos_token_id": 128000,
|
98 |
-
"eos_token_id": [
|
99 |
-
128001,
|
100 |
-
128008,
|
101 |
-
128009
|
102 |
-
],
|
103 |
-
"use_cache": false
|
104 |
-
}
|
105 |
-
|
106 |
-
2025-02-14 05:01:34,753 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/config.json
|
107 |
-
2025-02-14 05:01:34,757 - configuration_utils.py:800 - from_dict - INFO - Model config Dinov2Config {
|
108 |
-
"apply_layernorm": true,
|
109 |
-
"architectures": [
|
110 |
-
"Dinov2Model"
|
111 |
-
],
|
112 |
-
"attention_probs_dropout_prob": 0.0,
|
113 |
-
"drop_path_rate": 0.0,
|
114 |
-
"hidden_act": "gelu",
|
115 |
-
"hidden_dropout_prob": 0.0,
|
116 |
-
"hidden_size": 1536,
|
117 |
-
"image_size": 518,
|
118 |
-
"initializer_range": 0.02,
|
119 |
-
"layer_norm_eps": 1e-06,
|
120 |
-
"layerscale_value": 1.0,
|
121 |
-
"mlp_ratio": 4,
|
122 |
-
"model_type": "dinov2",
|
123 |
-
"num_attention_heads": 24,
|
124 |
-
"num_channels": 3,
|
125 |
-
"num_hidden_layers": 40,
|
126 |
-
"out_features": [
|
127 |
-
"stage40"
|
128 |
-
],
|
129 |
-
"out_indices": [
|
130 |
-
40
|
131 |
-
],
|
132 |
-
"patch_size": 14,
|
133 |
-
"qkv_bias": true,
|
134 |
-
"reshape_hidden_states": true,
|
135 |
-
"stage_names": [
|
136 |
-
"stem",
|
137 |
-
"stage1",
|
138 |
-
"stage2",
|
139 |
-
"stage3",
|
140 |
-
"stage4",
|
141 |
-
"stage5",
|
142 |
-
"stage6",
|
143 |
-
"stage7",
|
144 |
-
"stage8",
|
145 |
-
"stage9",
|
146 |
-
"stage10",
|
147 |
-
"stage11",
|
148 |
-
"stage12",
|
149 |
-
"stage13",
|
150 |
-
"stage14",
|
151 |
-
"stage15",
|
152 |
-
"stage16",
|
153 |
-
"stage17",
|
154 |
-
"stage18",
|
155 |
-
"stage19",
|
156 |
-
"stage20",
|
157 |
-
"stage21",
|
158 |
-
"stage22",
|
159 |
-
"stage23",
|
160 |
-
"stage24",
|
161 |
-
"stage25",
|
162 |
-
"stage26",
|
163 |
-
"stage27",
|
164 |
-
"stage28",
|
165 |
-
"stage29",
|
166 |
-
"stage30",
|
167 |
-
"stage31",
|
168 |
-
"stage32",
|
169 |
-
"stage33",
|
170 |
-
"stage34",
|
171 |
-
"stage35",
|
172 |
-
"stage36",
|
173 |
-
"stage37",
|
174 |
-
"stage38",
|
175 |
-
"stage39",
|
176 |
-
"stage40"
|
177 |
-
],
|
178 |
-
"torch_dtype": "float32",
|
179 |
-
"transformers_version": "4.43.1",
|
180 |
-
"use_swiglu_ffn": true
|
181 |
-
}
|
182 |
-
|
183 |
-
2025-02-14 05:01:36,113 - modeling_utils.py:4450 - _load_pretrained_model - INFO - All model checkpoint weights were used when initializing CambrianLlamaForCausalLM.
|
184 |
-
|
185 |
-
2025-02-14 05:01:36,113 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of CambrianLlamaForCausalLM were initialized from the model checkpoint at ./checkpoints/longvu_llama3_2.
|
186 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use CambrianLlamaForCausalLM for predictions without further training.
|
187 |
-
2025-02-14 05:01:36,119 - configuration_utils.py:991 - from_pretrained - INFO - loading configuration file ./checkpoints/longvu_llama3_2/generation_config.json
|
188 |
-
2025-02-14 05:01:36,119 - configuration_utils.py:1038 - from_dict - INFO - Generate config GenerationConfig {
|
189 |
-
"bos_token_id": 128000,
|
190 |
-
"do_sample": true,
|
191 |
-
"eos_token_id": [
|
192 |
-
128001,
|
193 |
-
128008,
|
194 |
-
128009
|
195 |
-
],
|
196 |
-
"temperature": 0.6,
|
197 |
-
"top_p": 0.9
|
198 |
-
}
|
199 |
-
|
200 |
-
2025-02-14 05:01:36,370 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file tokenizer.json
|
201 |
-
2025-02-14 05:01:36,370 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file added_tokens.json
|
202 |
-
2025-02-14 05:01:36,370 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file special_tokens_map.json
|
203 |
-
2025-02-14 05:01:36,370 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file tokenizer_config.json
|
204 |
-
2025-02-14 05:01:36,806 - tokenization_utils_base.py:2533 - _from_pretrained - INFO - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
205 |
-
2025-02-14 05:01:37,166 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/config.json
|
206 |
-
2025-02-14 05:01:37,167 - configuration_utils.py:800 - from_dict - INFO - Model config SiglipVisionConfig {
|
207 |
-
"attention_dropout": 0.0,
|
208 |
-
"hidden_act": "gelu_pytorch_tanh",
|
209 |
-
"hidden_size": 1152,
|
210 |
-
"image_size": 384,
|
211 |
-
"intermediate_size": 4304,
|
212 |
-
"layer_norm_eps": 1e-06,
|
213 |
-
"model_type": "siglip_vision_model",
|
214 |
-
"num_attention_heads": 16,
|
215 |
-
"num_channels": 3,
|
216 |
-
"num_hidden_layers": 27,
|
217 |
-
"patch_size": 14,
|
218 |
-
"transformers_version": "4.43.1"
|
219 |
-
}
|
220 |
-
|
221 |
-
2025-02-14 05:01:37,168 - modeling_utils.py:3621 - from_pretrained - INFO - loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/model.safetensors
|
222 |
-
2025-02-14 05:01:37,440 - modeling_utils.py:4440 - _load_pretrained_model - INFO - Some weights of the model checkpoint at google/siglip-so400m-patch14-384 were not used when initializing SiglipVisionModel: ['logit_bias', 'logit_scale', 'text_model.embeddings.position_embedding.weight', 'text_model.embeddings.token_embedding.weight', 'text_model.encoder.layers.0.layer_norm1.bias', 'text_model.encoder.layers.0.layer_norm1.weight', 'text_model.encoder.layers.0.layer_norm2.bias', 'text_model.encoder.layers.0.layer_norm2.weight', 'text_model.encoder.layers.0.mlp.fc1.bias', 'text_model.encoder.layers.0.mlp.fc1.weight', 'text_model.encoder.layers.0.mlp.fc2.bias', 'text_model.encoder.layers.0.mlp.fc2.weight', 'text_model.encoder.layers.0.self_attn.k_proj.bias', 'text_model.encoder.layers.0.self_attn.k_proj.weight', 'text_model.encoder.layers.0.self_attn.out_proj.bias', 'text_model.encoder.layers.0.self_attn.out_proj.weight', 'text_model.encoder.layers.0.self_attn.q_proj.bias', 'text_model.encoder.layers.0.self_attn.q_proj.weight', 'text_model.encoder.layers.0.self_attn.v_proj.bias', 'text_model.encoder.layers.0.self_attn.v_proj.weight', 'text_model.encoder.layers.1.layer_norm1.bias', 'text_model.encoder.layers.1.layer_norm1.weight', 'text_model.encoder.layers.1.layer_norm2.bias', 'text_model.encoder.layers.1.layer_norm2.weight', 'text_model.encoder.layers.1.mlp.fc1.bias', 'text_model.encoder.layers.1.mlp.fc1.weight', 'text_model.encoder.layers.1.mlp.fc2.bias', 'text_model.encoder.layers.1.mlp.fc2.weight', 'text_model.encoder.layers.1.self_attn.k_proj.bias', 'text_model.encoder.layers.1.self_attn.k_proj.weight', 'text_model.encoder.layers.1.self_attn.out_proj.bias', 'text_model.encoder.layers.1.self_attn.out_proj.weight', 'text_model.encoder.layers.1.self_attn.q_proj.bias', 'text_model.encoder.layers.1.self_attn.q_proj.weight', 'text_model.encoder.layers.1.self_attn.v_proj.bias', 'text_model.encoder.layers.1.self_attn.v_proj.weight', 'text_model.encoder.layers.10.layer_norm1.bias', 'text_model.encoder.layers.10.layer_norm1.weight', 'text_model.encoder.layers.10.layer_norm2.bias', 'text_model.encoder.layers.10.layer_norm2.weight', 'text_model.encoder.layers.10.mlp.fc1.bias', 'text_model.encoder.layers.10.mlp.fc1.weight', 'text_model.encoder.layers.10.mlp.fc2.bias', 'text_model.encoder.layers.10.mlp.fc2.weight', 'text_model.encoder.layers.10.self_attn.k_proj.bias', 'text_model.encoder.layers.10.self_attn.k_proj.weight', 'text_model.encoder.layers.10.self_attn.out_proj.bias', 'text_model.encoder.layers.10.self_attn.out_proj.weight', 'text_model.encoder.layers.10.self_attn.q_proj.bias', 'text_model.encoder.layers.10.self_attn.q_proj.weight', 'text_model.encoder.layers.10.self_attn.v_proj.bias', 'text_model.encoder.layers.10.self_attn.v_proj.weight', 'text_model.encoder.layers.11.layer_norm1.bias', 'text_model.encoder.layers.11.layer_norm1.weight', 'text_model.encoder.layers.11.layer_norm2.bias', 'text_model.encoder.layers.11.layer_norm2.weight', 'text_model.encoder.layers.11.mlp.fc1.bias', 'text_model.encoder.layers.11.mlp.fc1.weight', 'text_model.encoder.layers.11.mlp.fc2.bias', 'text_model.encoder.layers.11.mlp.fc2.weight', 'text_model.encoder.layers.11.self_attn.k_proj.bias', 'text_model.encoder.layers.11.self_attn.k_proj.weight', 'text_model.encoder.layers.11.self_attn.out_proj.bias', 'text_model.encoder.layers.11.self_attn.out_proj.weight', 'text_model.encoder.layers.11.self_attn.q_proj.bias', 'text_model.encoder.layers.11.self_attn.q_proj.weight', 'text_model.encoder.layers.11.self_attn.v_proj.bias', 'text_model.encoder.layers.11.self_attn.v_proj.weight', 'text_model.encoder.layers.12.layer_norm1.bias', 'text_model.encoder.layers.12.layer_norm1.weight', 'text_model.encoder.layers.12.layer_norm2.bias', 'text_model.encoder.layers.12.layer_norm2.weight', 'text_model.encoder.layers.12.mlp.fc1.bias', 'text_model.encoder.layers.12.mlp.fc1.weight', 'text_model.encoder.layers.12.mlp.fc2.bias', 'text_model.encoder.layers.12.mlp.fc2.weight', 'text_model.encoder.layers.12.self_attn.k_proj.bias', 'text_model.encoder.layers.12.self_attn.k_proj.weight', 'text_model.encoder.layers.12.self_attn.out_proj.bias', 'text_model.encoder.layers.12.self_attn.out_proj.weight', 'text_model.encoder.layers.12.self_attn.q_proj.bias', 'text_model.encoder.layers.12.self_attn.q_proj.weight', 'text_model.encoder.layers.12.self_attn.v_proj.bias', 'text_model.encoder.layers.12.self_attn.v_proj.weight', 'text_model.encoder.layers.13.layer_norm1.bias', 'text_model.encoder.layers.13.layer_norm1.weight', 'text_model.encoder.layers.13.layer_norm2.bias', 'text_model.encoder.layers.13.layer_norm2.weight', 'text_model.encoder.layers.13.mlp.fc1.bias', 'text_model.encoder.layers.13.mlp.fc1.weight', 'text_model.encoder.layers.13.mlp.fc2.bias', 'text_model.encoder.layers.13.mlp.fc2.weight', 'text_model.encoder.layers.13.self_attn.k_proj.bias', 'text_model.encoder.layers.13.self_attn.k_proj.weight', 'text_model.encoder.layers.13.self_attn.out_proj.bias', 'text_model.encoder.layers.13.self_attn.out_proj.weight', 'text_model.encoder.layers.13.self_attn.q_proj.bias', 'text_model.encoder.layers.13.self_attn.q_proj.weight', 'text_model.encoder.layers.13.self_attn.v_proj.bias', 'text_model.encoder.layers.13.self_attn.v_proj.weight', 'text_model.encoder.layers.14.layer_norm1.bias', 'text_model.encoder.layers.14.layer_norm1.weight', 'text_model.encoder.layers.14.layer_norm2.bias', 'text_model.encoder.layers.14.layer_norm2.weight', 'text_model.encoder.layers.14.mlp.fc1.bias', 'text_model.encoder.layers.14.mlp.fc1.weight', 'text_model.encoder.layers.14.mlp.fc2.bias', 'text_model.encoder.layers.14.mlp.fc2.weight', 'text_model.encoder.layers.14.self_attn.k_proj.bias', 'text_model.encoder.layers.14.self_attn.k_proj.weight', 'text_model.encoder.layers.14.self_attn.out_proj.bias', 'text_model.encoder.layers.14.self_attn.out_proj.weight', 'text_model.encoder.layers.14.self_attn.q_proj.bias', 'text_model.encoder.layers.14.self_attn.q_proj.weight', 'text_model.encoder.layers.14.self_attn.v_proj.bias', 'text_model.encoder.layers.14.self_attn.v_proj.weight', 'text_model.encoder.layers.15.layer_norm1.bias', 'text_model.encoder.layers.15.layer_norm1.weight', 'text_model.encoder.layers.15.layer_norm2.bias', 'text_model.encoder.layers.15.layer_norm2.weight', 'text_model.encoder.layers.15.mlp.fc1.bias', 'text_model.encoder.layers.15.mlp.fc1.weight', 'text_model.encoder.layers.15.mlp.fc2.bias', 'text_model.encoder.layers.15.mlp.fc2.weight', 'text_model.encoder.layers.15.self_attn.k_proj.bias', 'text_model.encoder.layers.15.self_attn.k_proj.weight', 'text_model.encoder.layers.15.self_attn.out_proj.bias', 'text_model.encoder.layers.15.self_attn.out_proj.weight', 'text_model.encoder.layers.15.self_attn.q_proj.bias', 'text_model.encoder.layers.15.self_attn.q_proj.weight', 'text_model.encoder.layers.15.self_attn.v_proj.bias', 'text_model.encoder.layers.15.self_attn.v_proj.weight', 'text_model.encoder.layers.16.layer_norm1.bias', 'text_model.encoder.layers.16.layer_norm1.weight', 'text_model.encoder.layers.16.layer_norm2.bias', 'text_model.encoder.layers.16.layer_norm2.weight', 'text_model.encoder.layers.16.mlp.fc1.bias', 'text_model.encoder.layers.16.mlp.fc1.weight', 'text_model.encoder.layers.16.mlp.fc2.bias', 'text_model.encoder.layers.16.mlp.fc2.weight', 'text_model.encoder.layers.16.self_attn.k_proj.bias', 'text_model.encoder.layers.16.self_attn.k_proj.weight', 'text_model.encoder.layers.16.self_attn.out_proj.bias', 'text_model.encoder.layers.16.self_attn.out_proj.weight', 'text_model.encoder.layers.16.self_attn.q_proj.bias', 'text_model.encoder.layers.16.self_attn.q_proj.weight', 'text_model.encoder.layers.16.self_attn.v_proj.bias', 'text_model.encoder.layers.16.self_attn.v_proj.weight', 'text_model.encoder.layers.17.layer_norm1.bias', 'text_model.encoder.layers.17.layer_norm1.weight', 'text_model.encoder.layers.17.layer_norm2.bias', 'text_model.encoder.layers.17.layer_norm2.weight', 'text_model.encoder.layers.17.mlp.fc1.bias', 'text_model.encoder.layers.17.mlp.fc1.weight', 'text_model.encoder.layers.17.mlp.fc2.bias', 'text_model.encoder.layers.17.mlp.fc2.weight', 'text_model.encoder.layers.17.self_attn.k_proj.bias', 'text_model.encoder.layers.17.self_attn.k_proj.weight', 'text_model.encoder.layers.17.self_attn.out_proj.bias', 'text_model.encoder.layers.17.self_attn.out_proj.weight', 'text_model.encoder.layers.17.self_attn.q_proj.bias', 'text_model.encoder.layers.17.self_attn.q_proj.weight', 'text_model.encoder.layers.17.self_attn.v_proj.bias', 'text_model.encoder.layers.17.self_attn.v_proj.weight', 'text_model.encoder.layers.18.layer_norm1.bias', 'text_model.encoder.layers.18.layer_norm1.weight', 'text_model.encoder.layers.18.layer_norm2.bias', 'text_model.encoder.layers.18.layer_norm2.weight', 'text_model.encoder.layers.18.mlp.fc1.bias', 'text_model.encoder.layers.18.mlp.fc1.weight', 'text_model.encoder.layers.18.mlp.fc2.bias', 'text_model.encoder.layers.18.mlp.fc2.weight', 'text_model.encoder.layers.18.self_attn.k_proj.bias', 'text_model.encoder.layers.18.self_attn.k_proj.weight', 'text_model.encoder.layers.18.self_attn.out_proj.bias', 'text_model.encoder.layers.18.self_attn.out_proj.weight', 'text_model.encoder.layers.18.self_attn.q_proj.bias', 'text_model.encoder.layers.18.self_attn.q_proj.weight', 'text_model.encoder.layers.18.self_attn.v_proj.bias', 'text_model.encoder.layers.18.self_attn.v_proj.weight', 'text_model.encoder.layers.19.layer_norm1.bias', 'text_model.encoder.layers.19.layer_norm1.weight', 'text_model.encoder.layers.19.layer_norm2.bias', 'text_model.encoder.layers.19.layer_norm2.weight', 'text_model.encoder.layers.19.mlp.fc1.bias', 'text_model.encoder.layers.19.mlp.fc1.weight', 'text_model.encoder.layers.19.mlp.fc2.bias', 'text_model.encoder.layers.19.mlp.fc2.weight', 'text_model.encoder.layers.19.self_attn.k_proj.bias', 'text_model.encoder.layers.19.self_attn.k_proj.weight', 'text_model.encoder.layers.19.self_attn.out_proj.bias', 'text_model.encoder.layers.19.self_attn.out_proj.weight', 'text_model.encoder.layers.19.self_attn.q_proj.bias', 'text_model.encoder.layers.19.self_attn.q_proj.weight', 'text_model.encoder.layers.19.self_attn.v_proj.bias', 'text_model.encoder.layers.19.self_attn.v_proj.weight', 'text_model.encoder.layers.2.layer_norm1.bias', 'text_model.encoder.layers.2.layer_norm1.weight', 'text_model.encoder.layers.2.layer_norm2.bias', 'text_model.encoder.layers.2.layer_norm2.weight', 'text_model.encoder.layers.2.mlp.fc1.bias', 'text_model.encoder.layers.2.mlp.fc1.weight', 'text_model.encoder.layers.2.mlp.fc2.bias', 'text_model.encoder.layers.2.mlp.fc2.weight', 'text_model.encoder.layers.2.self_attn.k_proj.bias', 'text_model.encoder.layers.2.self_attn.k_proj.weight', 'text_model.encoder.layers.2.self_attn.out_proj.bias', 'text_model.encoder.layers.2.self_attn.out_proj.weight', 'text_model.encoder.layers.2.self_attn.q_proj.bias', 'text_model.encoder.layers.2.self_attn.q_proj.weight', 'text_model.encoder.layers.2.self_attn.v_proj.bias', 'text_model.encoder.layers.2.self_attn.v_proj.weight', 'text_model.encoder.layers.20.layer_norm1.bias', 'text_model.encoder.layers.20.layer_norm1.weight', 'text_model.encoder.layers.20.layer_norm2.bias', 'text_model.encoder.layers.20.layer_norm2.weight', 'text_model.encoder.layers.20.mlp.fc1.bias', 'text_model.encoder.layers.20.mlp.fc1.weight', 'text_model.encoder.layers.20.mlp.fc2.bias', 'text_model.encoder.layers.20.mlp.fc2.weight', 'text_model.encoder.layers.20.self_attn.k_proj.bias', 'text_model.encoder.layers.20.self_attn.k_proj.weight', 'text_model.encoder.layers.20.self_attn.out_proj.bias', 'text_model.encoder.layers.20.self_attn.out_proj.weight', 'text_model.encoder.layers.20.self_attn.q_proj.bias', 'text_model.encoder.layers.20.self_attn.q_proj.weight', 'text_model.encoder.layers.20.self_attn.v_proj.bias', 'text_model.encoder.layers.20.self_attn.v_proj.weight', 'text_model.encoder.layers.21.layer_norm1.bias', 'text_model.encoder.layers.21.layer_norm1.weight', 'text_model.encoder.layers.21.layer_norm2.bias', 'text_model.encoder.layers.21.layer_norm2.weight', 'text_model.encoder.layers.21.mlp.fc1.bias', 'text_model.encoder.layers.21.mlp.fc1.weight', 'text_model.encoder.layers.21.mlp.fc2.bias', 'text_model.encoder.layers.21.mlp.fc2.weight', 'text_model.encoder.layers.21.self_attn.k_proj.bias', 'text_model.encoder.layers.21.self_attn.k_proj.weight', 'text_model.encoder.layers.21.self_attn.out_proj.bias', 'text_model.encoder.layers.21.self_attn.out_proj.weight', 'text_model.encoder.layers.21.self_attn.q_proj.bias', 'text_model.encoder.layers.21.self_attn.q_proj.weight', 'text_model.encoder.layers.21.self_attn.v_proj.bias', 'text_model.encoder.layers.21.self_attn.v_proj.weight', 'text_model.encoder.layers.22.layer_norm1.bias', 'text_model.encoder.layers.22.layer_norm1.weight', 'text_model.encoder.layers.22.layer_norm2.bias', 'text_model.encoder.layers.22.layer_norm2.weight', 'text_model.encoder.layers.22.mlp.fc1.bias', 'text_model.encoder.layers.22.mlp.fc1.weight', 'text_model.encoder.layers.22.mlp.fc2.bias', 'text_model.encoder.layers.22.mlp.fc2.weight', 'text_model.encoder.layers.22.self_attn.k_proj.bias', 'text_model.encoder.layers.22.self_attn.k_proj.weight', 'text_model.encoder.layers.22.self_attn.out_proj.bias', 'text_model.encoder.layers.22.self_attn.out_proj.weight', 'text_model.encoder.layers.22.self_attn.q_proj.bias', 'text_model.encoder.layers.22.self_attn.q_proj.weight', 'text_model.encoder.layers.22.self_attn.v_proj.bias', 'text_model.encoder.layers.22.self_attn.v_proj.weight', 'text_model.encoder.layers.23.layer_norm1.bias', 'text_model.encoder.layers.23.layer_norm1.weight', 'text_model.encoder.layers.23.layer_norm2.bias', 'text_model.encoder.layers.23.layer_norm2.weight', 'text_model.encoder.layers.23.mlp.fc1.bias', 'text_model.encoder.layers.23.mlp.fc1.weight', 'text_model.encoder.layers.23.mlp.fc2.bias', 'text_model.encoder.layers.23.mlp.fc2.weight', 'text_model.encoder.layers.23.self_attn.k_proj.bias', 'text_model.encoder.layers.23.self_attn.k_proj.weight', 'text_model.encoder.layers.23.self_attn.out_proj.bias', 'text_model.encoder.layers.23.self_attn.out_proj.weight', 'text_model.encoder.layers.23.self_attn.q_proj.bias', 'text_model.encoder.layers.23.self_attn.q_proj.weight', 'text_model.encoder.layers.23.self_attn.v_proj.bias', 'text_model.encoder.layers.23.self_attn.v_proj.weight', 'text_model.encoder.layers.24.layer_norm1.bias', 'text_model.encoder.layers.24.layer_norm1.weight', 'text_model.encoder.layers.24.layer_norm2.bias', 'text_model.encoder.layers.24.layer_norm2.weight', 'text_model.encoder.layers.24.mlp.fc1.bias', 'text_model.encoder.layers.24.mlp.fc1.weight', 'text_model.encoder.layers.24.mlp.fc2.bias', 'text_model.encoder.layers.24.mlp.fc2.weight', 'text_model.encoder.layers.24.self_attn.k_proj.bias', 'text_model.encoder.layers.24.self_attn.k_proj.weight', 'text_model.encoder.layers.24.self_attn.out_proj.bias', 'text_model.encoder.layers.24.self_attn.out_proj.weight', 'text_model.encoder.layers.24.self_attn.q_proj.bias', 'text_model.encoder.layers.24.self_attn.q_proj.weight', 'text_model.encoder.layers.24.self_attn.v_proj.bias', 'text_model.encoder.layers.24.self_attn.v_proj.weight', 'text_model.encoder.layers.25.layer_norm1.bias', 'text_model.encoder.layers.25.layer_norm1.weight', 'text_model.encoder.layers.25.layer_norm2.bias', 'text_model.encoder.layers.25.layer_norm2.weight', 'text_model.encoder.layers.25.mlp.fc1.bias', 'text_model.encoder.layers.25.mlp.fc1.weight', 'text_model.encoder.layers.25.mlp.fc2.bias', 'text_model.encoder.layers.25.mlp.fc2.weight', 'text_model.encoder.layers.25.self_attn.k_proj.bias', 'text_model.encoder.layers.25.self_attn.k_proj.weight', 'text_model.encoder.layers.25.self_attn.out_proj.bias', 'text_model.encoder.layers.25.self_attn.out_proj.weight', 'text_model.encoder.layers.25.self_attn.q_proj.bias', 'text_model.encoder.layers.25.self_attn.q_proj.weight', 'text_model.encoder.layers.25.self_attn.v_proj.bias', 'text_model.encoder.layers.25.self_attn.v_proj.weight', 'text_model.encoder.layers.26.layer_norm1.bias', 'text_model.encoder.layers.26.layer_norm1.weight', 'text_model.encoder.layers.26.layer_norm2.bias', 'text_model.encoder.layers.26.layer_norm2.weight', 'text_model.encoder.layers.26.mlp.fc1.bias', 'text_model.encoder.layers.26.mlp.fc1.weight', 'text_model.encoder.layers.26.mlp.fc2.bias', 'text_model.encoder.layers.26.mlp.fc2.weight', 'text_model.encoder.layers.26.self_attn.k_proj.bias', 'text_model.encoder.layers.26.self_attn.k_proj.weight', 'text_model.encoder.layers.26.self_attn.out_proj.bias', 'text_model.encoder.layers.26.self_attn.out_proj.weight', 'text_model.encoder.layers.26.self_attn.q_proj.bias', 'text_model.encoder.layers.26.self_attn.q_proj.weight', 'text_model.encoder.layers.26.self_attn.v_proj.bias', 'text_model.encoder.layers.26.self_attn.v_proj.weight', 'text_model.encoder.layers.3.layer_norm1.bias', 'text_model.encoder.layers.3.layer_norm1.weight', 'text_model.encoder.layers.3.layer_norm2.bias', 'text_model.encoder.layers.3.layer_norm2.weight', 'text_model.encoder.layers.3.mlp.fc1.bias', 'text_model.encoder.layers.3.mlp.fc1.weight', 'text_model.encoder.layers.3.mlp.fc2.bias', 'text_model.encoder.layers.3.mlp.fc2.weight', 'text_model.encoder.layers.3.self_attn.k_proj.bias', 'text_model.encoder.layers.3.self_attn.k_proj.weight', 'text_model.encoder.layers.3.self_attn.out_proj.bias', 'text_model.encoder.layers.3.self_attn.out_proj.weight', 'text_model.encoder.layers.3.self_attn.q_proj.bias', 'text_model.encoder.layers.3.self_attn.q_proj.weight', 'text_model.encoder.layers.3.self_attn.v_proj.bias', 'text_model.encoder.layers.3.self_attn.v_proj.weight', 'text_model.encoder.layers.4.layer_norm1.bias', 'text_model.encoder.layers.4.layer_norm1.weight', 'text_model.encoder.layers.4.layer_norm2.bias', 'text_model.encoder.layers.4.layer_norm2.weight', 'text_model.encoder.layers.4.mlp.fc1.bias', 'text_model.encoder.layers.4.mlp.fc1.weight', 'text_model.encoder.layers.4.mlp.fc2.bias', 'text_model.encoder.layers.4.mlp.fc2.weight', 'text_model.encoder.layers.4.self_attn.k_proj.bias', 'text_model.encoder.layers.4.self_attn.k_proj.weight', 'text_model.encoder.layers.4.self_attn.out_proj.bias', 'text_model.encoder.layers.4.self_attn.out_proj.weight', 'text_model.encoder.layers.4.self_attn.q_proj.bias', 'text_model.encoder.layers.4.self_attn.q_proj.weight', 'text_model.encoder.layers.4.self_attn.v_proj.bias', 'text_model.encoder.layers.4.self_attn.v_proj.weight', 'text_model.encoder.layers.5.layer_norm1.bias', 'text_model.encoder.layers.5.layer_norm1.weight', 'text_model.encoder.layers.5.layer_norm2.bias', 'text_model.encoder.layers.5.layer_norm2.weight', 'text_model.encoder.layers.5.mlp.fc1.bias', 'text_model.encoder.layers.5.mlp.fc1.weight', 'text_model.encoder.layers.5.mlp.fc2.bias', 'text_model.encoder.layers.5.mlp.fc2.weight', 'text_model.encoder.layers.5.self_attn.k_proj.bias', 'text_model.encoder.layers.5.self_attn.k_proj.weight', 'text_model.encoder.layers.5.self_attn.out_proj.bias', 'text_model.encoder.layers.5.self_attn.out_proj.weight', 'text_model.encoder.layers.5.self_attn.q_proj.bias', 'text_model.encoder.layers.5.self_attn.q_proj.weight', 'text_model.encoder.layers.5.self_attn.v_proj.bias', 'text_model.encoder.layers.5.self_attn.v_proj.weight', 'text_model.encoder.layers.6.layer_norm1.bias', 'text_model.encoder.layers.6.layer_norm1.weight', 'text_model.encoder.layers.6.layer_norm2.bias', 'text_model.encoder.layers.6.layer_norm2.weight', 'text_model.encoder.layers.6.mlp.fc1.bias', 'text_model.encoder.layers.6.mlp.fc1.weight', 'text_model.encoder.layers.6.mlp.fc2.bias', 'text_model.encoder.layers.6.mlp.fc2.weight', 'text_model.encoder.layers.6.self_attn.k_proj.bias', 'text_model.encoder.layers.6.self_attn.k_proj.weight', 'text_model.encoder.layers.6.self_attn.out_proj.bias', 'text_model.encoder.layers.6.self_attn.out_proj.weight', 'text_model.encoder.layers.6.self_attn.q_proj.bias', 'text_model.encoder.layers.6.self_attn.q_proj.weight', 'text_model.encoder.layers.6.self_attn.v_proj.bias', 'text_model.encoder.layers.6.self_attn.v_proj.weight', 'text_model.encoder.layers.7.layer_norm1.bias', 'text_model.encoder.layers.7.layer_norm1.weight', 'text_model.encoder.layers.7.layer_norm2.bias', 'text_model.encoder.layers.7.layer_norm2.weight', 'text_model.encoder.layers.7.mlp.fc1.bias', 'text_model.encoder.layers.7.mlp.fc1.weight', 'text_model.encoder.layers.7.mlp.fc2.bias', 'text_model.encoder.layers.7.mlp.fc2.weight', 'text_model.encoder.layers.7.self_attn.k_proj.bias', 'text_model.encoder.layers.7.self_attn.k_proj.weight', 'text_model.encoder.layers.7.self_attn.out_proj.bias', 'text_model.encoder.layers.7.self_attn.out_proj.weight', 'text_model.encoder.layers.7.self_attn.q_proj.bias', 'text_model.encoder.layers.7.self_attn.q_proj.weight', 'text_model.encoder.layers.7.self_attn.v_proj.bias', 'text_model.encoder.layers.7.self_attn.v_proj.weight', 'text_model.encoder.layers.8.layer_norm1.bias', 'text_model.encoder.layers.8.layer_norm1.weight', 'text_model.encoder.layers.8.layer_norm2.bias', 'text_model.encoder.layers.8.layer_norm2.weight', 'text_model.encoder.layers.8.mlp.fc1.bias', 'text_model.encoder.layers.8.mlp.fc1.weight', 'text_model.encoder.layers.8.mlp.fc2.bias', 'text_model.encoder.layers.8.mlp.fc2.weight', 'text_model.encoder.layers.8.self_attn.k_proj.bias', 'text_model.encoder.layers.8.self_attn.k_proj.weight', 'text_model.encoder.layers.8.self_attn.out_proj.bias', 'text_model.encoder.layers.8.self_attn.out_proj.weight', 'text_model.encoder.layers.8.self_attn.q_proj.bias', 'text_model.encoder.layers.8.self_attn.q_proj.weight', 'text_model.encoder.layers.8.self_attn.v_proj.bias', 'text_model.encoder.layers.8.self_attn.v_proj.weight', 'text_model.encoder.layers.9.layer_norm1.bias', 'text_model.encoder.layers.9.layer_norm1.weight', 'text_model.encoder.layers.9.layer_norm2.bias', 'text_model.encoder.layers.9.layer_norm2.weight', 'text_model.encoder.layers.9.mlp.fc1.bias', 'text_model.encoder.layers.9.mlp.fc1.weight', 'text_model.encoder.layers.9.mlp.fc2.bias', 'text_model.encoder.layers.9.mlp.fc2.weight', 'text_model.encoder.layers.9.self_attn.k_proj.bias', 'text_model.encoder.layers.9.self_attn.k_proj.weight', 'text_model.encoder.layers.9.self_attn.out_proj.bias', 'text_model.encoder.layers.9.self_attn.out_proj.weight', 'text_model.encoder.layers.9.self_attn.q_proj.bias', 'text_model.encoder.layers.9.self_attn.q_proj.weight', 'text_model.encoder.layers.9.self_attn.v_proj.bias', 'text_model.encoder.layers.9.self_attn.v_proj.weight', 'text_model.final_layer_norm.bias', 'text_model.final_layer_norm.weight', 'text_model.head.bias', 'text_model.head.weight']
|
223 |
-
- This IS expected if you are initializing SiglipVisionModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
|
224 |
-
- This IS NOT expected if you are initializing SiglipVisionModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
|
225 |
-
2025-02-14 05:01:37,442 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of SiglipVisionModel were initialized from the model checkpoint at google/siglip-so400m-patch14-384.
|
226 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use SiglipVisionModel for predictions without further training.
|
227 |
-
2025-02-14 05:01:37,626 - image_processing_base.py:375 - get_image_processor_dict - INFO - loading configuration file preprocessor_config.json from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/preprocessor_config.json
|
228 |
-
2025-02-14 05:01:37,627 - image_processing_base.py:429 - from_dict - INFO - Image processor SiglipImageProcessor {
|
229 |
-
"do_convert_rgb": null,
|
230 |
-
"do_normalize": true,
|
231 |
-
"do_rescale": true,
|
232 |
-
"do_resize": true,
|
233 |
-
"image_mean": [
|
234 |
-
0.5,
|
235 |
-
0.5,
|
236 |
-
0.5
|
237 |
-
],
|
238 |
-
"image_processor_type": "SiglipImageProcessor",
|
239 |
-
"image_std": [
|
240 |
-
0.5,
|
241 |
-
0.5,
|
242 |
-
0.5
|
243 |
-
],
|
244 |
-
"processor_class": "SiglipProcessor",
|
245 |
-
"resample": 3,
|
246 |
-
"rescale_factor": 0.00392156862745098,
|
247 |
-
"size": {
|
248 |
-
"height": 384,
|
249 |
-
"width": 384
|
250 |
-
}
|
251 |
-
}
|
252 |
-
|
253 |
-
2025-02-14 05:01:37,991 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/config.json
|
254 |
-
2025-02-14 05:01:37,995 - configuration_utils.py:800 - from_dict - INFO - Model config Dinov2Config {
|
255 |
-
"apply_layernorm": true,
|
256 |
-
"architectures": [
|
257 |
-
"Dinov2Model"
|
258 |
-
],
|
259 |
-
"attention_probs_dropout_prob": 0.0,
|
260 |
-
"drop_path_rate": 0.0,
|
261 |
-
"hidden_act": "gelu",
|
262 |
-
"hidden_dropout_prob": 0.0,
|
263 |
-
"hidden_size": 1536,
|
264 |
-
"image_size": 518,
|
265 |
-
"initializer_range": 0.02,
|
266 |
-
"layer_norm_eps": 1e-06,
|
267 |
-
"layerscale_value": 1.0,
|
268 |
-
"mlp_ratio": 4,
|
269 |
-
"model_type": "dinov2",
|
270 |
-
"num_attention_heads": 24,
|
271 |
-
"num_channels": 3,
|
272 |
-
"num_hidden_layers": 40,
|
273 |
-
"out_features": [
|
274 |
-
"stage40"
|
275 |
-
],
|
276 |
-
"out_indices": [
|
277 |
-
40
|
278 |
-
],
|
279 |
-
"patch_size": 14,
|
280 |
-
"qkv_bias": true,
|
281 |
-
"reshape_hidden_states": true,
|
282 |
-
"stage_names": [
|
283 |
-
"stem",
|
284 |
-
"stage1",
|
285 |
-
"stage2",
|
286 |
-
"stage3",
|
287 |
-
"stage4",
|
288 |
-
"stage5",
|
289 |
-
"stage6",
|
290 |
-
"stage7",
|
291 |
-
"stage8",
|
292 |
-
"stage9",
|
293 |
-
"stage10",
|
294 |
-
"stage11",
|
295 |
-
"stage12",
|
296 |
-
"stage13",
|
297 |
-
"stage14",
|
298 |
-
"stage15",
|
299 |
-
"stage16",
|
300 |
-
"stage17",
|
301 |
-
"stage18",
|
302 |
-
"stage19",
|
303 |
-
"stage20",
|
304 |
-
"stage21",
|
305 |
-
"stage22",
|
306 |
-
"stage23",
|
307 |
-
"stage24",
|
308 |
-
"stage25",
|
309 |
-
"stage26",
|
310 |
-
"stage27",
|
311 |
-
"stage28",
|
312 |
-
"stage29",
|
313 |
-
"stage30",
|
314 |
-
"stage31",
|
315 |
-
"stage32",
|
316 |
-
"stage33",
|
317 |
-
"stage34",
|
318 |
-
"stage35",
|
319 |
-
"stage36",
|
320 |
-
"stage37",
|
321 |
-
"stage38",
|
322 |
-
"stage39",
|
323 |
-
"stage40"
|
324 |
-
],
|
325 |
-
"torch_dtype": "float32",
|
326 |
-
"transformers_version": "4.43.1",
|
327 |
-
"use_swiglu_ffn": true
|
328 |
-
}
|
329 |
-
|
330 |
-
2025-02-14 05:01:37,995 - modeling_utils.py:3621 - from_pretrained - INFO - loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/model.safetensors
|
331 |
-
2025-02-14 05:01:38,584 - modeling_utils.py:4450 - _load_pretrained_model - INFO - All model checkpoint weights were used when initializing Dinov2Model.
|
332 |
-
|
333 |
-
2025-02-14 05:01:38,584 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of Dinov2Model were initialized from the model checkpoint at facebook/dinov2-giant.
|
334 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use Dinov2Model for predictions without further training.
|
335 |
-
2025-02-14 05:01:38,771 - image_processing_base.py:375 - get_image_processor_dict - INFO - loading configuration file preprocessor_config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/preprocessor_config.json
|
336 |
-
2025-02-14 05:01:38,774 - image_processing_base.py:429 - from_dict - INFO - Image processor BitImageProcessor {
|
337 |
-
"crop_size": {
|
338 |
-
"height": 378,
|
339 |
-
"width": 378
|
340 |
-
},
|
341 |
-
"do_center_crop": true,
|
342 |
-
"do_convert_rgb": true,
|
343 |
-
"do_normalize": true,
|
344 |
-
"do_rescale": true,
|
345 |
-
"do_resize": true,
|
346 |
-
"image_mean": [
|
347 |
-
0.485,
|
348 |
-
0.456,
|
349 |
-
0.406
|
350 |
-
],
|
351 |
-
"image_processor_type": "BitImageProcessor",
|
352 |
-
"image_std": [
|
353 |
-
0.229,
|
354 |
-
0.224,
|
355 |
-
0.225
|
356 |
-
],
|
357 |
-
"resample": 3,
|
358 |
-
"rescale_factor": 0.00392156862745098,
|
359 |
-
"size": {
|
360 |
-
"shortest_edge": 378
|
361 |
-
}
|
362 |
-
}
|
363 |
-
|
364 |
-
2025-02-14 05:01:39,569 - finetune_llama.py:1239 - train - INFO - Total params: 3264865280
|
365 |
-
2025-02-14 05:01:39,569 - finetune_llama.py:1240 - train - INFO - Trainable params: 12589056
|
366 |
-
2025-02-14 05:01:39,569 - finetune_llama.py:1241 - train - INFO - LM head params: 394002432
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fae628e76df568dd5e80f7f8759f86215724ca5777ddddb01d6531886e78a9ce
|
3 |
+
size 35149
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
runtime_logs/run_2025-02-14_05-02-58.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
runtime_logs/run_2025-02-14_17-32-33.log
CHANGED
@@ -1,378 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
"_name_or_path": "/tmp/iopath_cache/manifold_cache/tree/users/shenx/finetune/09281004-cambrian_llama3_2_t576_ov",
|
5 |
-
"architectures": [
|
6 |
-
"CambrianLlamaForCausalLM"
|
7 |
-
],
|
8 |
-
"attention_bias": false,
|
9 |
-
"attention_dropout": 0.0,
|
10 |
-
"bos_token_id": 128000,
|
11 |
-
"connect_layer": 2,
|
12 |
-
"connector_depth": 3,
|
13 |
-
"connector_only": true,
|
14 |
-
"dino_threshold": 0.83,
|
15 |
-
"drop_threshold": 0.8,
|
16 |
-
"eos_token_id": [
|
17 |
-
128001,
|
18 |
-
128008,
|
19 |
-
128009
|
20 |
-
],
|
21 |
-
"frame_pos": false,
|
22 |
-
"freeze_mm_mlp_adapter": false,
|
23 |
-
"hidden_act": "silu",
|
24 |
-
"hidden_size": 3072,
|
25 |
-
"highres": true,
|
26 |
-
"highres_connect": false,
|
27 |
-
"image_aspect_ratio": "pad",
|
28 |
-
"image_position": 91,
|
29 |
-
"image_token_len": 144,
|
30 |
-
"initializer_range": 0.02,
|
31 |
-
"intermediate_size": 8192,
|
32 |
-
"is_image_newline": true,
|
33 |
-
"is_st_sampler": false,
|
34 |
-
"lowres_token": 8,
|
35 |
-
"max_position_embeddings": 131072,
|
36 |
-
"mlp_bias": false,
|
37 |
-
"mm_patch_merge_type": "flat",
|
38 |
-
"mm_projector_lr": null,
|
39 |
-
"mm_projector_type": "sva",
|
40 |
-
"mm_use_im_patch_token": false,
|
41 |
-
"mm_use_im_start_end": false,
|
42 |
-
"mm_vision_sampler_lr": null,
|
43 |
-
"mm_vision_select_feature": "patch",
|
44 |
-
"mm_vision_select_layer": -2,
|
45 |
-
"mm_vision_tower_aux_list": [
|
46 |
-
"siglip/CLIP-ViT-SO400M-14-384",
|
47 |
-
"facebook/dinov2-giant-res378"
|
48 |
-
],
|
49 |
-
"mm_vision_tower_aux_token_len_list": [
|
50 |
-
576,
|
51 |
-
576
|
52 |
-
],
|
53 |
-
"mm_vision_tower_lr": null,
|
54 |
-
"model_type": "cambrian_llama",
|
55 |
-
"num_attention_heads": 24,
|
56 |
-
"num_hidden_layers": 28,
|
57 |
-
"num_key_value_heads": 8,
|
58 |
-
"num_of_vision_sampler_layers": 10,
|
59 |
-
"num_query_group": 1,
|
60 |
-
"pretraining_tp": 1,
|
61 |
-
"query_num_list": [
|
62 |
-
144
|
63 |
-
],
|
64 |
-
"rms_norm_eps": 1e-05,
|
65 |
-
"rope_scaling": {
|
66 |
-
"factor": 32.0,
|
67 |
-
"high_freq_factor": 4.0,
|
68 |
-
"low_freq_factor": 1.0,
|
69 |
-
"original_max_position_embeddings": 8192,
|
70 |
-
"rope_type": "llama3"
|
71 |
-
},
|
72 |
-
"rope_theta": 500000.0,
|
73 |
-
"spmd_debug": null,
|
74 |
-
"spmd_fsdp_sharding": null,
|
75 |
-
"spmd_mesh": null,
|
76 |
-
"start_of_vision_sampler_layers": 0,
|
77 |
-
"stride_of_vision_sampler_layers": 3,
|
78 |
-
"tie_word_embeddings": false,
|
79 |
-
"tokenizer_model_max_length": 8192,
|
80 |
-
"tokenizer_padding_side": "right",
|
81 |
-
"torch_dtype": "float32",
|
82 |
-
"transformers_version": "4.43.1",
|
83 |
-
"tune_mm_mlp_adapter": false,
|
84 |
-
"unfreeze_mm_vision_tower": false,
|
85 |
-
"use_cache": false,
|
86 |
-
"use_mm_proj": true,
|
87 |
-
"vision_hidden_size": 1024,
|
88 |
-
"vision_tower_aux_token_len_list": [
|
89 |
-
576,
|
90 |
-
576
|
91 |
-
],
|
92 |
-
"vocab_size": 128256
|
93 |
-
}
|
94 |
-
|
95 |
-
2025-02-14 17:32:33,813 - modeling_utils.py:3618 - from_pretrained - INFO - loading weights file ./checkpoints/longvu_llama3_2/pytorch_model.bin
|
96 |
-
2025-02-14 17:32:33,874 - configuration_utils.py:1038 - from_dict - INFO - Generate config GenerationConfig {
|
97 |
-
"bos_token_id": 128000,
|
98 |
-
"eos_token_id": [
|
99 |
-
128001,
|
100 |
-
128008,
|
101 |
-
128009
|
102 |
-
],
|
103 |
-
"use_cache": false
|
104 |
-
}
|
105 |
-
|
106 |
-
2025-02-14 17:32:34,467 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/config.json
|
107 |
-
2025-02-14 17:32:34,470 - configuration_utils.py:800 - from_dict - INFO - Model config Dinov2Config {
|
108 |
-
"apply_layernorm": true,
|
109 |
-
"architectures": [
|
110 |
-
"Dinov2Model"
|
111 |
-
],
|
112 |
-
"attention_probs_dropout_prob": 0.0,
|
113 |
-
"drop_path_rate": 0.0,
|
114 |
-
"hidden_act": "gelu",
|
115 |
-
"hidden_dropout_prob": 0.0,
|
116 |
-
"hidden_size": 1536,
|
117 |
-
"image_size": 518,
|
118 |
-
"initializer_range": 0.02,
|
119 |
-
"layer_norm_eps": 1e-06,
|
120 |
-
"layerscale_value": 1.0,
|
121 |
-
"mlp_ratio": 4,
|
122 |
-
"model_type": "dinov2",
|
123 |
-
"num_attention_heads": 24,
|
124 |
-
"num_channels": 3,
|
125 |
-
"num_hidden_layers": 40,
|
126 |
-
"out_features": [
|
127 |
-
"stage40"
|
128 |
-
],
|
129 |
-
"out_indices": [
|
130 |
-
40
|
131 |
-
],
|
132 |
-
"patch_size": 14,
|
133 |
-
"qkv_bias": true,
|
134 |
-
"reshape_hidden_states": true,
|
135 |
-
"stage_names": [
|
136 |
-
"stem",
|
137 |
-
"stage1",
|
138 |
-
"stage2",
|
139 |
-
"stage3",
|
140 |
-
"stage4",
|
141 |
-
"stage5",
|
142 |
-
"stage6",
|
143 |
-
"stage7",
|
144 |
-
"stage8",
|
145 |
-
"stage9",
|
146 |
-
"stage10",
|
147 |
-
"stage11",
|
148 |
-
"stage12",
|
149 |
-
"stage13",
|
150 |
-
"stage14",
|
151 |
-
"stage15",
|
152 |
-
"stage16",
|
153 |
-
"stage17",
|
154 |
-
"stage18",
|
155 |
-
"stage19",
|
156 |
-
"stage20",
|
157 |
-
"stage21",
|
158 |
-
"stage22",
|
159 |
-
"stage23",
|
160 |
-
"stage24",
|
161 |
-
"stage25",
|
162 |
-
"stage26",
|
163 |
-
"stage27",
|
164 |
-
"stage28",
|
165 |
-
"stage29",
|
166 |
-
"stage30",
|
167 |
-
"stage31",
|
168 |
-
"stage32",
|
169 |
-
"stage33",
|
170 |
-
"stage34",
|
171 |
-
"stage35",
|
172 |
-
"stage36",
|
173 |
-
"stage37",
|
174 |
-
"stage38",
|
175 |
-
"stage39",
|
176 |
-
"stage40"
|
177 |
-
],
|
178 |
-
"torch_dtype": "float32",
|
179 |
-
"transformers_version": "4.43.1",
|
180 |
-
"use_swiglu_ffn": true
|
181 |
-
}
|
182 |
-
|
183 |
-
2025-02-14 17:32:35,842 - modeling_utils.py:4450 - _load_pretrained_model - INFO - All model checkpoint weights were used when initializing CambrianLlamaForCausalLM.
|
184 |
-
|
185 |
-
2025-02-14 17:32:35,843 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of CambrianLlamaForCausalLM were initialized from the model checkpoint at ./checkpoints/longvu_llama3_2.
|
186 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use CambrianLlamaForCausalLM for predictions without further training.
|
187 |
-
2025-02-14 17:32:35,848 - configuration_utils.py:991 - from_pretrained - INFO - loading configuration file ./checkpoints/longvu_llama3_2/generation_config.json
|
188 |
-
2025-02-14 17:32:35,848 - configuration_utils.py:1038 - from_dict - INFO - Generate config GenerationConfig {
|
189 |
-
"bos_token_id": 128000,
|
190 |
-
"do_sample": true,
|
191 |
-
"eos_token_id": [
|
192 |
-
128001,
|
193 |
-
128008,
|
194 |
-
128009
|
195 |
-
],
|
196 |
-
"temperature": 0.6,
|
197 |
-
"top_p": 0.9
|
198 |
-
}
|
199 |
-
|
200 |
-
2025-02-14 17:32:36,138 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file tokenizer.json
|
201 |
-
2025-02-14 17:32:36,138 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file added_tokens.json
|
202 |
-
2025-02-14 17:32:36,139 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file special_tokens_map.json
|
203 |
-
2025-02-14 17:32:36,139 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file tokenizer_config.json
|
204 |
-
2025-02-14 17:32:36,540 - tokenization_utils_base.py:2533 - _from_pretrained - INFO - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
205 |
-
2025-02-14 17:32:36,920 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/config.json
|
206 |
-
2025-02-14 17:32:36,923 - configuration_utils.py:800 - from_dict - INFO - Model config SiglipVisionConfig {
|
207 |
-
"attention_dropout": 0.0,
|
208 |
-
"hidden_act": "gelu_pytorch_tanh",
|
209 |
-
"hidden_size": 1152,
|
210 |
-
"image_size": 384,
|
211 |
-
"intermediate_size": 4304,
|
212 |
-
"layer_norm_eps": 1e-06,
|
213 |
-
"model_type": "siglip_vision_model",
|
214 |
-
"num_attention_heads": 16,
|
215 |
-
"num_channels": 3,
|
216 |
-
"num_hidden_layers": 27,
|
217 |
-
"patch_size": 14,
|
218 |
-
"transformers_version": "4.43.1"
|
219 |
-
}
|
220 |
-
|
221 |
-
2025-02-14 17:32:36,924 - modeling_utils.py:3621 - from_pretrained - INFO - loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/model.safetensors
|
222 |
-
2025-02-14 17:32:37,192 - modeling_utils.py:4440 - _load_pretrained_model - INFO - Some weights of the model checkpoint at google/siglip-so400m-patch14-384 were not used when initializing SiglipVisionModel: ['logit_bias', 'logit_scale', 'text_model.embeddings.position_embedding.weight', 'text_model.embeddings.token_embedding.weight', 'text_model.encoder.layers.0.layer_norm1.bias', 'text_model.encoder.layers.0.layer_norm1.weight', 'text_model.encoder.layers.0.layer_norm2.bias', 'text_model.encoder.layers.0.layer_norm2.weight', 'text_model.encoder.layers.0.mlp.fc1.bias', 'text_model.encoder.layers.0.mlp.fc1.weight', 'text_model.encoder.layers.0.mlp.fc2.bias', 'text_model.encoder.layers.0.mlp.fc2.weight', 'text_model.encoder.layers.0.self_attn.k_proj.bias', 'text_model.encoder.layers.0.self_attn.k_proj.weight', 'text_model.encoder.layers.0.self_attn.out_proj.bias', 'text_model.encoder.layers.0.self_attn.out_proj.weight', 'text_model.encoder.layers.0.self_attn.q_proj.bias', 'text_model.encoder.layers.0.self_attn.q_proj.weight', 'text_model.encoder.layers.0.self_attn.v_proj.bias', 'text_model.encoder.layers.0.self_attn.v_proj.weight', 'text_model.encoder.layers.1.layer_norm1.bias', 'text_model.encoder.layers.1.layer_norm1.weight', 'text_model.encoder.layers.1.layer_norm2.bias', 'text_model.encoder.layers.1.layer_norm2.weight', 'text_model.encoder.layers.1.mlp.fc1.bias', 'text_model.encoder.layers.1.mlp.fc1.weight', 'text_model.encoder.layers.1.mlp.fc2.bias', 'text_model.encoder.layers.1.mlp.fc2.weight', 'text_model.encoder.layers.1.self_attn.k_proj.bias', 'text_model.encoder.layers.1.self_attn.k_proj.weight', 'text_model.encoder.layers.1.self_attn.out_proj.bias', 'text_model.encoder.layers.1.self_attn.out_proj.weight', 'text_model.encoder.layers.1.self_attn.q_proj.bias', 'text_model.encoder.layers.1.self_attn.q_proj.weight', 'text_model.encoder.layers.1.self_attn.v_proj.bias', 'text_model.encoder.layers.1.self_attn.v_proj.weight', 'text_model.encoder.layers.10.layer_norm1.bias', 'text_model.encoder.layers.10.layer_norm1.weight', 'text_model.encoder.layers.10.layer_norm2.bias', 'text_model.encoder.layers.10.layer_norm2.weight', 'text_model.encoder.layers.10.mlp.fc1.bias', 'text_model.encoder.layers.10.mlp.fc1.weight', 'text_model.encoder.layers.10.mlp.fc2.bias', 'text_model.encoder.layers.10.mlp.fc2.weight', 'text_model.encoder.layers.10.self_attn.k_proj.bias', 'text_model.encoder.layers.10.self_attn.k_proj.weight', 'text_model.encoder.layers.10.self_attn.out_proj.bias', 'text_model.encoder.layers.10.self_attn.out_proj.weight', 'text_model.encoder.layers.10.self_attn.q_proj.bias', 'text_model.encoder.layers.10.self_attn.q_proj.weight', 'text_model.encoder.layers.10.self_attn.v_proj.bias', 'text_model.encoder.layers.10.self_attn.v_proj.weight', 'text_model.encoder.layers.11.layer_norm1.bias', 'text_model.encoder.layers.11.layer_norm1.weight', 'text_model.encoder.layers.11.layer_norm2.bias', 'text_model.encoder.layers.11.layer_norm2.weight', 'text_model.encoder.layers.11.mlp.fc1.bias', 'text_model.encoder.layers.11.mlp.fc1.weight', 'text_model.encoder.layers.11.mlp.fc2.bias', 'text_model.encoder.layers.11.mlp.fc2.weight', 'text_model.encoder.layers.11.self_attn.k_proj.bias', 'text_model.encoder.layers.11.self_attn.k_proj.weight', 'text_model.encoder.layers.11.self_attn.out_proj.bias', 'text_model.encoder.layers.11.self_attn.out_proj.weight', 'text_model.encoder.layers.11.self_attn.q_proj.bias', 'text_model.encoder.layers.11.self_attn.q_proj.weight', 'text_model.encoder.layers.11.self_attn.v_proj.bias', 'text_model.encoder.layers.11.self_attn.v_proj.weight', 'text_model.encoder.layers.12.layer_norm1.bias', 'text_model.encoder.layers.12.layer_norm1.weight', 'text_model.encoder.layers.12.layer_norm2.bias', 'text_model.encoder.layers.12.layer_norm2.weight', 'text_model.encoder.layers.12.mlp.fc1.bias', 'text_model.encoder.layers.12.mlp.fc1.weight', 'text_model.encoder.layers.12.mlp.fc2.bias', 'text_model.encoder.layers.12.mlp.fc2.weight', 'text_model.encoder.layers.12.self_attn.k_proj.bias', 'text_model.encoder.layers.12.self_attn.k_proj.weight', 'text_model.encoder.layers.12.self_attn.out_proj.bias', 'text_model.encoder.layers.12.self_attn.out_proj.weight', 'text_model.encoder.layers.12.self_attn.q_proj.bias', 'text_model.encoder.layers.12.self_attn.q_proj.weight', 'text_model.encoder.layers.12.self_attn.v_proj.bias', 'text_model.encoder.layers.12.self_attn.v_proj.weight', 'text_model.encoder.layers.13.layer_norm1.bias', 'text_model.encoder.layers.13.layer_norm1.weight', 'text_model.encoder.layers.13.layer_norm2.bias', 'text_model.encoder.layers.13.layer_norm2.weight', 'text_model.encoder.layers.13.mlp.fc1.bias', 'text_model.encoder.layers.13.mlp.fc1.weight', 'text_model.encoder.layers.13.mlp.fc2.bias', 'text_model.encoder.layers.13.mlp.fc2.weight', 'text_model.encoder.layers.13.self_attn.k_proj.bias', 'text_model.encoder.layers.13.self_attn.k_proj.weight', 'text_model.encoder.layers.13.self_attn.out_proj.bias', 'text_model.encoder.layers.13.self_attn.out_proj.weight', 'text_model.encoder.layers.13.self_attn.q_proj.bias', 'text_model.encoder.layers.13.self_attn.q_proj.weight', 'text_model.encoder.layers.13.self_attn.v_proj.bias', 'text_model.encoder.layers.13.self_attn.v_proj.weight', 'text_model.encoder.layers.14.layer_norm1.bias', 'text_model.encoder.layers.14.layer_norm1.weight', 'text_model.encoder.layers.14.layer_norm2.bias', 'text_model.encoder.layers.14.layer_norm2.weight', 'text_model.encoder.layers.14.mlp.fc1.bias', 'text_model.encoder.layers.14.mlp.fc1.weight', 'text_model.encoder.layers.14.mlp.fc2.bias', 'text_model.encoder.layers.14.mlp.fc2.weight', 'text_model.encoder.layers.14.self_attn.k_proj.bias', 'text_model.encoder.layers.14.self_attn.k_proj.weight', 'text_model.encoder.layers.14.self_attn.out_proj.bias', 'text_model.encoder.layers.14.self_attn.out_proj.weight', 'text_model.encoder.layers.14.self_attn.q_proj.bias', 'text_model.encoder.layers.14.self_attn.q_proj.weight', 'text_model.encoder.layers.14.self_attn.v_proj.bias', 'text_model.encoder.layers.14.self_attn.v_proj.weight', 'text_model.encoder.layers.15.layer_norm1.bias', 'text_model.encoder.layers.15.layer_norm1.weight', 'text_model.encoder.layers.15.layer_norm2.bias', 'text_model.encoder.layers.15.layer_norm2.weight', 'text_model.encoder.layers.15.mlp.fc1.bias', 'text_model.encoder.layers.15.mlp.fc1.weight', 'text_model.encoder.layers.15.mlp.fc2.bias', 'text_model.encoder.layers.15.mlp.fc2.weight', 'text_model.encoder.layers.15.self_attn.k_proj.bias', 'text_model.encoder.layers.15.self_attn.k_proj.weight', 'text_model.encoder.layers.15.self_attn.out_proj.bias', 'text_model.encoder.layers.15.self_attn.out_proj.weight', 'text_model.encoder.layers.15.self_attn.q_proj.bias', 'text_model.encoder.layers.15.self_attn.q_proj.weight', 'text_model.encoder.layers.15.self_attn.v_proj.bias', 'text_model.encoder.layers.15.self_attn.v_proj.weight', 'text_model.encoder.layers.16.layer_norm1.bias', 'text_model.encoder.layers.16.layer_norm1.weight', 'text_model.encoder.layers.16.layer_norm2.bias', 'text_model.encoder.layers.16.layer_norm2.weight', 'text_model.encoder.layers.16.mlp.fc1.bias', 'text_model.encoder.layers.16.mlp.fc1.weight', 'text_model.encoder.layers.16.mlp.fc2.bias', 'text_model.encoder.layers.16.mlp.fc2.weight', 'text_model.encoder.layers.16.self_attn.k_proj.bias', 'text_model.encoder.layers.16.self_attn.k_proj.weight', 'text_model.encoder.layers.16.self_attn.out_proj.bias', 'text_model.encoder.layers.16.self_attn.out_proj.weight', 'text_model.encoder.layers.16.self_attn.q_proj.bias', 'text_model.encoder.layers.16.self_attn.q_proj.weight', 'text_model.encoder.layers.16.self_attn.v_proj.bias', 'text_model.encoder.layers.16.self_attn.v_proj.weight', 'text_model.encoder.layers.17.layer_norm1.bias', 'text_model.encoder.layers.17.layer_norm1.weight', 'text_model.encoder.layers.17.layer_norm2.bias', 'text_model.encoder.layers.17.layer_norm2.weight', 'text_model.encoder.layers.17.mlp.fc1.bias', 'text_model.encoder.layers.17.mlp.fc1.weight', 'text_model.encoder.layers.17.mlp.fc2.bias', 'text_model.encoder.layers.17.mlp.fc2.weight', 'text_model.encoder.layers.17.self_attn.k_proj.bias', 'text_model.encoder.layers.17.self_attn.k_proj.weight', 'text_model.encoder.layers.17.self_attn.out_proj.bias', 'text_model.encoder.layers.17.self_attn.out_proj.weight', 'text_model.encoder.layers.17.self_attn.q_proj.bias', 'text_model.encoder.layers.17.self_attn.q_proj.weight', 'text_model.encoder.layers.17.self_attn.v_proj.bias', 'text_model.encoder.layers.17.self_attn.v_proj.weight', 'text_model.encoder.layers.18.layer_norm1.bias', 'text_model.encoder.layers.18.layer_norm1.weight', 'text_model.encoder.layers.18.layer_norm2.bias', 'text_model.encoder.layers.18.layer_norm2.weight', 'text_model.encoder.layers.18.mlp.fc1.bias', 'text_model.encoder.layers.18.mlp.fc1.weight', 'text_model.encoder.layers.18.mlp.fc2.bias', 'text_model.encoder.layers.18.mlp.fc2.weight', 'text_model.encoder.layers.18.self_attn.k_proj.bias', 'text_model.encoder.layers.18.self_attn.k_proj.weight', 'text_model.encoder.layers.18.self_attn.out_proj.bias', 'text_model.encoder.layers.18.self_attn.out_proj.weight', 'text_model.encoder.layers.18.self_attn.q_proj.bias', 'text_model.encoder.layers.18.self_attn.q_proj.weight', 'text_model.encoder.layers.18.self_attn.v_proj.bias', 'text_model.encoder.layers.18.self_attn.v_proj.weight', 'text_model.encoder.layers.19.layer_norm1.bias', 'text_model.encoder.layers.19.layer_norm1.weight', 'text_model.encoder.layers.19.layer_norm2.bias', 'text_model.encoder.layers.19.layer_norm2.weight', 'text_model.encoder.layers.19.mlp.fc1.bias', 'text_model.encoder.layers.19.mlp.fc1.weight', 'text_model.encoder.layers.19.mlp.fc2.bias', 'text_model.encoder.layers.19.mlp.fc2.weight', 'text_model.encoder.layers.19.self_attn.k_proj.bias', 'text_model.encoder.layers.19.self_attn.k_proj.weight', 'text_model.encoder.layers.19.self_attn.out_proj.bias', 'text_model.encoder.layers.19.self_attn.out_proj.weight', 'text_model.encoder.layers.19.self_attn.q_proj.bias', 'text_model.encoder.layers.19.self_attn.q_proj.weight', 'text_model.encoder.layers.19.self_attn.v_proj.bias', 'text_model.encoder.layers.19.self_attn.v_proj.weight', 'text_model.encoder.layers.2.layer_norm1.bias', 'text_model.encoder.layers.2.layer_norm1.weight', 'text_model.encoder.layers.2.layer_norm2.bias', 'text_model.encoder.layers.2.layer_norm2.weight', 'text_model.encoder.layers.2.mlp.fc1.bias', 'text_model.encoder.layers.2.mlp.fc1.weight', 'text_model.encoder.layers.2.mlp.fc2.bias', 'text_model.encoder.layers.2.mlp.fc2.weight', 'text_model.encoder.layers.2.self_attn.k_proj.bias', 'text_model.encoder.layers.2.self_attn.k_proj.weight', 'text_model.encoder.layers.2.self_attn.out_proj.bias', 'text_model.encoder.layers.2.self_attn.out_proj.weight', 'text_model.encoder.layers.2.self_attn.q_proj.bias', 'text_model.encoder.layers.2.self_attn.q_proj.weight', 'text_model.encoder.layers.2.self_attn.v_proj.bias', 'text_model.encoder.layers.2.self_attn.v_proj.weight', 'text_model.encoder.layers.20.layer_norm1.bias', 'text_model.encoder.layers.20.layer_norm1.weight', 'text_model.encoder.layers.20.layer_norm2.bias', 'text_model.encoder.layers.20.layer_norm2.weight', 'text_model.encoder.layers.20.mlp.fc1.bias', 'text_model.encoder.layers.20.mlp.fc1.weight', 'text_model.encoder.layers.20.mlp.fc2.bias', 'text_model.encoder.layers.20.mlp.fc2.weight', 'text_model.encoder.layers.20.self_attn.k_proj.bias', 'text_model.encoder.layers.20.self_attn.k_proj.weight', 'text_model.encoder.layers.20.self_attn.out_proj.bias', 'text_model.encoder.layers.20.self_attn.out_proj.weight', 'text_model.encoder.layers.20.self_attn.q_proj.bias', 'text_model.encoder.layers.20.self_attn.q_proj.weight', 'text_model.encoder.layers.20.self_attn.v_proj.bias', 'text_model.encoder.layers.20.self_attn.v_proj.weight', 'text_model.encoder.layers.21.layer_norm1.bias', 'text_model.encoder.layers.21.layer_norm1.weight', 'text_model.encoder.layers.21.layer_norm2.bias', 'text_model.encoder.layers.21.layer_norm2.weight', 'text_model.encoder.layers.21.mlp.fc1.bias', 'text_model.encoder.layers.21.mlp.fc1.weight', 'text_model.encoder.layers.21.mlp.fc2.bias', 'text_model.encoder.layers.21.mlp.fc2.weight', 'text_model.encoder.layers.21.self_attn.k_proj.bias', 'text_model.encoder.layers.21.self_attn.k_proj.weight', 'text_model.encoder.layers.21.self_attn.out_proj.bias', 'text_model.encoder.layers.21.self_attn.out_proj.weight', 'text_model.encoder.layers.21.self_attn.q_proj.bias', 'text_model.encoder.layers.21.self_attn.q_proj.weight', 'text_model.encoder.layers.21.self_attn.v_proj.bias', 'text_model.encoder.layers.21.self_attn.v_proj.weight', 'text_model.encoder.layers.22.layer_norm1.bias', 'text_model.encoder.layers.22.layer_norm1.weight', 'text_model.encoder.layers.22.layer_norm2.bias', 'text_model.encoder.layers.22.layer_norm2.weight', 'text_model.encoder.layers.22.mlp.fc1.bias', 'text_model.encoder.layers.22.mlp.fc1.weight', 'text_model.encoder.layers.22.mlp.fc2.bias', 'text_model.encoder.layers.22.mlp.fc2.weight', 'text_model.encoder.layers.22.self_attn.k_proj.bias', 'text_model.encoder.layers.22.self_attn.k_proj.weight', 'text_model.encoder.layers.22.self_attn.out_proj.bias', 'text_model.encoder.layers.22.self_attn.out_proj.weight', 'text_model.encoder.layers.22.self_attn.q_proj.bias', 'text_model.encoder.layers.22.self_attn.q_proj.weight', 'text_model.encoder.layers.22.self_attn.v_proj.bias', 'text_model.encoder.layers.22.self_attn.v_proj.weight', 'text_model.encoder.layers.23.layer_norm1.bias', 'text_model.encoder.layers.23.layer_norm1.weight', 'text_model.encoder.layers.23.layer_norm2.bias', 'text_model.encoder.layers.23.layer_norm2.weight', 'text_model.encoder.layers.23.mlp.fc1.bias', 'text_model.encoder.layers.23.mlp.fc1.weight', 'text_model.encoder.layers.23.mlp.fc2.bias', 'text_model.encoder.layers.23.mlp.fc2.weight', 'text_model.encoder.layers.23.self_attn.k_proj.bias', 'text_model.encoder.layers.23.self_attn.k_proj.weight', 'text_model.encoder.layers.23.self_attn.out_proj.bias', 'text_model.encoder.layers.23.self_attn.out_proj.weight', 'text_model.encoder.layers.23.self_attn.q_proj.bias', 'text_model.encoder.layers.23.self_attn.q_proj.weight', 'text_model.encoder.layers.23.self_attn.v_proj.bias', 'text_model.encoder.layers.23.self_attn.v_proj.weight', 'text_model.encoder.layers.24.layer_norm1.bias', 'text_model.encoder.layers.24.layer_norm1.weight', 'text_model.encoder.layers.24.layer_norm2.bias', 'text_model.encoder.layers.24.layer_norm2.weight', 'text_model.encoder.layers.24.mlp.fc1.bias', 'text_model.encoder.layers.24.mlp.fc1.weight', 'text_model.encoder.layers.24.mlp.fc2.bias', 'text_model.encoder.layers.24.mlp.fc2.weight', 'text_model.encoder.layers.24.self_attn.k_proj.bias', 'text_model.encoder.layers.24.self_attn.k_proj.weight', 'text_model.encoder.layers.24.self_attn.out_proj.bias', 'text_model.encoder.layers.24.self_attn.out_proj.weight', 'text_model.encoder.layers.24.self_attn.q_proj.bias', 'text_model.encoder.layers.24.self_attn.q_proj.weight', 'text_model.encoder.layers.24.self_attn.v_proj.bias', 'text_model.encoder.layers.24.self_attn.v_proj.weight', 'text_model.encoder.layers.25.layer_norm1.bias', 'text_model.encoder.layers.25.layer_norm1.weight', 'text_model.encoder.layers.25.layer_norm2.bias', 'text_model.encoder.layers.25.layer_norm2.weight', 'text_model.encoder.layers.25.mlp.fc1.bias', 'text_model.encoder.layers.25.mlp.fc1.weight', 'text_model.encoder.layers.25.mlp.fc2.bias', 'text_model.encoder.layers.25.mlp.fc2.weight', 'text_model.encoder.layers.25.self_attn.k_proj.bias', 'text_model.encoder.layers.25.self_attn.k_proj.weight', 'text_model.encoder.layers.25.self_attn.out_proj.bias', 'text_model.encoder.layers.25.self_attn.out_proj.weight', 'text_model.encoder.layers.25.self_attn.q_proj.bias', 'text_model.encoder.layers.25.self_attn.q_proj.weight', 'text_model.encoder.layers.25.self_attn.v_proj.bias', 'text_model.encoder.layers.25.self_attn.v_proj.weight', 'text_model.encoder.layers.26.layer_norm1.bias', 'text_model.encoder.layers.26.layer_norm1.weight', 'text_model.encoder.layers.26.layer_norm2.bias', 'text_model.encoder.layers.26.layer_norm2.weight', 'text_model.encoder.layers.26.mlp.fc1.bias', 'text_model.encoder.layers.26.mlp.fc1.weight', 'text_model.encoder.layers.26.mlp.fc2.bias', 'text_model.encoder.layers.26.mlp.fc2.weight', 'text_model.encoder.layers.26.self_attn.k_proj.bias', 'text_model.encoder.layers.26.self_attn.k_proj.weight', 'text_model.encoder.layers.26.self_attn.out_proj.bias', 'text_model.encoder.layers.26.self_attn.out_proj.weight', 'text_model.encoder.layers.26.self_attn.q_proj.bias', 'text_model.encoder.layers.26.self_attn.q_proj.weight', 'text_model.encoder.layers.26.self_attn.v_proj.bias', 'text_model.encoder.layers.26.self_attn.v_proj.weight', 'text_model.encoder.layers.3.layer_norm1.bias', 'text_model.encoder.layers.3.layer_norm1.weight', 'text_model.encoder.layers.3.layer_norm2.bias', 'text_model.encoder.layers.3.layer_norm2.weight', 'text_model.encoder.layers.3.mlp.fc1.bias', 'text_model.encoder.layers.3.mlp.fc1.weight', 'text_model.encoder.layers.3.mlp.fc2.bias', 'text_model.encoder.layers.3.mlp.fc2.weight', 'text_model.encoder.layers.3.self_attn.k_proj.bias', 'text_model.encoder.layers.3.self_attn.k_proj.weight', 'text_model.encoder.layers.3.self_attn.out_proj.bias', 'text_model.encoder.layers.3.self_attn.out_proj.weight', 'text_model.encoder.layers.3.self_attn.q_proj.bias', 'text_model.encoder.layers.3.self_attn.q_proj.weight', 'text_model.encoder.layers.3.self_attn.v_proj.bias', 'text_model.encoder.layers.3.self_attn.v_proj.weight', 'text_model.encoder.layers.4.layer_norm1.bias', 'text_model.encoder.layers.4.layer_norm1.weight', 'text_model.encoder.layers.4.layer_norm2.bias', 'text_model.encoder.layers.4.layer_norm2.weight', 'text_model.encoder.layers.4.mlp.fc1.bias', 'text_model.encoder.layers.4.mlp.fc1.weight', 'text_model.encoder.layers.4.mlp.fc2.bias', 'text_model.encoder.layers.4.mlp.fc2.weight', 'text_model.encoder.layers.4.self_attn.k_proj.bias', 'text_model.encoder.layers.4.self_attn.k_proj.weight', 'text_model.encoder.layers.4.self_attn.out_proj.bias', 'text_model.encoder.layers.4.self_attn.out_proj.weight', 'text_model.encoder.layers.4.self_attn.q_proj.bias', 'text_model.encoder.layers.4.self_attn.q_proj.weight', 'text_model.encoder.layers.4.self_attn.v_proj.bias', 'text_model.encoder.layers.4.self_attn.v_proj.weight', 'text_model.encoder.layers.5.layer_norm1.bias', 'text_model.encoder.layers.5.layer_norm1.weight', 'text_model.encoder.layers.5.layer_norm2.bias', 'text_model.encoder.layers.5.layer_norm2.weight', 'text_model.encoder.layers.5.mlp.fc1.bias', 'text_model.encoder.layers.5.mlp.fc1.weight', 'text_model.encoder.layers.5.mlp.fc2.bias', 'text_model.encoder.layers.5.mlp.fc2.weight', 'text_model.encoder.layers.5.self_attn.k_proj.bias', 'text_model.encoder.layers.5.self_attn.k_proj.weight', 'text_model.encoder.layers.5.self_attn.out_proj.bias', 'text_model.encoder.layers.5.self_attn.out_proj.weight', 'text_model.encoder.layers.5.self_attn.q_proj.bias', 'text_model.encoder.layers.5.self_attn.q_proj.weight', 'text_model.encoder.layers.5.self_attn.v_proj.bias', 'text_model.encoder.layers.5.self_attn.v_proj.weight', 'text_model.encoder.layers.6.layer_norm1.bias', 'text_model.encoder.layers.6.layer_norm1.weight', 'text_model.encoder.layers.6.layer_norm2.bias', 'text_model.encoder.layers.6.layer_norm2.weight', 'text_model.encoder.layers.6.mlp.fc1.bias', 'text_model.encoder.layers.6.mlp.fc1.weight', 'text_model.encoder.layers.6.mlp.fc2.bias', 'text_model.encoder.layers.6.mlp.fc2.weight', 'text_model.encoder.layers.6.self_attn.k_proj.bias', 'text_model.encoder.layers.6.self_attn.k_proj.weight', 'text_model.encoder.layers.6.self_attn.out_proj.bias', 'text_model.encoder.layers.6.self_attn.out_proj.weight', 'text_model.encoder.layers.6.self_attn.q_proj.bias', 'text_model.encoder.layers.6.self_attn.q_proj.weight', 'text_model.encoder.layers.6.self_attn.v_proj.bias', 'text_model.encoder.layers.6.self_attn.v_proj.weight', 'text_model.encoder.layers.7.layer_norm1.bias', 'text_model.encoder.layers.7.layer_norm1.weight', 'text_model.encoder.layers.7.layer_norm2.bias', 'text_model.encoder.layers.7.layer_norm2.weight', 'text_model.encoder.layers.7.mlp.fc1.bias', 'text_model.encoder.layers.7.mlp.fc1.weight', 'text_model.encoder.layers.7.mlp.fc2.bias', 'text_model.encoder.layers.7.mlp.fc2.weight', 'text_model.encoder.layers.7.self_attn.k_proj.bias', 'text_model.encoder.layers.7.self_attn.k_proj.weight', 'text_model.encoder.layers.7.self_attn.out_proj.bias', 'text_model.encoder.layers.7.self_attn.out_proj.weight', 'text_model.encoder.layers.7.self_attn.q_proj.bias', 'text_model.encoder.layers.7.self_attn.q_proj.weight', 'text_model.encoder.layers.7.self_attn.v_proj.bias', 'text_model.encoder.layers.7.self_attn.v_proj.weight', 'text_model.encoder.layers.8.layer_norm1.bias', 'text_model.encoder.layers.8.layer_norm1.weight', 'text_model.encoder.layers.8.layer_norm2.bias', 'text_model.encoder.layers.8.layer_norm2.weight', 'text_model.encoder.layers.8.mlp.fc1.bias', 'text_model.encoder.layers.8.mlp.fc1.weight', 'text_model.encoder.layers.8.mlp.fc2.bias', 'text_model.encoder.layers.8.mlp.fc2.weight', 'text_model.encoder.layers.8.self_attn.k_proj.bias', 'text_model.encoder.layers.8.self_attn.k_proj.weight', 'text_model.encoder.layers.8.self_attn.out_proj.bias', 'text_model.encoder.layers.8.self_attn.out_proj.weight', 'text_model.encoder.layers.8.self_attn.q_proj.bias', 'text_model.encoder.layers.8.self_attn.q_proj.weight', 'text_model.encoder.layers.8.self_attn.v_proj.bias', 'text_model.encoder.layers.8.self_attn.v_proj.weight', 'text_model.encoder.layers.9.layer_norm1.bias', 'text_model.encoder.layers.9.layer_norm1.weight', 'text_model.encoder.layers.9.layer_norm2.bias', 'text_model.encoder.layers.9.layer_norm2.weight', 'text_model.encoder.layers.9.mlp.fc1.bias', 'text_model.encoder.layers.9.mlp.fc1.weight', 'text_model.encoder.layers.9.mlp.fc2.bias', 'text_model.encoder.layers.9.mlp.fc2.weight', 'text_model.encoder.layers.9.self_attn.k_proj.bias', 'text_model.encoder.layers.9.self_attn.k_proj.weight', 'text_model.encoder.layers.9.self_attn.out_proj.bias', 'text_model.encoder.layers.9.self_attn.out_proj.weight', 'text_model.encoder.layers.9.self_attn.q_proj.bias', 'text_model.encoder.layers.9.self_attn.q_proj.weight', 'text_model.encoder.layers.9.self_attn.v_proj.bias', 'text_model.encoder.layers.9.self_attn.v_proj.weight', 'text_model.final_layer_norm.bias', 'text_model.final_layer_norm.weight', 'text_model.head.bias', 'text_model.head.weight']
|
223 |
-
- This IS expected if you are initializing SiglipVisionModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
|
224 |
-
- This IS NOT expected if you are initializing SiglipVisionModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
|
225 |
-
2025-02-14 17:32:37,194 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of SiglipVisionModel were initialized from the model checkpoint at google/siglip-so400m-patch14-384.
|
226 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use SiglipVisionModel for predictions without further training.
|
227 |
-
2025-02-14 17:32:37,381 - image_processing_base.py:375 - get_image_processor_dict - INFO - loading configuration file preprocessor_config.json from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/preprocessor_config.json
|
228 |
-
2025-02-14 17:32:37,382 - image_processing_base.py:429 - from_dict - INFO - Image processor SiglipImageProcessor {
|
229 |
-
"do_convert_rgb": null,
|
230 |
-
"do_normalize": true,
|
231 |
-
"do_rescale": true,
|
232 |
-
"do_resize": true,
|
233 |
-
"image_mean": [
|
234 |
-
0.5,
|
235 |
-
0.5,
|
236 |
-
0.5
|
237 |
-
],
|
238 |
-
"image_processor_type": "SiglipImageProcessor",
|
239 |
-
"image_std": [
|
240 |
-
0.5,
|
241 |
-
0.5,
|
242 |
-
0.5
|
243 |
-
],
|
244 |
-
"processor_class": "SiglipProcessor",
|
245 |
-
"resample": 3,
|
246 |
-
"rescale_factor": 0.00392156862745098,
|
247 |
-
"size": {
|
248 |
-
"height": 384,
|
249 |
-
"width": 384
|
250 |
-
}
|
251 |
-
}
|
252 |
-
|
253 |
-
2025-02-14 17:32:38,400 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/config.json
|
254 |
-
2025-02-14 17:32:38,403 - configuration_utils.py:800 - from_dict - INFO - Model config Dinov2Config {
|
255 |
-
"apply_layernorm": true,
|
256 |
-
"architectures": [
|
257 |
-
"Dinov2Model"
|
258 |
-
],
|
259 |
-
"attention_probs_dropout_prob": 0.0,
|
260 |
-
"drop_path_rate": 0.0,
|
261 |
-
"hidden_act": "gelu",
|
262 |
-
"hidden_dropout_prob": 0.0,
|
263 |
-
"hidden_size": 1536,
|
264 |
-
"image_size": 518,
|
265 |
-
"initializer_range": 0.02,
|
266 |
-
"layer_norm_eps": 1e-06,
|
267 |
-
"layerscale_value": 1.0,
|
268 |
-
"mlp_ratio": 4,
|
269 |
-
"model_type": "dinov2",
|
270 |
-
"num_attention_heads": 24,
|
271 |
-
"num_channels": 3,
|
272 |
-
"num_hidden_layers": 40,
|
273 |
-
"out_features": [
|
274 |
-
"stage40"
|
275 |
-
],
|
276 |
-
"out_indices": [
|
277 |
-
40
|
278 |
-
],
|
279 |
-
"patch_size": 14,
|
280 |
-
"qkv_bias": true,
|
281 |
-
"reshape_hidden_states": true,
|
282 |
-
"stage_names": [
|
283 |
-
"stem",
|
284 |
-
"stage1",
|
285 |
-
"stage2",
|
286 |
-
"stage3",
|
287 |
-
"stage4",
|
288 |
-
"stage5",
|
289 |
-
"stage6",
|
290 |
-
"stage7",
|
291 |
-
"stage8",
|
292 |
-
"stage9",
|
293 |
-
"stage10",
|
294 |
-
"stage11",
|
295 |
-
"stage12",
|
296 |
-
"stage13",
|
297 |
-
"stage14",
|
298 |
-
"stage15",
|
299 |
-
"stage16",
|
300 |
-
"stage17",
|
301 |
-
"stage18",
|
302 |
-
"stage19",
|
303 |
-
"stage20",
|
304 |
-
"stage21",
|
305 |
-
"stage22",
|
306 |
-
"stage23",
|
307 |
-
"stage24",
|
308 |
-
"stage25",
|
309 |
-
"stage26",
|
310 |
-
"stage27",
|
311 |
-
"stage28",
|
312 |
-
"stage29",
|
313 |
-
"stage30",
|
314 |
-
"stage31",
|
315 |
-
"stage32",
|
316 |
-
"stage33",
|
317 |
-
"stage34",
|
318 |
-
"stage35",
|
319 |
-
"stage36",
|
320 |
-
"stage37",
|
321 |
-
"stage38",
|
322 |
-
"stage39",
|
323 |
-
"stage40"
|
324 |
-
],
|
325 |
-
"torch_dtype": "float32",
|
326 |
-
"transformers_version": "4.43.1",
|
327 |
-
"use_swiglu_ffn": true
|
328 |
-
}
|
329 |
-
|
330 |
-
2025-02-14 17:32:38,404 - modeling_utils.py:3621 - from_pretrained - INFO - loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/model.safetensors
|
331 |
-
2025-02-14 17:32:38,981 - modeling_utils.py:4450 - _load_pretrained_model - INFO - All model checkpoint weights were used when initializing Dinov2Model.
|
332 |
-
|
333 |
-
2025-02-14 17:32:38,981 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of Dinov2Model were initialized from the model checkpoint at facebook/dinov2-giant.
|
334 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use Dinov2Model for predictions without further training.
|
335 |
-
2025-02-14 17:32:39,176 - image_processing_base.py:375 - get_image_processor_dict - INFO - loading configuration file preprocessor_config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/preprocessor_config.json
|
336 |
-
2025-02-14 17:32:39,179 - image_processing_base.py:429 - from_dict - INFO - Image processor BitImageProcessor {
|
337 |
-
"crop_size": {
|
338 |
-
"height": 378,
|
339 |
-
"width": 378
|
340 |
-
},
|
341 |
-
"do_center_crop": true,
|
342 |
-
"do_convert_rgb": true,
|
343 |
-
"do_normalize": true,
|
344 |
-
"do_rescale": true,
|
345 |
-
"do_resize": true,
|
346 |
-
"image_mean": [
|
347 |
-
0.485,
|
348 |
-
0.456,
|
349 |
-
0.406
|
350 |
-
],
|
351 |
-
"image_processor_type": "BitImageProcessor",
|
352 |
-
"image_std": [
|
353 |
-
0.229,
|
354 |
-
0.224,
|
355 |
-
0.225
|
356 |
-
],
|
357 |
-
"resample": 3,
|
358 |
-
"rescale_factor": 0.00392156862745098,
|
359 |
-
"size": {
|
360 |
-
"shortest_edge": 378
|
361 |
-
}
|
362 |
-
}
|
363 |
-
|
364 |
-
2025-02-14 17:32:39,938 - finetune_llama.py:1239 - train - INFO - Total params: 3264865280
|
365 |
-
2025-02-14 17:32:39,938 - finetune_llama.py:1240 - train - INFO - Trainable params: 12589056
|
366 |
-
2025-02-14 17:32:39,938 - finetune_llama.py:1241 - train - INFO - LM head params: 394002432
|
367 |
-
2025-02-14 17:32:42,649 - trainer_callback.py:423 - add_callback - WARNING - You are adding a <class 'transformers.integrations.integration_utils.TensorBoardCallback'> to the callbacks of this Trainer, but there is already one. The currentlist of callbacks is
|
368 |
-
:DefaultFlowCallback
|
369 |
-
TensorBoardCallback
|
370 |
-
2025-02-14 17:32:42,649 - trainer.py:648 - __init__ - INFO - Using auto half precision backend
|
371 |
-
2025-02-14 17:32:43,183 - trainer.py:2134 - _inner_training_loop - INFO - ***** Running training *****
|
372 |
-
2025-02-14 17:32:43,184 - trainer.py:2135 - _inner_training_loop - INFO - Num examples = 550
|
373 |
-
2025-02-14 17:32:43,184 - trainer.py:2136 - _inner_training_loop - INFO - Num Epochs = 2
|
374 |
-
2025-02-14 17:32:43,184 - trainer.py:2137 - _inner_training_loop - INFO - Instantaneous batch size per device = 1
|
375 |
-
2025-02-14 17:32:43,184 - trainer.py:2140 - _inner_training_loop - INFO - Total train batch size (w. parallel, distributed & accumulation) = 1
|
376 |
-
2025-02-14 17:32:43,184 - trainer.py:2141 - _inner_training_loop - INFO - Gradient Accumulation steps = 1
|
377 |
-
2025-02-14 17:32:43,184 - trainer.py:2142 - _inner_training_loop - INFO - Total optimization steps = 1,100
|
378 |
-
2025-02-14 17:32:43,186 - trainer.py:2143 - _inner_training_loop - INFO - Number of trainable parameters = 406,591,488
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:056216c6b30ea50e9aeb44579620fe3d934c8fcb178331c5e9d64e261c2eada6
|
3 |
+
size 36433
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
runtime_logs/run_2025-02-14_17-36-05.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
runtime_logs/run_2025-02-15_02-18-13.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
runtime_logs/run_2025-02-15_02-47-54.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
runtime_logs/run_2025-02-15_02-56-12.log
CHANGED
@@ -1,378 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
"_name_or_path": "/tmp/iopath_cache/manifold_cache/tree/users/shenx/finetune/09281004-cambrian_llama3_2_t576_ov",
|
5 |
-
"architectures": [
|
6 |
-
"CambrianLlamaForCausalLM"
|
7 |
-
],
|
8 |
-
"attention_bias": false,
|
9 |
-
"attention_dropout": 0.0,
|
10 |
-
"bos_token_id": 128000,
|
11 |
-
"connect_layer": 2,
|
12 |
-
"connector_depth": 3,
|
13 |
-
"connector_only": true,
|
14 |
-
"dino_threshold": 0.83,
|
15 |
-
"drop_threshold": 0.8,
|
16 |
-
"eos_token_id": [
|
17 |
-
128001,
|
18 |
-
128008,
|
19 |
-
128009
|
20 |
-
],
|
21 |
-
"frame_pos": false,
|
22 |
-
"freeze_mm_mlp_adapter": false,
|
23 |
-
"hidden_act": "silu",
|
24 |
-
"hidden_size": 3072,
|
25 |
-
"highres": true,
|
26 |
-
"highres_connect": false,
|
27 |
-
"image_aspect_ratio": "pad",
|
28 |
-
"image_position": 91,
|
29 |
-
"image_token_len": 144,
|
30 |
-
"initializer_range": 0.02,
|
31 |
-
"intermediate_size": 8192,
|
32 |
-
"is_image_newline": true,
|
33 |
-
"is_st_sampler": false,
|
34 |
-
"lowres_token": 8,
|
35 |
-
"max_position_embeddings": 131072,
|
36 |
-
"mlp_bias": false,
|
37 |
-
"mm_patch_merge_type": "flat",
|
38 |
-
"mm_projector_lr": null,
|
39 |
-
"mm_projector_type": "sva",
|
40 |
-
"mm_use_im_patch_token": false,
|
41 |
-
"mm_use_im_start_end": false,
|
42 |
-
"mm_vision_sampler_lr": null,
|
43 |
-
"mm_vision_select_feature": "patch",
|
44 |
-
"mm_vision_select_layer": -2,
|
45 |
-
"mm_vision_tower_aux_list": [
|
46 |
-
"siglip/CLIP-ViT-SO400M-14-384",
|
47 |
-
"facebook/dinov2-giant-res378"
|
48 |
-
],
|
49 |
-
"mm_vision_tower_aux_token_len_list": [
|
50 |
-
576,
|
51 |
-
576
|
52 |
-
],
|
53 |
-
"mm_vision_tower_lr": null,
|
54 |
-
"model_type": "cambrian_llama",
|
55 |
-
"num_attention_heads": 24,
|
56 |
-
"num_hidden_layers": 28,
|
57 |
-
"num_key_value_heads": 8,
|
58 |
-
"num_of_vision_sampler_layers": 10,
|
59 |
-
"num_query_group": 1,
|
60 |
-
"pretraining_tp": 1,
|
61 |
-
"query_num_list": [
|
62 |
-
144
|
63 |
-
],
|
64 |
-
"rms_norm_eps": 1e-05,
|
65 |
-
"rope_scaling": {
|
66 |
-
"factor": 32.0,
|
67 |
-
"high_freq_factor": 4.0,
|
68 |
-
"low_freq_factor": 1.0,
|
69 |
-
"original_max_position_embeddings": 8192,
|
70 |
-
"rope_type": "llama3"
|
71 |
-
},
|
72 |
-
"rope_theta": 500000.0,
|
73 |
-
"spmd_debug": null,
|
74 |
-
"spmd_fsdp_sharding": null,
|
75 |
-
"spmd_mesh": null,
|
76 |
-
"start_of_vision_sampler_layers": 0,
|
77 |
-
"stride_of_vision_sampler_layers": 3,
|
78 |
-
"tie_word_embeddings": false,
|
79 |
-
"tokenizer_model_max_length": 8192,
|
80 |
-
"tokenizer_padding_side": "right",
|
81 |
-
"torch_dtype": "float32",
|
82 |
-
"transformers_version": "4.43.1",
|
83 |
-
"tune_mm_mlp_adapter": false,
|
84 |
-
"unfreeze_mm_vision_tower": false,
|
85 |
-
"use_cache": false,
|
86 |
-
"use_mm_proj": true,
|
87 |
-
"vision_hidden_size": 1024,
|
88 |
-
"vision_tower_aux_token_len_list": [
|
89 |
-
576,
|
90 |
-
576
|
91 |
-
],
|
92 |
-
"vocab_size": 128256
|
93 |
-
}
|
94 |
-
|
95 |
-
2025-02-15 02:56:12,703 - modeling_utils.py:3618 - from_pretrained - INFO - loading weights file ./checkpoints/longvu_llama3_2/pytorch_model.bin
|
96 |
-
2025-02-15 02:56:12,742 - configuration_utils.py:1038 - from_dict - INFO - Generate config GenerationConfig {
|
97 |
-
"bos_token_id": 128000,
|
98 |
-
"eos_token_id": [
|
99 |
-
128001,
|
100 |
-
128008,
|
101 |
-
128009
|
102 |
-
],
|
103 |
-
"use_cache": false
|
104 |
-
}
|
105 |
-
|
106 |
-
2025-02-15 02:56:12,964 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/config.json
|
107 |
-
2025-02-15 02:56:12,967 - configuration_utils.py:800 - from_dict - INFO - Model config Dinov2Config {
|
108 |
-
"apply_layernorm": true,
|
109 |
-
"architectures": [
|
110 |
-
"Dinov2Model"
|
111 |
-
],
|
112 |
-
"attention_probs_dropout_prob": 0.0,
|
113 |
-
"drop_path_rate": 0.0,
|
114 |
-
"hidden_act": "gelu",
|
115 |
-
"hidden_dropout_prob": 0.0,
|
116 |
-
"hidden_size": 1536,
|
117 |
-
"image_size": 518,
|
118 |
-
"initializer_range": 0.02,
|
119 |
-
"layer_norm_eps": 1e-06,
|
120 |
-
"layerscale_value": 1.0,
|
121 |
-
"mlp_ratio": 4,
|
122 |
-
"model_type": "dinov2",
|
123 |
-
"num_attention_heads": 24,
|
124 |
-
"num_channels": 3,
|
125 |
-
"num_hidden_layers": 40,
|
126 |
-
"out_features": [
|
127 |
-
"stage40"
|
128 |
-
],
|
129 |
-
"out_indices": [
|
130 |
-
40
|
131 |
-
],
|
132 |
-
"patch_size": 14,
|
133 |
-
"qkv_bias": true,
|
134 |
-
"reshape_hidden_states": true,
|
135 |
-
"stage_names": [
|
136 |
-
"stem",
|
137 |
-
"stage1",
|
138 |
-
"stage2",
|
139 |
-
"stage3",
|
140 |
-
"stage4",
|
141 |
-
"stage5",
|
142 |
-
"stage6",
|
143 |
-
"stage7",
|
144 |
-
"stage8",
|
145 |
-
"stage9",
|
146 |
-
"stage10",
|
147 |
-
"stage11",
|
148 |
-
"stage12",
|
149 |
-
"stage13",
|
150 |
-
"stage14",
|
151 |
-
"stage15",
|
152 |
-
"stage16",
|
153 |
-
"stage17",
|
154 |
-
"stage18",
|
155 |
-
"stage19",
|
156 |
-
"stage20",
|
157 |
-
"stage21",
|
158 |
-
"stage22",
|
159 |
-
"stage23",
|
160 |
-
"stage24",
|
161 |
-
"stage25",
|
162 |
-
"stage26",
|
163 |
-
"stage27",
|
164 |
-
"stage28",
|
165 |
-
"stage29",
|
166 |
-
"stage30",
|
167 |
-
"stage31",
|
168 |
-
"stage32",
|
169 |
-
"stage33",
|
170 |
-
"stage34",
|
171 |
-
"stage35",
|
172 |
-
"stage36",
|
173 |
-
"stage37",
|
174 |
-
"stage38",
|
175 |
-
"stage39",
|
176 |
-
"stage40"
|
177 |
-
],
|
178 |
-
"torch_dtype": "float32",
|
179 |
-
"transformers_version": "4.43.1",
|
180 |
-
"use_swiglu_ffn": true
|
181 |
-
}
|
182 |
-
|
183 |
-
2025-02-15 02:56:14,324 - modeling_utils.py:4450 - _load_pretrained_model - INFO - All model checkpoint weights were used when initializing CambrianLlamaForCausalLM.
|
184 |
-
|
185 |
-
2025-02-15 02:56:14,324 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of CambrianLlamaForCausalLM were initialized from the model checkpoint at ./checkpoints/longvu_llama3_2.
|
186 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use CambrianLlamaForCausalLM for predictions without further training.
|
187 |
-
2025-02-15 02:56:14,328 - configuration_utils.py:991 - from_pretrained - INFO - loading configuration file ./checkpoints/longvu_llama3_2/generation_config.json
|
188 |
-
2025-02-15 02:56:14,328 - configuration_utils.py:1038 - from_dict - INFO - Generate config GenerationConfig {
|
189 |
-
"bos_token_id": 128000,
|
190 |
-
"do_sample": true,
|
191 |
-
"eos_token_id": [
|
192 |
-
128001,
|
193 |
-
128008,
|
194 |
-
128009
|
195 |
-
],
|
196 |
-
"temperature": 0.6,
|
197 |
-
"top_p": 0.9
|
198 |
-
}
|
199 |
-
|
200 |
-
2025-02-15 02:56:14,485 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file tokenizer.json
|
201 |
-
2025-02-15 02:56:14,485 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file added_tokens.json
|
202 |
-
2025-02-15 02:56:14,485 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file special_tokens_map.json
|
203 |
-
2025-02-15 02:56:14,485 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file tokenizer_config.json
|
204 |
-
2025-02-15 02:56:14,682 - tokenization_utils_base.py:2533 - _from_pretrained - INFO - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
205 |
-
2025-02-15 02:56:15,344 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/config.json
|
206 |
-
2025-02-15 02:56:15,347 - configuration_utils.py:800 - from_dict - INFO - Model config SiglipVisionConfig {
|
207 |
-
"attention_dropout": 0.0,
|
208 |
-
"hidden_act": "gelu_pytorch_tanh",
|
209 |
-
"hidden_size": 1152,
|
210 |
-
"image_size": 384,
|
211 |
-
"intermediate_size": 4304,
|
212 |
-
"layer_norm_eps": 1e-06,
|
213 |
-
"model_type": "siglip_vision_model",
|
214 |
-
"num_attention_heads": 16,
|
215 |
-
"num_channels": 3,
|
216 |
-
"num_hidden_layers": 27,
|
217 |
-
"patch_size": 14,
|
218 |
-
"transformers_version": "4.43.1"
|
219 |
-
}
|
220 |
-
|
221 |
-
2025-02-15 02:56:15,347 - modeling_utils.py:3621 - from_pretrained - INFO - loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/model.safetensors
|
222 |
-
2025-02-15 02:56:15,490 - modeling_utils.py:4440 - _load_pretrained_model - INFO - Some weights of the model checkpoint at google/siglip-so400m-patch14-384 were not used when initializing SiglipVisionModel: ['logit_bias', 'logit_scale', 'text_model.embeddings.position_embedding.weight', 'text_model.embeddings.token_embedding.weight', 'text_model.encoder.layers.0.layer_norm1.bias', 'text_model.encoder.layers.0.layer_norm1.weight', 'text_model.encoder.layers.0.layer_norm2.bias', 'text_model.encoder.layers.0.layer_norm2.weight', 'text_model.encoder.layers.0.mlp.fc1.bias', 'text_model.encoder.layers.0.mlp.fc1.weight', 'text_model.encoder.layers.0.mlp.fc2.bias', 'text_model.encoder.layers.0.mlp.fc2.weight', 'text_model.encoder.layers.0.self_attn.k_proj.bias', 'text_model.encoder.layers.0.self_attn.k_proj.weight', 'text_model.encoder.layers.0.self_attn.out_proj.bias', 'text_model.encoder.layers.0.self_attn.out_proj.weight', 'text_model.encoder.layers.0.self_attn.q_proj.bias', 'text_model.encoder.layers.0.self_attn.q_proj.weight', 'text_model.encoder.layers.0.self_attn.v_proj.bias', 'text_model.encoder.layers.0.self_attn.v_proj.weight', 'text_model.encoder.layers.1.layer_norm1.bias', 'text_model.encoder.layers.1.layer_norm1.weight', 'text_model.encoder.layers.1.layer_norm2.bias', 'text_model.encoder.layers.1.layer_norm2.weight', 'text_model.encoder.layers.1.mlp.fc1.bias', 'text_model.encoder.layers.1.mlp.fc1.weight', 'text_model.encoder.layers.1.mlp.fc2.bias', 'text_model.encoder.layers.1.mlp.fc2.weight', 'text_model.encoder.layers.1.self_attn.k_proj.bias', 'text_model.encoder.layers.1.self_attn.k_proj.weight', 'text_model.encoder.layers.1.self_attn.out_proj.bias', 'text_model.encoder.layers.1.self_attn.out_proj.weight', 'text_model.encoder.layers.1.self_attn.q_proj.bias', 'text_model.encoder.layers.1.self_attn.q_proj.weight', 'text_model.encoder.layers.1.self_attn.v_proj.bias', 'text_model.encoder.layers.1.self_attn.v_proj.weight', 'text_model.encoder.layers.10.layer_norm1.bias', 'text_model.encoder.layers.10.layer_norm1.weight', 'text_model.encoder.layers.10.layer_norm2.bias', 'text_model.encoder.layers.10.layer_norm2.weight', 'text_model.encoder.layers.10.mlp.fc1.bias', 'text_model.encoder.layers.10.mlp.fc1.weight', 'text_model.encoder.layers.10.mlp.fc2.bias', 'text_model.encoder.layers.10.mlp.fc2.weight', 'text_model.encoder.layers.10.self_attn.k_proj.bias', 'text_model.encoder.layers.10.self_attn.k_proj.weight', 'text_model.encoder.layers.10.self_attn.out_proj.bias', 'text_model.encoder.layers.10.self_attn.out_proj.weight', 'text_model.encoder.layers.10.self_attn.q_proj.bias', 'text_model.encoder.layers.10.self_attn.q_proj.weight', 'text_model.encoder.layers.10.self_attn.v_proj.bias', 'text_model.encoder.layers.10.self_attn.v_proj.weight', 'text_model.encoder.layers.11.layer_norm1.bias', 'text_model.encoder.layers.11.layer_norm1.weight', 'text_model.encoder.layers.11.layer_norm2.bias', 'text_model.encoder.layers.11.layer_norm2.weight', 'text_model.encoder.layers.11.mlp.fc1.bias', 'text_model.encoder.layers.11.mlp.fc1.weight', 'text_model.encoder.layers.11.mlp.fc2.bias', 'text_model.encoder.layers.11.mlp.fc2.weight', 'text_model.encoder.layers.11.self_attn.k_proj.bias', 'text_model.encoder.layers.11.self_attn.k_proj.weight', 'text_model.encoder.layers.11.self_attn.out_proj.bias', 'text_model.encoder.layers.11.self_attn.out_proj.weight', 'text_model.encoder.layers.11.self_attn.q_proj.bias', 'text_model.encoder.layers.11.self_attn.q_proj.weight', 'text_model.encoder.layers.11.self_attn.v_proj.bias', 'text_model.encoder.layers.11.self_attn.v_proj.weight', 'text_model.encoder.layers.12.layer_norm1.bias', 'text_model.encoder.layers.12.layer_norm1.weight', 'text_model.encoder.layers.12.layer_norm2.bias', 'text_model.encoder.layers.12.layer_norm2.weight', 'text_model.encoder.layers.12.mlp.fc1.bias', 'text_model.encoder.layers.12.mlp.fc1.weight', 'text_model.encoder.layers.12.mlp.fc2.bias', 'text_model.encoder.layers.12.mlp.fc2.weight', 'text_model.encoder.layers.12.self_attn.k_proj.bias', 'text_model.encoder.layers.12.self_attn.k_proj.weight', 'text_model.encoder.layers.12.self_attn.out_proj.bias', 'text_model.encoder.layers.12.self_attn.out_proj.weight', 'text_model.encoder.layers.12.self_attn.q_proj.bias', 'text_model.encoder.layers.12.self_attn.q_proj.weight', 'text_model.encoder.layers.12.self_attn.v_proj.bias', 'text_model.encoder.layers.12.self_attn.v_proj.weight', 'text_model.encoder.layers.13.layer_norm1.bias', 'text_model.encoder.layers.13.layer_norm1.weight', 'text_model.encoder.layers.13.layer_norm2.bias', 'text_model.encoder.layers.13.layer_norm2.weight', 'text_model.encoder.layers.13.mlp.fc1.bias', 'text_model.encoder.layers.13.mlp.fc1.weight', 'text_model.encoder.layers.13.mlp.fc2.bias', 'text_model.encoder.layers.13.mlp.fc2.weight', 'text_model.encoder.layers.13.self_attn.k_proj.bias', 'text_model.encoder.layers.13.self_attn.k_proj.weight', 'text_model.encoder.layers.13.self_attn.out_proj.bias', 'text_model.encoder.layers.13.self_attn.out_proj.weight', 'text_model.encoder.layers.13.self_attn.q_proj.bias', 'text_model.encoder.layers.13.self_attn.q_proj.weight', 'text_model.encoder.layers.13.self_attn.v_proj.bias', 'text_model.encoder.layers.13.self_attn.v_proj.weight', 'text_model.encoder.layers.14.layer_norm1.bias', 'text_model.encoder.layers.14.layer_norm1.weight', 'text_model.encoder.layers.14.layer_norm2.bias', 'text_model.encoder.layers.14.layer_norm2.weight', 'text_model.encoder.layers.14.mlp.fc1.bias', 'text_model.encoder.layers.14.mlp.fc1.weight', 'text_model.encoder.layers.14.mlp.fc2.bias', 'text_model.encoder.layers.14.mlp.fc2.weight', 'text_model.encoder.layers.14.self_attn.k_proj.bias', 'text_model.encoder.layers.14.self_attn.k_proj.weight', 'text_model.encoder.layers.14.self_attn.out_proj.bias', 'text_model.encoder.layers.14.self_attn.out_proj.weight', 'text_model.encoder.layers.14.self_attn.q_proj.bias', 'text_model.encoder.layers.14.self_attn.q_proj.weight', 'text_model.encoder.layers.14.self_attn.v_proj.bias', 'text_model.encoder.layers.14.self_attn.v_proj.weight', 'text_model.encoder.layers.15.layer_norm1.bias', 'text_model.encoder.layers.15.layer_norm1.weight', 'text_model.encoder.layers.15.layer_norm2.bias', 'text_model.encoder.layers.15.layer_norm2.weight', 'text_model.encoder.layers.15.mlp.fc1.bias', 'text_model.encoder.layers.15.mlp.fc1.weight', 'text_model.encoder.layers.15.mlp.fc2.bias', 'text_model.encoder.layers.15.mlp.fc2.weight', 'text_model.encoder.layers.15.self_attn.k_proj.bias', 'text_model.encoder.layers.15.self_attn.k_proj.weight', 'text_model.encoder.layers.15.self_attn.out_proj.bias', 'text_model.encoder.layers.15.self_attn.out_proj.weight', 'text_model.encoder.layers.15.self_attn.q_proj.bias', 'text_model.encoder.layers.15.self_attn.q_proj.weight', 'text_model.encoder.layers.15.self_attn.v_proj.bias', 'text_model.encoder.layers.15.self_attn.v_proj.weight', 'text_model.encoder.layers.16.layer_norm1.bias', 'text_model.encoder.layers.16.layer_norm1.weight', 'text_model.encoder.layers.16.layer_norm2.bias', 'text_model.encoder.layers.16.layer_norm2.weight', 'text_model.encoder.layers.16.mlp.fc1.bias', 'text_model.encoder.layers.16.mlp.fc1.weight', 'text_model.encoder.layers.16.mlp.fc2.bias', 'text_model.encoder.layers.16.mlp.fc2.weight', 'text_model.encoder.layers.16.self_attn.k_proj.bias', 'text_model.encoder.layers.16.self_attn.k_proj.weight', 'text_model.encoder.layers.16.self_attn.out_proj.bias', 'text_model.encoder.layers.16.self_attn.out_proj.weight', 'text_model.encoder.layers.16.self_attn.q_proj.bias', 'text_model.encoder.layers.16.self_attn.q_proj.weight', 'text_model.encoder.layers.16.self_attn.v_proj.bias', 'text_model.encoder.layers.16.self_attn.v_proj.weight', 'text_model.encoder.layers.17.layer_norm1.bias', 'text_model.encoder.layers.17.layer_norm1.weight', 'text_model.encoder.layers.17.layer_norm2.bias', 'text_model.encoder.layers.17.layer_norm2.weight', 'text_model.encoder.layers.17.mlp.fc1.bias', 'text_model.encoder.layers.17.mlp.fc1.weight', 'text_model.encoder.layers.17.mlp.fc2.bias', 'text_model.encoder.layers.17.mlp.fc2.weight', 'text_model.encoder.layers.17.self_attn.k_proj.bias', 'text_model.encoder.layers.17.self_attn.k_proj.weight', 'text_model.encoder.layers.17.self_attn.out_proj.bias', 'text_model.encoder.layers.17.self_attn.out_proj.weight', 'text_model.encoder.layers.17.self_attn.q_proj.bias', 'text_model.encoder.layers.17.self_attn.q_proj.weight', 'text_model.encoder.layers.17.self_attn.v_proj.bias', 'text_model.encoder.layers.17.self_attn.v_proj.weight', 'text_model.encoder.layers.18.layer_norm1.bias', 'text_model.encoder.layers.18.layer_norm1.weight', 'text_model.encoder.layers.18.layer_norm2.bias', 'text_model.encoder.layers.18.layer_norm2.weight', 'text_model.encoder.layers.18.mlp.fc1.bias', 'text_model.encoder.layers.18.mlp.fc1.weight', 'text_model.encoder.layers.18.mlp.fc2.bias', 'text_model.encoder.layers.18.mlp.fc2.weight', 'text_model.encoder.layers.18.self_attn.k_proj.bias', 'text_model.encoder.layers.18.self_attn.k_proj.weight', 'text_model.encoder.layers.18.self_attn.out_proj.bias', 'text_model.encoder.layers.18.self_attn.out_proj.weight', 'text_model.encoder.layers.18.self_attn.q_proj.bias', 'text_model.encoder.layers.18.self_attn.q_proj.weight', 'text_model.encoder.layers.18.self_attn.v_proj.bias', 'text_model.encoder.layers.18.self_attn.v_proj.weight', 'text_model.encoder.layers.19.layer_norm1.bias', 'text_model.encoder.layers.19.layer_norm1.weight', 'text_model.encoder.layers.19.layer_norm2.bias', 'text_model.encoder.layers.19.layer_norm2.weight', 'text_model.encoder.layers.19.mlp.fc1.bias', 'text_model.encoder.layers.19.mlp.fc1.weight', 'text_model.encoder.layers.19.mlp.fc2.bias', 'text_model.encoder.layers.19.mlp.fc2.weight', 'text_model.encoder.layers.19.self_attn.k_proj.bias', 'text_model.encoder.layers.19.self_attn.k_proj.weight', 'text_model.encoder.layers.19.self_attn.out_proj.bias', 'text_model.encoder.layers.19.self_attn.out_proj.weight', 'text_model.encoder.layers.19.self_attn.q_proj.bias', 'text_model.encoder.layers.19.self_attn.q_proj.weight', 'text_model.encoder.layers.19.self_attn.v_proj.bias', 'text_model.encoder.layers.19.self_attn.v_proj.weight', 'text_model.encoder.layers.2.layer_norm1.bias', 'text_model.encoder.layers.2.layer_norm1.weight', 'text_model.encoder.layers.2.layer_norm2.bias', 'text_model.encoder.layers.2.layer_norm2.weight', 'text_model.encoder.layers.2.mlp.fc1.bias', 'text_model.encoder.layers.2.mlp.fc1.weight', 'text_model.encoder.layers.2.mlp.fc2.bias', 'text_model.encoder.layers.2.mlp.fc2.weight', 'text_model.encoder.layers.2.self_attn.k_proj.bias', 'text_model.encoder.layers.2.self_attn.k_proj.weight', 'text_model.encoder.layers.2.self_attn.out_proj.bias', 'text_model.encoder.layers.2.self_attn.out_proj.weight', 'text_model.encoder.layers.2.self_attn.q_proj.bias', 'text_model.encoder.layers.2.self_attn.q_proj.weight', 'text_model.encoder.layers.2.self_attn.v_proj.bias', 'text_model.encoder.layers.2.self_attn.v_proj.weight', 'text_model.encoder.layers.20.layer_norm1.bias', 'text_model.encoder.layers.20.layer_norm1.weight', 'text_model.encoder.layers.20.layer_norm2.bias', 'text_model.encoder.layers.20.layer_norm2.weight', 'text_model.encoder.layers.20.mlp.fc1.bias', 'text_model.encoder.layers.20.mlp.fc1.weight', 'text_model.encoder.layers.20.mlp.fc2.bias', 'text_model.encoder.layers.20.mlp.fc2.weight', 'text_model.encoder.layers.20.self_attn.k_proj.bias', 'text_model.encoder.layers.20.self_attn.k_proj.weight', 'text_model.encoder.layers.20.self_attn.out_proj.bias', 'text_model.encoder.layers.20.self_attn.out_proj.weight', 'text_model.encoder.layers.20.self_attn.q_proj.bias', 'text_model.encoder.layers.20.self_attn.q_proj.weight', 'text_model.encoder.layers.20.self_attn.v_proj.bias', 'text_model.encoder.layers.20.self_attn.v_proj.weight', 'text_model.encoder.layers.21.layer_norm1.bias', 'text_model.encoder.layers.21.layer_norm1.weight', 'text_model.encoder.layers.21.layer_norm2.bias', 'text_model.encoder.layers.21.layer_norm2.weight', 'text_model.encoder.layers.21.mlp.fc1.bias', 'text_model.encoder.layers.21.mlp.fc1.weight', 'text_model.encoder.layers.21.mlp.fc2.bias', 'text_model.encoder.layers.21.mlp.fc2.weight', 'text_model.encoder.layers.21.self_attn.k_proj.bias', 'text_model.encoder.layers.21.self_attn.k_proj.weight', 'text_model.encoder.layers.21.self_attn.out_proj.bias', 'text_model.encoder.layers.21.self_attn.out_proj.weight', 'text_model.encoder.layers.21.self_attn.q_proj.bias', 'text_model.encoder.layers.21.self_attn.q_proj.weight', 'text_model.encoder.layers.21.self_attn.v_proj.bias', 'text_model.encoder.layers.21.self_attn.v_proj.weight', 'text_model.encoder.layers.22.layer_norm1.bias', 'text_model.encoder.layers.22.layer_norm1.weight', 'text_model.encoder.layers.22.layer_norm2.bias', 'text_model.encoder.layers.22.layer_norm2.weight', 'text_model.encoder.layers.22.mlp.fc1.bias', 'text_model.encoder.layers.22.mlp.fc1.weight', 'text_model.encoder.layers.22.mlp.fc2.bias', 'text_model.encoder.layers.22.mlp.fc2.weight', 'text_model.encoder.layers.22.self_attn.k_proj.bias', 'text_model.encoder.layers.22.self_attn.k_proj.weight', 'text_model.encoder.layers.22.self_attn.out_proj.bias', 'text_model.encoder.layers.22.self_attn.out_proj.weight', 'text_model.encoder.layers.22.self_attn.q_proj.bias', 'text_model.encoder.layers.22.self_attn.q_proj.weight', 'text_model.encoder.layers.22.self_attn.v_proj.bias', 'text_model.encoder.layers.22.self_attn.v_proj.weight', 'text_model.encoder.layers.23.layer_norm1.bias', 'text_model.encoder.layers.23.layer_norm1.weight', 'text_model.encoder.layers.23.layer_norm2.bias', 'text_model.encoder.layers.23.layer_norm2.weight', 'text_model.encoder.layers.23.mlp.fc1.bias', 'text_model.encoder.layers.23.mlp.fc1.weight', 'text_model.encoder.layers.23.mlp.fc2.bias', 'text_model.encoder.layers.23.mlp.fc2.weight', 'text_model.encoder.layers.23.self_attn.k_proj.bias', 'text_model.encoder.layers.23.self_attn.k_proj.weight', 'text_model.encoder.layers.23.self_attn.out_proj.bias', 'text_model.encoder.layers.23.self_attn.out_proj.weight', 'text_model.encoder.layers.23.self_attn.q_proj.bias', 'text_model.encoder.layers.23.self_attn.q_proj.weight', 'text_model.encoder.layers.23.self_attn.v_proj.bias', 'text_model.encoder.layers.23.self_attn.v_proj.weight', 'text_model.encoder.layers.24.layer_norm1.bias', 'text_model.encoder.layers.24.layer_norm1.weight', 'text_model.encoder.layers.24.layer_norm2.bias', 'text_model.encoder.layers.24.layer_norm2.weight', 'text_model.encoder.layers.24.mlp.fc1.bias', 'text_model.encoder.layers.24.mlp.fc1.weight', 'text_model.encoder.layers.24.mlp.fc2.bias', 'text_model.encoder.layers.24.mlp.fc2.weight', 'text_model.encoder.layers.24.self_attn.k_proj.bias', 'text_model.encoder.layers.24.self_attn.k_proj.weight', 'text_model.encoder.layers.24.self_attn.out_proj.bias', 'text_model.encoder.layers.24.self_attn.out_proj.weight', 'text_model.encoder.layers.24.self_attn.q_proj.bias', 'text_model.encoder.layers.24.self_attn.q_proj.weight', 'text_model.encoder.layers.24.self_attn.v_proj.bias', 'text_model.encoder.layers.24.self_attn.v_proj.weight', 'text_model.encoder.layers.25.layer_norm1.bias', 'text_model.encoder.layers.25.layer_norm1.weight', 'text_model.encoder.layers.25.layer_norm2.bias', 'text_model.encoder.layers.25.layer_norm2.weight', 'text_model.encoder.layers.25.mlp.fc1.bias', 'text_model.encoder.layers.25.mlp.fc1.weight', 'text_model.encoder.layers.25.mlp.fc2.bias', 'text_model.encoder.layers.25.mlp.fc2.weight', 'text_model.encoder.layers.25.self_attn.k_proj.bias', 'text_model.encoder.layers.25.self_attn.k_proj.weight', 'text_model.encoder.layers.25.self_attn.out_proj.bias', 'text_model.encoder.layers.25.self_attn.out_proj.weight', 'text_model.encoder.layers.25.self_attn.q_proj.bias', 'text_model.encoder.layers.25.self_attn.q_proj.weight', 'text_model.encoder.layers.25.self_attn.v_proj.bias', 'text_model.encoder.layers.25.self_attn.v_proj.weight', 'text_model.encoder.layers.26.layer_norm1.bias', 'text_model.encoder.layers.26.layer_norm1.weight', 'text_model.encoder.layers.26.layer_norm2.bias', 'text_model.encoder.layers.26.layer_norm2.weight', 'text_model.encoder.layers.26.mlp.fc1.bias', 'text_model.encoder.layers.26.mlp.fc1.weight', 'text_model.encoder.layers.26.mlp.fc2.bias', 'text_model.encoder.layers.26.mlp.fc2.weight', 'text_model.encoder.layers.26.self_attn.k_proj.bias', 'text_model.encoder.layers.26.self_attn.k_proj.weight', 'text_model.encoder.layers.26.self_attn.out_proj.bias', 'text_model.encoder.layers.26.self_attn.out_proj.weight', 'text_model.encoder.layers.26.self_attn.q_proj.bias', 'text_model.encoder.layers.26.self_attn.q_proj.weight', 'text_model.encoder.layers.26.self_attn.v_proj.bias', 'text_model.encoder.layers.26.self_attn.v_proj.weight', 'text_model.encoder.layers.3.layer_norm1.bias', 'text_model.encoder.layers.3.layer_norm1.weight', 'text_model.encoder.layers.3.layer_norm2.bias', 'text_model.encoder.layers.3.layer_norm2.weight', 'text_model.encoder.layers.3.mlp.fc1.bias', 'text_model.encoder.layers.3.mlp.fc1.weight', 'text_model.encoder.layers.3.mlp.fc2.bias', 'text_model.encoder.layers.3.mlp.fc2.weight', 'text_model.encoder.layers.3.self_attn.k_proj.bias', 'text_model.encoder.layers.3.self_attn.k_proj.weight', 'text_model.encoder.layers.3.self_attn.out_proj.bias', 'text_model.encoder.layers.3.self_attn.out_proj.weight', 'text_model.encoder.layers.3.self_attn.q_proj.bias', 'text_model.encoder.layers.3.self_attn.q_proj.weight', 'text_model.encoder.layers.3.self_attn.v_proj.bias', 'text_model.encoder.layers.3.self_attn.v_proj.weight', 'text_model.encoder.layers.4.layer_norm1.bias', 'text_model.encoder.layers.4.layer_norm1.weight', 'text_model.encoder.layers.4.layer_norm2.bias', 'text_model.encoder.layers.4.layer_norm2.weight', 'text_model.encoder.layers.4.mlp.fc1.bias', 'text_model.encoder.layers.4.mlp.fc1.weight', 'text_model.encoder.layers.4.mlp.fc2.bias', 'text_model.encoder.layers.4.mlp.fc2.weight', 'text_model.encoder.layers.4.self_attn.k_proj.bias', 'text_model.encoder.layers.4.self_attn.k_proj.weight', 'text_model.encoder.layers.4.self_attn.out_proj.bias', 'text_model.encoder.layers.4.self_attn.out_proj.weight', 'text_model.encoder.layers.4.self_attn.q_proj.bias', 'text_model.encoder.layers.4.self_attn.q_proj.weight', 'text_model.encoder.layers.4.self_attn.v_proj.bias', 'text_model.encoder.layers.4.self_attn.v_proj.weight', 'text_model.encoder.layers.5.layer_norm1.bias', 'text_model.encoder.layers.5.layer_norm1.weight', 'text_model.encoder.layers.5.layer_norm2.bias', 'text_model.encoder.layers.5.layer_norm2.weight', 'text_model.encoder.layers.5.mlp.fc1.bias', 'text_model.encoder.layers.5.mlp.fc1.weight', 'text_model.encoder.layers.5.mlp.fc2.bias', 'text_model.encoder.layers.5.mlp.fc2.weight', 'text_model.encoder.layers.5.self_attn.k_proj.bias', 'text_model.encoder.layers.5.self_attn.k_proj.weight', 'text_model.encoder.layers.5.self_attn.out_proj.bias', 'text_model.encoder.layers.5.self_attn.out_proj.weight', 'text_model.encoder.layers.5.self_attn.q_proj.bias', 'text_model.encoder.layers.5.self_attn.q_proj.weight', 'text_model.encoder.layers.5.self_attn.v_proj.bias', 'text_model.encoder.layers.5.self_attn.v_proj.weight', 'text_model.encoder.layers.6.layer_norm1.bias', 'text_model.encoder.layers.6.layer_norm1.weight', 'text_model.encoder.layers.6.layer_norm2.bias', 'text_model.encoder.layers.6.layer_norm2.weight', 'text_model.encoder.layers.6.mlp.fc1.bias', 'text_model.encoder.layers.6.mlp.fc1.weight', 'text_model.encoder.layers.6.mlp.fc2.bias', 'text_model.encoder.layers.6.mlp.fc2.weight', 'text_model.encoder.layers.6.self_attn.k_proj.bias', 'text_model.encoder.layers.6.self_attn.k_proj.weight', 'text_model.encoder.layers.6.self_attn.out_proj.bias', 'text_model.encoder.layers.6.self_attn.out_proj.weight', 'text_model.encoder.layers.6.self_attn.q_proj.bias', 'text_model.encoder.layers.6.self_attn.q_proj.weight', 'text_model.encoder.layers.6.self_attn.v_proj.bias', 'text_model.encoder.layers.6.self_attn.v_proj.weight', 'text_model.encoder.layers.7.layer_norm1.bias', 'text_model.encoder.layers.7.layer_norm1.weight', 'text_model.encoder.layers.7.layer_norm2.bias', 'text_model.encoder.layers.7.layer_norm2.weight', 'text_model.encoder.layers.7.mlp.fc1.bias', 'text_model.encoder.layers.7.mlp.fc1.weight', 'text_model.encoder.layers.7.mlp.fc2.bias', 'text_model.encoder.layers.7.mlp.fc2.weight', 'text_model.encoder.layers.7.self_attn.k_proj.bias', 'text_model.encoder.layers.7.self_attn.k_proj.weight', 'text_model.encoder.layers.7.self_attn.out_proj.bias', 'text_model.encoder.layers.7.self_attn.out_proj.weight', 'text_model.encoder.layers.7.self_attn.q_proj.bias', 'text_model.encoder.layers.7.self_attn.q_proj.weight', 'text_model.encoder.layers.7.self_attn.v_proj.bias', 'text_model.encoder.layers.7.self_attn.v_proj.weight', 'text_model.encoder.layers.8.layer_norm1.bias', 'text_model.encoder.layers.8.layer_norm1.weight', 'text_model.encoder.layers.8.layer_norm2.bias', 'text_model.encoder.layers.8.layer_norm2.weight', 'text_model.encoder.layers.8.mlp.fc1.bias', 'text_model.encoder.layers.8.mlp.fc1.weight', 'text_model.encoder.layers.8.mlp.fc2.bias', 'text_model.encoder.layers.8.mlp.fc2.weight', 'text_model.encoder.layers.8.self_attn.k_proj.bias', 'text_model.encoder.layers.8.self_attn.k_proj.weight', 'text_model.encoder.layers.8.self_attn.out_proj.bias', 'text_model.encoder.layers.8.self_attn.out_proj.weight', 'text_model.encoder.layers.8.self_attn.q_proj.bias', 'text_model.encoder.layers.8.self_attn.q_proj.weight', 'text_model.encoder.layers.8.self_attn.v_proj.bias', 'text_model.encoder.layers.8.self_attn.v_proj.weight', 'text_model.encoder.layers.9.layer_norm1.bias', 'text_model.encoder.layers.9.layer_norm1.weight', 'text_model.encoder.layers.9.layer_norm2.bias', 'text_model.encoder.layers.9.layer_norm2.weight', 'text_model.encoder.layers.9.mlp.fc1.bias', 'text_model.encoder.layers.9.mlp.fc1.weight', 'text_model.encoder.layers.9.mlp.fc2.bias', 'text_model.encoder.layers.9.mlp.fc2.weight', 'text_model.encoder.layers.9.self_attn.k_proj.bias', 'text_model.encoder.layers.9.self_attn.k_proj.weight', 'text_model.encoder.layers.9.self_attn.out_proj.bias', 'text_model.encoder.layers.9.self_attn.out_proj.weight', 'text_model.encoder.layers.9.self_attn.q_proj.bias', 'text_model.encoder.layers.9.self_attn.q_proj.weight', 'text_model.encoder.layers.9.self_attn.v_proj.bias', 'text_model.encoder.layers.9.self_attn.v_proj.weight', 'text_model.final_layer_norm.bias', 'text_model.final_layer_norm.weight', 'text_model.head.bias', 'text_model.head.weight']
|
223 |
-
- This IS expected if you are initializing SiglipVisionModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
|
224 |
-
- This IS NOT expected if you are initializing SiglipVisionModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
|
225 |
-
2025-02-15 02:56:15,491 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of SiglipVisionModel were initialized from the model checkpoint at google/siglip-so400m-patch14-384.
|
226 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use SiglipVisionModel for predictions without further training.
|
227 |
-
2025-02-15 02:56:15,674 - image_processing_base.py:375 - get_image_processor_dict - INFO - loading configuration file preprocessor_config.json from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/preprocessor_config.json
|
228 |
-
2025-02-15 02:56:15,674 - image_processing_base.py:429 - from_dict - INFO - Image processor SiglipImageProcessor {
|
229 |
-
"do_convert_rgb": null,
|
230 |
-
"do_normalize": true,
|
231 |
-
"do_rescale": true,
|
232 |
-
"do_resize": true,
|
233 |
-
"image_mean": [
|
234 |
-
0.5,
|
235 |
-
0.5,
|
236 |
-
0.5
|
237 |
-
],
|
238 |
-
"image_processor_type": "SiglipImageProcessor",
|
239 |
-
"image_std": [
|
240 |
-
0.5,
|
241 |
-
0.5,
|
242 |
-
0.5
|
243 |
-
],
|
244 |
-
"processor_class": "SiglipProcessor",
|
245 |
-
"resample": 3,
|
246 |
-
"rescale_factor": 0.00392156862745098,
|
247 |
-
"size": {
|
248 |
-
"height": 384,
|
249 |
-
"width": 384
|
250 |
-
}
|
251 |
-
}
|
252 |
-
|
253 |
-
2025-02-15 02:56:16,050 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/config.json
|
254 |
-
2025-02-15 02:56:16,052 - configuration_utils.py:800 - from_dict - INFO - Model config Dinov2Config {
|
255 |
-
"apply_layernorm": true,
|
256 |
-
"architectures": [
|
257 |
-
"Dinov2Model"
|
258 |
-
],
|
259 |
-
"attention_probs_dropout_prob": 0.0,
|
260 |
-
"drop_path_rate": 0.0,
|
261 |
-
"hidden_act": "gelu",
|
262 |
-
"hidden_dropout_prob": 0.0,
|
263 |
-
"hidden_size": 1536,
|
264 |
-
"image_size": 518,
|
265 |
-
"initializer_range": 0.02,
|
266 |
-
"layer_norm_eps": 1e-06,
|
267 |
-
"layerscale_value": 1.0,
|
268 |
-
"mlp_ratio": 4,
|
269 |
-
"model_type": "dinov2",
|
270 |
-
"num_attention_heads": 24,
|
271 |
-
"num_channels": 3,
|
272 |
-
"num_hidden_layers": 40,
|
273 |
-
"out_features": [
|
274 |
-
"stage40"
|
275 |
-
],
|
276 |
-
"out_indices": [
|
277 |
-
40
|
278 |
-
],
|
279 |
-
"patch_size": 14,
|
280 |
-
"qkv_bias": true,
|
281 |
-
"reshape_hidden_states": true,
|
282 |
-
"stage_names": [
|
283 |
-
"stem",
|
284 |
-
"stage1",
|
285 |
-
"stage2",
|
286 |
-
"stage3",
|
287 |
-
"stage4",
|
288 |
-
"stage5",
|
289 |
-
"stage6",
|
290 |
-
"stage7",
|
291 |
-
"stage8",
|
292 |
-
"stage9",
|
293 |
-
"stage10",
|
294 |
-
"stage11",
|
295 |
-
"stage12",
|
296 |
-
"stage13",
|
297 |
-
"stage14",
|
298 |
-
"stage15",
|
299 |
-
"stage16",
|
300 |
-
"stage17",
|
301 |
-
"stage18",
|
302 |
-
"stage19",
|
303 |
-
"stage20",
|
304 |
-
"stage21",
|
305 |
-
"stage22",
|
306 |
-
"stage23",
|
307 |
-
"stage24",
|
308 |
-
"stage25",
|
309 |
-
"stage26",
|
310 |
-
"stage27",
|
311 |
-
"stage28",
|
312 |
-
"stage29",
|
313 |
-
"stage30",
|
314 |
-
"stage31",
|
315 |
-
"stage32",
|
316 |
-
"stage33",
|
317 |
-
"stage34",
|
318 |
-
"stage35",
|
319 |
-
"stage36",
|
320 |
-
"stage37",
|
321 |
-
"stage38",
|
322 |
-
"stage39",
|
323 |
-
"stage40"
|
324 |
-
],
|
325 |
-
"torch_dtype": "float32",
|
326 |
-
"transformers_version": "4.43.1",
|
327 |
-
"use_swiglu_ffn": true
|
328 |
-
}
|
329 |
-
|
330 |
-
2025-02-15 02:56:16,053 - modeling_utils.py:3621 - from_pretrained - INFO - loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/model.safetensors
|
331 |
-
2025-02-15 02:56:16,377 - modeling_utils.py:4450 - _load_pretrained_model - INFO - All model checkpoint weights were used when initializing Dinov2Model.
|
332 |
-
|
333 |
-
2025-02-15 02:56:16,377 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of Dinov2Model were initialized from the model checkpoint at facebook/dinov2-giant.
|
334 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use Dinov2Model for predictions without further training.
|
335 |
-
2025-02-15 02:56:16,563 - image_processing_base.py:375 - get_image_processor_dict - INFO - loading configuration file preprocessor_config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/preprocessor_config.json
|
336 |
-
2025-02-15 02:56:16,566 - image_processing_base.py:429 - from_dict - INFO - Image processor BitImageProcessor {
|
337 |
-
"crop_size": {
|
338 |
-
"height": 378,
|
339 |
-
"width": 378
|
340 |
-
},
|
341 |
-
"do_center_crop": true,
|
342 |
-
"do_convert_rgb": true,
|
343 |
-
"do_normalize": true,
|
344 |
-
"do_rescale": true,
|
345 |
-
"do_resize": true,
|
346 |
-
"image_mean": [
|
347 |
-
0.485,
|
348 |
-
0.456,
|
349 |
-
0.406
|
350 |
-
],
|
351 |
-
"image_processor_type": "BitImageProcessor",
|
352 |
-
"image_std": [
|
353 |
-
0.229,
|
354 |
-
0.224,
|
355 |
-
0.225
|
356 |
-
],
|
357 |
-
"resample": 3,
|
358 |
-
"rescale_factor": 0.00392156862745098,
|
359 |
-
"size": {
|
360 |
-
"shortest_edge": 378
|
361 |
-
}
|
362 |
-
}
|
363 |
-
|
364 |
-
2025-02-15 02:56:17,230 - finetune_llama.py:1239 - train - INFO - Total params: 3264865280
|
365 |
-
2025-02-15 02:56:17,230 - finetune_llama.py:1240 - train - INFO - Trainable params: 12589056
|
366 |
-
2025-02-15 02:56:17,230 - finetune_llama.py:1241 - train - INFO - LM head params: 394002432
|
367 |
-
2025-02-15 02:56:19,287 - trainer_callback.py:423 - add_callback - WARNING - You are adding a <class 'transformers.integrations.integration_utils.TensorBoardCallback'> to the callbacks of this Trainer, but there is already one. The currentlist of callbacks is
|
368 |
-
:DefaultFlowCallback
|
369 |
-
TensorBoardCallback
|
370 |
-
2025-02-15 02:56:19,287 - trainer.py:648 - __init__ - INFO - Using auto half precision backend
|
371 |
-
2025-02-15 02:56:19,591 - trainer.py:2134 - _inner_training_loop - INFO - ***** Running training *****
|
372 |
-
2025-02-15 02:56:19,591 - trainer.py:2135 - _inner_training_loop - INFO - Num examples = 2
|
373 |
-
2025-02-15 02:56:19,591 - trainer.py:2136 - _inner_training_loop - INFO - Num Epochs = 2
|
374 |
-
2025-02-15 02:56:19,591 - trainer.py:2137 - _inner_training_loop - INFO - Instantaneous batch size per device = 1
|
375 |
-
2025-02-15 02:56:19,591 - trainer.py:2140 - _inner_training_loop - INFO - Total train batch size (w. parallel, distributed & accumulation) = 1
|
376 |
-
2025-02-15 02:56:19,591 - trainer.py:2141 - _inner_training_loop - INFO - Gradient Accumulation steps = 1
|
377 |
-
2025-02-15 02:56:19,591 - trainer.py:2142 - _inner_training_loop - INFO - Total optimization steps = 4
|
378 |
-
2025-02-15 02:56:19,593 - trainer.py:2143 - _inner_training_loop - INFO - Number of trainable parameters = 406,591,488
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb53b35784cd4c3deffdb4f7fbb4b55778d0370f0a55cccf53af92339759f3a2
|
3 |
+
size 36427
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
runtime_logs/run_2025-02-15_03-00-20.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
runtime_logs/run_2025-02-15_03-02-43.log
CHANGED
@@ -1,383 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
"_name_or_path": "/tmp/iopath_cache/manifold_cache/tree/users/shenx/finetune/09281004-cambrian_llama3_2_t576_ov",
|
5 |
-
"architectures": [
|
6 |
-
"CambrianLlamaForCausalLM"
|
7 |
-
],
|
8 |
-
"attention_bias": false,
|
9 |
-
"attention_dropout": 0.0,
|
10 |
-
"bos_token_id": 128000,
|
11 |
-
"connect_layer": 2,
|
12 |
-
"connector_depth": 3,
|
13 |
-
"connector_only": true,
|
14 |
-
"dino_threshold": 0.83,
|
15 |
-
"drop_threshold": 0.8,
|
16 |
-
"eos_token_id": [
|
17 |
-
128001,
|
18 |
-
128008,
|
19 |
-
128009
|
20 |
-
],
|
21 |
-
"frame_pos": false,
|
22 |
-
"freeze_mm_mlp_adapter": false,
|
23 |
-
"hidden_act": "silu",
|
24 |
-
"hidden_size": 3072,
|
25 |
-
"highres": true,
|
26 |
-
"highres_connect": false,
|
27 |
-
"image_aspect_ratio": "pad",
|
28 |
-
"image_position": 91,
|
29 |
-
"image_token_len": 144,
|
30 |
-
"initializer_range": 0.02,
|
31 |
-
"intermediate_size": 8192,
|
32 |
-
"is_image_newline": true,
|
33 |
-
"is_st_sampler": false,
|
34 |
-
"lowres_token": 8,
|
35 |
-
"max_position_embeddings": 131072,
|
36 |
-
"mlp_bias": false,
|
37 |
-
"mm_patch_merge_type": "flat",
|
38 |
-
"mm_projector_lr": null,
|
39 |
-
"mm_projector_type": "sva",
|
40 |
-
"mm_use_im_patch_token": false,
|
41 |
-
"mm_use_im_start_end": false,
|
42 |
-
"mm_vision_sampler_lr": null,
|
43 |
-
"mm_vision_select_feature": "patch",
|
44 |
-
"mm_vision_select_layer": -2,
|
45 |
-
"mm_vision_tower_aux_list": [
|
46 |
-
"siglip/CLIP-ViT-SO400M-14-384",
|
47 |
-
"facebook/dinov2-giant-res378"
|
48 |
-
],
|
49 |
-
"mm_vision_tower_aux_token_len_list": [
|
50 |
-
576,
|
51 |
-
576
|
52 |
-
],
|
53 |
-
"mm_vision_tower_lr": null,
|
54 |
-
"model_type": "cambrian_llama",
|
55 |
-
"num_attention_heads": 24,
|
56 |
-
"num_hidden_layers": 28,
|
57 |
-
"num_key_value_heads": 8,
|
58 |
-
"num_of_vision_sampler_layers": 10,
|
59 |
-
"num_query_group": 1,
|
60 |
-
"pretraining_tp": 1,
|
61 |
-
"query_num_list": [
|
62 |
-
144
|
63 |
-
],
|
64 |
-
"rms_norm_eps": 1e-05,
|
65 |
-
"rope_scaling": {
|
66 |
-
"factor": 32.0,
|
67 |
-
"high_freq_factor": 4.0,
|
68 |
-
"low_freq_factor": 1.0,
|
69 |
-
"original_max_position_embeddings": 8192,
|
70 |
-
"rope_type": "llama3"
|
71 |
-
},
|
72 |
-
"rope_theta": 500000.0,
|
73 |
-
"spmd_debug": null,
|
74 |
-
"spmd_fsdp_sharding": null,
|
75 |
-
"spmd_mesh": null,
|
76 |
-
"start_of_vision_sampler_layers": 0,
|
77 |
-
"stride_of_vision_sampler_layers": 3,
|
78 |
-
"tie_word_embeddings": false,
|
79 |
-
"tokenizer_model_max_length": 8192,
|
80 |
-
"tokenizer_padding_side": "right",
|
81 |
-
"torch_dtype": "float32",
|
82 |
-
"transformers_version": "4.43.1",
|
83 |
-
"tune_mm_mlp_adapter": false,
|
84 |
-
"unfreeze_mm_vision_tower": false,
|
85 |
-
"use_cache": false,
|
86 |
-
"use_mm_proj": true,
|
87 |
-
"vision_hidden_size": 1024,
|
88 |
-
"vision_tower_aux_token_len_list": [
|
89 |
-
576,
|
90 |
-
576
|
91 |
-
],
|
92 |
-
"vocab_size": 128256
|
93 |
-
}
|
94 |
-
|
95 |
-
2025-02-15 03:02:44,240 - modeling_utils.py:3618 - from_pretrained - INFO - loading weights file ./checkpoints/longvu_llama3_2/pytorch_model.bin
|
96 |
-
2025-02-15 03:02:44,301 - configuration_utils.py:1038 - from_dict - INFO - Generate config GenerationConfig {
|
97 |
-
"bos_token_id": 128000,
|
98 |
-
"eos_token_id": [
|
99 |
-
128001,
|
100 |
-
128008,
|
101 |
-
128009
|
102 |
-
],
|
103 |
-
"use_cache": false
|
104 |
-
}
|
105 |
-
|
106 |
-
2025-02-15 03:02:44,543 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/config.json
|
107 |
-
2025-02-15 03:02:44,547 - configuration_utils.py:800 - from_dict - INFO - Model config Dinov2Config {
|
108 |
-
"apply_layernorm": true,
|
109 |
-
"architectures": [
|
110 |
-
"Dinov2Model"
|
111 |
-
],
|
112 |
-
"attention_probs_dropout_prob": 0.0,
|
113 |
-
"drop_path_rate": 0.0,
|
114 |
-
"hidden_act": "gelu",
|
115 |
-
"hidden_dropout_prob": 0.0,
|
116 |
-
"hidden_size": 1536,
|
117 |
-
"image_size": 518,
|
118 |
-
"initializer_range": 0.02,
|
119 |
-
"layer_norm_eps": 1e-06,
|
120 |
-
"layerscale_value": 1.0,
|
121 |
-
"mlp_ratio": 4,
|
122 |
-
"model_type": "dinov2",
|
123 |
-
"num_attention_heads": 24,
|
124 |
-
"num_channels": 3,
|
125 |
-
"num_hidden_layers": 40,
|
126 |
-
"out_features": [
|
127 |
-
"stage40"
|
128 |
-
],
|
129 |
-
"out_indices": [
|
130 |
-
40
|
131 |
-
],
|
132 |
-
"patch_size": 14,
|
133 |
-
"qkv_bias": true,
|
134 |
-
"reshape_hidden_states": true,
|
135 |
-
"stage_names": [
|
136 |
-
"stem",
|
137 |
-
"stage1",
|
138 |
-
"stage2",
|
139 |
-
"stage3",
|
140 |
-
"stage4",
|
141 |
-
"stage5",
|
142 |
-
"stage6",
|
143 |
-
"stage7",
|
144 |
-
"stage8",
|
145 |
-
"stage9",
|
146 |
-
"stage10",
|
147 |
-
"stage11",
|
148 |
-
"stage12",
|
149 |
-
"stage13",
|
150 |
-
"stage14",
|
151 |
-
"stage15",
|
152 |
-
"stage16",
|
153 |
-
"stage17",
|
154 |
-
"stage18",
|
155 |
-
"stage19",
|
156 |
-
"stage20",
|
157 |
-
"stage21",
|
158 |
-
"stage22",
|
159 |
-
"stage23",
|
160 |
-
"stage24",
|
161 |
-
"stage25",
|
162 |
-
"stage26",
|
163 |
-
"stage27",
|
164 |
-
"stage28",
|
165 |
-
"stage29",
|
166 |
-
"stage30",
|
167 |
-
"stage31",
|
168 |
-
"stage32",
|
169 |
-
"stage33",
|
170 |
-
"stage34",
|
171 |
-
"stage35",
|
172 |
-
"stage36",
|
173 |
-
"stage37",
|
174 |
-
"stage38",
|
175 |
-
"stage39",
|
176 |
-
"stage40"
|
177 |
-
],
|
178 |
-
"torch_dtype": "float32",
|
179 |
-
"transformers_version": "4.43.1",
|
180 |
-
"use_swiglu_ffn": true
|
181 |
-
}
|
182 |
-
|
183 |
-
2025-02-15 03:02:46,020 - modeling_utils.py:4450 - _load_pretrained_model - INFO - All model checkpoint weights were used when initializing CambrianLlamaForCausalLM.
|
184 |
-
|
185 |
-
2025-02-15 03:02:46,020 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of CambrianLlamaForCausalLM were initialized from the model checkpoint at ./checkpoints/longvu_llama3_2.
|
186 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use CambrianLlamaForCausalLM for predictions without further training.
|
187 |
-
2025-02-15 03:02:46,026 - configuration_utils.py:991 - from_pretrained - INFO - loading configuration file ./checkpoints/longvu_llama3_2/generation_config.json
|
188 |
-
2025-02-15 03:02:46,026 - configuration_utils.py:1038 - from_dict - INFO - Generate config GenerationConfig {
|
189 |
-
"bos_token_id": 128000,
|
190 |
-
"do_sample": true,
|
191 |
-
"eos_token_id": [
|
192 |
-
128001,
|
193 |
-
128008,
|
194 |
-
128009
|
195 |
-
],
|
196 |
-
"temperature": 0.6,
|
197 |
-
"top_p": 0.9
|
198 |
-
}
|
199 |
-
|
200 |
-
2025-02-15 03:02:46,298 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file tokenizer.json
|
201 |
-
2025-02-15 03:02:46,298 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file added_tokens.json
|
202 |
-
2025-02-15 03:02:46,299 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file special_tokens_map.json
|
203 |
-
2025-02-15 03:02:46,299 - tokenization_utils_base.py:2287 - from_pretrained - INFO - loading file tokenizer_config.json
|
204 |
-
2025-02-15 03:02:46,709 - tokenization_utils_base.py:2533 - _from_pretrained - INFO - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
205 |
-
2025-02-15 03:02:47,394 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/config.json
|
206 |
-
2025-02-15 03:02:47,396 - configuration_utils.py:800 - from_dict - INFO - Model config SiglipVisionConfig {
|
207 |
-
"attention_dropout": 0.0,
|
208 |
-
"hidden_act": "gelu_pytorch_tanh",
|
209 |
-
"hidden_size": 1152,
|
210 |
-
"image_size": 384,
|
211 |
-
"intermediate_size": 4304,
|
212 |
-
"layer_norm_eps": 1e-06,
|
213 |
-
"model_type": "siglip_vision_model",
|
214 |
-
"num_attention_heads": 16,
|
215 |
-
"num_channels": 3,
|
216 |
-
"num_hidden_layers": 27,
|
217 |
-
"patch_size": 14,
|
218 |
-
"transformers_version": "4.43.1"
|
219 |
-
}
|
220 |
-
|
221 |
-
2025-02-15 03:02:47,397 - modeling_utils.py:3621 - from_pretrained - INFO - loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/model.safetensors
|
222 |
-
2025-02-15 03:02:47,666 - modeling_utils.py:4440 - _load_pretrained_model - INFO - Some weights of the model checkpoint at google/siglip-so400m-patch14-384 were not used when initializing SiglipVisionModel: ['logit_bias', 'logit_scale', 'text_model.embeddings.position_embedding.weight', 'text_model.embeddings.token_embedding.weight', 'text_model.encoder.layers.0.layer_norm1.bias', 'text_model.encoder.layers.0.layer_norm1.weight', 'text_model.encoder.layers.0.layer_norm2.bias', 'text_model.encoder.layers.0.layer_norm2.weight', 'text_model.encoder.layers.0.mlp.fc1.bias', 'text_model.encoder.layers.0.mlp.fc1.weight', 'text_model.encoder.layers.0.mlp.fc2.bias', 'text_model.encoder.layers.0.mlp.fc2.weight', 'text_model.encoder.layers.0.self_attn.k_proj.bias', 'text_model.encoder.layers.0.self_attn.k_proj.weight', 'text_model.encoder.layers.0.self_attn.out_proj.bias', 'text_model.encoder.layers.0.self_attn.out_proj.weight', 'text_model.encoder.layers.0.self_attn.q_proj.bias', 'text_model.encoder.layers.0.self_attn.q_proj.weight', 'text_model.encoder.layers.0.self_attn.v_proj.bias', 'text_model.encoder.layers.0.self_attn.v_proj.weight', 'text_model.encoder.layers.1.layer_norm1.bias', 'text_model.encoder.layers.1.layer_norm1.weight', 'text_model.encoder.layers.1.layer_norm2.bias', 'text_model.encoder.layers.1.layer_norm2.weight', 'text_model.encoder.layers.1.mlp.fc1.bias', 'text_model.encoder.layers.1.mlp.fc1.weight', 'text_model.encoder.layers.1.mlp.fc2.bias', 'text_model.encoder.layers.1.mlp.fc2.weight', 'text_model.encoder.layers.1.self_attn.k_proj.bias', 'text_model.encoder.layers.1.self_attn.k_proj.weight', 'text_model.encoder.layers.1.self_attn.out_proj.bias', 'text_model.encoder.layers.1.self_attn.out_proj.weight', 'text_model.encoder.layers.1.self_attn.q_proj.bias', 'text_model.encoder.layers.1.self_attn.q_proj.weight', 'text_model.encoder.layers.1.self_attn.v_proj.bias', 'text_model.encoder.layers.1.self_attn.v_proj.weight', 'text_model.encoder.layers.10.layer_norm1.bias', 'text_model.encoder.layers.10.layer_norm1.weight', 'text_model.encoder.layers.10.layer_norm2.bias', 'text_model.encoder.layers.10.layer_norm2.weight', 'text_model.encoder.layers.10.mlp.fc1.bias', 'text_model.encoder.layers.10.mlp.fc1.weight', 'text_model.encoder.layers.10.mlp.fc2.bias', 'text_model.encoder.layers.10.mlp.fc2.weight', 'text_model.encoder.layers.10.self_attn.k_proj.bias', 'text_model.encoder.layers.10.self_attn.k_proj.weight', 'text_model.encoder.layers.10.self_attn.out_proj.bias', 'text_model.encoder.layers.10.self_attn.out_proj.weight', 'text_model.encoder.layers.10.self_attn.q_proj.bias', 'text_model.encoder.layers.10.self_attn.q_proj.weight', 'text_model.encoder.layers.10.self_attn.v_proj.bias', 'text_model.encoder.layers.10.self_attn.v_proj.weight', 'text_model.encoder.layers.11.layer_norm1.bias', 'text_model.encoder.layers.11.layer_norm1.weight', 'text_model.encoder.layers.11.layer_norm2.bias', 'text_model.encoder.layers.11.layer_norm2.weight', 'text_model.encoder.layers.11.mlp.fc1.bias', 'text_model.encoder.layers.11.mlp.fc1.weight', 'text_model.encoder.layers.11.mlp.fc2.bias', 'text_model.encoder.layers.11.mlp.fc2.weight', 'text_model.encoder.layers.11.self_attn.k_proj.bias', 'text_model.encoder.layers.11.self_attn.k_proj.weight', 'text_model.encoder.layers.11.self_attn.out_proj.bias', 'text_model.encoder.layers.11.self_attn.out_proj.weight', 'text_model.encoder.layers.11.self_attn.q_proj.bias', 'text_model.encoder.layers.11.self_attn.q_proj.weight', 'text_model.encoder.layers.11.self_attn.v_proj.bias', 'text_model.encoder.layers.11.self_attn.v_proj.weight', 'text_model.encoder.layers.12.layer_norm1.bias', 'text_model.encoder.layers.12.layer_norm1.weight', 'text_model.encoder.layers.12.layer_norm2.bias', 'text_model.encoder.layers.12.layer_norm2.weight', 'text_model.encoder.layers.12.mlp.fc1.bias', 'text_model.encoder.layers.12.mlp.fc1.weight', 'text_model.encoder.layers.12.mlp.fc2.bias', 'text_model.encoder.layers.12.mlp.fc2.weight', 'text_model.encoder.layers.12.self_attn.k_proj.bias', 'text_model.encoder.layers.12.self_attn.k_proj.weight', 'text_model.encoder.layers.12.self_attn.out_proj.bias', 'text_model.encoder.layers.12.self_attn.out_proj.weight', 'text_model.encoder.layers.12.self_attn.q_proj.bias', 'text_model.encoder.layers.12.self_attn.q_proj.weight', 'text_model.encoder.layers.12.self_attn.v_proj.bias', 'text_model.encoder.layers.12.self_attn.v_proj.weight', 'text_model.encoder.layers.13.layer_norm1.bias', 'text_model.encoder.layers.13.layer_norm1.weight', 'text_model.encoder.layers.13.layer_norm2.bias', 'text_model.encoder.layers.13.layer_norm2.weight', 'text_model.encoder.layers.13.mlp.fc1.bias', 'text_model.encoder.layers.13.mlp.fc1.weight', 'text_model.encoder.layers.13.mlp.fc2.bias', 'text_model.encoder.layers.13.mlp.fc2.weight', 'text_model.encoder.layers.13.self_attn.k_proj.bias', 'text_model.encoder.layers.13.self_attn.k_proj.weight', 'text_model.encoder.layers.13.self_attn.out_proj.bias', 'text_model.encoder.layers.13.self_attn.out_proj.weight', 'text_model.encoder.layers.13.self_attn.q_proj.bias', 'text_model.encoder.layers.13.self_attn.q_proj.weight', 'text_model.encoder.layers.13.self_attn.v_proj.bias', 'text_model.encoder.layers.13.self_attn.v_proj.weight', 'text_model.encoder.layers.14.layer_norm1.bias', 'text_model.encoder.layers.14.layer_norm1.weight', 'text_model.encoder.layers.14.layer_norm2.bias', 'text_model.encoder.layers.14.layer_norm2.weight', 'text_model.encoder.layers.14.mlp.fc1.bias', 'text_model.encoder.layers.14.mlp.fc1.weight', 'text_model.encoder.layers.14.mlp.fc2.bias', 'text_model.encoder.layers.14.mlp.fc2.weight', 'text_model.encoder.layers.14.self_attn.k_proj.bias', 'text_model.encoder.layers.14.self_attn.k_proj.weight', 'text_model.encoder.layers.14.self_attn.out_proj.bias', 'text_model.encoder.layers.14.self_attn.out_proj.weight', 'text_model.encoder.layers.14.self_attn.q_proj.bias', 'text_model.encoder.layers.14.self_attn.q_proj.weight', 'text_model.encoder.layers.14.self_attn.v_proj.bias', 'text_model.encoder.layers.14.self_attn.v_proj.weight', 'text_model.encoder.layers.15.layer_norm1.bias', 'text_model.encoder.layers.15.layer_norm1.weight', 'text_model.encoder.layers.15.layer_norm2.bias', 'text_model.encoder.layers.15.layer_norm2.weight', 'text_model.encoder.layers.15.mlp.fc1.bias', 'text_model.encoder.layers.15.mlp.fc1.weight', 'text_model.encoder.layers.15.mlp.fc2.bias', 'text_model.encoder.layers.15.mlp.fc2.weight', 'text_model.encoder.layers.15.self_attn.k_proj.bias', 'text_model.encoder.layers.15.self_attn.k_proj.weight', 'text_model.encoder.layers.15.self_attn.out_proj.bias', 'text_model.encoder.layers.15.self_attn.out_proj.weight', 'text_model.encoder.layers.15.self_attn.q_proj.bias', 'text_model.encoder.layers.15.self_attn.q_proj.weight', 'text_model.encoder.layers.15.self_attn.v_proj.bias', 'text_model.encoder.layers.15.self_attn.v_proj.weight', 'text_model.encoder.layers.16.layer_norm1.bias', 'text_model.encoder.layers.16.layer_norm1.weight', 'text_model.encoder.layers.16.layer_norm2.bias', 'text_model.encoder.layers.16.layer_norm2.weight', 'text_model.encoder.layers.16.mlp.fc1.bias', 'text_model.encoder.layers.16.mlp.fc1.weight', 'text_model.encoder.layers.16.mlp.fc2.bias', 'text_model.encoder.layers.16.mlp.fc2.weight', 'text_model.encoder.layers.16.self_attn.k_proj.bias', 'text_model.encoder.layers.16.self_attn.k_proj.weight', 'text_model.encoder.layers.16.self_attn.out_proj.bias', 'text_model.encoder.layers.16.self_attn.out_proj.weight', 'text_model.encoder.layers.16.self_attn.q_proj.bias', 'text_model.encoder.layers.16.self_attn.q_proj.weight', 'text_model.encoder.layers.16.self_attn.v_proj.bias', 'text_model.encoder.layers.16.self_attn.v_proj.weight', 'text_model.encoder.layers.17.layer_norm1.bias', 'text_model.encoder.layers.17.layer_norm1.weight', 'text_model.encoder.layers.17.layer_norm2.bias', 'text_model.encoder.layers.17.layer_norm2.weight', 'text_model.encoder.layers.17.mlp.fc1.bias', 'text_model.encoder.layers.17.mlp.fc1.weight', 'text_model.encoder.layers.17.mlp.fc2.bias', 'text_model.encoder.layers.17.mlp.fc2.weight', 'text_model.encoder.layers.17.self_attn.k_proj.bias', 'text_model.encoder.layers.17.self_attn.k_proj.weight', 'text_model.encoder.layers.17.self_attn.out_proj.bias', 'text_model.encoder.layers.17.self_attn.out_proj.weight', 'text_model.encoder.layers.17.self_attn.q_proj.bias', 'text_model.encoder.layers.17.self_attn.q_proj.weight', 'text_model.encoder.layers.17.self_attn.v_proj.bias', 'text_model.encoder.layers.17.self_attn.v_proj.weight', 'text_model.encoder.layers.18.layer_norm1.bias', 'text_model.encoder.layers.18.layer_norm1.weight', 'text_model.encoder.layers.18.layer_norm2.bias', 'text_model.encoder.layers.18.layer_norm2.weight', 'text_model.encoder.layers.18.mlp.fc1.bias', 'text_model.encoder.layers.18.mlp.fc1.weight', 'text_model.encoder.layers.18.mlp.fc2.bias', 'text_model.encoder.layers.18.mlp.fc2.weight', 'text_model.encoder.layers.18.self_attn.k_proj.bias', 'text_model.encoder.layers.18.self_attn.k_proj.weight', 'text_model.encoder.layers.18.self_attn.out_proj.bias', 'text_model.encoder.layers.18.self_attn.out_proj.weight', 'text_model.encoder.layers.18.self_attn.q_proj.bias', 'text_model.encoder.layers.18.self_attn.q_proj.weight', 'text_model.encoder.layers.18.self_attn.v_proj.bias', 'text_model.encoder.layers.18.self_attn.v_proj.weight', 'text_model.encoder.layers.19.layer_norm1.bias', 'text_model.encoder.layers.19.layer_norm1.weight', 'text_model.encoder.layers.19.layer_norm2.bias', 'text_model.encoder.layers.19.layer_norm2.weight', 'text_model.encoder.layers.19.mlp.fc1.bias', 'text_model.encoder.layers.19.mlp.fc1.weight', 'text_model.encoder.layers.19.mlp.fc2.bias', 'text_model.encoder.layers.19.mlp.fc2.weight', 'text_model.encoder.layers.19.self_attn.k_proj.bias', 'text_model.encoder.layers.19.self_attn.k_proj.weight', 'text_model.encoder.layers.19.self_attn.out_proj.bias', 'text_model.encoder.layers.19.self_attn.out_proj.weight', 'text_model.encoder.layers.19.self_attn.q_proj.bias', 'text_model.encoder.layers.19.self_attn.q_proj.weight', 'text_model.encoder.layers.19.self_attn.v_proj.bias', 'text_model.encoder.layers.19.self_attn.v_proj.weight', 'text_model.encoder.layers.2.layer_norm1.bias', 'text_model.encoder.layers.2.layer_norm1.weight', 'text_model.encoder.layers.2.layer_norm2.bias', 'text_model.encoder.layers.2.layer_norm2.weight', 'text_model.encoder.layers.2.mlp.fc1.bias', 'text_model.encoder.layers.2.mlp.fc1.weight', 'text_model.encoder.layers.2.mlp.fc2.bias', 'text_model.encoder.layers.2.mlp.fc2.weight', 'text_model.encoder.layers.2.self_attn.k_proj.bias', 'text_model.encoder.layers.2.self_attn.k_proj.weight', 'text_model.encoder.layers.2.self_attn.out_proj.bias', 'text_model.encoder.layers.2.self_attn.out_proj.weight', 'text_model.encoder.layers.2.self_attn.q_proj.bias', 'text_model.encoder.layers.2.self_attn.q_proj.weight', 'text_model.encoder.layers.2.self_attn.v_proj.bias', 'text_model.encoder.layers.2.self_attn.v_proj.weight', 'text_model.encoder.layers.20.layer_norm1.bias', 'text_model.encoder.layers.20.layer_norm1.weight', 'text_model.encoder.layers.20.layer_norm2.bias', 'text_model.encoder.layers.20.layer_norm2.weight', 'text_model.encoder.layers.20.mlp.fc1.bias', 'text_model.encoder.layers.20.mlp.fc1.weight', 'text_model.encoder.layers.20.mlp.fc2.bias', 'text_model.encoder.layers.20.mlp.fc2.weight', 'text_model.encoder.layers.20.self_attn.k_proj.bias', 'text_model.encoder.layers.20.self_attn.k_proj.weight', 'text_model.encoder.layers.20.self_attn.out_proj.bias', 'text_model.encoder.layers.20.self_attn.out_proj.weight', 'text_model.encoder.layers.20.self_attn.q_proj.bias', 'text_model.encoder.layers.20.self_attn.q_proj.weight', 'text_model.encoder.layers.20.self_attn.v_proj.bias', 'text_model.encoder.layers.20.self_attn.v_proj.weight', 'text_model.encoder.layers.21.layer_norm1.bias', 'text_model.encoder.layers.21.layer_norm1.weight', 'text_model.encoder.layers.21.layer_norm2.bias', 'text_model.encoder.layers.21.layer_norm2.weight', 'text_model.encoder.layers.21.mlp.fc1.bias', 'text_model.encoder.layers.21.mlp.fc1.weight', 'text_model.encoder.layers.21.mlp.fc2.bias', 'text_model.encoder.layers.21.mlp.fc2.weight', 'text_model.encoder.layers.21.self_attn.k_proj.bias', 'text_model.encoder.layers.21.self_attn.k_proj.weight', 'text_model.encoder.layers.21.self_attn.out_proj.bias', 'text_model.encoder.layers.21.self_attn.out_proj.weight', 'text_model.encoder.layers.21.self_attn.q_proj.bias', 'text_model.encoder.layers.21.self_attn.q_proj.weight', 'text_model.encoder.layers.21.self_attn.v_proj.bias', 'text_model.encoder.layers.21.self_attn.v_proj.weight', 'text_model.encoder.layers.22.layer_norm1.bias', 'text_model.encoder.layers.22.layer_norm1.weight', 'text_model.encoder.layers.22.layer_norm2.bias', 'text_model.encoder.layers.22.layer_norm2.weight', 'text_model.encoder.layers.22.mlp.fc1.bias', 'text_model.encoder.layers.22.mlp.fc1.weight', 'text_model.encoder.layers.22.mlp.fc2.bias', 'text_model.encoder.layers.22.mlp.fc2.weight', 'text_model.encoder.layers.22.self_attn.k_proj.bias', 'text_model.encoder.layers.22.self_attn.k_proj.weight', 'text_model.encoder.layers.22.self_attn.out_proj.bias', 'text_model.encoder.layers.22.self_attn.out_proj.weight', 'text_model.encoder.layers.22.self_attn.q_proj.bias', 'text_model.encoder.layers.22.self_attn.q_proj.weight', 'text_model.encoder.layers.22.self_attn.v_proj.bias', 'text_model.encoder.layers.22.self_attn.v_proj.weight', 'text_model.encoder.layers.23.layer_norm1.bias', 'text_model.encoder.layers.23.layer_norm1.weight', 'text_model.encoder.layers.23.layer_norm2.bias', 'text_model.encoder.layers.23.layer_norm2.weight', 'text_model.encoder.layers.23.mlp.fc1.bias', 'text_model.encoder.layers.23.mlp.fc1.weight', 'text_model.encoder.layers.23.mlp.fc2.bias', 'text_model.encoder.layers.23.mlp.fc2.weight', 'text_model.encoder.layers.23.self_attn.k_proj.bias', 'text_model.encoder.layers.23.self_attn.k_proj.weight', 'text_model.encoder.layers.23.self_attn.out_proj.bias', 'text_model.encoder.layers.23.self_attn.out_proj.weight', 'text_model.encoder.layers.23.self_attn.q_proj.bias', 'text_model.encoder.layers.23.self_attn.q_proj.weight', 'text_model.encoder.layers.23.self_attn.v_proj.bias', 'text_model.encoder.layers.23.self_attn.v_proj.weight', 'text_model.encoder.layers.24.layer_norm1.bias', 'text_model.encoder.layers.24.layer_norm1.weight', 'text_model.encoder.layers.24.layer_norm2.bias', 'text_model.encoder.layers.24.layer_norm2.weight', 'text_model.encoder.layers.24.mlp.fc1.bias', 'text_model.encoder.layers.24.mlp.fc1.weight', 'text_model.encoder.layers.24.mlp.fc2.bias', 'text_model.encoder.layers.24.mlp.fc2.weight', 'text_model.encoder.layers.24.self_attn.k_proj.bias', 'text_model.encoder.layers.24.self_attn.k_proj.weight', 'text_model.encoder.layers.24.self_attn.out_proj.bias', 'text_model.encoder.layers.24.self_attn.out_proj.weight', 'text_model.encoder.layers.24.self_attn.q_proj.bias', 'text_model.encoder.layers.24.self_attn.q_proj.weight', 'text_model.encoder.layers.24.self_attn.v_proj.bias', 'text_model.encoder.layers.24.self_attn.v_proj.weight', 'text_model.encoder.layers.25.layer_norm1.bias', 'text_model.encoder.layers.25.layer_norm1.weight', 'text_model.encoder.layers.25.layer_norm2.bias', 'text_model.encoder.layers.25.layer_norm2.weight', 'text_model.encoder.layers.25.mlp.fc1.bias', 'text_model.encoder.layers.25.mlp.fc1.weight', 'text_model.encoder.layers.25.mlp.fc2.bias', 'text_model.encoder.layers.25.mlp.fc2.weight', 'text_model.encoder.layers.25.self_attn.k_proj.bias', 'text_model.encoder.layers.25.self_attn.k_proj.weight', 'text_model.encoder.layers.25.self_attn.out_proj.bias', 'text_model.encoder.layers.25.self_attn.out_proj.weight', 'text_model.encoder.layers.25.self_attn.q_proj.bias', 'text_model.encoder.layers.25.self_attn.q_proj.weight', 'text_model.encoder.layers.25.self_attn.v_proj.bias', 'text_model.encoder.layers.25.self_attn.v_proj.weight', 'text_model.encoder.layers.26.layer_norm1.bias', 'text_model.encoder.layers.26.layer_norm1.weight', 'text_model.encoder.layers.26.layer_norm2.bias', 'text_model.encoder.layers.26.layer_norm2.weight', 'text_model.encoder.layers.26.mlp.fc1.bias', 'text_model.encoder.layers.26.mlp.fc1.weight', 'text_model.encoder.layers.26.mlp.fc2.bias', 'text_model.encoder.layers.26.mlp.fc2.weight', 'text_model.encoder.layers.26.self_attn.k_proj.bias', 'text_model.encoder.layers.26.self_attn.k_proj.weight', 'text_model.encoder.layers.26.self_attn.out_proj.bias', 'text_model.encoder.layers.26.self_attn.out_proj.weight', 'text_model.encoder.layers.26.self_attn.q_proj.bias', 'text_model.encoder.layers.26.self_attn.q_proj.weight', 'text_model.encoder.layers.26.self_attn.v_proj.bias', 'text_model.encoder.layers.26.self_attn.v_proj.weight', 'text_model.encoder.layers.3.layer_norm1.bias', 'text_model.encoder.layers.3.layer_norm1.weight', 'text_model.encoder.layers.3.layer_norm2.bias', 'text_model.encoder.layers.3.layer_norm2.weight', 'text_model.encoder.layers.3.mlp.fc1.bias', 'text_model.encoder.layers.3.mlp.fc1.weight', 'text_model.encoder.layers.3.mlp.fc2.bias', 'text_model.encoder.layers.3.mlp.fc2.weight', 'text_model.encoder.layers.3.self_attn.k_proj.bias', 'text_model.encoder.layers.3.self_attn.k_proj.weight', 'text_model.encoder.layers.3.self_attn.out_proj.bias', 'text_model.encoder.layers.3.self_attn.out_proj.weight', 'text_model.encoder.layers.3.self_attn.q_proj.bias', 'text_model.encoder.layers.3.self_attn.q_proj.weight', 'text_model.encoder.layers.3.self_attn.v_proj.bias', 'text_model.encoder.layers.3.self_attn.v_proj.weight', 'text_model.encoder.layers.4.layer_norm1.bias', 'text_model.encoder.layers.4.layer_norm1.weight', 'text_model.encoder.layers.4.layer_norm2.bias', 'text_model.encoder.layers.4.layer_norm2.weight', 'text_model.encoder.layers.4.mlp.fc1.bias', 'text_model.encoder.layers.4.mlp.fc1.weight', 'text_model.encoder.layers.4.mlp.fc2.bias', 'text_model.encoder.layers.4.mlp.fc2.weight', 'text_model.encoder.layers.4.self_attn.k_proj.bias', 'text_model.encoder.layers.4.self_attn.k_proj.weight', 'text_model.encoder.layers.4.self_attn.out_proj.bias', 'text_model.encoder.layers.4.self_attn.out_proj.weight', 'text_model.encoder.layers.4.self_attn.q_proj.bias', 'text_model.encoder.layers.4.self_attn.q_proj.weight', 'text_model.encoder.layers.4.self_attn.v_proj.bias', 'text_model.encoder.layers.4.self_attn.v_proj.weight', 'text_model.encoder.layers.5.layer_norm1.bias', 'text_model.encoder.layers.5.layer_norm1.weight', 'text_model.encoder.layers.5.layer_norm2.bias', 'text_model.encoder.layers.5.layer_norm2.weight', 'text_model.encoder.layers.5.mlp.fc1.bias', 'text_model.encoder.layers.5.mlp.fc1.weight', 'text_model.encoder.layers.5.mlp.fc2.bias', 'text_model.encoder.layers.5.mlp.fc2.weight', 'text_model.encoder.layers.5.self_attn.k_proj.bias', 'text_model.encoder.layers.5.self_attn.k_proj.weight', 'text_model.encoder.layers.5.self_attn.out_proj.bias', 'text_model.encoder.layers.5.self_attn.out_proj.weight', 'text_model.encoder.layers.5.self_attn.q_proj.bias', 'text_model.encoder.layers.5.self_attn.q_proj.weight', 'text_model.encoder.layers.5.self_attn.v_proj.bias', 'text_model.encoder.layers.5.self_attn.v_proj.weight', 'text_model.encoder.layers.6.layer_norm1.bias', 'text_model.encoder.layers.6.layer_norm1.weight', 'text_model.encoder.layers.6.layer_norm2.bias', 'text_model.encoder.layers.6.layer_norm2.weight', 'text_model.encoder.layers.6.mlp.fc1.bias', 'text_model.encoder.layers.6.mlp.fc1.weight', 'text_model.encoder.layers.6.mlp.fc2.bias', 'text_model.encoder.layers.6.mlp.fc2.weight', 'text_model.encoder.layers.6.self_attn.k_proj.bias', 'text_model.encoder.layers.6.self_attn.k_proj.weight', 'text_model.encoder.layers.6.self_attn.out_proj.bias', 'text_model.encoder.layers.6.self_attn.out_proj.weight', 'text_model.encoder.layers.6.self_attn.q_proj.bias', 'text_model.encoder.layers.6.self_attn.q_proj.weight', 'text_model.encoder.layers.6.self_attn.v_proj.bias', 'text_model.encoder.layers.6.self_attn.v_proj.weight', 'text_model.encoder.layers.7.layer_norm1.bias', 'text_model.encoder.layers.7.layer_norm1.weight', 'text_model.encoder.layers.7.layer_norm2.bias', 'text_model.encoder.layers.7.layer_norm2.weight', 'text_model.encoder.layers.7.mlp.fc1.bias', 'text_model.encoder.layers.7.mlp.fc1.weight', 'text_model.encoder.layers.7.mlp.fc2.bias', 'text_model.encoder.layers.7.mlp.fc2.weight', 'text_model.encoder.layers.7.self_attn.k_proj.bias', 'text_model.encoder.layers.7.self_attn.k_proj.weight', 'text_model.encoder.layers.7.self_attn.out_proj.bias', 'text_model.encoder.layers.7.self_attn.out_proj.weight', 'text_model.encoder.layers.7.self_attn.q_proj.bias', 'text_model.encoder.layers.7.self_attn.q_proj.weight', 'text_model.encoder.layers.7.self_attn.v_proj.bias', 'text_model.encoder.layers.7.self_attn.v_proj.weight', 'text_model.encoder.layers.8.layer_norm1.bias', 'text_model.encoder.layers.8.layer_norm1.weight', 'text_model.encoder.layers.8.layer_norm2.bias', 'text_model.encoder.layers.8.layer_norm2.weight', 'text_model.encoder.layers.8.mlp.fc1.bias', 'text_model.encoder.layers.8.mlp.fc1.weight', 'text_model.encoder.layers.8.mlp.fc2.bias', 'text_model.encoder.layers.8.mlp.fc2.weight', 'text_model.encoder.layers.8.self_attn.k_proj.bias', 'text_model.encoder.layers.8.self_attn.k_proj.weight', 'text_model.encoder.layers.8.self_attn.out_proj.bias', 'text_model.encoder.layers.8.self_attn.out_proj.weight', 'text_model.encoder.layers.8.self_attn.q_proj.bias', 'text_model.encoder.layers.8.self_attn.q_proj.weight', 'text_model.encoder.layers.8.self_attn.v_proj.bias', 'text_model.encoder.layers.8.self_attn.v_proj.weight', 'text_model.encoder.layers.9.layer_norm1.bias', 'text_model.encoder.layers.9.layer_norm1.weight', 'text_model.encoder.layers.9.layer_norm2.bias', 'text_model.encoder.layers.9.layer_norm2.weight', 'text_model.encoder.layers.9.mlp.fc1.bias', 'text_model.encoder.layers.9.mlp.fc1.weight', 'text_model.encoder.layers.9.mlp.fc2.bias', 'text_model.encoder.layers.9.mlp.fc2.weight', 'text_model.encoder.layers.9.self_attn.k_proj.bias', 'text_model.encoder.layers.9.self_attn.k_proj.weight', 'text_model.encoder.layers.9.self_attn.out_proj.bias', 'text_model.encoder.layers.9.self_attn.out_proj.weight', 'text_model.encoder.layers.9.self_attn.q_proj.bias', 'text_model.encoder.layers.9.self_attn.q_proj.weight', 'text_model.encoder.layers.9.self_attn.v_proj.bias', 'text_model.encoder.layers.9.self_attn.v_proj.weight', 'text_model.final_layer_norm.bias', 'text_model.final_layer_norm.weight', 'text_model.head.bias', 'text_model.head.weight']
|
223 |
-
- This IS expected if you are initializing SiglipVisionModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
|
224 |
-
- This IS NOT expected if you are initializing SiglipVisionModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
|
225 |
-
2025-02-15 03:02:47,668 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of SiglipVisionModel were initialized from the model checkpoint at google/siglip-so400m-patch14-384.
|
226 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use SiglipVisionModel for predictions without further training.
|
227 |
-
2025-02-15 03:02:47,864 - image_processing_base.py:375 - get_image_processor_dict - INFO - loading configuration file preprocessor_config.json from cache at /root/.cache/huggingface/hub/models--google--siglip-so400m-patch14-384/snapshots/9fdffc58afc957d1a03a25b10dba0329ab15c2a3/preprocessor_config.json
|
228 |
-
2025-02-15 03:02:47,865 - image_processing_base.py:429 - from_dict - INFO - Image processor SiglipImageProcessor {
|
229 |
-
"do_convert_rgb": null,
|
230 |
-
"do_normalize": true,
|
231 |
-
"do_rescale": true,
|
232 |
-
"do_resize": true,
|
233 |
-
"image_mean": [
|
234 |
-
0.5,
|
235 |
-
0.5,
|
236 |
-
0.5
|
237 |
-
],
|
238 |
-
"image_processor_type": "SiglipImageProcessor",
|
239 |
-
"image_std": [
|
240 |
-
0.5,
|
241 |
-
0.5,
|
242 |
-
0.5
|
243 |
-
],
|
244 |
-
"processor_class": "SiglipProcessor",
|
245 |
-
"resample": 3,
|
246 |
-
"rescale_factor": 0.00392156862745098,
|
247 |
-
"size": {
|
248 |
-
"height": 384,
|
249 |
-
"width": 384
|
250 |
-
}
|
251 |
-
}
|
252 |
-
|
253 |
-
2025-02-15 03:02:48,307 - configuration_utils.py:733 - _get_config_dict - INFO - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/config.json
|
254 |
-
2025-02-15 03:02:48,310 - configuration_utils.py:800 - from_dict - INFO - Model config Dinov2Config {
|
255 |
-
"apply_layernorm": true,
|
256 |
-
"architectures": [
|
257 |
-
"Dinov2Model"
|
258 |
-
],
|
259 |
-
"attention_probs_dropout_prob": 0.0,
|
260 |
-
"drop_path_rate": 0.0,
|
261 |
-
"hidden_act": "gelu",
|
262 |
-
"hidden_dropout_prob": 0.0,
|
263 |
-
"hidden_size": 1536,
|
264 |
-
"image_size": 518,
|
265 |
-
"initializer_range": 0.02,
|
266 |
-
"layer_norm_eps": 1e-06,
|
267 |
-
"layerscale_value": 1.0,
|
268 |
-
"mlp_ratio": 4,
|
269 |
-
"model_type": "dinov2",
|
270 |
-
"num_attention_heads": 24,
|
271 |
-
"num_channels": 3,
|
272 |
-
"num_hidden_layers": 40,
|
273 |
-
"out_features": [
|
274 |
-
"stage40"
|
275 |
-
],
|
276 |
-
"out_indices": [
|
277 |
-
40
|
278 |
-
],
|
279 |
-
"patch_size": 14,
|
280 |
-
"qkv_bias": true,
|
281 |
-
"reshape_hidden_states": true,
|
282 |
-
"stage_names": [
|
283 |
-
"stem",
|
284 |
-
"stage1",
|
285 |
-
"stage2",
|
286 |
-
"stage3",
|
287 |
-
"stage4",
|
288 |
-
"stage5",
|
289 |
-
"stage6",
|
290 |
-
"stage7",
|
291 |
-
"stage8",
|
292 |
-
"stage9",
|
293 |
-
"stage10",
|
294 |
-
"stage11",
|
295 |
-
"stage12",
|
296 |
-
"stage13",
|
297 |
-
"stage14",
|
298 |
-
"stage15",
|
299 |
-
"stage16",
|
300 |
-
"stage17",
|
301 |
-
"stage18",
|
302 |
-
"stage19",
|
303 |
-
"stage20",
|
304 |
-
"stage21",
|
305 |
-
"stage22",
|
306 |
-
"stage23",
|
307 |
-
"stage24",
|
308 |
-
"stage25",
|
309 |
-
"stage26",
|
310 |
-
"stage27",
|
311 |
-
"stage28",
|
312 |
-
"stage29",
|
313 |
-
"stage30",
|
314 |
-
"stage31",
|
315 |
-
"stage32",
|
316 |
-
"stage33",
|
317 |
-
"stage34",
|
318 |
-
"stage35",
|
319 |
-
"stage36",
|
320 |
-
"stage37",
|
321 |
-
"stage38",
|
322 |
-
"stage39",
|
323 |
-
"stage40"
|
324 |
-
],
|
325 |
-
"torch_dtype": "float32",
|
326 |
-
"transformers_version": "4.43.1",
|
327 |
-
"use_swiglu_ffn": true
|
328 |
-
}
|
329 |
-
|
330 |
-
2025-02-15 03:02:48,310 - modeling_utils.py:3621 - from_pretrained - INFO - loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/model.safetensors
|
331 |
-
2025-02-15 03:02:48,895 - modeling_utils.py:4450 - _load_pretrained_model - INFO - All model checkpoint weights were used when initializing Dinov2Model.
|
332 |
-
|
333 |
-
2025-02-15 03:02:48,895 - modeling_utils.py:4458 - _load_pretrained_model - INFO - All the weights of Dinov2Model were initialized from the model checkpoint at facebook/dinov2-giant.
|
334 |
-
If your task is similar to the task the model of the checkpoint was trained on, you can already use Dinov2Model for predictions without further training.
|
335 |
-
2025-02-15 03:02:49,091 - image_processing_base.py:375 - get_image_processor_dict - INFO - loading configuration file preprocessor_config.json from cache at /root/.cache/huggingface/hub/models--facebook--dinov2-giant/snapshots/611a9d42f2335e0f921f1e313ad3c1b7178d206d/preprocessor_config.json
|
336 |
-
2025-02-15 03:02:49,094 - image_processing_base.py:429 - from_dict - INFO - Image processor BitImageProcessor {
|
337 |
-
"crop_size": {
|
338 |
-
"height": 378,
|
339 |
-
"width": 378
|
340 |
-
},
|
341 |
-
"do_center_crop": true,
|
342 |
-
"do_convert_rgb": true,
|
343 |
-
"do_normalize": true,
|
344 |
-
"do_rescale": true,
|
345 |
-
"do_resize": true,
|
346 |
-
"image_mean": [
|
347 |
-
0.485,
|
348 |
-
0.456,
|
349 |
-
0.406
|
350 |
-
],
|
351 |
-
"image_processor_type": "BitImageProcessor",
|
352 |
-
"image_std": [
|
353 |
-
0.229,
|
354 |
-
0.224,
|
355 |
-
0.225
|
356 |
-
],
|
357 |
-
"resample": 3,
|
358 |
-
"rescale_factor": 0.00392156862745098,
|
359 |
-
"size": {
|
360 |
-
"shortest_edge": 378
|
361 |
-
}
|
362 |
-
}
|
363 |
-
|
364 |
-
2025-02-15 03:02:50,031 - finetune_llama.py:1239 - train - INFO - Total params: 3264865280
|
365 |
-
2025-02-15 03:02:50,031 - finetune_llama.py:1240 - train - INFO - Trainable params: 12589056
|
366 |
-
2025-02-15 03:02:50,031 - finetune_llama.py:1241 - train - INFO - LM head params: 394002432
|
367 |
-
2025-02-15 03:02:52,464 - trainer_callback.py:423 - add_callback - WARNING - You are adding a <class 'transformers.integrations.integration_utils.TensorBoardCallback'> to the callbacks of this Trainer, but there is already one. The currentlist of callbacks is
|
368 |
-
:DefaultFlowCallback
|
369 |
-
TensorBoardCallback
|
370 |
-
2025-02-15 03:02:52,464 - trainer.py:648 - __init__ - INFO - Using auto half precision backend
|
371 |
-
2025-02-15 03:02:52,465 - trainer.py:2526 - _load_from_checkpoint - INFO - Loading model from ./checkpoints/cambrian_llama3_2/checkpoint-4.
|
372 |
-
2025-02-15 03:02:55,358 - trainer.py:2134 - _inner_training_loop - INFO - ***** Running training *****
|
373 |
-
2025-02-15 03:02:55,358 - trainer.py:2135 - _inner_training_loop - INFO - Num examples = 540
|
374 |
-
2025-02-15 03:02:55,358 - trainer.py:2136 - _inner_training_loop - INFO - Num Epochs = 2
|
375 |
-
2025-02-15 03:02:55,358 - trainer.py:2137 - _inner_training_loop - INFO - Instantaneous batch size per device = 1
|
376 |
-
2025-02-15 03:02:55,358 - trainer.py:2140 - _inner_training_loop - INFO - Total train batch size (w. parallel, distributed & accumulation) = 1
|
377 |
-
2025-02-15 03:02:55,358 - trainer.py:2141 - _inner_training_loop - INFO - Gradient Accumulation steps = 1
|
378 |
-
2025-02-15 03:02:55,358 - trainer.py:2142 - _inner_training_loop - INFO - Total optimization steps = 1,080
|
379 |
-
2025-02-15 03:02:55,359 - trainer.py:2143 - _inner_training_loop - INFO - Number of trainable parameters = 406,591,488
|
380 |
-
2025-02-15 03:02:55,359 - trainer.py:2165 - _inner_training_loop - INFO - Continuing training from checkpoint, will skip to saved global_step
|
381 |
-
2025-02-15 03:02:55,359 - trainer.py:2166 - _inner_training_loop - INFO - Continuing training from epoch 0
|
382 |
-
2025-02-15 03:02:55,359 - trainer.py:2167 - _inner_training_loop - INFO - Continuing training from global step 4
|
383 |
-
2025-02-15 03:02:55,359 - trainer.py:2169 - _inner_training_loop - INFO - Will skip the first 0 epochs then the first 4 batches in the first epoch.
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8ca82d50a27c49e4b6b579372386820290395eb6a9384bbeba1d94c653c460a
|
3 |
+
size 37091
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|