tjs9707 commited on
Commit
350d350
·
verified ·
1 Parent(s): dccd41e

Add files using upload-large-folder tool

Browse files
Files changed (36) hide show
  1. .gitattributes +2 -0
  2. README.md +250 -0
  3. asset/method.png +3 -0
  4. asset/teaser.png +3 -0
  5. gamecraft_models/mp_rank_00_model_states.pt +3 -0
  6. gamecraft_models/mp_rank_00_model_states_distill.pt +3 -0
  7. stdmodels/llava-llama-3-8b-v1_1-transformers/.gitattributes +35 -0
  8. stdmodels/llava-llama-3-8b-v1_1-transformers/README.md +122 -0
  9. stdmodels/llava-llama-3-8b-v1_1-transformers/config.json +44 -0
  10. stdmodels/llava-llama-3-8b-v1_1-transformers/generation_config.json +6 -0
  11. stdmodels/llava-llama-3-8b-v1_1-transformers/model-00001-of-00004.safetensors +3 -0
  12. stdmodels/llava-llama-3-8b-v1_1-transformers/model-00002-of-00004.safetensors +3 -0
  13. stdmodels/llava-llama-3-8b-v1_1-transformers/model-00003-of-00004.safetensors +3 -0
  14. stdmodels/llava-llama-3-8b-v1_1-transformers/model-00004-of-00004.safetensors +3 -0
  15. stdmodels/llava-llama-3-8b-v1_1-transformers/model.safetensors.index.json +693 -0
  16. stdmodels/llava-llama-3-8b-v1_1-transformers/preprocessor_config.json +45 -0
  17. stdmodels/llava-llama-3-8b-v1_1-transformers/special_tokens_map.json +24 -0
  18. stdmodels/llava-llama-3-8b-v1_1-transformers/tokenizer.json +0 -0
  19. stdmodels/llava-llama-3-8b-v1_1-transformers/tokenizer_config.json +2093 -0
  20. stdmodels/openai_clip-vit-large-patch14/.gitattributes +28 -0
  21. stdmodels/openai_clip-vit-large-patch14/README.md +145 -0
  22. stdmodels/openai_clip-vit-large-patch14/config.json +171 -0
  23. stdmodels/openai_clip-vit-large-patch14/flax_model.msgpack +3 -0
  24. stdmodels/openai_clip-vit-large-patch14/merges.txt +0 -0
  25. stdmodels/openai_clip-vit-large-patch14/model.safetensors +3 -0
  26. stdmodels/openai_clip-vit-large-patch14/preprocessor_config.json +19 -0
  27. stdmodels/openai_clip-vit-large-patch14/pytorch_model.bin +3 -0
  28. stdmodels/openai_clip-vit-large-patch14/special_tokens_map.json +1 -0
  29. stdmodels/openai_clip-vit-large-patch14/tf_model.h5 +3 -0
  30. stdmodels/openai_clip-vit-large-patch14/tokenizer.json +0 -0
  31. stdmodels/openai_clip-vit-large-patch14/tokenizer_config.json +34 -0
  32. stdmodels/openai_clip-vit-large-patch14/vocab.json +0 -0
  33. stdmodels/vae_3d/hyvae/checkpoint-step-270000.ckpt +3 -0
  34. stdmodels/vae_3d/hyvae/config.json +34 -0
  35. stdmodels/vae_3d/hyvae/pytorch_model.pt +3 -0
  36. stdmodels/vae_3d/hyvae/source +1 -0
.gitattributes CHANGED
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  teaser.png filter=lfs diff=lfs merge=lfs -text
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  teaser.png filter=lfs diff=lfs merge=lfs -text
37
+ asset/method.png filter=lfs diff=lfs merge=lfs -text
38
+ asset/teaser.png filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: image-to-video
3
+ language:
4
+ - en
5
+ extra_gated_eu_disallowed: true
6
+ ---
7
+ <!-- ## **HunyuanVideo-Avatar** -->
8
+
9
+ <!-- ## **Hunyuan-GameCraft** -->
10
+
11
+ <!-- <p align="center">
12
+ <img src="assets/material/logo.png" height=100>
13
+ </p> -->
14
+
15
+ # **Hunyuan-GameCraft** 🎮
16
+
17
+ <div align="center">
18
+ <a href="https://github.com/Tencent-Hunyuan/Hunyuan-GameCraft-1.0"><img src="https://img.shields.io/static/v1?label=Hunyuan-GameCraft-1.0%20Code&message=Github&color=blue"></a> &ensp;
19
+ <a href="https://hunyuan-gamecraft.github.io/"><img src="https://img.shields.io/static/v1?label=Project%20Page&message=Web&color=green"></a> &ensp;
20
+ <a href="https://arxiv.org/abs/2506.17201"><img src="https://img.shields.io/badge/ArXiv-2506.17201-red"></a> &ensp;
21
+ </div>
22
+
23
+ <div align="center">
24
+ <a href="https://huggingface.co/tencent/Hunyuan-GameCraft-1.0"><img src="https://img.shields.io/static/v1?label=Hunyuan-GameCraft-1.0&message=Hunyuan-GameCraft-1.0&color=yellow"></a> &ensp;
25
+ </div>
26
+
27
+ ![image](asset/teaser.png)
28
+
29
+ > [**Hunyuan-GameCraft: High-dynamic Interactive Game Video Generation with Hybrid History Condition**](https://arxiv.org/pdf/2505.20156) <be>
30
+
31
+ ## 🔥🔥🔥 News!!
32
+ * Jun 06, 2025: 🔥 Hunyuan-GameCraft supports **Single GPU** with only **10GB VRAM**, with **DeepCache** included.
33
+ * Aug 14, 2025: 👋 We release the inference code and model weights of Hunyuan-GameCraft. [Download](weights/README.md).
34
+
35
+
36
+ ## 📑 Open-source Plan
37
+
38
+ - Hunyuan-GameCraft
39
+ - [x] Inference
40
+ - [x] Checkpoints
41
+ - [ ] Gradio & Huggingface Demo
42
+
43
+ ## Contents
44
+ - [**Hunyuan-GameCraft** 🌅](#Hunyuan-GameCraft-)
45
+ - [🔥🔥🔥 News!!](#-news)
46
+ - [📑 Open-source Plan](#-open-source-plan)
47
+ - [Contents](#contents)
48
+ - [**Abstract**](#abstract)
49
+ - [**Overall Architecture**](#Hunyuan-GameCraft-overall-architecture)
50
+ - [📜 Requirements](#-requirements)
51
+ - [🛠️ Dependencies and Installation](#️-dependencies-and-installation)
52
+ - [Installation Guide for Linux](#installation-guide-for-linux)
53
+ - [🧱 Download Pretrained Models](#-download-pretrained-models)
54
+ - [🚀 Parallel Inference on Multiple GPUs](#-parallel-inference-on-multiple-gpus)
55
+ - [🔑 Single-gpu Inference](#-single-gpu-inference)
56
+ - [Run with very low VRAM](#run-with-very-low-vram)
57
+ - [Run a Gradio Server](#run-a-gradio-server)
58
+ - [🔗 BibTeX](#-bibtex)
59
+ - [Acknowledgements](#acknowledgements)
60
+ ---
61
+
62
+ ## **Abstract**
63
+
64
+ Recent advances in diffusion-based and controllable video generation have enabled high-quality and temporally coherent video synthesis, laying the groundwork for immersive interactive gaming experiences. However, current methods face limitations in **dynamics**, **physically realistic**, **long-term consistency**, and **efficiency**, which limit the ability to create various gameplay videos. To address these gaps, we introduce Hunyuan-GameCraft, a novel framework for high-dynamic interactive video generation in game environments. To achieve fine-grained action control, we unify standard keyboard and mouse inputs into a **shared camera representation space**, facilitating smooth interpolation between various camera and movement operations. Then we propose a **hybrid history-conditioned training strategy** that extends video sequences autoregressively while preserving game scene information. Additionally, to enhance inference efficiency and playability, we achieve **model distillation** to reduce computational overhead while maintaining consistency across long temporal sequences, making it suitable for real-time deployment in complex interactive environments. The model is trained on a large-scale dataset comprising over one million gameplay recordings across over 100 AAA games, ensuring broad coverage and diversity, then fine-tuned on a carefully annotated synthetic dataset to enhance precision and control. The curated game scene data significantly improves the visual fidelity, realism and action controllability. Extensive experiments demonstrate that Hunyuan-GameCraft significantly outperforms existing models, advancing the realism and playability of interactive game video generation.
65
+
66
+ ## **Overall Architecture**
67
+
68
+ ![image](asset/method.png)
69
+
70
+ Given a reference image and the corresponding prompt, the keyboard or mouse signal, we transform these options to the continuous camera space. Then we design a light-weight action encoder to encode the input camera trajectory. The action and image features are added after patchify. For long video extension, we design a variable mask indicator, where 1 and 0 indicate history frames and predicted frames, respectively.
71
+
72
+
73
+ ## 📜 Requirements
74
+
75
+ * An NVIDIA GPU with CUDA support is required.
76
+ * The model is tested on a machine with 8GPUs.
77
+ * **Minimum**: The minimum GPU memory required is 24GB but very slow.
78
+ * **Recommended**: We recommend using a GPU with 80GB of memory for better generation quality.
79
+ * Tested operating system: Linux
80
+
81
+
82
+ ## 🛠️ Dependencies and Installation
83
+
84
+ Begin by cloning the repository:
85
+ ```shell
86
+ git clone https://github.com/Tencent-Hunyuan/Hunyuan-GameCraft-1.0.git
87
+ cd Hunyuan-GameCraft-1.0
88
+ ```
89
+
90
+ ### Installation Guide for Linux
91
+
92
+ We recommend CUDA versions 12.4 for the manual installation.
93
+
94
+ Conda's installation instructions are available [here](https://docs.anaconda.com/free/miniconda/index.html).
95
+
96
+ ```shell
97
+ # 1. Create conda environment
98
+ conda create -n HYGameCraft python==3.10
99
+
100
+ # 2. Activate the environment
101
+ conda activate HYGameCraft
102
+
103
+ # 3. Install PyTorch and other dependencies using conda
104
+ conda install pytorch==2.5.1 torchvision==0.20.0 torchaudio==2.5.1 pytorch-cuda=12.4 -c pytorch -c nvidia
105
+
106
+ # 4. Install pip dependencies
107
+ python -m pip install -r requirements.txt
108
+ # 5. Install flash attention v2 for acceleration (requires CUDA 11.8 or above)
109
+ python -m pip install ninja
110
+ python -m pip install git+https://github.com/Dao-AILab/[email protected]
111
+ ```
112
+
113
+ Additionally, you can also use HunyuanVideo Docker image. Use the following command to pull and run the docker image.
114
+
115
+ ```shell
116
+ # For CUDA 12.4 (updated to avoid float point exception)
117
+ docker pull hunyuanvideo/hunyuanvideo:cuda_12
118
+ docker run -itd --gpus all --init --net=host --uts=host --ipc=host --name hunyuanvideo --security-opt=seccomp=unconfined --ulimit=stack=67108864 --ulimit=memlock=-1 --privileged hunyuanvideo/hunyuanvideo:cuda_12
119
+ pip install diffusers==0.34.0 transformers==4.54.1
120
+
121
+ ```
122
+
123
+
124
+ ## 🚀 Parallel Inference on Multiple GPUs
125
+
126
+ For example, to generate a video using 8 GPUs, you can use the following command, where `--action-list w s d a` simulate keyboard manipulation signals to help you generate a video of the corresponding content. `--action-speed-list 0.2 0.2 0.2 0.2` represents the displacement distance and can be replaced with any value between 0 and 3, the length of `action-speed-list` must be the same as `action-list`:
127
+ ```bash
128
+ #!/bin/bash
129
+ JOBS_DIR=$(dirname $(dirname "$0"))
130
+ export PYTHONPATH=${JOBS_DIR}:$PYTHONPATH
131
+ export MODEL_BASE="weights/stdmodels"
132
+ checkpoint_path="weights/gamecraft_models/mp_rank_00_model_states.pt"
133
+
134
+ current_time=$(date "+%Y.%m.%d-%H.%M.%S")
135
+ modelname='Tencent_hunyuanGameCraft_720P'
136
+
137
+ torchrun --nnodes=1 --nproc_per_node=8 --master_port 29605 hymm_sp/sample_batch.py \
138
+ --image-path "asset/village.png" \
139
+ --prompt "A charming medieval village with cobblestone streets, thatched-roof houses, and vibrant flower gardens under a bright blue sky." \
140
+ --add-pos-prompt "Realistic, High-quality." \
141
+ --add-neg-prompt "overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion, blurring, text, subtitles, static, picture, black border." \
142
+ --ckpt ${checkpoint_path} \
143
+ --video-size 704 1216 \
144
+ --cfg-scale 2.0 \
145
+ --image-start \
146
+ --action-list w s d a \
147
+ --action-speed-list 0.2 0.2 0.2 0.2 \
148
+ --seed 250160 \
149
+ --infer-steps 50 \
150
+ --use-fp8 \
151
+ --flow-shift-eval-video 5.0 \
152
+ --save-path './results/'
153
+
154
+ ```
155
+
156
+
157
+ Additionally, we support FP8 optimization and [SageAttn](https://github.com/thu-ml/SageAttention). To enable FP8, simply add the `--use-fp8` to your command.
158
+ And install SageAttention with:
159
+ ```bash
160
+ git clone https://github.com/thu-ml/SageAttention.git
161
+ cd SageAttention
162
+ python setup.py install # or pip install -e .
163
+ ```
164
+
165
+ We also provide accelerated model, you can use the following command:
166
+ ```bash
167
+ #!/bin/bash
168
+ JOBS_DIR=$(dirname $(dirname "$0"))
169
+ export PYTHONPATH=${JOBS_DIR}:$PYTHONPATH
170
+ export MODEL_BASE="weights/stdmodels"
171
+ checkpoint_path="weights/gamecraft_models/mp_rank_00_model_states_distill.pt"
172
+
173
+ current_time=$(date "+%Y.%m.%d-%H.%M.%S")
174
+ modelname='Tencent_hunyuanGameCraft_720P'
175
+
176
+ torchrun --nnodes=1 --nproc_per_node=8 --master_port 29605 hymm_sp/sample_batch.py \
177
+ --image-path "asset/village.png" \
178
+ --prompt "A charming medieval village with cobblestone streets, thatched-roof houses, and vibrant flower gardens under a bright blue sky." \
179
+ --add-neg-prompt "overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion, blurring, text, subtitles, static, picture, black border." \
180
+ --ckpt ${checkpoint_path} \
181
+ --video-size 704 1216 \
182
+ --cfg-scale 1.0 \
183
+ --image-start \
184
+ --action-list w s d a \
185
+ --action-speed-list 0.2 0.2 0.2 0.2 \
186
+ --seed 250160 \
187
+ --infer-steps 8 \
188
+ --use-fp8 \
189
+ --flow-shift-eval-video 5.0 \
190
+ --save-path './results_distill/'
191
+ ```
192
+
193
+
194
+ ## 🔑 Single-gpu with Low-VRAM Inference
195
+
196
+ For example, to generate a video with 1 GPU with Low-VRAM (over 24GB), you can use the following command:
197
+
198
+ ```bash
199
+ #!/bin/bash
200
+ JOBS_DIR=$(dirname $(dirname "$0"))
201
+ export PYTHONPATH=${JOBS_DIR}:$PYTHONPATH
202
+ export MODEL_BASE="weights/stdmodels"
203
+ checkpoint_path="weights/gamecraft_models/mp_rank_00_model_states.pt"
204
+
205
+ current_time=$(date "+%Y.%m.%d-%H.%M.%S")
206
+ modelname='Tencent_hunyuanGameCraft_720P'
207
+
208
+ # disable sp and cpu offload
209
+ export DISABLE_SP=1
210
+ export CPU_OFFLOAD=1
211
+
212
+ torchrun --nnodes=1 --nproc_per_node=1 --master_port 29605 hymm_sp/sample_batch.py \
213
+ --image-path "asset/village.png" \
214
+ --prompt "A charming medieval village with cobblestone streets, thatched-roof houses, and vibrant flower gardens under a bright blue sky." \
215
+ --add-neg-prompt "overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion, blurring, text, subtitles, static, picture, black border." \
216
+ --ckpt ${checkpoint_path} \
217
+ --video-size 704 1216 \
218
+ --cfg-scale 2.0 \
219
+ --image-start \
220
+ --action-list w a d s \
221
+ --action-speed-list 0.2 0.2 0.2 0.2 \
222
+ --seed 250160 \
223
+ --infer-steps 50 \
224
+ --flow-shift-eval-video 5.0 \
225
+ --cpu-offload \
226
+ --use-fp8 \
227
+ --save-path './results/'
228
+
229
+ ```
230
+
231
+
232
+ ## 🔗 BibTeX
233
+
234
+ If you find [Hunyuan-GameCraft](https://arxiv.org/pdf/2505.20156) useful for your research and applications, please cite using this BibTeX:
235
+
236
+ ```BibTeX
237
+ @misc{li2025hunyuangamecrafthighdynamicinteractivegame,
238
+ title={Hunyuan-GameCraft: High-dynamic Interactive Game Video Generation with Hybrid History Condition},
239
+ author={Jiaqi Li and Junshu Tang and Zhiyong Xu and Longhuang Wu and Yuan Zhou and Shuai Shao and Tianbao Yu and Zhiguo Cao and Qinglin Lu},
240
+ year={2025},
241
+ eprint={2506.17201},
242
+ archivePrefix={arXiv},
243
+ primaryClass={cs.CV},
244
+ url={https://arxiv.org/abs/2506.17201},
245
+ }
246
+ ```
247
+
248
+ ## Acknowledgements
249
+
250
+ We would like to thank the contributors to the [HunyuanVideo](https://github.com/Tencent/HunyuanVideo), [HunyuanVideo-Avatar](https://github.com/Tencent-Hunyuan/HunyuanVideo-Avatar),[SD3](https://huggingface.co/stabilityai/stable-diffusion-3-medium), [FLUX](https://github.com/black-forest-labs/flux), [Llama](https://github.com/meta-llama/llama), [LLaVA](https://github.com/haotian-liu/LLaVA), [Xtuner](https://github.com/InternLM/xtuner), [diffusers](https://github.com/huggingface/diffusers) and [HuggingFace](https://huggingface.co) repositories, for their open research and exploration.
asset/method.png ADDED

Git LFS Details

  • SHA256: e9d0546830d54f90e96392614405472ab06eb700ad184e4a6689bd84ff436890
  • Pointer size: 132 Bytes
  • Size of remote file: 2.86 MB
asset/teaser.png ADDED

Git LFS Details

  • SHA256: 5272120a5f85af9ee44c5f9714d6d0d99ba186ef2a66181b4bbd1b718c399555
  • Pointer size: 133 Bytes
  • Size of remote file: 20.9 MB
gamecraft_models/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b150745b7b9c974433e0140c8c3ed8a9ab5596560a391ed736de5c0edb4b16a
3
+ size 30153470020
gamecraft_models/mp_rank_00_model_states_distill.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2fdd0ec5de5b17a30c607ef7892767d3e562d12b3ee713e6ee55b1c4b4d20ef
3
+ size 30153470020
stdmodels/llava-llama-3-8b-v1_1-transformers/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
stdmodels/llava-llama-3-8b-v1_1-transformers/README.md ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - Lin-Chen/ShareGPT4V
4
+ pipeline_tag: image-text-to-text
5
+ library_name: xtuner
6
+ ---
7
+
8
+ <div align="center">
9
+ <img src="https://github.com/InternLM/lmdeploy/assets/36994684/0cf8d00f-e86b-40ba-9b54-dc8f1bc6c8d8" width="600"/>
10
+
11
+
12
+ [![Generic badge](https://img.shields.io/badge/GitHub-%20XTuner-black.svg)](https://github.com/InternLM/xtuner)
13
+
14
+
15
+ </div>
16
+
17
+ ## Model
18
+
19
+ llava-llama-3-8b-v1_1-hf is a LLaVA model fine-tuned from [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) and [CLIP-ViT-Large-patch14-336](https://huggingface.co/openai/clip-vit-large-patch14-336) with [ShareGPT4V-PT](https://huggingface.co/datasets/Lin-Chen/ShareGPT4V) and [InternVL-SFT](https://github.com/OpenGVLab/InternVL/tree/main/internvl_chat#prepare-training-datasets) by [XTuner](https://github.com/InternLM/xtuner).
20
+
21
+ **Note: This model is in HuggingFace LLaVA format.**
22
+
23
+ Resources:
24
+
25
+ - GitHub: [xtuner](https://github.com/InternLM/xtuner)
26
+ - Official LLaVA format model: [xtuner/llava-llama-3-8b-v1_1-hf](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-hf)
27
+ - XTuner LLaVA format model: [xtuner/llava-llama-3-8b-v1_1](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1)
28
+ - GGUF format model: [xtuner/llava-llama-3-8b-v1_1-gguf](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-gguf)
29
+
30
+
31
+ ## Details
32
+
33
+ | Model | Visual Encoder | Projector | Resolution | Pretraining Strategy | Fine-tuning Strategy | Pretrain Dataset | Fine-tune Dataset |
34
+ | :-------------------- | ------------------: | --------: | ---------: | ---------------------: | ------------------------: | ------------------------: | -----------------------: |
35
+ | LLaVA-v1.5-7B | CLIP-L | MLP | 336 | Frozen LLM, Frozen ViT | Full LLM, Frozen ViT | LLaVA-PT (558K) | LLaVA-Mix (665K) |
36
+ | LLaVA-Llama-3-8B | CLIP-L | MLP | 336 | Frozen LLM, Frozen ViT | Full LLM, LoRA ViT | LLaVA-PT (558K) | LLaVA-Mix (665K) |
37
+ | LLaVA-Llama-3-8B-v1.1 | CLIP-L | MLP | 336 | Frozen LLM, Frozen ViT | Full LLM, LoRA ViT | ShareGPT4V-PT (1246K) | InternVL-SFT (1268K) |
38
+
39
+ ## Results
40
+
41
+ <div align="center">
42
+ <img src="https://github.com/InternLM/xtuner/assets/36994684/a157638c-3500-44ed-bfab-d8d8249f91bb" alt="Image" width=500" />
43
+ </div>
44
+
45
+ | Model | MMBench Test (EN) | MMBench Test (CN) | CCBench Dev | MMMU Val | SEED-IMG | AI2D Test | ScienceQA Test | HallusionBench aAcc | POPE | GQA | TextVQA | MME | MMStar |
46
+ | :-------------------- | :---------------: | :---------------: | :---------: | :-------: | :------: | :-------: | :------------: | :-----------------: | :--: | :--: | :-----: | :------: | :----: |
47
+ | LLaVA-v1.5-7B | 66.5 | 59.0 | 27.5 | 35.3 | 60.5 | 54.8 | 70.4 | 44.9 | 85.9 | 62.0 | 58.2 | 1511/348 | 30.3 |
48
+ | LLaVA-Llama-3-8B | 68.9 | 61.6 | 30.4 | 36.8 | 69.8 | 60.9 | 73.3 | 47.3 | 87.2 | 63.5 | 58.0 | 1506/295 | 38.2 |
49
+ | LLaVA-Llama-3-8B-v1.1 | 72.3 | 66.4 | 31.6 | 36.8 | 70.1 | 70.0 | 72.9 | 47.7 | 86.4 | 62.6 | 59.0 | 1469/349 | 45.1 |
50
+
51
+
52
+ ## QuickStart
53
+
54
+
55
+ ### Chat by `pipeline`
56
+
57
+
58
+ ```python
59
+ from transformers import pipeline
60
+ from PIL import Image
61
+ import requests
62
+
63
+ model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
64
+ pipe = pipeline("image-to-text", model=model_id, device=0)
65
+ url = "http://images.cocodataset.org/val2017/000000039769.jpg"
66
+
67
+ image = Image.open(requests.get(url, stream=True).raw)
68
+ prompt = ("<|start_header_id|>user<|end_header_id|>\n\n<image>\nWhat are these?<|eot_id|>"
69
+ "<|start_header_id|>assistant<|end_header_id|>\n\n")
70
+ outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
71
+ print(outputs)
72
+ >>> [{'generated_text': 'user\n\n\nWhat are these?assistant\n\nThese are two cats, one brown and one gray, lying on a pink blanket. sleep. brown and gray cat sleeping on a pink blanket.'}]
73
+ ```
74
+
75
+ ### Chat by pure `transformers`
76
+
77
+ ```python
78
+ import requests
79
+ from PIL import Image
80
+
81
+ import torch
82
+ from transformers import AutoProcessor, LlavaForConditionalGeneration
83
+
84
+ model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
85
+
86
+ prompt = ("<|start_header_id|>user<|end_header_id|>\n\n<image>\nWhat are these?<|eot_id|>"
87
+ "<|start_header_id|>assistant<|end_header_id|>\n\n")
88
+ image_file = "http://images.cocodataset.org/val2017/000000039769.jpg"
89
+
90
+ model = LlavaForConditionalGeneration.from_pretrained(
91
+ model_id,
92
+ torch_dtype=torch.float16,
93
+ low_cpu_mem_usage=True,
94
+ ).to(0)
95
+
96
+ processor = AutoProcessor.from_pretrained(model_id)
97
+
98
+
99
+ raw_image = Image.open(requests.get(image_file, stream=True).raw)
100
+ inputs = processor(prompt, raw_image, return_tensors='pt').to(0, torch.float16)
101
+
102
+ output = model.generate(**inputs, max_new_tokens=200, do_sample=False)
103
+ print(processor.decode(output[0][2:], skip_special_tokens=True))
104
+ >>> These are two cats, one brown and one gray, lying on a pink blanket. sleep. brown and gray cat sleeping on a pink blanket.
105
+ ```
106
+
107
+
108
+ ### Reproduce
109
+
110
+ Please refer to [docs](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/llava/phi3_mini_4k_instruct_clip_vit_large_p14_336#readme).
111
+
112
+
113
+ ## Citation
114
+
115
+ ```bibtex
116
+ @misc{2023xtuner,
117
+ title={XTuner: A Toolkit for Efficiently Fine-tuning LLM},
118
+ author={XTuner Contributors},
119
+ howpublished = {\url{https://github.com/InternLM/xtuner}},
120
+ year={2023}
121
+ }
122
+ ```
stdmodels/llava-llama-3-8b-v1_1-transformers/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlavaForConditionalGeneration"
4
+ ],
5
+ "ignore_index": -100,
6
+ "image_token_index": 128257,
7
+ "model_type": "llava",
8
+ "pad_token_id": 128258,
9
+ "projector_hidden_act": "gelu",
10
+ "text_config": {
11
+ "architectures": [
12
+ "LlamaForCausalLM"
13
+ ],
14
+ "bos_token_id": 128000,
15
+ "eos_token_id": 128001,
16
+ "intermediate_size": 14336,
17
+ "max_position_embeddings": 8192,
18
+ "model_type": "llama",
19
+ "num_key_value_heads": 8,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_theta": 500000.0,
22
+ "torch_dtype": "float16",
23
+ "vocab_size": 128320
24
+ },
25
+ "torch_dtype": "float16",
26
+ "transformers_version": "4.40.1",
27
+ "vision_config": {
28
+ "architectures": [
29
+ "CLIPVisionModel"
30
+ ],
31
+ "dropout": 0.0,
32
+ "hidden_size": 1024,
33
+ "image_size": 336,
34
+ "intermediate_size": 4096,
35
+ "model_type": "clip_vision_model",
36
+ "num_attention_heads": 16,
37
+ "num_hidden_layers": 24,
38
+ "patch_size": 14,
39
+ "projection_dim": 768,
40
+ "torch_dtype": "float32"
41
+ },
42
+ "vision_feature_layer": -2,
43
+ "vision_feature_select_strategy": "default"
44
+ }
stdmodels/llava-llama-3-8b-v1_1-transformers/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "eos_token_id": 128001,
5
+ "transformers_version": "4.40.1"
6
+ }
stdmodels/llava-llama-3-8b-v1_1-transformers/model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bb04e272165e426273ce0524e139e2e0a67df322b337a21dc3cad757839bbd9
3
+ size 4997088760
stdmodels/llava-llama-3-8b-v1_1-transformers/model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:268d2a5b0427bf992dd67e79fd1db200b6f95be61867bfe9345d370be2e6c68a
3
+ size 4915917552
stdmodels/llava-llama-3-8b-v1_1-transformers/model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f8e9275cbb4ffa5607ce95502532aae71377284b251efe4c71f76883318f9ba
3
+ size 4999820824
stdmodels/llava-llama-3-8b-v1_1-transformers/model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8549ed634cc7b55fe3e1855855f6dfb369f496fcd7ddc5c21a5ed994e5a0dbe9
3
+ size 1839769624
stdmodels/llava-llama-3-8b-v1_1-transformers/model.safetensors.index.json ADDED
@@ -0,0 +1,693 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 16752504832
4
+ },
5
+ "weight_map": {
6
+ "language_model.lm_head.weight": "model-00004-of-00004.safetensors",
7
+ "language_model.model.embed_tokens.weight": "model-00001-of-00004.safetensors",
8
+ "language_model.model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
9
+ "language_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
10
+ "language_model.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
11
+ "language_model.model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
12
+ "language_model.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
13
+ "language_model.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
14
+ "language_model.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
15
+ "language_model.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
16
+ "language_model.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
17
+ "language_model.model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
18
+ "language_model.model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
19
+ "language_model.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
20
+ "language_model.model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
21
+ "language_model.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
22
+ "language_model.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
23
+ "language_model.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
24
+ "language_model.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
25
+ "language_model.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
26
+ "language_model.model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
27
+ "language_model.model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
28
+ "language_model.model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
29
+ "language_model.model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
30
+ "language_model.model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
31
+ "language_model.model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
32
+ "language_model.model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
33
+ "language_model.model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
34
+ "language_model.model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
35
+ "language_model.model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
36
+ "language_model.model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
37
+ "language_model.model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
38
+ "language_model.model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
39
+ "language_model.model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
40
+ "language_model.model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
41
+ "language_model.model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
42
+ "language_model.model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
43
+ "language_model.model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
44
+ "language_model.model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
45
+ "language_model.model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
46
+ "language_model.model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
47
+ "language_model.model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
48
+ "language_model.model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
49
+ "language_model.model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
50
+ "language_model.model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
51
+ "language_model.model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
52
+ "language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
53
+ "language_model.model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
54
+ "language_model.model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
55
+ "language_model.model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
56
+ "language_model.model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
57
+ "language_model.model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
58
+ "language_model.model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
59
+ "language_model.model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
60
+ "language_model.model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
61
+ "language_model.model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
62
+ "language_model.model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
63
+ "language_model.model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
64
+ "language_model.model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
65
+ "language_model.model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
66
+ "language_model.model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
67
+ "language_model.model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
68
+ "language_model.model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
69
+ "language_model.model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
70
+ "language_model.model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
71
+ "language_model.model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
72
+ "language_model.model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
73
+ "language_model.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
74
+ "language_model.model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
75
+ "language_model.model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
76
+ "language_model.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
77
+ "language_model.model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
78
+ "language_model.model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
79
+ "language_model.model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
80
+ "language_model.model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
81
+ "language_model.model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
82
+ "language_model.model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
83
+ "language_model.model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
84
+ "language_model.model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
85
+ "language_model.model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
86
+ "language_model.model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
87
+ "language_model.model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
88
+ "language_model.model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
89
+ "language_model.model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
90
+ "language_model.model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
91
+ "language_model.model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
92
+ "language_model.model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
93
+ "language_model.model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
94
+ "language_model.model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
95
+ "language_model.model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
96
+ "language_model.model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
97
+ "language_model.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
98
+ "language_model.model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
99
+ "language_model.model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
100
+ "language_model.model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
101
+ "language_model.model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
102
+ "language_model.model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
103
+ "language_model.model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
104
+ "language_model.model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
105
+ "language_model.model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
106
+ "language_model.model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
107
+ "language_model.model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
108
+ "language_model.model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
109
+ "language_model.model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
110
+ "language_model.model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
111
+ "language_model.model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
112
+ "language_model.model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
113
+ "language_model.model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
114
+ "language_model.model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
115
+ "language_model.model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
116
+ "language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
117
+ "language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
118
+ "language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
119
+ "language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
120
+ "language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
121
+ "language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
122
+ "language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
123
+ "language_model.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
124
+ "language_model.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
125
+ "language_model.model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
126
+ "language_model.model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
127
+ "language_model.model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
128
+ "language_model.model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
129
+ "language_model.model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
130
+ "language_model.model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
131
+ "language_model.model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
132
+ "language_model.model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
133
+ "language_model.model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
134
+ "language_model.model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
135
+ "language_model.model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
136
+ "language_model.model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
137
+ "language_model.model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
138
+ "language_model.model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
139
+ "language_model.model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
140
+ "language_model.model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
141
+ "language_model.model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
142
+ "language_model.model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
143
+ "language_model.model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
144
+ "language_model.model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
145
+ "language_model.model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
146
+ "language_model.model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
147
+ "language_model.model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
148
+ "language_model.model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
149
+ "language_model.model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
150
+ "language_model.model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
151
+ "language_model.model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
152
+ "language_model.model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
153
+ "language_model.model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
154
+ "language_model.model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
155
+ "language_model.model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
156
+ "language_model.model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
157
+ "language_model.model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
158
+ "language_model.model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
159
+ "language_model.model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
160
+ "language_model.model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
161
+ "language_model.model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
162
+ "language_model.model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
163
+ "language_model.model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
164
+ "language_model.model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
165
+ "language_model.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
166
+ "language_model.model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
167
+ "language_model.model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
168
+ "language_model.model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
169
+ "language_model.model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
170
+ "language_model.model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
171
+ "language_model.model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
172
+ "language_model.model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
173
+ "language_model.model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
174
+ "language_model.model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
175
+ "language_model.model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
176
+ "language_model.model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
177
+ "language_model.model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
178
+ "language_model.model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
179
+ "language_model.model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
180
+ "language_model.model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
181
+ "language_model.model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
182
+ "language_model.model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
183
+ "language_model.model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
184
+ "language_model.model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
185
+ "language_model.model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
186
+ "language_model.model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
187
+ "language_model.model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
188
+ "language_model.model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
189
+ "language_model.model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
190
+ "language_model.model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
191
+ "language_model.model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
192
+ "language_model.model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
193
+ "language_model.model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
194
+ "language_model.model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
195
+ "language_model.model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
196
+ "language_model.model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
197
+ "language_model.model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
198
+ "language_model.model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
199
+ "language_model.model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
200
+ "language_model.model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
201
+ "language_model.model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
202
+ "language_model.model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
203
+ "language_model.model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
204
+ "language_model.model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
205
+ "language_model.model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
206
+ "language_model.model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
207
+ "language_model.model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
208
+ "language_model.model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
209
+ "language_model.model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
210
+ "language_model.model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
211
+ "language_model.model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
212
+ "language_model.model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
213
+ "language_model.model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
214
+ "language_model.model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
215
+ "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
216
+ "language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
217
+ "language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
218
+ "language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
219
+ "language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
220
+ "language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
221
+ "language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
222
+ "language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
223
+ "language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
224
+ "language_model.model.layers.30.input_layernorm.weight": "model-00004-of-00004.safetensors",
225
+ "language_model.model.layers.30.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
226
+ "language_model.model.layers.30.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
227
+ "language_model.model.layers.30.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
228
+ "language_model.model.layers.30.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
229
+ "language_model.model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
230
+ "language_model.model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
231
+ "language_model.model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
232
+ "language_model.model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
233
+ "language_model.model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
234
+ "language_model.model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
235
+ "language_model.model.layers.31.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
236
+ "language_model.model.layers.31.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
237
+ "language_model.model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
238
+ "language_model.model.layers.31.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
239
+ "language_model.model.layers.31.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
240
+ "language_model.model.layers.31.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
241
+ "language_model.model.layers.31.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
242
+ "language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
243
+ "language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
244
+ "language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
245
+ "language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
246
+ "language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
247
+ "language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
248
+ "language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
249
+ "language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
250
+ "language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
251
+ "language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
252
+ "language_model.model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
253
+ "language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
254
+ "language_model.model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
255
+ "language_model.model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
256
+ "language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
257
+ "language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
258
+ "language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
259
+ "language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
260
+ "language_model.model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
261
+ "language_model.model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
262
+ "language_model.model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
263
+ "language_model.model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
264
+ "language_model.model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
265
+ "language_model.model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
266
+ "language_model.model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
267
+ "language_model.model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
268
+ "language_model.model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
269
+ "language_model.model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors",
270
+ "language_model.model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
271
+ "language_model.model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
272
+ "language_model.model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
273
+ "language_model.model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
274
+ "language_model.model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
275
+ "language_model.model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
276
+ "language_model.model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
277
+ "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
278
+ "language_model.model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
279
+ "language_model.model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
280
+ "language_model.model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
281
+ "language_model.model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
282
+ "language_model.model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
283
+ "language_model.model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
284
+ "language_model.model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
285
+ "language_model.model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
286
+ "language_model.model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
287
+ "language_model.model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
288
+ "language_model.model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
289
+ "language_model.model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
290
+ "language_model.model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
291
+ "language_model.model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
292
+ "language_model.model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
293
+ "language_model.model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
294
+ "language_model.model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
295
+ "language_model.model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
296
+ "language_model.model.norm.weight": "model-00004-of-00004.safetensors",
297
+ "multi_modal_projector.linear_1.bias": "model-00001-of-00004.safetensors",
298
+ "multi_modal_projector.linear_1.weight": "model-00001-of-00004.safetensors",
299
+ "multi_modal_projector.linear_2.bias": "model-00001-of-00004.safetensors",
300
+ "multi_modal_projector.linear_2.weight": "model-00001-of-00004.safetensors",
301
+ "vision_tower.vision_model.embeddings.class_embedding": "model-00001-of-00004.safetensors",
302
+ "vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00001-of-00004.safetensors",
303
+ "vision_tower.vision_model.embeddings.position_embedding.weight": "model-00001-of-00004.safetensors",
304
+ "vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00001-of-00004.safetensors",
305
+ "vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00001-of-00004.safetensors",
306
+ "vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00001-of-00004.safetensors",
307
+ "vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00001-of-00004.safetensors",
308
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00001-of-00004.safetensors",
309
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00001-of-00004.safetensors",
310
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00001-of-00004.safetensors",
311
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00001-of-00004.safetensors",
312
+ "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
313
+ "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
314
+ "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
315
+ "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
316
+ "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
317
+ "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
318
+ "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
319
+ "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
320
+ "vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00001-of-00004.safetensors",
321
+ "vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00001-of-00004.safetensors",
322
+ "vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00001-of-00004.safetensors",
323
+ "vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00001-of-00004.safetensors",
324
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00001-of-00004.safetensors",
325
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00001-of-00004.safetensors",
326
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00001-of-00004.safetensors",
327
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00001-of-00004.safetensors",
328
+ "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
329
+ "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
330
+ "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
331
+ "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
332
+ "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
333
+ "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
334
+ "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
335
+ "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
336
+ "vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00001-of-00004.safetensors",
337
+ "vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00001-of-00004.safetensors",
338
+ "vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00001-of-00004.safetensors",
339
+ "vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00001-of-00004.safetensors",
340
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00001-of-00004.safetensors",
341
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00001-of-00004.safetensors",
342
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00001-of-00004.safetensors",
343
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00001-of-00004.safetensors",
344
+ "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
345
+ "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
346
+ "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
347
+ "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
348
+ "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
349
+ "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
350
+ "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
351
+ "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
352
+ "vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00001-of-00004.safetensors",
353
+ "vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00001-of-00004.safetensors",
354
+ "vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00001-of-00004.safetensors",
355
+ "vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00001-of-00004.safetensors",
356
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00001-of-00004.safetensors",
357
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00001-of-00004.safetensors",
358
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00001-of-00004.safetensors",
359
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00001-of-00004.safetensors",
360
+ "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
361
+ "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
362
+ "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
363
+ "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
364
+ "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
365
+ "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
366
+ "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
367
+ "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
368
+ "vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00001-of-00004.safetensors",
369
+ "vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00001-of-00004.safetensors",
370
+ "vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00001-of-00004.safetensors",
371
+ "vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00001-of-00004.safetensors",
372
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00001-of-00004.safetensors",
373
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00001-of-00004.safetensors",
374
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00001-of-00004.safetensors",
375
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00001-of-00004.safetensors",
376
+ "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
377
+ "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
378
+ "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
379
+ "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
380
+ "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
381
+ "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
382
+ "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
383
+ "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
384
+ "vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00001-of-00004.safetensors",
385
+ "vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00001-of-00004.safetensors",
386
+ "vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00001-of-00004.safetensors",
387
+ "vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00001-of-00004.safetensors",
388
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00001-of-00004.safetensors",
389
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00001-of-00004.safetensors",
390
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00001-of-00004.safetensors",
391
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00001-of-00004.safetensors",
392
+ "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
393
+ "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
394
+ "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
395
+ "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
396
+ "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
397
+ "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
398
+ "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
399
+ "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
400
+ "vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00001-of-00004.safetensors",
401
+ "vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00001-of-00004.safetensors",
402
+ "vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00001-of-00004.safetensors",
403
+ "vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00001-of-00004.safetensors",
404
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00001-of-00004.safetensors",
405
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00001-of-00004.safetensors",
406
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00001-of-00004.safetensors",
407
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00001-of-00004.safetensors",
408
+ "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
409
+ "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
410
+ "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
411
+ "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
412
+ "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
413
+ "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
414
+ "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
415
+ "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
416
+ "vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00001-of-00004.safetensors",
417
+ "vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00001-of-00004.safetensors",
418
+ "vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00001-of-00004.safetensors",
419
+ "vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00001-of-00004.safetensors",
420
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00001-of-00004.safetensors",
421
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00001-of-00004.safetensors",
422
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00001-of-00004.safetensors",
423
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00001-of-00004.safetensors",
424
+ "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
425
+ "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
426
+ "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
427
+ "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
428
+ "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
429
+ "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
430
+ "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
431
+ "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
432
+ "vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00001-of-00004.safetensors",
433
+ "vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00001-of-00004.safetensors",
434
+ "vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00001-of-00004.safetensors",
435
+ "vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00001-of-00004.safetensors",
436
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00001-of-00004.safetensors",
437
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00001-of-00004.safetensors",
438
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00001-of-00004.safetensors",
439
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00001-of-00004.safetensors",
440
+ "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
441
+ "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
442
+ "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
443
+ "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
444
+ "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
445
+ "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
446
+ "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
447
+ "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
448
+ "vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00001-of-00004.safetensors",
449
+ "vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00001-of-00004.safetensors",
450
+ "vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00001-of-00004.safetensors",
451
+ "vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00001-of-00004.safetensors",
452
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00001-of-00004.safetensors",
453
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00001-of-00004.safetensors",
454
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00001-of-00004.safetensors",
455
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00001-of-00004.safetensors",
456
+ "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
457
+ "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
458
+ "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
459
+ "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
460
+ "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
461
+ "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
462
+ "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
463
+ "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
464
+ "vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00001-of-00004.safetensors",
465
+ "vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00001-of-00004.safetensors",
466
+ "vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00001-of-00004.safetensors",
467
+ "vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00001-of-00004.safetensors",
468
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00001-of-00004.safetensors",
469
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00001-of-00004.safetensors",
470
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00001-of-00004.safetensors",
471
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00001-of-00004.safetensors",
472
+ "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
473
+ "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
474
+ "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
475
+ "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
476
+ "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
477
+ "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
478
+ "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
479
+ "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
480
+ "vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00001-of-00004.safetensors",
481
+ "vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00001-of-00004.safetensors",
482
+ "vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00001-of-00004.safetensors",
483
+ "vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00001-of-00004.safetensors",
484
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00001-of-00004.safetensors",
485
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00001-of-00004.safetensors",
486
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00001-of-00004.safetensors",
487
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00001-of-00004.safetensors",
488
+ "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
489
+ "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
490
+ "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
491
+ "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
492
+ "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
493
+ "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
494
+ "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
495
+ "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
496
+ "vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00001-of-00004.safetensors",
497
+ "vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00001-of-00004.safetensors",
498
+ "vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00001-of-00004.safetensors",
499
+ "vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00001-of-00004.safetensors",
500
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00001-of-00004.safetensors",
501
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00001-of-00004.safetensors",
502
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00001-of-00004.safetensors",
503
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00001-of-00004.safetensors",
504
+ "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
505
+ "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
506
+ "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
507
+ "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
508
+ "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
509
+ "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
510
+ "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
511
+ "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
512
+ "vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00001-of-00004.safetensors",
513
+ "vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00001-of-00004.safetensors",
514
+ "vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00001-of-00004.safetensors",
515
+ "vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00001-of-00004.safetensors",
516
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00001-of-00004.safetensors",
517
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00001-of-00004.safetensors",
518
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00001-of-00004.safetensors",
519
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00001-of-00004.safetensors",
520
+ "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
521
+ "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
522
+ "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
523
+ "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
524
+ "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
525
+ "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
526
+ "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
527
+ "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
528
+ "vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00001-of-00004.safetensors",
529
+ "vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00001-of-00004.safetensors",
530
+ "vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00001-of-00004.safetensors",
531
+ "vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00001-of-00004.safetensors",
532
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00001-of-00004.safetensors",
533
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00001-of-00004.safetensors",
534
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00001-of-00004.safetensors",
535
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00001-of-00004.safetensors",
536
+ "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
537
+ "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
538
+ "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
539
+ "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
540
+ "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
541
+ "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
542
+ "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
543
+ "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
544
+ "vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00001-of-00004.safetensors",
545
+ "vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00001-of-00004.safetensors",
546
+ "vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00001-of-00004.safetensors",
547
+ "vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00001-of-00004.safetensors",
548
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00001-of-00004.safetensors",
549
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00001-of-00004.safetensors",
550
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00001-of-00004.safetensors",
551
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00001-of-00004.safetensors",
552
+ "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
553
+ "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
554
+ "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
555
+ "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
556
+ "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
557
+ "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
558
+ "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
559
+ "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
560
+ "vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00001-of-00004.safetensors",
561
+ "vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00001-of-00004.safetensors",
562
+ "vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00001-of-00004.safetensors",
563
+ "vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00001-of-00004.safetensors",
564
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00001-of-00004.safetensors",
565
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00001-of-00004.safetensors",
566
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00001-of-00004.safetensors",
567
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00001-of-00004.safetensors",
568
+ "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
569
+ "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
570
+ "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
571
+ "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
572
+ "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
573
+ "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
574
+ "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
575
+ "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
576
+ "vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00001-of-00004.safetensors",
577
+ "vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00001-of-00004.safetensors",
578
+ "vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00001-of-00004.safetensors",
579
+ "vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00001-of-00004.safetensors",
580
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00001-of-00004.safetensors",
581
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00001-of-00004.safetensors",
582
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00001-of-00004.safetensors",
583
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00001-of-00004.safetensors",
584
+ "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
585
+ "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
586
+ "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
587
+ "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
588
+ "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
589
+ "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
590
+ "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
591
+ "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
592
+ "vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00001-of-00004.safetensors",
593
+ "vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00001-of-00004.safetensors",
594
+ "vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00001-of-00004.safetensors",
595
+ "vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00001-of-00004.safetensors",
596
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00001-of-00004.safetensors",
597
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00001-of-00004.safetensors",
598
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00001-of-00004.safetensors",
599
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00001-of-00004.safetensors",
600
+ "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
601
+ "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
602
+ "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
603
+ "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
604
+ "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
605
+ "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
606
+ "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
607
+ "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
608
+ "vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00001-of-00004.safetensors",
609
+ "vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00001-of-00004.safetensors",
610
+ "vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00001-of-00004.safetensors",
611
+ "vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00001-of-00004.safetensors",
612
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00001-of-00004.safetensors",
613
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00001-of-00004.safetensors",
614
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00001-of-00004.safetensors",
615
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00001-of-00004.safetensors",
616
+ "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
617
+ "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
618
+ "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
619
+ "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
620
+ "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
621
+ "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
622
+ "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
623
+ "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
624
+ "vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00001-of-00004.safetensors",
625
+ "vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00001-of-00004.safetensors",
626
+ "vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00001-of-00004.safetensors",
627
+ "vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00001-of-00004.safetensors",
628
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00001-of-00004.safetensors",
629
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00001-of-00004.safetensors",
630
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00001-of-00004.safetensors",
631
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00001-of-00004.safetensors",
632
+ "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
633
+ "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
634
+ "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
635
+ "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
636
+ "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
637
+ "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
638
+ "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
639
+ "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
640
+ "vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00001-of-00004.safetensors",
641
+ "vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00001-of-00004.safetensors",
642
+ "vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00001-of-00004.safetensors",
643
+ "vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00001-of-00004.safetensors",
644
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00001-of-00004.safetensors",
645
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00001-of-00004.safetensors",
646
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00001-of-00004.safetensors",
647
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00001-of-00004.safetensors",
648
+ "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
649
+ "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
650
+ "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
651
+ "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
652
+ "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
653
+ "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
654
+ "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
655
+ "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
656
+ "vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00001-of-00004.safetensors",
657
+ "vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00001-of-00004.safetensors",
658
+ "vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00001-of-00004.safetensors",
659
+ "vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00001-of-00004.safetensors",
660
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00001-of-00004.safetensors",
661
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00001-of-00004.safetensors",
662
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00001-of-00004.safetensors",
663
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00001-of-00004.safetensors",
664
+ "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
665
+ "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
666
+ "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
667
+ "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
668
+ "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
669
+ "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
670
+ "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
671
+ "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
672
+ "vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00001-of-00004.safetensors",
673
+ "vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00001-of-00004.safetensors",
674
+ "vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00001-of-00004.safetensors",
675
+ "vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00001-of-00004.safetensors",
676
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00001-of-00004.safetensors",
677
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00001-of-00004.safetensors",
678
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00001-of-00004.safetensors",
679
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00001-of-00004.safetensors",
680
+ "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
681
+ "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
682
+ "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
683
+ "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
684
+ "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
685
+ "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
686
+ "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
687
+ "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
688
+ "vision_tower.vision_model.post_layernorm.bias": "model-00001-of-00004.safetensors",
689
+ "vision_tower.vision_model.post_layernorm.weight": "model-00001-of-00004.safetensors",
690
+ "vision_tower.vision_model.pre_layrnorm.bias": "model-00001-of-00004.safetensors",
691
+ "vision_tower.vision_model.pre_layrnorm.weight": "model-00001-of-00004.safetensors"
692
+ }
693
+ }
stdmodels/llava-llama-3-8b-v1_1-transformers/preprocessor_config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_center_crop",
8
+ "crop_size",
9
+ "do_rescale",
10
+ "rescale_factor",
11
+ "do_normalize",
12
+ "image_mean",
13
+ "image_std",
14
+ "do_convert_rgb",
15
+ "return_tensors",
16
+ "data_format",
17
+ "input_data_format"
18
+ ],
19
+ "crop_size": {
20
+ "height": 336,
21
+ "width": 336
22
+ },
23
+ "do_center_crop": true,
24
+ "do_convert_rgb": true,
25
+ "do_normalize": true,
26
+ "do_rescale": true,
27
+ "do_resize": true,
28
+ "image_mean": [
29
+ 0.48145466,
30
+ 0.4578275,
31
+ 0.40821073
32
+ ],
33
+ "image_processor_type": "CLIPImageProcessor",
34
+ "image_std": [
35
+ 0.26862954,
36
+ 0.26130258,
37
+ 0.27577711
38
+ ],
39
+ "processor_class": "LlavaProcessor",
40
+ "resample": 3,
41
+ "rescale_factor": 0.00392156862745098,
42
+ "size": {
43
+ "shortest_edge": 336
44
+ }
45
+ }
stdmodels/llava-llama-3-8b-v1_1-transformers/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end_of_text|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": "<unk>"
24
+ }
stdmodels/llava-llama-3-8b-v1_1-transformers/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
stdmodels/llava-llama-3-8b-v1_1-transformers/tokenizer_config.json ADDED
@@ -0,0 +1,2093 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "128000": {
6
+ "content": "<|begin_of_text|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "128001": {
14
+ "content": "<|end_of_text|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "128002": {
22
+ "content": "<|reserved_special_token_0|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "128003": {
30
+ "content": "<|reserved_special_token_1|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "128004": {
38
+ "content": "<|reserved_special_token_2|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "128005": {
46
+ "content": "<|reserved_special_token_3|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "128006": {
54
+ "content": "<|start_header_id|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "128007": {
62
+ "content": "<|end_header_id|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "128008": {
70
+ "content": "<|reserved_special_token_4|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "128009": {
78
+ "content": "<|eot_id|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "128010": {
86
+ "content": "<|reserved_special_token_5|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "128011": {
94
+ "content": "<|reserved_special_token_6|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "128012": {
102
+ "content": "<|reserved_special_token_7|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "128013": {
110
+ "content": "<|reserved_special_token_8|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "128014": {
118
+ "content": "<|reserved_special_token_9|>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": true
124
+ },
125
+ "128015": {
126
+ "content": "<|reserved_special_token_10|>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": true
132
+ },
133
+ "128016": {
134
+ "content": "<|reserved_special_token_11|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": true
140
+ },
141
+ "128017": {
142
+ "content": "<|reserved_special_token_12|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": true
148
+ },
149
+ "128018": {
150
+ "content": "<|reserved_special_token_13|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": true
156
+ },
157
+ "128019": {
158
+ "content": "<|reserved_special_token_14|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": true
164
+ },
165
+ "128020": {
166
+ "content": "<|reserved_special_token_15|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": true
172
+ },
173
+ "128021": {
174
+ "content": "<|reserved_special_token_16|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": true
180
+ },
181
+ "128022": {
182
+ "content": "<|reserved_special_token_17|>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": true
188
+ },
189
+ "128023": {
190
+ "content": "<|reserved_special_token_18|>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": true
196
+ },
197
+ "128024": {
198
+ "content": "<|reserved_special_token_19|>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": true
204
+ },
205
+ "128025": {
206
+ "content": "<|reserved_special_token_20|>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": true
212
+ },
213
+ "128026": {
214
+ "content": "<|reserved_special_token_21|>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false,
219
+ "special": true
220
+ },
221
+ "128027": {
222
+ "content": "<|reserved_special_token_22|>",
223
+ "lstrip": false,
224
+ "normalized": false,
225
+ "rstrip": false,
226
+ "single_word": false,
227
+ "special": true
228
+ },
229
+ "128028": {
230
+ "content": "<|reserved_special_token_23|>",
231
+ "lstrip": false,
232
+ "normalized": false,
233
+ "rstrip": false,
234
+ "single_word": false,
235
+ "special": true
236
+ },
237
+ "128029": {
238
+ "content": "<|reserved_special_token_24|>",
239
+ "lstrip": false,
240
+ "normalized": false,
241
+ "rstrip": false,
242
+ "single_word": false,
243
+ "special": true
244
+ },
245
+ "128030": {
246
+ "content": "<|reserved_special_token_25|>",
247
+ "lstrip": false,
248
+ "normalized": false,
249
+ "rstrip": false,
250
+ "single_word": false,
251
+ "special": true
252
+ },
253
+ "128031": {
254
+ "content": "<|reserved_special_token_26|>",
255
+ "lstrip": false,
256
+ "normalized": false,
257
+ "rstrip": false,
258
+ "single_word": false,
259
+ "special": true
260
+ },
261
+ "128032": {
262
+ "content": "<|reserved_special_token_27|>",
263
+ "lstrip": false,
264
+ "normalized": false,
265
+ "rstrip": false,
266
+ "single_word": false,
267
+ "special": true
268
+ },
269
+ "128033": {
270
+ "content": "<|reserved_special_token_28|>",
271
+ "lstrip": false,
272
+ "normalized": false,
273
+ "rstrip": false,
274
+ "single_word": false,
275
+ "special": true
276
+ },
277
+ "128034": {
278
+ "content": "<|reserved_special_token_29|>",
279
+ "lstrip": false,
280
+ "normalized": false,
281
+ "rstrip": false,
282
+ "single_word": false,
283
+ "special": true
284
+ },
285
+ "128035": {
286
+ "content": "<|reserved_special_token_30|>",
287
+ "lstrip": false,
288
+ "normalized": false,
289
+ "rstrip": false,
290
+ "single_word": false,
291
+ "special": true
292
+ },
293
+ "128036": {
294
+ "content": "<|reserved_special_token_31|>",
295
+ "lstrip": false,
296
+ "normalized": false,
297
+ "rstrip": false,
298
+ "single_word": false,
299
+ "special": true
300
+ },
301
+ "128037": {
302
+ "content": "<|reserved_special_token_32|>",
303
+ "lstrip": false,
304
+ "normalized": false,
305
+ "rstrip": false,
306
+ "single_word": false,
307
+ "special": true
308
+ },
309
+ "128038": {
310
+ "content": "<|reserved_special_token_33|>",
311
+ "lstrip": false,
312
+ "normalized": false,
313
+ "rstrip": false,
314
+ "single_word": false,
315
+ "special": true
316
+ },
317
+ "128039": {
318
+ "content": "<|reserved_special_token_34|>",
319
+ "lstrip": false,
320
+ "normalized": false,
321
+ "rstrip": false,
322
+ "single_word": false,
323
+ "special": true
324
+ },
325
+ "128040": {
326
+ "content": "<|reserved_special_token_35|>",
327
+ "lstrip": false,
328
+ "normalized": false,
329
+ "rstrip": false,
330
+ "single_word": false,
331
+ "special": true
332
+ },
333
+ "128041": {
334
+ "content": "<|reserved_special_token_36|>",
335
+ "lstrip": false,
336
+ "normalized": false,
337
+ "rstrip": false,
338
+ "single_word": false,
339
+ "special": true
340
+ },
341
+ "128042": {
342
+ "content": "<|reserved_special_token_37|>",
343
+ "lstrip": false,
344
+ "normalized": false,
345
+ "rstrip": false,
346
+ "single_word": false,
347
+ "special": true
348
+ },
349
+ "128043": {
350
+ "content": "<|reserved_special_token_38|>",
351
+ "lstrip": false,
352
+ "normalized": false,
353
+ "rstrip": false,
354
+ "single_word": false,
355
+ "special": true
356
+ },
357
+ "128044": {
358
+ "content": "<|reserved_special_token_39|>",
359
+ "lstrip": false,
360
+ "normalized": false,
361
+ "rstrip": false,
362
+ "single_word": false,
363
+ "special": true
364
+ },
365
+ "128045": {
366
+ "content": "<|reserved_special_token_40|>",
367
+ "lstrip": false,
368
+ "normalized": false,
369
+ "rstrip": false,
370
+ "single_word": false,
371
+ "special": true
372
+ },
373
+ "128046": {
374
+ "content": "<|reserved_special_token_41|>",
375
+ "lstrip": false,
376
+ "normalized": false,
377
+ "rstrip": false,
378
+ "single_word": false,
379
+ "special": true
380
+ },
381
+ "128047": {
382
+ "content": "<|reserved_special_token_42|>",
383
+ "lstrip": false,
384
+ "normalized": false,
385
+ "rstrip": false,
386
+ "single_word": false,
387
+ "special": true
388
+ },
389
+ "128048": {
390
+ "content": "<|reserved_special_token_43|>",
391
+ "lstrip": false,
392
+ "normalized": false,
393
+ "rstrip": false,
394
+ "single_word": false,
395
+ "special": true
396
+ },
397
+ "128049": {
398
+ "content": "<|reserved_special_token_44|>",
399
+ "lstrip": false,
400
+ "normalized": false,
401
+ "rstrip": false,
402
+ "single_word": false,
403
+ "special": true
404
+ },
405
+ "128050": {
406
+ "content": "<|reserved_special_token_45|>",
407
+ "lstrip": false,
408
+ "normalized": false,
409
+ "rstrip": false,
410
+ "single_word": false,
411
+ "special": true
412
+ },
413
+ "128051": {
414
+ "content": "<|reserved_special_token_46|>",
415
+ "lstrip": false,
416
+ "normalized": false,
417
+ "rstrip": false,
418
+ "single_word": false,
419
+ "special": true
420
+ },
421
+ "128052": {
422
+ "content": "<|reserved_special_token_47|>",
423
+ "lstrip": false,
424
+ "normalized": false,
425
+ "rstrip": false,
426
+ "single_word": false,
427
+ "special": true
428
+ },
429
+ "128053": {
430
+ "content": "<|reserved_special_token_48|>",
431
+ "lstrip": false,
432
+ "normalized": false,
433
+ "rstrip": false,
434
+ "single_word": false,
435
+ "special": true
436
+ },
437
+ "128054": {
438
+ "content": "<|reserved_special_token_49|>",
439
+ "lstrip": false,
440
+ "normalized": false,
441
+ "rstrip": false,
442
+ "single_word": false,
443
+ "special": true
444
+ },
445
+ "128055": {
446
+ "content": "<|reserved_special_token_50|>",
447
+ "lstrip": false,
448
+ "normalized": false,
449
+ "rstrip": false,
450
+ "single_word": false,
451
+ "special": true
452
+ },
453
+ "128056": {
454
+ "content": "<|reserved_special_token_51|>",
455
+ "lstrip": false,
456
+ "normalized": false,
457
+ "rstrip": false,
458
+ "single_word": false,
459
+ "special": true
460
+ },
461
+ "128057": {
462
+ "content": "<|reserved_special_token_52|>",
463
+ "lstrip": false,
464
+ "normalized": false,
465
+ "rstrip": false,
466
+ "single_word": false,
467
+ "special": true
468
+ },
469
+ "128058": {
470
+ "content": "<|reserved_special_token_53|>",
471
+ "lstrip": false,
472
+ "normalized": false,
473
+ "rstrip": false,
474
+ "single_word": false,
475
+ "special": true
476
+ },
477
+ "128059": {
478
+ "content": "<|reserved_special_token_54|>",
479
+ "lstrip": false,
480
+ "normalized": false,
481
+ "rstrip": false,
482
+ "single_word": false,
483
+ "special": true
484
+ },
485
+ "128060": {
486
+ "content": "<|reserved_special_token_55|>",
487
+ "lstrip": false,
488
+ "normalized": false,
489
+ "rstrip": false,
490
+ "single_word": false,
491
+ "special": true
492
+ },
493
+ "128061": {
494
+ "content": "<|reserved_special_token_56|>",
495
+ "lstrip": false,
496
+ "normalized": false,
497
+ "rstrip": false,
498
+ "single_word": false,
499
+ "special": true
500
+ },
501
+ "128062": {
502
+ "content": "<|reserved_special_token_57|>",
503
+ "lstrip": false,
504
+ "normalized": false,
505
+ "rstrip": false,
506
+ "single_word": false,
507
+ "special": true
508
+ },
509
+ "128063": {
510
+ "content": "<|reserved_special_token_58|>",
511
+ "lstrip": false,
512
+ "normalized": false,
513
+ "rstrip": false,
514
+ "single_word": false,
515
+ "special": true
516
+ },
517
+ "128064": {
518
+ "content": "<|reserved_special_token_59|>",
519
+ "lstrip": false,
520
+ "normalized": false,
521
+ "rstrip": false,
522
+ "single_word": false,
523
+ "special": true
524
+ },
525
+ "128065": {
526
+ "content": "<|reserved_special_token_60|>",
527
+ "lstrip": false,
528
+ "normalized": false,
529
+ "rstrip": false,
530
+ "single_word": false,
531
+ "special": true
532
+ },
533
+ "128066": {
534
+ "content": "<|reserved_special_token_61|>",
535
+ "lstrip": false,
536
+ "normalized": false,
537
+ "rstrip": false,
538
+ "single_word": false,
539
+ "special": true
540
+ },
541
+ "128067": {
542
+ "content": "<|reserved_special_token_62|>",
543
+ "lstrip": false,
544
+ "normalized": false,
545
+ "rstrip": false,
546
+ "single_word": false,
547
+ "special": true
548
+ },
549
+ "128068": {
550
+ "content": "<|reserved_special_token_63|>",
551
+ "lstrip": false,
552
+ "normalized": false,
553
+ "rstrip": false,
554
+ "single_word": false,
555
+ "special": true
556
+ },
557
+ "128069": {
558
+ "content": "<|reserved_special_token_64|>",
559
+ "lstrip": false,
560
+ "normalized": false,
561
+ "rstrip": false,
562
+ "single_word": false,
563
+ "special": true
564
+ },
565
+ "128070": {
566
+ "content": "<|reserved_special_token_65|>",
567
+ "lstrip": false,
568
+ "normalized": false,
569
+ "rstrip": false,
570
+ "single_word": false,
571
+ "special": true
572
+ },
573
+ "128071": {
574
+ "content": "<|reserved_special_token_66|>",
575
+ "lstrip": false,
576
+ "normalized": false,
577
+ "rstrip": false,
578
+ "single_word": false,
579
+ "special": true
580
+ },
581
+ "128072": {
582
+ "content": "<|reserved_special_token_67|>",
583
+ "lstrip": false,
584
+ "normalized": false,
585
+ "rstrip": false,
586
+ "single_word": false,
587
+ "special": true
588
+ },
589
+ "128073": {
590
+ "content": "<|reserved_special_token_68|>",
591
+ "lstrip": false,
592
+ "normalized": false,
593
+ "rstrip": false,
594
+ "single_word": false,
595
+ "special": true
596
+ },
597
+ "128074": {
598
+ "content": "<|reserved_special_token_69|>",
599
+ "lstrip": false,
600
+ "normalized": false,
601
+ "rstrip": false,
602
+ "single_word": false,
603
+ "special": true
604
+ },
605
+ "128075": {
606
+ "content": "<|reserved_special_token_70|>",
607
+ "lstrip": false,
608
+ "normalized": false,
609
+ "rstrip": false,
610
+ "single_word": false,
611
+ "special": true
612
+ },
613
+ "128076": {
614
+ "content": "<|reserved_special_token_71|>",
615
+ "lstrip": false,
616
+ "normalized": false,
617
+ "rstrip": false,
618
+ "single_word": false,
619
+ "special": true
620
+ },
621
+ "128077": {
622
+ "content": "<|reserved_special_token_72|>",
623
+ "lstrip": false,
624
+ "normalized": false,
625
+ "rstrip": false,
626
+ "single_word": false,
627
+ "special": true
628
+ },
629
+ "128078": {
630
+ "content": "<|reserved_special_token_73|>",
631
+ "lstrip": false,
632
+ "normalized": false,
633
+ "rstrip": false,
634
+ "single_word": false,
635
+ "special": true
636
+ },
637
+ "128079": {
638
+ "content": "<|reserved_special_token_74|>",
639
+ "lstrip": false,
640
+ "normalized": false,
641
+ "rstrip": false,
642
+ "single_word": false,
643
+ "special": true
644
+ },
645
+ "128080": {
646
+ "content": "<|reserved_special_token_75|>",
647
+ "lstrip": false,
648
+ "normalized": false,
649
+ "rstrip": false,
650
+ "single_word": false,
651
+ "special": true
652
+ },
653
+ "128081": {
654
+ "content": "<|reserved_special_token_76|>",
655
+ "lstrip": false,
656
+ "normalized": false,
657
+ "rstrip": false,
658
+ "single_word": false,
659
+ "special": true
660
+ },
661
+ "128082": {
662
+ "content": "<|reserved_special_token_77|>",
663
+ "lstrip": false,
664
+ "normalized": false,
665
+ "rstrip": false,
666
+ "single_word": false,
667
+ "special": true
668
+ },
669
+ "128083": {
670
+ "content": "<|reserved_special_token_78|>",
671
+ "lstrip": false,
672
+ "normalized": false,
673
+ "rstrip": false,
674
+ "single_word": false,
675
+ "special": true
676
+ },
677
+ "128084": {
678
+ "content": "<|reserved_special_token_79|>",
679
+ "lstrip": false,
680
+ "normalized": false,
681
+ "rstrip": false,
682
+ "single_word": false,
683
+ "special": true
684
+ },
685
+ "128085": {
686
+ "content": "<|reserved_special_token_80|>",
687
+ "lstrip": false,
688
+ "normalized": false,
689
+ "rstrip": false,
690
+ "single_word": false,
691
+ "special": true
692
+ },
693
+ "128086": {
694
+ "content": "<|reserved_special_token_81|>",
695
+ "lstrip": false,
696
+ "normalized": false,
697
+ "rstrip": false,
698
+ "single_word": false,
699
+ "special": true
700
+ },
701
+ "128087": {
702
+ "content": "<|reserved_special_token_82|>",
703
+ "lstrip": false,
704
+ "normalized": false,
705
+ "rstrip": false,
706
+ "single_word": false,
707
+ "special": true
708
+ },
709
+ "128088": {
710
+ "content": "<|reserved_special_token_83|>",
711
+ "lstrip": false,
712
+ "normalized": false,
713
+ "rstrip": false,
714
+ "single_word": false,
715
+ "special": true
716
+ },
717
+ "128089": {
718
+ "content": "<|reserved_special_token_84|>",
719
+ "lstrip": false,
720
+ "normalized": false,
721
+ "rstrip": false,
722
+ "single_word": false,
723
+ "special": true
724
+ },
725
+ "128090": {
726
+ "content": "<|reserved_special_token_85|>",
727
+ "lstrip": false,
728
+ "normalized": false,
729
+ "rstrip": false,
730
+ "single_word": false,
731
+ "special": true
732
+ },
733
+ "128091": {
734
+ "content": "<|reserved_special_token_86|>",
735
+ "lstrip": false,
736
+ "normalized": false,
737
+ "rstrip": false,
738
+ "single_word": false,
739
+ "special": true
740
+ },
741
+ "128092": {
742
+ "content": "<|reserved_special_token_87|>",
743
+ "lstrip": false,
744
+ "normalized": false,
745
+ "rstrip": false,
746
+ "single_word": false,
747
+ "special": true
748
+ },
749
+ "128093": {
750
+ "content": "<|reserved_special_token_88|>",
751
+ "lstrip": false,
752
+ "normalized": false,
753
+ "rstrip": false,
754
+ "single_word": false,
755
+ "special": true
756
+ },
757
+ "128094": {
758
+ "content": "<|reserved_special_token_89|>",
759
+ "lstrip": false,
760
+ "normalized": false,
761
+ "rstrip": false,
762
+ "single_word": false,
763
+ "special": true
764
+ },
765
+ "128095": {
766
+ "content": "<|reserved_special_token_90|>",
767
+ "lstrip": false,
768
+ "normalized": false,
769
+ "rstrip": false,
770
+ "single_word": false,
771
+ "special": true
772
+ },
773
+ "128096": {
774
+ "content": "<|reserved_special_token_91|>",
775
+ "lstrip": false,
776
+ "normalized": false,
777
+ "rstrip": false,
778
+ "single_word": false,
779
+ "special": true
780
+ },
781
+ "128097": {
782
+ "content": "<|reserved_special_token_92|>",
783
+ "lstrip": false,
784
+ "normalized": false,
785
+ "rstrip": false,
786
+ "single_word": false,
787
+ "special": true
788
+ },
789
+ "128098": {
790
+ "content": "<|reserved_special_token_93|>",
791
+ "lstrip": false,
792
+ "normalized": false,
793
+ "rstrip": false,
794
+ "single_word": false,
795
+ "special": true
796
+ },
797
+ "128099": {
798
+ "content": "<|reserved_special_token_94|>",
799
+ "lstrip": false,
800
+ "normalized": false,
801
+ "rstrip": false,
802
+ "single_word": false,
803
+ "special": true
804
+ },
805
+ "128100": {
806
+ "content": "<|reserved_special_token_95|>",
807
+ "lstrip": false,
808
+ "normalized": false,
809
+ "rstrip": false,
810
+ "single_word": false,
811
+ "special": true
812
+ },
813
+ "128101": {
814
+ "content": "<|reserved_special_token_96|>",
815
+ "lstrip": false,
816
+ "normalized": false,
817
+ "rstrip": false,
818
+ "single_word": false,
819
+ "special": true
820
+ },
821
+ "128102": {
822
+ "content": "<|reserved_special_token_97|>",
823
+ "lstrip": false,
824
+ "normalized": false,
825
+ "rstrip": false,
826
+ "single_word": false,
827
+ "special": true
828
+ },
829
+ "128103": {
830
+ "content": "<|reserved_special_token_98|>",
831
+ "lstrip": false,
832
+ "normalized": false,
833
+ "rstrip": false,
834
+ "single_word": false,
835
+ "special": true
836
+ },
837
+ "128104": {
838
+ "content": "<|reserved_special_token_99|>",
839
+ "lstrip": false,
840
+ "normalized": false,
841
+ "rstrip": false,
842
+ "single_word": false,
843
+ "special": true
844
+ },
845
+ "128105": {
846
+ "content": "<|reserved_special_token_100|>",
847
+ "lstrip": false,
848
+ "normalized": false,
849
+ "rstrip": false,
850
+ "single_word": false,
851
+ "special": true
852
+ },
853
+ "128106": {
854
+ "content": "<|reserved_special_token_101|>",
855
+ "lstrip": false,
856
+ "normalized": false,
857
+ "rstrip": false,
858
+ "single_word": false,
859
+ "special": true
860
+ },
861
+ "128107": {
862
+ "content": "<|reserved_special_token_102|>",
863
+ "lstrip": false,
864
+ "normalized": false,
865
+ "rstrip": false,
866
+ "single_word": false,
867
+ "special": true
868
+ },
869
+ "128108": {
870
+ "content": "<|reserved_special_token_103|>",
871
+ "lstrip": false,
872
+ "normalized": false,
873
+ "rstrip": false,
874
+ "single_word": false,
875
+ "special": true
876
+ },
877
+ "128109": {
878
+ "content": "<|reserved_special_token_104|>",
879
+ "lstrip": false,
880
+ "normalized": false,
881
+ "rstrip": false,
882
+ "single_word": false,
883
+ "special": true
884
+ },
885
+ "128110": {
886
+ "content": "<|reserved_special_token_105|>",
887
+ "lstrip": false,
888
+ "normalized": false,
889
+ "rstrip": false,
890
+ "single_word": false,
891
+ "special": true
892
+ },
893
+ "128111": {
894
+ "content": "<|reserved_special_token_106|>",
895
+ "lstrip": false,
896
+ "normalized": false,
897
+ "rstrip": false,
898
+ "single_word": false,
899
+ "special": true
900
+ },
901
+ "128112": {
902
+ "content": "<|reserved_special_token_107|>",
903
+ "lstrip": false,
904
+ "normalized": false,
905
+ "rstrip": false,
906
+ "single_word": false,
907
+ "special": true
908
+ },
909
+ "128113": {
910
+ "content": "<|reserved_special_token_108|>",
911
+ "lstrip": false,
912
+ "normalized": false,
913
+ "rstrip": false,
914
+ "single_word": false,
915
+ "special": true
916
+ },
917
+ "128114": {
918
+ "content": "<|reserved_special_token_109|>",
919
+ "lstrip": false,
920
+ "normalized": false,
921
+ "rstrip": false,
922
+ "single_word": false,
923
+ "special": true
924
+ },
925
+ "128115": {
926
+ "content": "<|reserved_special_token_110|>",
927
+ "lstrip": false,
928
+ "normalized": false,
929
+ "rstrip": false,
930
+ "single_word": false,
931
+ "special": true
932
+ },
933
+ "128116": {
934
+ "content": "<|reserved_special_token_111|>",
935
+ "lstrip": false,
936
+ "normalized": false,
937
+ "rstrip": false,
938
+ "single_word": false,
939
+ "special": true
940
+ },
941
+ "128117": {
942
+ "content": "<|reserved_special_token_112|>",
943
+ "lstrip": false,
944
+ "normalized": false,
945
+ "rstrip": false,
946
+ "single_word": false,
947
+ "special": true
948
+ },
949
+ "128118": {
950
+ "content": "<|reserved_special_token_113|>",
951
+ "lstrip": false,
952
+ "normalized": false,
953
+ "rstrip": false,
954
+ "single_word": false,
955
+ "special": true
956
+ },
957
+ "128119": {
958
+ "content": "<|reserved_special_token_114|>",
959
+ "lstrip": false,
960
+ "normalized": false,
961
+ "rstrip": false,
962
+ "single_word": false,
963
+ "special": true
964
+ },
965
+ "128120": {
966
+ "content": "<|reserved_special_token_115|>",
967
+ "lstrip": false,
968
+ "normalized": false,
969
+ "rstrip": false,
970
+ "single_word": false,
971
+ "special": true
972
+ },
973
+ "128121": {
974
+ "content": "<|reserved_special_token_116|>",
975
+ "lstrip": false,
976
+ "normalized": false,
977
+ "rstrip": false,
978
+ "single_word": false,
979
+ "special": true
980
+ },
981
+ "128122": {
982
+ "content": "<|reserved_special_token_117|>",
983
+ "lstrip": false,
984
+ "normalized": false,
985
+ "rstrip": false,
986
+ "single_word": false,
987
+ "special": true
988
+ },
989
+ "128123": {
990
+ "content": "<|reserved_special_token_118|>",
991
+ "lstrip": false,
992
+ "normalized": false,
993
+ "rstrip": false,
994
+ "single_word": false,
995
+ "special": true
996
+ },
997
+ "128124": {
998
+ "content": "<|reserved_special_token_119|>",
999
+ "lstrip": false,
1000
+ "normalized": false,
1001
+ "rstrip": false,
1002
+ "single_word": false,
1003
+ "special": true
1004
+ },
1005
+ "128125": {
1006
+ "content": "<|reserved_special_token_120|>",
1007
+ "lstrip": false,
1008
+ "normalized": false,
1009
+ "rstrip": false,
1010
+ "single_word": false,
1011
+ "special": true
1012
+ },
1013
+ "128126": {
1014
+ "content": "<|reserved_special_token_121|>",
1015
+ "lstrip": false,
1016
+ "normalized": false,
1017
+ "rstrip": false,
1018
+ "single_word": false,
1019
+ "special": true
1020
+ },
1021
+ "128127": {
1022
+ "content": "<|reserved_special_token_122|>",
1023
+ "lstrip": false,
1024
+ "normalized": false,
1025
+ "rstrip": false,
1026
+ "single_word": false,
1027
+ "special": true
1028
+ },
1029
+ "128128": {
1030
+ "content": "<|reserved_special_token_123|>",
1031
+ "lstrip": false,
1032
+ "normalized": false,
1033
+ "rstrip": false,
1034
+ "single_word": false,
1035
+ "special": true
1036
+ },
1037
+ "128129": {
1038
+ "content": "<|reserved_special_token_124|>",
1039
+ "lstrip": false,
1040
+ "normalized": false,
1041
+ "rstrip": false,
1042
+ "single_word": false,
1043
+ "special": true
1044
+ },
1045
+ "128130": {
1046
+ "content": "<|reserved_special_token_125|>",
1047
+ "lstrip": false,
1048
+ "normalized": false,
1049
+ "rstrip": false,
1050
+ "single_word": false,
1051
+ "special": true
1052
+ },
1053
+ "128131": {
1054
+ "content": "<|reserved_special_token_126|>",
1055
+ "lstrip": false,
1056
+ "normalized": false,
1057
+ "rstrip": false,
1058
+ "single_word": false,
1059
+ "special": true
1060
+ },
1061
+ "128132": {
1062
+ "content": "<|reserved_special_token_127|>",
1063
+ "lstrip": false,
1064
+ "normalized": false,
1065
+ "rstrip": false,
1066
+ "single_word": false,
1067
+ "special": true
1068
+ },
1069
+ "128133": {
1070
+ "content": "<|reserved_special_token_128|>",
1071
+ "lstrip": false,
1072
+ "normalized": false,
1073
+ "rstrip": false,
1074
+ "single_word": false,
1075
+ "special": true
1076
+ },
1077
+ "128134": {
1078
+ "content": "<|reserved_special_token_129|>",
1079
+ "lstrip": false,
1080
+ "normalized": false,
1081
+ "rstrip": false,
1082
+ "single_word": false,
1083
+ "special": true
1084
+ },
1085
+ "128135": {
1086
+ "content": "<|reserved_special_token_130|>",
1087
+ "lstrip": false,
1088
+ "normalized": false,
1089
+ "rstrip": false,
1090
+ "single_word": false,
1091
+ "special": true
1092
+ },
1093
+ "128136": {
1094
+ "content": "<|reserved_special_token_131|>",
1095
+ "lstrip": false,
1096
+ "normalized": false,
1097
+ "rstrip": false,
1098
+ "single_word": false,
1099
+ "special": true
1100
+ },
1101
+ "128137": {
1102
+ "content": "<|reserved_special_token_132|>",
1103
+ "lstrip": false,
1104
+ "normalized": false,
1105
+ "rstrip": false,
1106
+ "single_word": false,
1107
+ "special": true
1108
+ },
1109
+ "128138": {
1110
+ "content": "<|reserved_special_token_133|>",
1111
+ "lstrip": false,
1112
+ "normalized": false,
1113
+ "rstrip": false,
1114
+ "single_word": false,
1115
+ "special": true
1116
+ },
1117
+ "128139": {
1118
+ "content": "<|reserved_special_token_134|>",
1119
+ "lstrip": false,
1120
+ "normalized": false,
1121
+ "rstrip": false,
1122
+ "single_word": false,
1123
+ "special": true
1124
+ },
1125
+ "128140": {
1126
+ "content": "<|reserved_special_token_135|>",
1127
+ "lstrip": false,
1128
+ "normalized": false,
1129
+ "rstrip": false,
1130
+ "single_word": false,
1131
+ "special": true
1132
+ },
1133
+ "128141": {
1134
+ "content": "<|reserved_special_token_136|>",
1135
+ "lstrip": false,
1136
+ "normalized": false,
1137
+ "rstrip": false,
1138
+ "single_word": false,
1139
+ "special": true
1140
+ },
1141
+ "128142": {
1142
+ "content": "<|reserved_special_token_137|>",
1143
+ "lstrip": false,
1144
+ "normalized": false,
1145
+ "rstrip": false,
1146
+ "single_word": false,
1147
+ "special": true
1148
+ },
1149
+ "128143": {
1150
+ "content": "<|reserved_special_token_138|>",
1151
+ "lstrip": false,
1152
+ "normalized": false,
1153
+ "rstrip": false,
1154
+ "single_word": false,
1155
+ "special": true
1156
+ },
1157
+ "128144": {
1158
+ "content": "<|reserved_special_token_139|>",
1159
+ "lstrip": false,
1160
+ "normalized": false,
1161
+ "rstrip": false,
1162
+ "single_word": false,
1163
+ "special": true
1164
+ },
1165
+ "128145": {
1166
+ "content": "<|reserved_special_token_140|>",
1167
+ "lstrip": false,
1168
+ "normalized": false,
1169
+ "rstrip": false,
1170
+ "single_word": false,
1171
+ "special": true
1172
+ },
1173
+ "128146": {
1174
+ "content": "<|reserved_special_token_141|>",
1175
+ "lstrip": false,
1176
+ "normalized": false,
1177
+ "rstrip": false,
1178
+ "single_word": false,
1179
+ "special": true
1180
+ },
1181
+ "128147": {
1182
+ "content": "<|reserved_special_token_142|>",
1183
+ "lstrip": false,
1184
+ "normalized": false,
1185
+ "rstrip": false,
1186
+ "single_word": false,
1187
+ "special": true
1188
+ },
1189
+ "128148": {
1190
+ "content": "<|reserved_special_token_143|>",
1191
+ "lstrip": false,
1192
+ "normalized": false,
1193
+ "rstrip": false,
1194
+ "single_word": false,
1195
+ "special": true
1196
+ },
1197
+ "128149": {
1198
+ "content": "<|reserved_special_token_144|>",
1199
+ "lstrip": false,
1200
+ "normalized": false,
1201
+ "rstrip": false,
1202
+ "single_word": false,
1203
+ "special": true
1204
+ },
1205
+ "128150": {
1206
+ "content": "<|reserved_special_token_145|>",
1207
+ "lstrip": false,
1208
+ "normalized": false,
1209
+ "rstrip": false,
1210
+ "single_word": false,
1211
+ "special": true
1212
+ },
1213
+ "128151": {
1214
+ "content": "<|reserved_special_token_146|>",
1215
+ "lstrip": false,
1216
+ "normalized": false,
1217
+ "rstrip": false,
1218
+ "single_word": false,
1219
+ "special": true
1220
+ },
1221
+ "128152": {
1222
+ "content": "<|reserved_special_token_147|>",
1223
+ "lstrip": false,
1224
+ "normalized": false,
1225
+ "rstrip": false,
1226
+ "single_word": false,
1227
+ "special": true
1228
+ },
1229
+ "128153": {
1230
+ "content": "<|reserved_special_token_148|>",
1231
+ "lstrip": false,
1232
+ "normalized": false,
1233
+ "rstrip": false,
1234
+ "single_word": false,
1235
+ "special": true
1236
+ },
1237
+ "128154": {
1238
+ "content": "<|reserved_special_token_149|>",
1239
+ "lstrip": false,
1240
+ "normalized": false,
1241
+ "rstrip": false,
1242
+ "single_word": false,
1243
+ "special": true
1244
+ },
1245
+ "128155": {
1246
+ "content": "<|reserved_special_token_150|>",
1247
+ "lstrip": false,
1248
+ "normalized": false,
1249
+ "rstrip": false,
1250
+ "single_word": false,
1251
+ "special": true
1252
+ },
1253
+ "128156": {
1254
+ "content": "<|reserved_special_token_151|>",
1255
+ "lstrip": false,
1256
+ "normalized": false,
1257
+ "rstrip": false,
1258
+ "single_word": false,
1259
+ "special": true
1260
+ },
1261
+ "128157": {
1262
+ "content": "<|reserved_special_token_152|>",
1263
+ "lstrip": false,
1264
+ "normalized": false,
1265
+ "rstrip": false,
1266
+ "single_word": false,
1267
+ "special": true
1268
+ },
1269
+ "128158": {
1270
+ "content": "<|reserved_special_token_153|>",
1271
+ "lstrip": false,
1272
+ "normalized": false,
1273
+ "rstrip": false,
1274
+ "single_word": false,
1275
+ "special": true
1276
+ },
1277
+ "128159": {
1278
+ "content": "<|reserved_special_token_154|>",
1279
+ "lstrip": false,
1280
+ "normalized": false,
1281
+ "rstrip": false,
1282
+ "single_word": false,
1283
+ "special": true
1284
+ },
1285
+ "128160": {
1286
+ "content": "<|reserved_special_token_155|>",
1287
+ "lstrip": false,
1288
+ "normalized": false,
1289
+ "rstrip": false,
1290
+ "single_word": false,
1291
+ "special": true
1292
+ },
1293
+ "128161": {
1294
+ "content": "<|reserved_special_token_156|>",
1295
+ "lstrip": false,
1296
+ "normalized": false,
1297
+ "rstrip": false,
1298
+ "single_word": false,
1299
+ "special": true
1300
+ },
1301
+ "128162": {
1302
+ "content": "<|reserved_special_token_157|>",
1303
+ "lstrip": false,
1304
+ "normalized": false,
1305
+ "rstrip": false,
1306
+ "single_word": false,
1307
+ "special": true
1308
+ },
1309
+ "128163": {
1310
+ "content": "<|reserved_special_token_158|>",
1311
+ "lstrip": false,
1312
+ "normalized": false,
1313
+ "rstrip": false,
1314
+ "single_word": false,
1315
+ "special": true
1316
+ },
1317
+ "128164": {
1318
+ "content": "<|reserved_special_token_159|>",
1319
+ "lstrip": false,
1320
+ "normalized": false,
1321
+ "rstrip": false,
1322
+ "single_word": false,
1323
+ "special": true
1324
+ },
1325
+ "128165": {
1326
+ "content": "<|reserved_special_token_160|>",
1327
+ "lstrip": false,
1328
+ "normalized": false,
1329
+ "rstrip": false,
1330
+ "single_word": false,
1331
+ "special": true
1332
+ },
1333
+ "128166": {
1334
+ "content": "<|reserved_special_token_161|>",
1335
+ "lstrip": false,
1336
+ "normalized": false,
1337
+ "rstrip": false,
1338
+ "single_word": false,
1339
+ "special": true
1340
+ },
1341
+ "128167": {
1342
+ "content": "<|reserved_special_token_162|>",
1343
+ "lstrip": false,
1344
+ "normalized": false,
1345
+ "rstrip": false,
1346
+ "single_word": false,
1347
+ "special": true
1348
+ },
1349
+ "128168": {
1350
+ "content": "<|reserved_special_token_163|>",
1351
+ "lstrip": false,
1352
+ "normalized": false,
1353
+ "rstrip": false,
1354
+ "single_word": false,
1355
+ "special": true
1356
+ },
1357
+ "128169": {
1358
+ "content": "<|reserved_special_token_164|>",
1359
+ "lstrip": false,
1360
+ "normalized": false,
1361
+ "rstrip": false,
1362
+ "single_word": false,
1363
+ "special": true
1364
+ },
1365
+ "128170": {
1366
+ "content": "<|reserved_special_token_165|>",
1367
+ "lstrip": false,
1368
+ "normalized": false,
1369
+ "rstrip": false,
1370
+ "single_word": false,
1371
+ "special": true
1372
+ },
1373
+ "128171": {
1374
+ "content": "<|reserved_special_token_166|>",
1375
+ "lstrip": false,
1376
+ "normalized": false,
1377
+ "rstrip": false,
1378
+ "single_word": false,
1379
+ "special": true
1380
+ },
1381
+ "128172": {
1382
+ "content": "<|reserved_special_token_167|>",
1383
+ "lstrip": false,
1384
+ "normalized": false,
1385
+ "rstrip": false,
1386
+ "single_word": false,
1387
+ "special": true
1388
+ },
1389
+ "128173": {
1390
+ "content": "<|reserved_special_token_168|>",
1391
+ "lstrip": false,
1392
+ "normalized": false,
1393
+ "rstrip": false,
1394
+ "single_word": false,
1395
+ "special": true
1396
+ },
1397
+ "128174": {
1398
+ "content": "<|reserved_special_token_169|>",
1399
+ "lstrip": false,
1400
+ "normalized": false,
1401
+ "rstrip": false,
1402
+ "single_word": false,
1403
+ "special": true
1404
+ },
1405
+ "128175": {
1406
+ "content": "<|reserved_special_token_170|>",
1407
+ "lstrip": false,
1408
+ "normalized": false,
1409
+ "rstrip": false,
1410
+ "single_word": false,
1411
+ "special": true
1412
+ },
1413
+ "128176": {
1414
+ "content": "<|reserved_special_token_171|>",
1415
+ "lstrip": false,
1416
+ "normalized": false,
1417
+ "rstrip": false,
1418
+ "single_word": false,
1419
+ "special": true
1420
+ },
1421
+ "128177": {
1422
+ "content": "<|reserved_special_token_172|>",
1423
+ "lstrip": false,
1424
+ "normalized": false,
1425
+ "rstrip": false,
1426
+ "single_word": false,
1427
+ "special": true
1428
+ },
1429
+ "128178": {
1430
+ "content": "<|reserved_special_token_173|>",
1431
+ "lstrip": false,
1432
+ "normalized": false,
1433
+ "rstrip": false,
1434
+ "single_word": false,
1435
+ "special": true
1436
+ },
1437
+ "128179": {
1438
+ "content": "<|reserved_special_token_174|>",
1439
+ "lstrip": false,
1440
+ "normalized": false,
1441
+ "rstrip": false,
1442
+ "single_word": false,
1443
+ "special": true
1444
+ },
1445
+ "128180": {
1446
+ "content": "<|reserved_special_token_175|>",
1447
+ "lstrip": false,
1448
+ "normalized": false,
1449
+ "rstrip": false,
1450
+ "single_word": false,
1451
+ "special": true
1452
+ },
1453
+ "128181": {
1454
+ "content": "<|reserved_special_token_176|>",
1455
+ "lstrip": false,
1456
+ "normalized": false,
1457
+ "rstrip": false,
1458
+ "single_word": false,
1459
+ "special": true
1460
+ },
1461
+ "128182": {
1462
+ "content": "<|reserved_special_token_177|>",
1463
+ "lstrip": false,
1464
+ "normalized": false,
1465
+ "rstrip": false,
1466
+ "single_word": false,
1467
+ "special": true
1468
+ },
1469
+ "128183": {
1470
+ "content": "<|reserved_special_token_178|>",
1471
+ "lstrip": false,
1472
+ "normalized": false,
1473
+ "rstrip": false,
1474
+ "single_word": false,
1475
+ "special": true
1476
+ },
1477
+ "128184": {
1478
+ "content": "<|reserved_special_token_179|>",
1479
+ "lstrip": false,
1480
+ "normalized": false,
1481
+ "rstrip": false,
1482
+ "single_word": false,
1483
+ "special": true
1484
+ },
1485
+ "128185": {
1486
+ "content": "<|reserved_special_token_180|>",
1487
+ "lstrip": false,
1488
+ "normalized": false,
1489
+ "rstrip": false,
1490
+ "single_word": false,
1491
+ "special": true
1492
+ },
1493
+ "128186": {
1494
+ "content": "<|reserved_special_token_181|>",
1495
+ "lstrip": false,
1496
+ "normalized": false,
1497
+ "rstrip": false,
1498
+ "single_word": false,
1499
+ "special": true
1500
+ },
1501
+ "128187": {
1502
+ "content": "<|reserved_special_token_182|>",
1503
+ "lstrip": false,
1504
+ "normalized": false,
1505
+ "rstrip": false,
1506
+ "single_word": false,
1507
+ "special": true
1508
+ },
1509
+ "128188": {
1510
+ "content": "<|reserved_special_token_183|>",
1511
+ "lstrip": false,
1512
+ "normalized": false,
1513
+ "rstrip": false,
1514
+ "single_word": false,
1515
+ "special": true
1516
+ },
1517
+ "128189": {
1518
+ "content": "<|reserved_special_token_184|>",
1519
+ "lstrip": false,
1520
+ "normalized": false,
1521
+ "rstrip": false,
1522
+ "single_word": false,
1523
+ "special": true
1524
+ },
1525
+ "128190": {
1526
+ "content": "<|reserved_special_token_185|>",
1527
+ "lstrip": false,
1528
+ "normalized": false,
1529
+ "rstrip": false,
1530
+ "single_word": false,
1531
+ "special": true
1532
+ },
1533
+ "128191": {
1534
+ "content": "<|reserved_special_token_186|>",
1535
+ "lstrip": false,
1536
+ "normalized": false,
1537
+ "rstrip": false,
1538
+ "single_word": false,
1539
+ "special": true
1540
+ },
1541
+ "128192": {
1542
+ "content": "<|reserved_special_token_187|>",
1543
+ "lstrip": false,
1544
+ "normalized": false,
1545
+ "rstrip": false,
1546
+ "single_word": false,
1547
+ "special": true
1548
+ },
1549
+ "128193": {
1550
+ "content": "<|reserved_special_token_188|>",
1551
+ "lstrip": false,
1552
+ "normalized": false,
1553
+ "rstrip": false,
1554
+ "single_word": false,
1555
+ "special": true
1556
+ },
1557
+ "128194": {
1558
+ "content": "<|reserved_special_token_189|>",
1559
+ "lstrip": false,
1560
+ "normalized": false,
1561
+ "rstrip": false,
1562
+ "single_word": false,
1563
+ "special": true
1564
+ },
1565
+ "128195": {
1566
+ "content": "<|reserved_special_token_190|>",
1567
+ "lstrip": false,
1568
+ "normalized": false,
1569
+ "rstrip": false,
1570
+ "single_word": false,
1571
+ "special": true
1572
+ },
1573
+ "128196": {
1574
+ "content": "<|reserved_special_token_191|>",
1575
+ "lstrip": false,
1576
+ "normalized": false,
1577
+ "rstrip": false,
1578
+ "single_word": false,
1579
+ "special": true
1580
+ },
1581
+ "128197": {
1582
+ "content": "<|reserved_special_token_192|>",
1583
+ "lstrip": false,
1584
+ "normalized": false,
1585
+ "rstrip": false,
1586
+ "single_word": false,
1587
+ "special": true
1588
+ },
1589
+ "128198": {
1590
+ "content": "<|reserved_special_token_193|>",
1591
+ "lstrip": false,
1592
+ "normalized": false,
1593
+ "rstrip": false,
1594
+ "single_word": false,
1595
+ "special": true
1596
+ },
1597
+ "128199": {
1598
+ "content": "<|reserved_special_token_194|>",
1599
+ "lstrip": false,
1600
+ "normalized": false,
1601
+ "rstrip": false,
1602
+ "single_word": false,
1603
+ "special": true
1604
+ },
1605
+ "128200": {
1606
+ "content": "<|reserved_special_token_195|>",
1607
+ "lstrip": false,
1608
+ "normalized": false,
1609
+ "rstrip": false,
1610
+ "single_word": false,
1611
+ "special": true
1612
+ },
1613
+ "128201": {
1614
+ "content": "<|reserved_special_token_196|>",
1615
+ "lstrip": false,
1616
+ "normalized": false,
1617
+ "rstrip": false,
1618
+ "single_word": false,
1619
+ "special": true
1620
+ },
1621
+ "128202": {
1622
+ "content": "<|reserved_special_token_197|>",
1623
+ "lstrip": false,
1624
+ "normalized": false,
1625
+ "rstrip": false,
1626
+ "single_word": false,
1627
+ "special": true
1628
+ },
1629
+ "128203": {
1630
+ "content": "<|reserved_special_token_198|>",
1631
+ "lstrip": false,
1632
+ "normalized": false,
1633
+ "rstrip": false,
1634
+ "single_word": false,
1635
+ "special": true
1636
+ },
1637
+ "128204": {
1638
+ "content": "<|reserved_special_token_199|>",
1639
+ "lstrip": false,
1640
+ "normalized": false,
1641
+ "rstrip": false,
1642
+ "single_word": false,
1643
+ "special": true
1644
+ },
1645
+ "128205": {
1646
+ "content": "<|reserved_special_token_200|>",
1647
+ "lstrip": false,
1648
+ "normalized": false,
1649
+ "rstrip": false,
1650
+ "single_word": false,
1651
+ "special": true
1652
+ },
1653
+ "128206": {
1654
+ "content": "<|reserved_special_token_201|>",
1655
+ "lstrip": false,
1656
+ "normalized": false,
1657
+ "rstrip": false,
1658
+ "single_word": false,
1659
+ "special": true
1660
+ },
1661
+ "128207": {
1662
+ "content": "<|reserved_special_token_202|>",
1663
+ "lstrip": false,
1664
+ "normalized": false,
1665
+ "rstrip": false,
1666
+ "single_word": false,
1667
+ "special": true
1668
+ },
1669
+ "128208": {
1670
+ "content": "<|reserved_special_token_203|>",
1671
+ "lstrip": false,
1672
+ "normalized": false,
1673
+ "rstrip": false,
1674
+ "single_word": false,
1675
+ "special": true
1676
+ },
1677
+ "128209": {
1678
+ "content": "<|reserved_special_token_204|>",
1679
+ "lstrip": false,
1680
+ "normalized": false,
1681
+ "rstrip": false,
1682
+ "single_word": false,
1683
+ "special": true
1684
+ },
1685
+ "128210": {
1686
+ "content": "<|reserved_special_token_205|>",
1687
+ "lstrip": false,
1688
+ "normalized": false,
1689
+ "rstrip": false,
1690
+ "single_word": false,
1691
+ "special": true
1692
+ },
1693
+ "128211": {
1694
+ "content": "<|reserved_special_token_206|>",
1695
+ "lstrip": false,
1696
+ "normalized": false,
1697
+ "rstrip": false,
1698
+ "single_word": false,
1699
+ "special": true
1700
+ },
1701
+ "128212": {
1702
+ "content": "<|reserved_special_token_207|>",
1703
+ "lstrip": false,
1704
+ "normalized": false,
1705
+ "rstrip": false,
1706
+ "single_word": false,
1707
+ "special": true
1708
+ },
1709
+ "128213": {
1710
+ "content": "<|reserved_special_token_208|>",
1711
+ "lstrip": false,
1712
+ "normalized": false,
1713
+ "rstrip": false,
1714
+ "single_word": false,
1715
+ "special": true
1716
+ },
1717
+ "128214": {
1718
+ "content": "<|reserved_special_token_209|>",
1719
+ "lstrip": false,
1720
+ "normalized": false,
1721
+ "rstrip": false,
1722
+ "single_word": false,
1723
+ "special": true
1724
+ },
1725
+ "128215": {
1726
+ "content": "<|reserved_special_token_210|>",
1727
+ "lstrip": false,
1728
+ "normalized": false,
1729
+ "rstrip": false,
1730
+ "single_word": false,
1731
+ "special": true
1732
+ },
1733
+ "128216": {
1734
+ "content": "<|reserved_special_token_211|>",
1735
+ "lstrip": false,
1736
+ "normalized": false,
1737
+ "rstrip": false,
1738
+ "single_word": false,
1739
+ "special": true
1740
+ },
1741
+ "128217": {
1742
+ "content": "<|reserved_special_token_212|>",
1743
+ "lstrip": false,
1744
+ "normalized": false,
1745
+ "rstrip": false,
1746
+ "single_word": false,
1747
+ "special": true
1748
+ },
1749
+ "128218": {
1750
+ "content": "<|reserved_special_token_213|>",
1751
+ "lstrip": false,
1752
+ "normalized": false,
1753
+ "rstrip": false,
1754
+ "single_word": false,
1755
+ "special": true
1756
+ },
1757
+ "128219": {
1758
+ "content": "<|reserved_special_token_214|>",
1759
+ "lstrip": false,
1760
+ "normalized": false,
1761
+ "rstrip": false,
1762
+ "single_word": false,
1763
+ "special": true
1764
+ },
1765
+ "128220": {
1766
+ "content": "<|reserved_special_token_215|>",
1767
+ "lstrip": false,
1768
+ "normalized": false,
1769
+ "rstrip": false,
1770
+ "single_word": false,
1771
+ "special": true
1772
+ },
1773
+ "128221": {
1774
+ "content": "<|reserved_special_token_216|>",
1775
+ "lstrip": false,
1776
+ "normalized": false,
1777
+ "rstrip": false,
1778
+ "single_word": false,
1779
+ "special": true
1780
+ },
1781
+ "128222": {
1782
+ "content": "<|reserved_special_token_217|>",
1783
+ "lstrip": false,
1784
+ "normalized": false,
1785
+ "rstrip": false,
1786
+ "single_word": false,
1787
+ "special": true
1788
+ },
1789
+ "128223": {
1790
+ "content": "<|reserved_special_token_218|>",
1791
+ "lstrip": false,
1792
+ "normalized": false,
1793
+ "rstrip": false,
1794
+ "single_word": false,
1795
+ "special": true
1796
+ },
1797
+ "128224": {
1798
+ "content": "<|reserved_special_token_219|>",
1799
+ "lstrip": false,
1800
+ "normalized": false,
1801
+ "rstrip": false,
1802
+ "single_word": false,
1803
+ "special": true
1804
+ },
1805
+ "128225": {
1806
+ "content": "<|reserved_special_token_220|>",
1807
+ "lstrip": false,
1808
+ "normalized": false,
1809
+ "rstrip": false,
1810
+ "single_word": false,
1811
+ "special": true
1812
+ },
1813
+ "128226": {
1814
+ "content": "<|reserved_special_token_221|>",
1815
+ "lstrip": false,
1816
+ "normalized": false,
1817
+ "rstrip": false,
1818
+ "single_word": false,
1819
+ "special": true
1820
+ },
1821
+ "128227": {
1822
+ "content": "<|reserved_special_token_222|>",
1823
+ "lstrip": false,
1824
+ "normalized": false,
1825
+ "rstrip": false,
1826
+ "single_word": false,
1827
+ "special": true
1828
+ },
1829
+ "128228": {
1830
+ "content": "<|reserved_special_token_223|>",
1831
+ "lstrip": false,
1832
+ "normalized": false,
1833
+ "rstrip": false,
1834
+ "single_word": false,
1835
+ "special": true
1836
+ },
1837
+ "128229": {
1838
+ "content": "<|reserved_special_token_224|>",
1839
+ "lstrip": false,
1840
+ "normalized": false,
1841
+ "rstrip": false,
1842
+ "single_word": false,
1843
+ "special": true
1844
+ },
1845
+ "128230": {
1846
+ "content": "<|reserved_special_token_225|>",
1847
+ "lstrip": false,
1848
+ "normalized": false,
1849
+ "rstrip": false,
1850
+ "single_word": false,
1851
+ "special": true
1852
+ },
1853
+ "128231": {
1854
+ "content": "<|reserved_special_token_226|>",
1855
+ "lstrip": false,
1856
+ "normalized": false,
1857
+ "rstrip": false,
1858
+ "single_word": false,
1859
+ "special": true
1860
+ },
1861
+ "128232": {
1862
+ "content": "<|reserved_special_token_227|>",
1863
+ "lstrip": false,
1864
+ "normalized": false,
1865
+ "rstrip": false,
1866
+ "single_word": false,
1867
+ "special": true
1868
+ },
1869
+ "128233": {
1870
+ "content": "<|reserved_special_token_228|>",
1871
+ "lstrip": false,
1872
+ "normalized": false,
1873
+ "rstrip": false,
1874
+ "single_word": false,
1875
+ "special": true
1876
+ },
1877
+ "128234": {
1878
+ "content": "<|reserved_special_token_229|>",
1879
+ "lstrip": false,
1880
+ "normalized": false,
1881
+ "rstrip": false,
1882
+ "single_word": false,
1883
+ "special": true
1884
+ },
1885
+ "128235": {
1886
+ "content": "<|reserved_special_token_230|>",
1887
+ "lstrip": false,
1888
+ "normalized": false,
1889
+ "rstrip": false,
1890
+ "single_word": false,
1891
+ "special": true
1892
+ },
1893
+ "128236": {
1894
+ "content": "<|reserved_special_token_231|>",
1895
+ "lstrip": false,
1896
+ "normalized": false,
1897
+ "rstrip": false,
1898
+ "single_word": false,
1899
+ "special": true
1900
+ },
1901
+ "128237": {
1902
+ "content": "<|reserved_special_token_232|>",
1903
+ "lstrip": false,
1904
+ "normalized": false,
1905
+ "rstrip": false,
1906
+ "single_word": false,
1907
+ "special": true
1908
+ },
1909
+ "128238": {
1910
+ "content": "<|reserved_special_token_233|>",
1911
+ "lstrip": false,
1912
+ "normalized": false,
1913
+ "rstrip": false,
1914
+ "single_word": false,
1915
+ "special": true
1916
+ },
1917
+ "128239": {
1918
+ "content": "<|reserved_special_token_234|>",
1919
+ "lstrip": false,
1920
+ "normalized": false,
1921
+ "rstrip": false,
1922
+ "single_word": false,
1923
+ "special": true
1924
+ },
1925
+ "128240": {
1926
+ "content": "<|reserved_special_token_235|>",
1927
+ "lstrip": false,
1928
+ "normalized": false,
1929
+ "rstrip": false,
1930
+ "single_word": false,
1931
+ "special": true
1932
+ },
1933
+ "128241": {
1934
+ "content": "<|reserved_special_token_236|>",
1935
+ "lstrip": false,
1936
+ "normalized": false,
1937
+ "rstrip": false,
1938
+ "single_word": false,
1939
+ "special": true
1940
+ },
1941
+ "128242": {
1942
+ "content": "<|reserved_special_token_237|>",
1943
+ "lstrip": false,
1944
+ "normalized": false,
1945
+ "rstrip": false,
1946
+ "single_word": false,
1947
+ "special": true
1948
+ },
1949
+ "128243": {
1950
+ "content": "<|reserved_special_token_238|>",
1951
+ "lstrip": false,
1952
+ "normalized": false,
1953
+ "rstrip": false,
1954
+ "single_word": false,
1955
+ "special": true
1956
+ },
1957
+ "128244": {
1958
+ "content": "<|reserved_special_token_239|>",
1959
+ "lstrip": false,
1960
+ "normalized": false,
1961
+ "rstrip": false,
1962
+ "single_word": false,
1963
+ "special": true
1964
+ },
1965
+ "128245": {
1966
+ "content": "<|reserved_special_token_240|>",
1967
+ "lstrip": false,
1968
+ "normalized": false,
1969
+ "rstrip": false,
1970
+ "single_word": false,
1971
+ "special": true
1972
+ },
1973
+ "128246": {
1974
+ "content": "<|reserved_special_token_241|>",
1975
+ "lstrip": false,
1976
+ "normalized": false,
1977
+ "rstrip": false,
1978
+ "single_word": false,
1979
+ "special": true
1980
+ },
1981
+ "128247": {
1982
+ "content": "<|reserved_special_token_242|>",
1983
+ "lstrip": false,
1984
+ "normalized": false,
1985
+ "rstrip": false,
1986
+ "single_word": false,
1987
+ "special": true
1988
+ },
1989
+ "128248": {
1990
+ "content": "<|reserved_special_token_243|>",
1991
+ "lstrip": false,
1992
+ "normalized": false,
1993
+ "rstrip": false,
1994
+ "single_word": false,
1995
+ "special": true
1996
+ },
1997
+ "128249": {
1998
+ "content": "<|reserved_special_token_244|>",
1999
+ "lstrip": false,
2000
+ "normalized": false,
2001
+ "rstrip": false,
2002
+ "single_word": false,
2003
+ "special": true
2004
+ },
2005
+ "128250": {
2006
+ "content": "<|reserved_special_token_245|>",
2007
+ "lstrip": false,
2008
+ "normalized": false,
2009
+ "rstrip": false,
2010
+ "single_word": false,
2011
+ "special": true
2012
+ },
2013
+ "128251": {
2014
+ "content": "<|reserved_special_token_246|>",
2015
+ "lstrip": false,
2016
+ "normalized": false,
2017
+ "rstrip": false,
2018
+ "single_word": false,
2019
+ "special": true
2020
+ },
2021
+ "128252": {
2022
+ "content": "<|reserved_special_token_247|>",
2023
+ "lstrip": false,
2024
+ "normalized": false,
2025
+ "rstrip": false,
2026
+ "single_word": false,
2027
+ "special": true
2028
+ },
2029
+ "128253": {
2030
+ "content": "<|reserved_special_token_248|>",
2031
+ "lstrip": false,
2032
+ "normalized": false,
2033
+ "rstrip": false,
2034
+ "single_word": false,
2035
+ "special": true
2036
+ },
2037
+ "128254": {
2038
+ "content": "<|reserved_special_token_249|>",
2039
+ "lstrip": false,
2040
+ "normalized": false,
2041
+ "rstrip": false,
2042
+ "single_word": false,
2043
+ "special": true
2044
+ },
2045
+ "128255": {
2046
+ "content": "<|reserved_special_token_250|>",
2047
+ "lstrip": false,
2048
+ "normalized": false,
2049
+ "rstrip": false,
2050
+ "single_word": false,
2051
+ "special": true
2052
+ },
2053
+ "128256": {
2054
+ "content": "<unk>",
2055
+ "lstrip": false,
2056
+ "normalized": false,
2057
+ "rstrip": false,
2058
+ "single_word": false,
2059
+ "special": true
2060
+ },
2061
+ "128257": {
2062
+ "content": "<image>",
2063
+ "lstrip": false,
2064
+ "normalized": false,
2065
+ "rstrip": false,
2066
+ "single_word": false,
2067
+ "special": true
2068
+ },
2069
+ "128258": {
2070
+ "content": "<pad>",
2071
+ "lstrip": false,
2072
+ "normalized": false,
2073
+ "rstrip": false,
2074
+ "single_word": false,
2075
+ "special": true
2076
+ }
2077
+ },
2078
+ "bos_token": "<|begin_of_text|>",
2079
+ "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
2080
+ "clean_up_tokenization_spaces": true,
2081
+ "eos_token": "<|end_of_text|>",
2082
+ "model_input_names": [
2083
+ "input_ids",
2084
+ "attention_mask"
2085
+ ],
2086
+ "model_max_length": 1000000000000000019884624838656,
2087
+ "pad_token": "<pad>",
2088
+ "padding_side": "right",
2089
+ "processor_class": "LlavaProcessor",
2090
+ "tokenizer_class": "LlamaTokenizer",
2091
+ "unk_token": "<unk>",
2092
+ "use_default_system_prompt": false
2093
+ }
stdmodels/openai_clip-vit-large-patch14/.gitattributes ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ model.safetensors filter=lfs diff=lfs merge=lfs -text
stdmodels/openai_clip-vit-large-patch14/README.md ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - vision
4
+ widget:
5
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/cat-dog-music.png
6
+ candidate_labels: playing music, playing sports
7
+ example_title: Cat & Dog
8
+ ---
9
+
10
+ # Model Card: CLIP
11
+
12
+ Disclaimer: The model card is taken and modified from the official CLIP repository, it can be found [here](https://github.com/openai/CLIP/blob/main/model-card.md).
13
+
14
+ ## Model Details
15
+
16
+ The CLIP model was developed by researchers at OpenAI to learn about what contributes to robustness in computer vision tasks. The model was also developed to test the ability of models to generalize to arbitrary image classification tasks in a zero-shot manner. It was not developed for general model deployment - to deploy models like CLIP, researchers will first need to carefully study their capabilities in relation to the specific context they’re being deployed within.
17
+
18
+ ### Model Date
19
+
20
+ January 2021
21
+
22
+ ### Model Type
23
+
24
+ The base model uses a ViT-L/14 Transformer architecture as an image encoder and uses a masked self-attention Transformer as a text encoder. These encoders are trained to maximize the similarity of (image, text) pairs via a contrastive loss.
25
+
26
+ The original implementation had two variants: one using a ResNet image encoder and the other using a Vision Transformer. This repository has the variant with the Vision Transformer.
27
+
28
+
29
+ ### Documents
30
+
31
+ - [Blog Post](https://openai.com/blog/clip/)
32
+ - [CLIP Paper](https://arxiv.org/abs/2103.00020)
33
+
34
+
35
+ ### Use with Transformers
36
+
37
+ ```python
38
+ from PIL import Image
39
+ import requests
40
+
41
+ from transformers import CLIPProcessor, CLIPModel
42
+
43
+ model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
44
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
45
+
46
+ url = "http://images.cocodataset.org/val2017/000000039769.jpg"
47
+ image = Image.open(requests.get(url, stream=True).raw)
48
+
49
+ inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
50
+
51
+ outputs = model(**inputs)
52
+ logits_per_image = outputs.logits_per_image # this is the image-text similarity score
53
+ probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
54
+ ```
55
+
56
+
57
+ ## Model Use
58
+
59
+ ### Intended Use
60
+
61
+ The model is intended as a research output for research communities. We hope that this model will enable researchers to better understand and explore zero-shot, arbitrary image classification. We also hope it can be used for interdisciplinary studies of the potential impact of such models - the CLIP paper includes a discussion of potential downstream impacts to provide an example for this sort of analysis.
62
+
63
+ #### Primary intended uses
64
+
65
+ The primary intended users of these models are AI researchers.
66
+
67
+ We primarily imagine the model will be used by researchers to better understand robustness, generalization, and other capabilities, biases, and constraints of computer vision models.
68
+
69
+ ### Out-of-Scope Use Cases
70
+
71
+ **Any** deployed use case of the model - whether commercial or not - is currently out of scope. Non-deployed use cases such as image search in a constrained environment, are also not recommended unless there is thorough in-domain testing of the model with a specific, fixed class taxonomy. This is because our safety assessment demonstrated a high need for task specific testing especially given the variability of CLIP’s performance with different class taxonomies. This makes untested and unconstrained deployment of the model in any use case currently potentially harmful.
72
+
73
+ Certain use cases which would fall under the domain of surveillance and facial recognition are always out-of-scope regardless of performance of the model. This is because the use of artificial intelligence for tasks such as these can be premature currently given the lack of testing norms and checks to ensure its fair use.
74
+
75
+ Since the model has not been purposefully trained in or evaluated on any languages other than English, its use should be limited to English language use cases.
76
+
77
+
78
+
79
+ ## Data
80
+
81
+ The model was trained on publicly available image-caption data. This was done through a combination of crawling a handful of websites and using commonly-used pre-existing image datasets such as [YFCC100M](http://projects.dfki.uni-kl.de/yfcc100m/). A large portion of the data comes from our crawling of the internet. This means that the data is more representative of people and societies most connected to the internet which tend to skew towards more developed nations, and younger, male users.
82
+
83
+ ### Data Mission Statement
84
+
85
+ Our goal with building this dataset was to test out robustness and generalizability in computer vision tasks. As a result, the focus was on gathering large quantities of data from different publicly-available internet data sources. The data was gathered in a mostly non-interventionist manner. However, we only crawled websites that had policies against excessively violent and adult images and allowed us to filter out such content. We do not intend for this dataset to be used as the basis for any commercial or deployed model and will not be releasing the dataset.
86
+
87
+
88
+
89
+ ## Performance and Limitations
90
+
91
+ ### Performance
92
+
93
+ We have evaluated the performance of CLIP on a wide range of benchmarks across a variety of computer vision datasets such as OCR to texture recognition to fine-grained classification. The paper describes model performance on the following datasets:
94
+
95
+ - Food101
96
+ - CIFAR10
97
+ - CIFAR100
98
+ - Birdsnap
99
+ - SUN397
100
+ - Stanford Cars
101
+ - FGVC Aircraft
102
+ - VOC2007
103
+ - DTD
104
+ - Oxford-IIIT Pet dataset
105
+ - Caltech101
106
+ - Flowers102
107
+ - MNIST
108
+ - SVHN
109
+ - IIIT5K
110
+ - Hateful Memes
111
+ - SST-2
112
+ - UCF101
113
+ - Kinetics700
114
+ - Country211
115
+ - CLEVR Counting
116
+ - KITTI Distance
117
+ - STL-10
118
+ - RareAct
119
+ - Flickr30
120
+ - MSCOCO
121
+ - ImageNet
122
+ - ImageNet-A
123
+ - ImageNet-R
124
+ - ImageNet Sketch
125
+ - ObjectNet (ImageNet Overlap)
126
+ - Youtube-BB
127
+ - ImageNet-Vid
128
+
129
+ ## Limitations
130
+
131
+ CLIP and our analysis of it have a number of limitations. CLIP currently struggles with respect to certain tasks such as fine grained classification and counting objects. CLIP also poses issues with regards to fairness and bias which we discuss in the paper and briefly in the next section. Additionally, our approach to testing CLIP also has an important limitation- in many cases we have used linear probes to evaluate the performance of CLIP and there is evidence suggesting that linear probes can underestimate model performance.
132
+
133
+ ### Bias and Fairness
134
+
135
+ We find that the performance of CLIP - and the specific biases it exhibits - can depend significantly on class design and the choices one makes for categories to include and exclude. We tested the risk of certain kinds of denigration with CLIP by classifying images of people from [Fairface](https://arxiv.org/abs/1908.04913) into crime-related and non-human animal categories. We found significant disparities with respect to race and gender. Additionally, we found that these disparities could shift based on how the classes were constructed. (Details captured in the Broader Impacts Section in the paper).
136
+
137
+ We also tested the performance of CLIP on gender, race and age classification using the Fairface dataset (We default to using race categories as they are constructed in the Fairface dataset.) in order to assess quality of performance across different demographics. We found accuracy >96% across all races for gender classification with ‘Middle Eastern’ having the highest accuracy (98.4%) and ‘White’ having the lowest (96.5%). Additionally, CLIP averaged ~93% for racial classification and ~63% for age classification. Our use of evaluations to test for gender, race and age classification as well as denigration harms is simply to evaluate performance of the model across people and surface potential risks and not to demonstrate an endorsement/enthusiasm for such tasks.
138
+
139
+
140
+
141
+ ## Feedback
142
+
143
+ ### Where to send questions or comments about the model
144
+
145
+ Please use [this Google Form](https://forms.gle/Uv7afRH5dvY34ZEs9)
stdmodels/openai_clip-vit-large-patch14/config.json ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "clip-vit-large-patch14/",
3
+ "architectures": [
4
+ "CLIPModel"
5
+ ],
6
+ "initializer_factor": 1.0,
7
+ "logit_scale_init_value": 2.6592,
8
+ "model_type": "clip",
9
+ "projection_dim": 768,
10
+ "text_config": {
11
+ "_name_or_path": "",
12
+ "add_cross_attention": false,
13
+ "architectures": null,
14
+ "attention_dropout": 0.0,
15
+ "bad_words_ids": null,
16
+ "bos_token_id": 0,
17
+ "chunk_size_feed_forward": 0,
18
+ "cross_attention_hidden_size": null,
19
+ "decoder_start_token_id": null,
20
+ "diversity_penalty": 0.0,
21
+ "do_sample": false,
22
+ "dropout": 0.0,
23
+ "early_stopping": false,
24
+ "encoder_no_repeat_ngram_size": 0,
25
+ "eos_token_id": 2,
26
+ "finetuning_task": null,
27
+ "forced_bos_token_id": null,
28
+ "forced_eos_token_id": null,
29
+ "hidden_act": "quick_gelu",
30
+ "hidden_size": 768,
31
+ "id2label": {
32
+ "0": "LABEL_0",
33
+ "1": "LABEL_1"
34
+ },
35
+ "initializer_factor": 1.0,
36
+ "initializer_range": 0.02,
37
+ "intermediate_size": 3072,
38
+ "is_decoder": false,
39
+ "is_encoder_decoder": false,
40
+ "label2id": {
41
+ "LABEL_0": 0,
42
+ "LABEL_1": 1
43
+ },
44
+ "layer_norm_eps": 1e-05,
45
+ "length_penalty": 1.0,
46
+ "max_length": 20,
47
+ "max_position_embeddings": 77,
48
+ "min_length": 0,
49
+ "model_type": "clip_text_model",
50
+ "no_repeat_ngram_size": 0,
51
+ "num_attention_heads": 12,
52
+ "num_beam_groups": 1,
53
+ "num_beams": 1,
54
+ "num_hidden_layers": 12,
55
+ "num_return_sequences": 1,
56
+ "output_attentions": false,
57
+ "output_hidden_states": false,
58
+ "output_scores": false,
59
+ "pad_token_id": 1,
60
+ "prefix": null,
61
+ "problem_type": null,
62
+ "projection_dim" : 768,
63
+ "pruned_heads": {},
64
+ "remove_invalid_values": false,
65
+ "repetition_penalty": 1.0,
66
+ "return_dict": true,
67
+ "return_dict_in_generate": false,
68
+ "sep_token_id": null,
69
+ "task_specific_params": null,
70
+ "temperature": 1.0,
71
+ "tie_encoder_decoder": false,
72
+ "tie_word_embeddings": true,
73
+ "tokenizer_class": null,
74
+ "top_k": 50,
75
+ "top_p": 1.0,
76
+ "torch_dtype": null,
77
+ "torchscript": false,
78
+ "transformers_version": "4.16.0.dev0",
79
+ "use_bfloat16": false,
80
+ "vocab_size": 49408
81
+ },
82
+ "text_config_dict": {
83
+ "hidden_size": 768,
84
+ "intermediate_size": 3072,
85
+ "num_attention_heads": 12,
86
+ "num_hidden_layers": 12,
87
+ "projection_dim": 768
88
+ },
89
+ "torch_dtype": "float32",
90
+ "transformers_version": null,
91
+ "vision_config": {
92
+ "_name_or_path": "",
93
+ "add_cross_attention": false,
94
+ "architectures": null,
95
+ "attention_dropout": 0.0,
96
+ "bad_words_ids": null,
97
+ "bos_token_id": null,
98
+ "chunk_size_feed_forward": 0,
99
+ "cross_attention_hidden_size": null,
100
+ "decoder_start_token_id": null,
101
+ "diversity_penalty": 0.0,
102
+ "do_sample": false,
103
+ "dropout": 0.0,
104
+ "early_stopping": false,
105
+ "encoder_no_repeat_ngram_size": 0,
106
+ "eos_token_id": null,
107
+ "finetuning_task": null,
108
+ "forced_bos_token_id": null,
109
+ "forced_eos_token_id": null,
110
+ "hidden_act": "quick_gelu",
111
+ "hidden_size": 1024,
112
+ "id2label": {
113
+ "0": "LABEL_0",
114
+ "1": "LABEL_1"
115
+ },
116
+ "image_size": 224,
117
+ "initializer_factor": 1.0,
118
+ "initializer_range": 0.02,
119
+ "intermediate_size": 4096,
120
+ "is_decoder": false,
121
+ "is_encoder_decoder": false,
122
+ "label2id": {
123
+ "LABEL_0": 0,
124
+ "LABEL_1": 1
125
+ },
126
+ "layer_norm_eps": 1e-05,
127
+ "length_penalty": 1.0,
128
+ "max_length": 20,
129
+ "min_length": 0,
130
+ "model_type": "clip_vision_model",
131
+ "no_repeat_ngram_size": 0,
132
+ "num_attention_heads": 16,
133
+ "num_beam_groups": 1,
134
+ "num_beams": 1,
135
+ "num_hidden_layers": 24,
136
+ "num_return_sequences": 1,
137
+ "output_attentions": false,
138
+ "output_hidden_states": false,
139
+ "output_scores": false,
140
+ "pad_token_id": null,
141
+ "patch_size": 14,
142
+ "prefix": null,
143
+ "problem_type": null,
144
+ "projection_dim" : 768,
145
+ "pruned_heads": {},
146
+ "remove_invalid_values": false,
147
+ "repetition_penalty": 1.0,
148
+ "return_dict": true,
149
+ "return_dict_in_generate": false,
150
+ "sep_token_id": null,
151
+ "task_specific_params": null,
152
+ "temperature": 1.0,
153
+ "tie_encoder_decoder": false,
154
+ "tie_word_embeddings": true,
155
+ "tokenizer_class": null,
156
+ "top_k": 50,
157
+ "top_p": 1.0,
158
+ "torch_dtype": null,
159
+ "torchscript": false,
160
+ "transformers_version": "4.16.0.dev0",
161
+ "use_bfloat16": false
162
+ },
163
+ "vision_config_dict": {
164
+ "hidden_size": 1024,
165
+ "intermediate_size": 4096,
166
+ "num_attention_heads": 16,
167
+ "num_hidden_layers": 24,
168
+ "patch_size": 14,
169
+ "projection_dim": 768
170
+ }
171
+ }
stdmodels/openai_clip-vit-large-patch14/flax_model.msgpack ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:156f677ed4495acd1ec7197249c091b85c240267c82f2f7f2e4eae4177931fed
3
+ size 1710486359
stdmodels/openai_clip-vit-large-patch14/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
stdmodels/openai_clip-vit-large-patch14/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2bf730a0c7debf160f7a6b50b3aaf3703e7e88ac73de7a314903141db026dcb
3
+ size 1710540580
stdmodels/openai_clip-vit-large-patch14/preprocessor_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": 224,
3
+ "do_center_crop": true,
4
+ "do_normalize": true,
5
+ "do_resize": true,
6
+ "feature_extractor_type": "CLIPFeatureExtractor",
7
+ "image_mean": [
8
+ 0.48145466,
9
+ 0.4578275,
10
+ 0.40821073
11
+ ],
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "resample": 3,
18
+ "size": 224
19
+ }
stdmodels/openai_clip-vit-large-patch14/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1a17cdbe0f36fec524f5cafb1c261ea3bbbc13e346e0f74fc9eb0460dedd0d3
3
+ size 1710671599
stdmodels/openai_clip-vit-large-patch14/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<|startoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": "<|endoftext|>"}
stdmodels/openai_clip-vit-large-patch14/tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f154e925c18270d662d28f6261523c2ff6e80f1f05292cb034db41d5951c7a4
3
+ size 1711114176
stdmodels/openai_clip-vit-large-patch14/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
stdmodels/openai_clip-vit-large-patch14/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "unk_token": {
3
+ "content": "<|endoftext|>",
4
+ "single_word": false,
5
+ "lstrip": false,
6
+ "rstrip": false,
7
+ "normalized": true,
8
+ "__type": "AddedToken"
9
+ },
10
+ "bos_token": {
11
+ "content": "<|startoftext|>",
12
+ "single_word": false,
13
+ "lstrip": false,
14
+ "rstrip": false,
15
+ "normalized": true,
16
+ "__type": "AddedToken"
17
+ },
18
+ "eos_token": {
19
+ "content": "<|endoftext|>",
20
+ "single_word": false,
21
+ "lstrip": false,
22
+ "rstrip": false,
23
+ "normalized": true,
24
+ "__type": "AddedToken"
25
+ },
26
+ "pad_token": "<|endoftext|>",
27
+ "add_prefix_space": false,
28
+ "errors": "replace",
29
+ "do_lower_case": true,
30
+ "name_or_path": "openai/clip-vit-base-patch32",
31
+ "model_max_length": 77,
32
+ "special_tokens_map_file": "./special_tokens_map.json",
33
+ "tokenizer_class": "CLIPTokenizer"
34
+ }
stdmodels/openai_clip-vit-large-patch14/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
stdmodels/vae_3d/hyvae/checkpoint-step-270000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1da2d66888cfa4da4c38a41e6aaa0d69fd792dfe9a96ebd497058604e347b8f
3
+ size 3066127337
stdmodels/vae_3d/hyvae/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKLCausal3D",
3
+ "_diffusers_version": "0.4.2",
4
+ "act_fn": "silu",
5
+ "block_out_channels": [
6
+ 128,
7
+ 256,
8
+ 512,
9
+ 512
10
+ ],
11
+ "down_block_types": [
12
+ "DownEncoderBlockCausal3D",
13
+ "DownEncoderBlockCausal3D",
14
+ "DownEncoderBlockCausal3D",
15
+ "DownEncoderBlockCausal3D"
16
+ ],
17
+ "in_channels": 3,
18
+ "latent_channels": 16,
19
+ "layers_per_block": 2,
20
+ "norm_num_groups": 32,
21
+ "out_channels": 3,
22
+ "sample_size": 256,
23
+ "sample_tsize": 64,
24
+ "up_block_types": [
25
+ "UpDecoderBlockCausal3D",
26
+ "UpDecoderBlockCausal3D",
27
+ "UpDecoderBlockCausal3D",
28
+ "UpDecoderBlockCausal3D"
29
+ ],
30
+ "scaling_factor": 0.476986,
31
+ "time_compression_ratio": 4,
32
+ "mid_block_add_attention": true,
33
+ "mid_block_causal_attn": true
34
+ }
stdmodels/vae_3d/hyvae/pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1da2d66888cfa4da4c38a41e6aaa0d69fd792dfe9a96ebd497058604e347b8f
3
+ size 3066127337
stdmodels/vae_3d/hyvae/source ADDED
@@ -0,0 +1 @@
 
 
1
+ /apdcephfs_nj8/share_301739632/bobbybwu/exp/videoVAE/finetune_conv3d_884_c16_video50m_image500m_cold_start_nj-2024-08-01T20-13-32/checkpoints/checkpoint-step-270000.ckpt