Update README.md
Browse files
README.md
CHANGED
@@ -122,7 +122,7 @@ conversation = [
|
|
122 |
"role": "user",
|
123 |
"content": [
|
124 |
{"type": "image", "url": "https://huggingface.co/NCSOFT/VARCO-VISION-2.0-14B/resolve/main/demo.jpg"},
|
125 |
-
{"type": "text", "text": "
|
126 |
],
|
127 |
},
|
128 |
]
|
@@ -175,6 +175,50 @@ print(output)
|
|
175 |
```
|
176 |
</details>
|
177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
<details>
|
179 |
<summary>OCR inference</summary>
|
180 |
|
|
|
122 |
"role": "user",
|
123 |
"content": [
|
124 |
{"type": "image", "url": "https://huggingface.co/NCSOFT/VARCO-VISION-2.0-14B/resolve/main/demo.jpg"},
|
125 |
+
{"type": "text", "text": "각 박스마다 한 줄씩 색상과 글자를 정확하게 출력해주세요."},
|
126 |
],
|
127 |
},
|
128 |
]
|
|
|
175 |
```
|
176 |
</details>
|
177 |
|
178 |
+
<details>
|
179 |
+
<summary>Batch inference</summary>
|
180 |
+
|
181 |
+
All inputs in a batch must have the same modality structure—for example, text-only with text-only, single-image with single-image, and multi-image inputs with the same number of images—to ensure correct batch inference.
|
182 |
+
|
183 |
+
```python
|
184 |
+
conversation_1 = [
|
185 |
+
{
|
186 |
+
"role": "user",
|
187 |
+
"content": [
|
188 |
+
{"type": "image", "image": "file:///path/to/image1.jpg"},
|
189 |
+
{"type": "text", "text": "이미지를 설명해주세요."},
|
190 |
+
],
|
191 |
+
},
|
192 |
+
]
|
193 |
+
|
194 |
+
conversation_2 = [
|
195 |
+
{
|
196 |
+
"role": "user",
|
197 |
+
"content": [
|
198 |
+
{"type": "image", "image": "file:///path/to/image2.jpg"},
|
199 |
+
{"type": "text", "text": "이 이미지에 표시된 것은 무엇인가요?"},
|
200 |
+
],
|
201 |
+
},
|
202 |
+
]
|
203 |
+
|
204 |
+
inputs = processor.apply_chat_template(
|
205 |
+
[conversation_1, conversation_2],
|
206 |
+
add_generation_prompt=True,
|
207 |
+
tokenize=True,
|
208 |
+
return_dict=True,
|
209 |
+
padding=True,
|
210 |
+
return_tensors="pt"
|
211 |
+
).to(model.device, torch.float16)
|
212 |
+
|
213 |
+
generate_ids = model.generate(**inputs, max_new_tokens=1024)
|
214 |
+
generate_ids_trimmed = [
|
215 |
+
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generate_ids)
|
216 |
+
]
|
217 |
+
output = processor.batch_decode(generate_ids_trimmed, skip_special_tokens=True)
|
218 |
+
print(output)
|
219 |
+
```
|
220 |
+
</details>
|
221 |
+
|
222 |
<details>
|
223 |
<summary>OCR inference</summary>
|
224 |
|