Update README.md

#4
by jujeongho - opened
Files changed (1) hide show
  1. README.md +45 -1
README.md CHANGED
@@ -122,7 +122,7 @@ conversation = [
122
  "role": "user",
123
  "content": [
124
  {"type": "image", "url": "https://huggingface.co/NCSOFT/VARCO-VISION-2.0-14B/resolve/main/demo.jpg"},
125
- {"type": "text", "text": "์ด ์ด๋ฏธ์ง€์— ํ‘œ์‹œ๋œ ๊ฒƒ์€ ๋ฌด์—‡์ธ๊ฐ€์š”?"},
126
  ],
127
  },
128
  ]
@@ -175,6 +175,50 @@ print(output)
175
  ```
176
  </details>
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  <details>
179
  <summary>OCR inference</summary>
180
 
 
122
  "role": "user",
123
  "content": [
124
  {"type": "image", "url": "https://huggingface.co/NCSOFT/VARCO-VISION-2.0-14B/resolve/main/demo.jpg"},
125
+ {"type": "text", "text": "๊ฐ ๋ฐ•์Šค๋งˆ๋‹ค ํ•œ ์ค„์”ฉ ์ƒ‰์ƒ๊ณผ ๊ธ€์ž๋ฅผ ์ •ํ™•ํ•˜๊ฒŒ ์ถœ๋ ฅํ•ด์ฃผ์„ธ์š”."},
126
  ],
127
  },
128
  ]
 
175
  ```
176
  </details>
177
 
178
+ <details>
179
+ <summary>Batch inference</summary>
180
+
181
+ All inputs in a batch must have the same modality structureโ€”for example, text-only with text-only, single-image with single-image, and multi-image inputs with the same number of imagesโ€”to ensure correct batch inference.
182
+
183
+ ```python
184
+ conversation_1 = [
185
+ {
186
+ "role": "user",
187
+ "content": [
188
+ {"type": "image", "image": "file:///path/to/image1.jpg"},
189
+ {"type": "text", "text": "์ด๋ฏธ์ง€๋ฅผ ์„ค๋ช…ํ•ด์ฃผ์„ธ์š”."},
190
+ ],
191
+ },
192
+ ]
193
+
194
+ conversation_2 = [
195
+ {
196
+ "role": "user",
197
+ "content": [
198
+ {"type": "image", "image": "file:///path/to/image2.jpg"},
199
+ {"type": "text", "text": "์ด ์ด๋ฏธ์ง€์— ํ‘œ์‹œ๋œ ๊ฒƒ์€ ๋ฌด์—‡์ธ๊ฐ€์š”?"},
200
+ ],
201
+ },
202
+ ]
203
+
204
+ inputs = processor.apply_chat_template(
205
+ [conversation_1, conversation_2],
206
+ add_generation_prompt=True,
207
+ tokenize=True,
208
+ return_dict=True,
209
+ padding=True,
210
+ return_tensors="pt"
211
+ ).to(model.device, torch.float16)
212
+
213
+ generate_ids = model.generate(**inputs, max_new_tokens=1024)
214
+ generate_ids_trimmed = [
215
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generate_ids)
216
+ ]
217
+ output = processor.batch_decode(generate_ids_trimmed, skip_special_tokens=True)
218
+ print(output)
219
+ ```
220
+ </details>
221
+
222
  <details>
223
  <summary>OCR inference</summary>
224