Hcompany
/

Holo1-7B

@@ -92,9 +92,7 @@ import json
 import os
 from typing import Any, Literal
-from PIL import Image
 from transformers import AutoModelForImageTextToText, AutoProcessor
-from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
 # default: Load the model on the available device(s)
 # We recommend enabling flash_attention_2 for better acceleration and memory saving.
@@ -131,7 +129,12 @@ def run_inference(messages: list[dict[str, Any]]) -> str:
 ### Prepare image and instruction
 ```python
 # Prepare image and instruction
 image_url = "https://huggingface.co/Hcompany/Holo1-7B/resolve/main/calendar_example.jpg"
 image = Image.open(requests.get(image_url, stream=True).raw)

 import os
 from typing import Any, Literal
 from transformers import AutoModelForImageTextToText, AutoProcessor
 # default: Load the model on the available device(s)
 # We recommend enabling flash_attention_2 for better acceleration and memory saving.
 ### Prepare image and instruction
+WARNING: Holo1 is using absolute coordinates (number of pixels) and HuggingFace processor is doing image resize. To have matching coordinates, one needs to smart_resize the image.
 ```python
+from PIL import Image
+from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
 # Prepare image and instruction
 image_url = "https://huggingface.co/Hcompany/Holo1-7B/resolve/main/calendar_example.jpg"
 image = Image.open(requests.get(image_url, stream=True).raw)