Spaces:
Running
on
Zero
Running
on
Zero
SZhanZ
commited on
Commit
·
2410ef7
1
Parent(s):
f5c1e8d
device set
Browse files
app.py
CHANGED
@@ -17,7 +17,7 @@ def extract_bbox_answer(content):
|
|
17 |
return bbox
|
18 |
return [0, 0, 0, 0]
|
19 |
|
20 |
-
def process_image_and_text(image, text):
|
21 |
"""Process image and text input, return thinking process and bbox"""
|
22 |
question = f"Please provide the bounding box coordinate of the region this sentence describes: {text}."
|
23 |
QUESTION_TEMPLATE = "{Question} First output the thinking process in <think> </think> tags and then output the final answer in <answer> </answer> tags. Output the final answer in JSON format."
|
@@ -45,7 +45,7 @@ def process_image_and_text(image, text):
|
|
45 |
add_special_tokens=False,
|
46 |
)
|
47 |
|
48 |
-
inputs = inputs.to(
|
49 |
|
50 |
with torch.no_grad():
|
51 |
generated_ids = model.generate(**inputs, use_cache=True, max_new_tokens=256, do_sample=False)
|
@@ -76,11 +76,12 @@ if __name__ == "__main__":
|
|
76 |
|
77 |
# model_path = "/data/shz/project/vlm-r1/VLM-R1/output/Qwen2.5-VL-3B-GRPO-REC/checkpoint-500"
|
78 |
model_path = "SZhanZ/Qwen2.5VL-VLM-R1-REC-step500"
|
79 |
-
|
|
|
80 |
processor = AutoProcessor.from_pretrained(model_path)
|
81 |
|
82 |
def gradio_interface(image, text):
|
83 |
-
thinking, result_image = process_image_and_text(image, text)
|
84 |
return thinking, result_image
|
85 |
|
86 |
demo = gr.Interface(
|
|
|
17 |
return bbox
|
18 |
return [0, 0, 0, 0]
|
19 |
|
20 |
+
def process_image_and_text(image, text, device):
|
21 |
"""Process image and text input, return thinking process and bbox"""
|
22 |
question = f"Please provide the bounding box coordinate of the region this sentence describes: {text}."
|
23 |
QUESTION_TEMPLATE = "{Question} First output the thinking process in <think> </think> tags and then output the final answer in <answer> </answer> tags. Output the final answer in JSON format."
|
|
|
45 |
add_special_tokens=False,
|
46 |
)
|
47 |
|
48 |
+
inputs = inputs.to(device)
|
49 |
|
50 |
with torch.no_grad():
|
51 |
generated_ids = model.generate(**inputs, use_cache=True, max_new_tokens=256, do_sample=False)
|
|
|
76 |
|
77 |
# model_path = "/data/shz/project/vlm-r1/VLM-R1/output/Qwen2.5-VL-3B-GRPO-REC/checkpoint-500"
|
78 |
model_path = "SZhanZ/Qwen2.5VL-VLM-R1-REC-step500"
|
79 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
80 |
+
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(model_path, torch_dtype=torch.float16, device_map=device)
|
81 |
processor = AutoProcessor.from_pretrained(model_path)
|
82 |
|
83 |
def gradio_interface(image, text):
|
84 |
+
thinking, result_image = process_image_and_text(image, text, device)
|
85 |
return thinking, result_image
|
86 |
|
87 |
demo = gr.Interface(
|