This is a Listener-Rewarded (paper) fine-tuned version of QwenVL-2.5-7B-Instruct model to predict human preferences trained on the HPDv2 dataset.
This is a research-preview version. The model still often provides non-reliable reasoning traces, hallucinates, and judges from the first glance.
Example usage
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
import torch
from PIL import Image
import re
import json
reasoner = Qwen2_5_VLForConditionalGeneration.from_pretrained(
"alexgambashidze/qwen2.5vl_image_preference_reasoner",
torch_dtype=torch.bfloat16,
attn_implementation="flash_attention_2",
device_map="auto",
)
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", max_pixels=720*28*28)
SYSTEM_PROMPT = (
"The user has two images and a textual prompt. "
"You need to reason carefully inside <think>...</think> tags and produce an answer in <answer>...</answer> tags where you should choose best image."
)
image1_path = 'Your first image path'
image2_path = 'Your second image path'
image1 = Image.open(image1_path).resize((512,512)), Image.LANCZOS)
image2 = Image.open(image2_path).resize((512,512)), Image.LANCZOS)
user_prompt = "A beautiful sunset over mountains"
user_content = [
{"type": "image"},
{"type": "image"},
{
"type": "text",
"text": (
f"User prompt: {user_prompt}\n\n"
"Which image is better given the prompt?"
"Provide your reasoning in <think>...</think> tags, "
'and the final JSON answer in <answer>{"preferred":"second"}</answer> or {"preferred":"first"}.\n'
),
},
]
conversation_prompt = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_content},
]
inputs = processor(
text=[conversation_prompt],
images=[[image1, image2]],
return_tensors="pt",
padding=True,
truncation=True,
).to(reasoner.device)
with torch.no_grad():
output = reasoner.generate(
**inputs,
max_new_tokens=1024,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=processor.tokenizer.eos_token_id
)
generated_text = processor.decode(output[0], skip_special_tokens=True)
conversation_text = processor.apply_chat_template(
conversation_prompt,
add_generation_prompt=True,
tokenize=False
)
assistant_response = generated_text[len(conversation_text):].strip()
print("Reasoner response:")
print(assistant_response)
pattern_answer_tags = r"<answer>(.*?)</answer>"
match = re.search(pattern_answer_tags, assistant_response, flags=re.DOTALL)
if match:
answer_content = match.group(1)
try:
answer_json = json.loads(answer_content)
preferred = answer_json.get("preferred", "unknown")
print(f"\n preference: {preferred}")
except json.JSONDecodeError:
print(f"\nraw answer: {answer_content}")
else:
print("\n no answer tags found")
- Downloads last month
- 6
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support