Spaces:
Running
on
Zero
Running
on
Zero
import spaces | |
import torch | |
import gradio as gr | |
from PIL import Image | |
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor | |
from functools import lru_cache | |
MODEL_ID = "unsloth/Qwen2.5-VL-3B-Instruct" | |
def _load_model(): | |
"""Load and cache the model and processor inside GPU worker.""" | |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained( | |
MODEL_ID, | |
torch_dtype=torch.bfloat16 | |
).to("cuda") | |
adapter_path = "thangvip/qwen-2.5-vl-3b-lora-brainrot-new" | |
model.load_adapter(adapter_path) | |
processor = AutoProcessor.from_pretrained(MODEL_ID) | |
return model, processor | |
def gpu_inference(image_path: str, prompt: str) -> str: | |
"""Perform inference entirely in GPU subprocess.""" | |
model, processor = _load_model() | |
# Load and preprocess image | |
image = Image.open(image_path).convert("RGB") | |
if image.width > 512: | |
ratio = image.height / image.width | |
image = image.resize((512, int(512 * ratio)), Image.Resampling.LANCZOS) | |
# Build conversation | |
system_msg = ( | |
"You are BrainRot Bot.\n" | |
) | |
conversation = [ | |
{"role": "system", "content": [{"type": "text", "text": system_msg}]}, | |
{"role": "user", "content": [ | |
{"type": "image", "image": image}, | |
{"type": "text", "text": prompt} | |
]} | |
] | |
# Tokenize, generate, decode | |
chat_input = processor.apply_chat_template( | |
conversation, tokenize=False, add_generation_prompt=True | |
) | |
inputs = processor(text=[chat_input], images=[image], return_tensors="pt").to("cuda") | |
output_ids = model.generate(**inputs, max_new_tokens=1024) | |
decoded = processor.batch_decode( | |
output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False | |
)[0] | |
# Extract assistant portion | |
return decoded.split("assistant", 1)[-1].strip().lstrip(":").strip() | |
# Message handling | |
def add_message(history, user_input): | |
if history is None: | |
history = [] | |
for f in user_input.get("files", []): | |
history.append({"role": "user", "content": (f,)}) | |
text = user_input.get("text", "") | |
if text: | |
history.append({"role": "user", "content": text}) | |
return history, gr.MultimodalTextbox(value=None) | |
def inference_interface(history): | |
if not history: | |
return history, gr.MultimodalTextbox(value=None) | |
# Last user text | |
user_text = next( | |
(m["content"] for m in reversed(history) | |
if m["role"] == "user" and isinstance(m["content"], str)), | |
None | |
) | |
if user_text is None: | |
return history, gr.MultimodalTextbox(value=None) | |
# Last user image | |
image_path = next( | |
(m["content"][0] for m in reversed(history) | |
if m["role"] == "user" and isinstance(m["content"], tuple)), | |
None | |
) | |
if image_path is None: | |
return history, gr.MultimodalTextbox(value=None) | |
# GPU inference | |
reply = gpu_inference(image_path, user_text) | |
history.append({"role": "assistant", "content": reply}) | |
return history, gr.MultimodalTextbox(value=None) | |
def build_demo(): | |
with gr.Blocks() as demo: | |
gr.Markdown("# qwen-2.5-vl-3b-lora-brr\n Ask me anything about brainrot meme") | |
chatbot = gr.Chatbot([], type="messages", label="Conversation") | |
chat_input = gr.MultimodalTextbox( | |
interactive=True, | |
file_types=["image"], | |
placeholder="Enter text and upload an image.", | |
show_label=True | |
) | |
submit_evt = chat_input.submit( | |
add_message, [chatbot, chat_input], [chatbot, chat_input] | |
) | |
submit_evt.then( | |
inference_interface, [chatbot], [chatbot, chat_input] | |
) | |
with gr.Row(): | |
send_btn = gr.Button("Send") | |
clear_btn = gr.ClearButton([chatbot, chat_input]) | |
send_click = send_btn.click( | |
add_message, [chatbot, chat_input], [chatbot, chat_input] | |
) | |
send_click.then( | |
inference_interface, [chatbot], [chatbot, chat_input] | |
) | |
return demo | |
if __name__ == "__main__": | |
demo = build_demo() | |
demo.launch(share=True) | |