thangvip's picture
Update app.py
24d8849 verified
import spaces
import torch
import gradio as gr
from PIL import Image
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
from functools import lru_cache
MODEL_ID = "unsloth/Qwen2.5-VL-3B-Instruct"
@lru_cache(maxsize=1)
def _load_model():
"""Load and cache the model and processor inside GPU worker."""
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16
).to("cuda")
adapter_path = "thangvip/qwen-2.5-vl-3b-lora-brainrot-new"
model.load_adapter(adapter_path)
processor = AutoProcessor.from_pretrained(MODEL_ID)
return model, processor
@spaces.GPU
def gpu_inference(image_path: str, prompt: str) -> str:
"""Perform inference entirely in GPU subprocess."""
model, processor = _load_model()
# Load and preprocess image
image = Image.open(image_path).convert("RGB")
if image.width > 512:
ratio = image.height / image.width
image = image.resize((512, int(512 * ratio)), Image.Resampling.LANCZOS)
# Build conversation
system_msg = (
"You are BrainRot Bot.\n"
)
conversation = [
{"role": "system", "content": [{"type": "text", "text": system_msg}]},
{"role": "user", "content": [
{"type": "image", "image": image},
{"type": "text", "text": prompt}
]}
]
# Tokenize, generate, decode
chat_input = processor.apply_chat_template(
conversation, tokenize=False, add_generation_prompt=True
)
inputs = processor(text=[chat_input], images=[image], return_tensors="pt").to("cuda")
output_ids = model.generate(**inputs, max_new_tokens=1024)
decoded = processor.batch_decode(
output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
# Extract assistant portion
return decoded.split("assistant", 1)[-1].strip().lstrip(":").strip()
# Message handling
def add_message(history, user_input):
if history is None:
history = []
for f in user_input.get("files", []):
history.append({"role": "user", "content": (f,)})
text = user_input.get("text", "")
if text:
history.append({"role": "user", "content": text})
return history, gr.MultimodalTextbox(value=None)
def inference_interface(history):
if not history:
return history, gr.MultimodalTextbox(value=None)
# Last user text
user_text = next(
(m["content"] for m in reversed(history)
if m["role"] == "user" and isinstance(m["content"], str)),
None
)
if user_text is None:
return history, gr.MultimodalTextbox(value=None)
# Last user image
image_path = next(
(m["content"][0] for m in reversed(history)
if m["role"] == "user" and isinstance(m["content"], tuple)),
None
)
if image_path is None:
return history, gr.MultimodalTextbox(value=None)
# GPU inference
reply = gpu_inference(image_path, user_text)
history.append({"role": "assistant", "content": reply})
return history, gr.MultimodalTextbox(value=None)
def build_demo():
with gr.Blocks() as demo:
gr.Markdown("# qwen-2.5-vl-3b-lora-brr\n Ask me anything about brainrot meme")
chatbot = gr.Chatbot([], type="messages", label="Conversation")
chat_input = gr.MultimodalTextbox(
interactive=True,
file_types=["image"],
placeholder="Enter text and upload an image.",
show_label=True
)
submit_evt = chat_input.submit(
add_message, [chatbot, chat_input], [chatbot, chat_input]
)
submit_evt.then(
inference_interface, [chatbot], [chatbot, chat_input]
)
with gr.Row():
send_btn = gr.Button("Send")
clear_btn = gr.ClearButton([chatbot, chat_input])
send_click = send_btn.click(
add_message, [chatbot, chat_input], [chatbot, chat_input]
)
send_click.then(
inference_interface, [chatbot], [chatbot, chat_input]
)
return demo
if __name__ == "__main__":
demo = build_demo()
demo.launch(share=True)