Spaces:

thangvip
/

Qwen2.5-VL-3B-Brainrot-LoRA

Running on Zero

App Files Files Community

Qwen2.5-VL-3B-Brainrot-LoRA / app.py

thangvip

Update app.py

24d8849 verified 3 days ago

raw

history blame contribute delete

4.19 kB

	import spaces
	import torch
	import gradio as gr
	from PIL import Image
	from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
	from functools import lru_cache

	MODEL_ID = "unsloth/Qwen2.5-VL-3B-Instruct"

	@lru_cache(maxsize=1)
	def _load_model():
	"""Load and cache the model and processor inside GPU worker."""
	model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.bfloat16
	).to("cuda")

	adapter_path = "thangvip/qwen-2.5-vl-3b-lora-brainrot-new"
	model.load_adapter(adapter_path)

	processor = AutoProcessor.from_pretrained(MODEL_ID)
	return model, processor

	@spaces.GPU
	def gpu_inference(image_path: str, prompt: str) -> str:
	"""Perform inference entirely in GPU subprocess."""
	model, processor = _load_model()

	# Load and preprocess image
	image = Image.open(image_path).convert("RGB")
	if image.width > 512:
	ratio = image.height / image.width
	image = image.resize((512, int(512 * ratio)), Image.Resampling.LANCZOS)

	# Build conversation
	system_msg = (
	"You are BrainRot Bot.\n"
	)
	conversation = [
	{"role": "system", "content": [{"type": "text", "text": system_msg}]},
	{"role": "user", "content": [
	{"type": "image", "image": image},
	{"type": "text", "text": prompt}
	]}
	]

	# Tokenize, generate, decode
	chat_input = processor.apply_chat_template(
	conversation, tokenize=False, add_generation_prompt=True
	)
	inputs = processor(text=[chat_input], images=[image], return_tensors="pt").to("cuda")
	output_ids = model.generate(**inputs, max_new_tokens=1024)
	decoded = processor.batch_decode(
	output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
	)[0]

	# Extract assistant portion
	return decoded.split("assistant", 1)[-1].strip().lstrip(":").strip()

	# Message handling

	def add_message(history, user_input):
	if history is None:
	history = []
	for f in user_input.get("files", []):
	history.append({"role": "user", "content": (f,)})
	text = user_input.get("text", "")
	if text:
	history.append({"role": "user", "content": text})
	return history, gr.MultimodalTextbox(value=None)


	def inference_interface(history):
	if not history:
	return history, gr.MultimodalTextbox(value=None)
	# Last user text
	user_text = next(
	(m["content"] for m in reversed(history)
	if m["role"] == "user" and isinstance(m["content"], str)),
	None
	)
	if user_text is None:
	return history, gr.MultimodalTextbox(value=None)
	# Last user image
	image_path = next(
	(m["content"][0] for m in reversed(history)
	if m["role"] == "user" and isinstance(m["content"], tuple)),
	None
	)
	if image_path is None:
	return history, gr.MultimodalTextbox(value=None)

	# GPU inference
	reply = gpu_inference(image_path, user_text)
	history.append({"role": "assistant", "content": reply})
	return history, gr.MultimodalTextbox(value=None)


	def build_demo():
	with gr.Blocks() as demo:
	gr.Markdown("# qwen-2.5-vl-3b-lora-brr\n Ask me anything about brainrot meme")
	chatbot = gr.Chatbot([], type="messages", label="Conversation")
	chat_input = gr.MultimodalTextbox(
	interactive=True,
	file_types=["image"],
	placeholder="Enter text and upload an image.",
	show_label=True
	)
	submit_evt = chat_input.submit(
	add_message, [chatbot, chat_input], [chatbot, chat_input]
	)
	submit_evt.then(
	inference_interface, [chatbot], [chatbot, chat_input]
	)
	with gr.Row():
	send_btn = gr.Button("Send")
	clear_btn = gr.ClearButton([chatbot, chat_input])
	send_click = send_btn.click(
	add_message, [chatbot, chat_input], [chatbot, chat_input]
	)
	send_click.then(
	inference_interface, [chatbot], [chatbot, chat_input]
	)
	return demo


	if __name__ == "__main__":
	demo = build_demo()
	demo.launch(share=True)