Spaces:
Build error
Build error
# LoRA Inference Gradio Space Demo | |
import spaces | |
import gradio as gr | |
from peft import PeftModel | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
# Load the base model | |
base_model = AutoModelForCausalLM.from_pretrained( | |
"unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit", | |
device_map="auto", | |
torch_dtype="auto" | |
) | |
# Load the LoRA adapter | |
model = PeftModel.from_pretrained( | |
base_model, | |
"ZennyKenny/GPRO_LoRA_Qwen_3B" | |
) | |
# Load the tokenizer | |
tokenizer = AutoTokenizer.from_pretrained("unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit") | |
def generate_response(prompt): | |
reasoning_prompt = ( | |
"Answer the following question and explain your reasoning step by step.\n" | |
f"Question: {prompt}\nReasoning:" | |
) | |
inputs = tokenizer(reasoning_prompt, return_tensors="pt").to(model.device) | |
# Streamed response | |
stream = model.generate( | |
**inputs, | |
max_new_tokens=300, # Increased token limit | |
do_sample=True, | |
temperature=0.8, | |
top_p=0.95, | |
stream=True | |
) | |
# Yield output tokens in real-time | |
for chunk in stream: | |
yield tokenizer.decode(chunk[0], skip_special_tokens=True) | |
demo = gr.Interface( | |
fn=generate_response, | |
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), | |
outputs=gr.Textbox(label="Response"), | |
title="LoRA Model Reasoning Inference", | |
description="Demo your LoRA model with step-by-step reasoning in Hugging Face Gradio.", | |
live=True | |
) | |
demo.launch() | |