ZennyKenny commited on
Commit
e750d47
·
verified ·
1 Parent(s): efc16fd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LoRA Inference Gradio Space Demo
2
+
3
+ import gradio as gr
4
+ from peft import PeftModel
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
+
7
+ # Load the base model
8
+ base_model = AutoModelForCausalLM.from_pretrained(
9
+ "unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit",
10
+ device_map="auto",
11
+ torch_dtype="auto"
12
+ )
13
+
14
+ # Load the LoRA adapter
15
+ model = PeftModel.from_pretrained(
16
+ base_model,
17
+ "ZennyKenny/GPRO_LoRA_Qwen_3B"
18
+ )
19
+
20
+ # Load the tokenizer
21
+ tokenizer = AutoTokenizer.from_pretrained("unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit")
22
+
23
+ def generate_response(prompt):
24
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
25
+ outputs = model.generate(**inputs, max_new_tokens=50)
26
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
27
+
28
+ zk_qwen = gr.Interface(
29
+ fn=generate_response,
30
+ inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
31
+ outputs=gr.Textbox(label="Response"),
32
+ title="LoRA Model Inference",
33
+ description="Demo your LoRA model with Hugging Face Gradio."
34
+ )
35
+
36
+ zk_qwen.launch()