Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
# LoRA Inference Gradio Space Demo
|
2 |
-
|
3 |
import gradio as gr
|
4 |
from peft import PeftModel
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
@@ -20,6 +20,7 @@ model = PeftModel.from_pretrained(
|
|
20 |
# Load the tokenizer
|
21 |
tokenizer = AutoTokenizer.from_pretrained("unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit")
|
22 |
|
|
|
23 |
def generate_response(prompt):
|
24 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
25 |
outputs = model.generate(**inputs, max_new_tokens=50)
|
|
|
1 |
# LoRA Inference Gradio Space Demo
|
2 |
+
import spaces
|
3 |
import gradio as gr
|
4 |
from peft import PeftModel
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
20 |
# Load the tokenizer
|
21 |
tokenizer = AutoTokenizer.from_pretrained("unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit")
|
22 |
|
23 |
+
@spaces.GPU
|
24 |
def generate_response(prompt):
|
25 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
26 |
outputs = model.generate(**inputs, max_new_tokens=50)
|