ZennyKenny commited on
Commit
1e58692
·
verified ·
1 Parent(s): e750d47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # LoRA Inference Gradio Space Demo
2
-
3
  import gradio as gr
4
  from peft import PeftModel
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -20,6 +20,7 @@ model = PeftModel.from_pretrained(
20
  # Load the tokenizer
21
  tokenizer = AutoTokenizer.from_pretrained("unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit")
22
 
 
23
  def generate_response(prompt):
24
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
25
  outputs = model.generate(**inputs, max_new_tokens=50)
 
1
  # LoRA Inference Gradio Space Demo
2
+ import spaces
3
  import gradio as gr
4
  from peft import PeftModel
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
20
  # Load the tokenizer
21
  tokenizer = AutoTokenizer.from_pretrained("unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit")
22
 
23
+ @spaces.GPU
24
  def generate_response(prompt):
25
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
26
  outputs = model.generate(**inputs, max_new_tokens=50)