Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
5 |
# Load the model and tokenizer locally
|
6 |
model_name = "kz919/QwQ-0.5B-Distilled-SFT"
|
7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
-
model = AutoModelForCausalLM.from_pretrained(model_name)
|
9 |
|
10 |
# Define the function to handle chat responses
|
11 |
@spaces.GPU
|
@@ -17,7 +17,7 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
|
|
17 |
prompt += f"User: {message}\nAssistant:"
|
18 |
|
19 |
# Tokenize the input prompt
|
20 |
-
inputs = tokenizer(prompt, return_tensors="pt")
|
21 |
|
22 |
# Generate a response
|
23 |
outputs = model.generate(
|
|
|
5 |
# Load the model and tokenizer locally
|
6 |
model_name = "kz919/QwQ-0.5B-Distilled-SFT"
|
7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
+
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
|
9 |
|
10 |
# Define the function to handle chat responses
|
11 |
@spaces.GPU
|
|
|
17 |
prompt += f"User: {message}\nAssistant:"
|
18 |
|
19 |
# Tokenize the input prompt
|
20 |
+
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
21 |
|
22 |
# Generate a response
|
23 |
outputs = model.generate(
|