Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ model_name = "sarvamai/sarvam-m"
|
|
9 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
10 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
|
11 |
|
12 |
-
@spaces.GPU
|
13 |
def generate_response(prompt, chat_history):
|
14 |
|
15 |
chat_history.append(dict(role="user", content=prompt ))
|
@@ -17,8 +17,7 @@ def generate_response(prompt, chat_history):
|
|
17 |
|
18 |
print(chat_history)
|
19 |
|
20 |
-
|
21 |
-
text = tokenizer.apply_chat_template(messages, tokenize=False, enable_thinking=True)
|
22 |
|
23 |
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
24 |
|
@@ -28,9 +27,7 @@ def generate_response(prompt, chat_history):
|
|
28 |
# Conduct text generation with streaming
|
29 |
generation_kwargs = dict(
|
30 |
input_ids=model_inputs.input_ids,
|
31 |
-
max_new_tokens=
|
32 |
-
do_sample=True,
|
33 |
-
temperature=0.7,
|
34 |
streamer=streamer,
|
35 |
)
|
36 |
|
@@ -62,9 +59,8 @@ def generate_response(prompt, chat_history):
|
|
62 |
# Create the Gradio interface
|
63 |
with gr.Blocks() as demo:
|
64 |
gr.Markdown("# Sarvam M Demo")
|
65 |
-
chatbot = gr.Chatbot(height=
|
66 |
msg = gr.Textbox(label="Your Message")
|
67 |
-
|
68 |
msg.submit(generate_response, [msg, chatbot], [chatbot])
|
69 |
|
70 |
if __name__ == "__main__":
|
|
|
9 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
10 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
|
11 |
|
12 |
+
@spaces.GPU(duration=120)
|
13 |
def generate_response(prompt, chat_history):
|
14 |
|
15 |
chat_history.append(dict(role="user", content=prompt ))
|
|
|
17 |
|
18 |
print(chat_history)
|
19 |
|
20 |
+
text = tokenizer.apply_chat_template(chat_history, tokenize=False, enable_thinking=True)
|
|
|
21 |
|
22 |
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
23 |
|
|
|
27 |
# Conduct text generation with streaming
|
28 |
generation_kwargs = dict(
|
29 |
input_ids=model_inputs.input_ids,
|
30 |
+
max_new_tokens=4096,
|
|
|
|
|
31 |
streamer=streamer,
|
32 |
)
|
33 |
|
|
|
59 |
# Create the Gradio interface
|
60 |
with gr.Blocks() as demo:
|
61 |
gr.Markdown("# Sarvam M Demo")
|
62 |
+
chatbot = gr.Chatbot(height=500, type="messages")
|
63 |
msg = gr.Textbox(label="Your Message")
|
|
|
64 |
msg.submit(generate_response, [msg, chatbot], [chatbot])
|
65 |
|
66 |
if __name__ == "__main__":
|