Spaces:

KingNish
/

Sarvam-M-Demo

Sleeping

App Files Files Community

KingNish commited on Jun 8

Commit

399775a

verified ·

1 Parent(s): 060b8a4

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -7

app.py CHANGED Viewed

@@ -14,12 +14,21 @@ indicators = ["Thinking ⠋", "Thinking ⠙", "Thinking ⠹", "Thinking ⠸", "T
 @spaces.GPU(duration=120)
 def generate_response(prompt, chat_history):
-    chat_history.append(dict(role="user", content=prompt))
-    yield chat_history
     print(chat_history)
-    text = tokenizer.apply_chat_template(chat_history, tokenize=False, enable_thinking=True)
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
@@ -42,7 +51,7 @@ def generate_response(prompt, chat_history):
     reasoning_done = False
     start_time = time.time()
-    chat_history.append(dict(role="assistant", content=reasoning_content, metadata={"title": "Thinking..."}))
     indicator_index = 0
     for new_text in streamer:
@@ -50,7 +59,7 @@ def generate_response(prompt, chat_history):
             reasoning_done = True
             thought_duration = time.time() - start_time
             chat_history[-1]["metadata"] = {"title": f"Thought for {thought_duration:.2f} seconds"}
-            chat_history.append(dict(role="assistant", content=content))
         if not reasoning_done:
             # Update the thinking indicator
@@ -62,14 +71,14 @@ def generate_response(prompt, chat_history):
             content += new_text
             chat_history[-1]["content"] = content
-        yield chat_history
 # Create the Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# Sarvam M Demo")
     chatbot = gr.Chatbot(height=500, type="messages")
     msg = gr.Textbox(label="Your Message")
-    msg.submit(generate_response, [msg, chatbot], [chatbot])
 if __name__ == "__main__":
     demo.launch(mcp_server=True)

 @spaces.GPU(duration=120)
 def generate_response(prompt, chat_history):
+    chat_history.append({"role": "user", "content": prompt})
+    yield chat_history, ""
     print(chat_history)
+    # Preprocess chat history to include thinking tags
+    processed_chat_history = []
+    for message in chat_history:
+        # Skipping Thought Process in history
+        if message["role"] == "assistant" and "metadata" in message and message["metadata"].get("title", "").startswith("Thought"):
+            pass
+        else:
+            processed_chat_history.append(message)
+    text = tokenizer.apply_chat_template(processed_chat_history, tokenize=False, add_generation_prompt=True)
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
     reasoning_done = False
     start_time = time.time()
+    chat_history.append({"role": "assistant", "content": reasoning_content, "metadata": {"title": "Thinking..."}})
     indicator_index = 0
     for new_text in streamer:
             reasoning_done = True
             thought_duration = time.time() - start_time
             chat_history[-1]["metadata"] = {"title": f"Thought for {thought_duration:.2f} seconds"}
+            chat_history.append({"role": "assistant", "content": content})
         if not reasoning_done:
             # Update the thinking indicator
             content += new_text
             chat_history[-1]["content"] = content
+        yield chat_history, ""
 # Create the Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# Sarvam M Demo")
     chatbot = gr.Chatbot(height=500, type="messages")
     msg = gr.Textbox(label="Your Message")
+    msg.submit(generate_response, [msg, chatbot], [chatbot, msg])
 if __name__ == "__main__":
     demo.launch(mcp_server=True)