import gradio as gr
import requests
import json
import os
import threading
import queue
import time
# Load all configuration from environment variables
TOGETHER_API_KEY = os.environ.get('TOGETHER_API_KEY', '')
TOGETHER_API_URL = os.environ.get('TOGETHER_API_URL', 'https://api.together.xyz/v1/chat/completions')
MODEL_A_NAME = os.environ.get('MODEL_A_NAME', '')
MODEL_B_NAME = os.environ.get('MODEL_B_NAME', '')
MODEL_C_NAME = os.environ.get('MODEL_C_NAME', '')
# Display names for the UI
MODEL_A_DISPLAY = os.environ.get('MODEL_A_DISPLAY', '')
MODEL_B_DISPLAY = os.environ.get('MODEL_B_DISPLAY', '')
MODEL_C_DISPLAY = os.environ.get('MODEL_C_DISPLAY', '')
# Headers for API calls
HEADERS = {
"Authorization": f"Bearer {TOGETHER_API_KEY}",
"Content-Type": "application/json"
}
SYSTEM_PROMPT = os.environ.get('SYSTEM_PROMPT', """You are an expert conversationalist who responds to the best of your ability. The assistant is Palmyra, created by Writer.""")
MODELS = {
"Model A": MODEL_A_NAME,
"Model B": MODEL_B_NAME,
"Model C": MODEL_C_NAME
}
def stream_together_model(model_name, user_prompt, add_thinking_delay=False):
if add_thinking_delay:
yield "Thinking.."
time.sleep(1)
yield ""
body = {
"model": model_name,
"messages": [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_prompt}
],
"stream": True
}
try:
with requests.post(TOGETHER_API_URL, headers=HEADERS, json=body, stream=True) as response:
response.raise_for_status()
for line in response.iter_lines():
if line:
try:
data = json.loads(line.decode('utf-8').replace("data: ", ""))
content = data.get("choices", [{}])[0].get("delta", {}).get("content", "")
if content:
yield content
except:
continue
except Exception as e:
yield f"[Error: {str(e)}]"
def stream_model_c(user_prompt):
url = "http://192.222.54.94:8000/v1/chat/completions"
headers = {"Content-Type": "application/json"}
body = {
"model": "palmyra-x5-v2",
"messages": [
{"role": "user", "content": user_prompt}
],
"temperature": 0.07
}
try:
response = requests.post(url, headers=headers, json=body)
response.raise_for_status()
data = response.json()
content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
yield content
except Exception as e:
yield f"[Error: {str(e)}]"
custom_css = """... (unchanged CSS, keep same) ..."""
with gr.Blocks(css=custom_css, theme=gr.themes.Base()) as demo:
gr.HTML("""
Palmyra-x5
""")
with gr.Row():
chatbot_a = gr.Chatbot(label=MODEL_A_DISPLAY, height=500, bubble_full_width=False)
chatbot_b = gr.Chatbot(label=MODEL_B_DISPLAY, height=500, bubble_full_width=False)
chatbot_c = gr.Chatbot(label=MODEL_C_DISPLAY, height=500, bubble_full_width=False)
with gr.Row():
user_input = gr.Textbox(placeholder="Type your message...", show_label=False, scale=8)
thinking_toggle = gr.Checkbox(label="Show Thinking Process", value=True, scale=2)
submit_btn = gr.Button("Send", scale=1, variant="primary")
gr.Examples(
examples=[
"What does Tencent do?",
"Explain quantum computing",
"Write a haiku about AI",
"Compare Python vs JavaScript",
"Tips for better sleep"
],
inputs=user_input,
label="Try these examples:"
)
def stream_all_models(message, enable_thinking, hist_a, hist_b, hist_c):
if not message.strip():
return hist_a, hist_b, hist_c, ""
hist_a = hist_a + [[message, ""]]
hist_b = hist_b + [[message, ""]]
hist_c = hist_c + [[message, ""]]
yield hist_a, hist_b, hist_c, ""
q1, q2, q3 = queue.Queue(), queue.Queue(), queue.Queue()
def fetch_stream(q, model, add_delay=False):
try:
for chunk in stream_together_model(model, message, add_delay):
q.put(chunk)
finally:
q.put(None)
def fetch_stream_c(q, message):
try:
for chunk in stream_model_c(message):
q.put(chunk)
finally:
q.put(None)
threading.Thread(target=fetch_stream, args=(q1, MODELS["Model A"], True)).start()
threading.Thread(target=fetch_stream, args=(q2, MODELS["Model B"], True)).start()
threading.Thread(target=fetch_stream, args=(q3, MODELS["Model C"], True)).start()
done_a = done_b = done_c = False
while not (done_a and done_b and done_c):
updated = False
if not done_a:
try:
chunk = q1.get(timeout=0.05)
if chunk is None:
done_a = True
else:
if chunk == "":
hist_a[-1][1] = ""
elif chunk.startswith("\ud83e\udd14"):
hist_a[-1][1] = chunk
else:
hist_a[-1][1] += chunk
updated = True
except:
pass
if not done_b:
try:
chunk = q2.get(timeout=0.05)
if chunk is None:
done_b = True
else:
if chunk == "":
hist_b[-1][1] = ""
elif chunk.startswith("\ud83e\udd14"):
hist_b[-1][1] = chunk
else:
hist_b[-1][1] += chunk
updated = True
except:
pass
if not done_c:
try:
chunk = q3.get(timeout=0.05)
if chunk is None:
done_c = True
else:
hist_c[-1][1] += chunk
updated = True
except:
pass
if updated:
yield hist_a, hist_b, hist_c, ""
submit_btn.click(
stream_all_models,
[user_input, thinking_toggle, chatbot_a, chatbot_b, chatbot_c],
[chatbot_a, chatbot_b, chatbot_c, user_input]
)
user_input.submit(
stream_all_models,
[user_input, thinking_toggle, chatbot_a, chatbot_b, chatbot_c],
[chatbot_a, chatbot_b, chatbot_c, user_input]
)
if __name__ == "__main__":
demo.launch()