import gradio as gr import requests import json import os import threading import queue import time # Load all configuration from environment variables TOGETHER_API_KEY = os.environ.get('TOGETHER_API_KEY', '') TOGETHER_API_URL = os.environ.get('TOGETHER_API_URL', 'https://api.together.xyz/v1/chat/completions') MODEL_A_NAME = os.environ.get('MODEL_A_NAME', '') MODEL_B_NAME = os.environ.get('MODEL_B_NAME', '') MODEL_C_NAME = os.environ.get('MODEL_C_NAME', '') # Display names for the UI MODEL_A_DISPLAY = os.environ.get('MODEL_A_DISPLAY', '') MODEL_B_DISPLAY = os.environ.get('MODEL_B_DISPLAY', '') MODEL_C_DISPLAY = os.environ.get('MODEL_C_DISPLAY', '') # Headers for API calls HEADERS = { "Authorization": f"Bearer {TOGETHER_API_KEY}", "Content-Type": "application/json" } SYSTEM_PROMPT = os.environ.get('SYSTEM_PROMPT', """You are an expert conversationalist who responds to the best of your ability. The assistant is Palmyra, created by Writer.""") MODELS = { "Model A": MODEL_A_NAME, "Model B": MODEL_B_NAME, "Model C": MODEL_C_NAME } def stream_together_model(model_name, user_prompt, add_thinking_delay=False): if add_thinking_delay: yield "Thinking.." time.sleep(1) yield "" body = { "model": model_name, "messages": [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user_prompt} ], "stream": True } try: with requests.post(TOGETHER_API_URL, headers=HEADERS, json=body, stream=True) as response: response.raise_for_status() for line in response.iter_lines(): if line: try: data = json.loads(line.decode('utf-8').replace("data: ", "")) content = data.get("choices", [{}])[0].get("delta", {}).get("content", "") if content: yield content except: continue except Exception as e: yield f"[Error: {str(e)}]" def stream_model_c(user_prompt): url = "http://192.222.54.94:8000/v1/chat/completions" headers = {"Content-Type": "application/json"} body = { "model": "palmyra-x5-v2", "messages": [ {"role": "user", "content": user_prompt} ], "temperature": 0.07 } try: response = requests.post(url, headers=headers, json=body) response.raise_for_status() data = response.json() content = data.get("choices", [{}])[0].get("message", {}).get("content", "") yield content except Exception as e: yield f"[Error: {str(e)}]" custom_css = """... (unchanged CSS, keep same) ...""" with gr.Blocks(css=custom_css, theme=gr.themes.Base()) as demo: gr.HTML("""

Palmyra-x5

""") with gr.Row(): chatbot_a = gr.Chatbot(label=MODEL_A_DISPLAY, height=500, bubble_full_width=False) chatbot_b = gr.Chatbot(label=MODEL_B_DISPLAY, height=500, bubble_full_width=False) chatbot_c = gr.Chatbot(label=MODEL_C_DISPLAY, height=500, bubble_full_width=False) with gr.Row(): user_input = gr.Textbox(placeholder="Type your message...", show_label=False, scale=8) thinking_toggle = gr.Checkbox(label="Show Thinking Process", value=True, scale=2) submit_btn = gr.Button("Send", scale=1, variant="primary") gr.Examples( examples=[ "What does Tencent do?", "Explain quantum computing", "Write a haiku about AI", "Compare Python vs JavaScript", "Tips for better sleep" ], inputs=user_input, label="Try these examples:" ) def stream_all_models(message, enable_thinking, hist_a, hist_b, hist_c): if not message.strip(): return hist_a, hist_b, hist_c, "" hist_a = hist_a + [[message, ""]] hist_b = hist_b + [[message, ""]] hist_c = hist_c + [[message, ""]] yield hist_a, hist_b, hist_c, "" q1, q2, q3 = queue.Queue(), queue.Queue(), queue.Queue() def fetch_stream(q, model, add_delay=False): try: for chunk in stream_together_model(model, message, add_delay): q.put(chunk) finally: q.put(None) def fetch_stream_c(q, message): try: for chunk in stream_model_c(message): q.put(chunk) finally: q.put(None) threading.Thread(target=fetch_stream, args=(q1, MODELS["Model A"], True)).start() threading.Thread(target=fetch_stream, args=(q2, MODELS["Model B"], True)).start() threading.Thread(target=fetch_stream, args=(q3, MODELS["Model C"], True)).start() done_a = done_b = done_c = False while not (done_a and done_b and done_c): updated = False if not done_a: try: chunk = q1.get(timeout=0.05) if chunk is None: done_a = True else: if chunk == "": hist_a[-1][1] = "" elif chunk.startswith("\ud83e\udd14"): hist_a[-1][1] = chunk else: hist_a[-1][1] += chunk updated = True except: pass if not done_b: try: chunk = q2.get(timeout=0.05) if chunk is None: done_b = True else: if chunk == "": hist_b[-1][1] = "" elif chunk.startswith("\ud83e\udd14"): hist_b[-1][1] = chunk else: hist_b[-1][1] += chunk updated = True except: pass if not done_c: try: chunk = q3.get(timeout=0.05) if chunk is None: done_c = True else: if chunk == "": hist_c[-1][1] = "" elif chunk.startswith("\ud83e\udd14"): hist_c[-1][1] = chunk else: hist_c[-1][1] += chunk updated = True except: pass if updated: yield hist_a, hist_b, hist_c, "" submit_btn.click( stream_all_models, [user_input, thinking_toggle, chatbot_a, chatbot_b, chatbot_c], [chatbot_a, chatbot_b, chatbot_c, user_input] ) user_input.submit( stream_all_models, [user_input, thinking_toggle, chatbot_a, chatbot_b, chatbot_c], [chatbot_a, chatbot_b, chatbot_c, user_input] ) if __name__ == "__main__": demo.launch()