Palmyra-x5

import gradio as gr
import requests
import json
import os
import threading
import queue
import time

# Load all configuration from environment variables
TOGETHER_API_KEY = os.environ.get('TOGETHER_API_KEY', '')
TOGETHER_API_URL = os.environ.get('TOGETHER_API_URL', 'https://api.together.xyz/v1/chat/completions')

MODEL_A_NAME = os.environ.get('MODEL_A_NAME', '')
MODEL_B_NAME = os.environ.get('MODEL_B_NAME', '')
MODEL_C_NAME = os.environ.get('MODEL_C_NAME', '')

# Display names for the UI
MODEL_A_DISPLAY = os.environ.get('MODEL_A_DISPLAY', '')
MODEL_B_DISPLAY = os.environ.get('MODEL_B_DISPLAY', '')
MODEL_C_DISPLAY = os.environ.get('MODEL_C_DISPLAY', '')

# Headers for API calls
HEADERS = {
    "Authorization": f"Bearer {TOGETHER_API_KEY}",
    "Content-Type": "application/json"
}

SYSTEM_PROMPT = os.environ.get('SYSTEM_PROMPT', """You are an expert conversationalist who responds to the best of your ability. The assistant is Palmyra, created by Writer.""")

MODELS = {
    "Model A": MODEL_A_NAME,
    "Model B": MODEL_B_NAME,
    "Model C": MODEL_C_NAME
}

def stream_together_model(model_name, user_prompt, add_thinking_delay=False):
    if add_thinking_delay:
        yield "Thinking.."
        time.sleep(1)
        yield ""

    body = {
        "model": model_name,
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": user_prompt}
        ],
        "stream": True
    }
    try:
        with requests.post(TOGETHER_API_URL, headers=HEADERS, json=body, stream=True) as response:
            response.raise_for_status()
            for line in response.iter_lines():
                if line:
                    try:
                        data = json.loads(line.decode('utf-8').replace("data: ", ""))
                        content = data.get("choices", [{}])[0].get("delta", {}).get("content", "")
                        if content:
                            yield content
                    except:
                        continue
    except Exception as e:
        yield f"[Error: {str(e)}]"

def stream_model_c(user_prompt):
    url = "http://192.222.54.94:8000/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    body = {
        "model": "palmyra-x5-v2",
        "messages": [
            {"role": "user", "content": user_prompt}
        ],
        "temperature": 0.07
    }

    try:
        response = requests.post(url, headers=headers, json=body)
        response.raise_for_status()
        data = response.json()
        content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
        yield content
    except Exception as e:
        yield f"[Error: {str(e)}]"

custom_css = """... (unchanged CSS, keep same) ..."""

with gr.Blocks(css=custom_css, theme=gr.themes.Base()) as demo:
    gr.HTML("""
        <div class="container">
            <h1>Palmyra-x5</h1>
        </div>
    """)

    with gr.Row():
        chatbot_a = gr.Chatbot(label=MODEL_A_DISPLAY, height=500, bubble_full_width=False)
        chatbot_b = gr.Chatbot(label=MODEL_B_DISPLAY, height=500, bubble_full_width=False)
        chatbot_c = gr.Chatbot(label=MODEL_C_DISPLAY, height=500, bubble_full_width=False)

    with gr.Row():
        user_input = gr.Textbox(placeholder="Type your message...", show_label=False, scale=8)
        thinking_toggle = gr.Checkbox(label="Show Thinking Process", value=True, scale=2)
        submit_btn = gr.Button("Send", scale=1, variant="primary")

    gr.Examples(
        examples=[
            "What does Tencent do?",
            "Explain quantum computing",
            "Write a haiku about AI",
            "Compare Python vs JavaScript",
            "Tips for better sleep"
        ],
        inputs=user_input,
        label="Try these examples:"
    )

    def stream_all_models(message, enable_thinking, hist_a, hist_b, hist_c):
        if not message.strip():
            return hist_a, hist_b, hist_c, ""

        hist_a = hist_a + [[message, ""]]
        hist_b = hist_b + [[message, ""]]
        hist_c = hist_c + [[message, ""]]
        yield hist_a, hist_b, hist_c, ""

        q1, q2, q3 = queue.Queue(), queue.Queue(), queue.Queue()

        def fetch_stream(q, model, add_delay=False):
            try:
                for chunk in stream_together_model(model, message, add_delay):
                    q.put(chunk)
            finally:
                q.put(None)

        def fetch_stream_c(q, message):
            try:
                for chunk in stream_model_c(message):
                    q.put(chunk)
            finally:
                q.put(None)

        threading.Thread(target=fetch_stream, args=(q1, MODELS["Model A"], True)).start()
        threading.Thread(target=fetch_stream, args=(q2, MODELS["Model B"], True)).start()
        threading.Thread(target=fetch_stream, args=(q3, MODELS["Model C"], True)).start()

        done_a = done_b = done_c = False

        while not (done_a and done_b and done_c):
            updated = False

            if not done_a:
                try:
                    chunk = q1.get(timeout=0.05)
                    if chunk is None:
                        done_a = True
                    else:
                        if chunk == "":
                            hist_a[-1][1] = ""
                        elif chunk.startswith("\ud83e\udd14"):
                            hist_a[-1][1] = chunk
                        else:
                            hist_a[-1][1] += chunk
                        updated = True
                except:
                    pass

            if not done_b:
                try:
                    chunk = q2.get(timeout=0.05)
                    if chunk is None:
                        done_b = True
                    else:
                        if chunk == "":
                            hist_b[-1][1] = ""
                        elif chunk.startswith("\ud83e\udd14"):
                            hist_b[-1][1] = chunk
                        else:
                            hist_b[-1][1] += chunk
                        updated = True
                except:
                    pass

            if not done_c:
                try:
                    chunk = q3.get(timeout=0.05)
                    if chunk is None:
                        done_c = True
                    else:
                        hist_c[-1][1] += chunk
                        updated = True
                except:
                    pass

            if updated:
                yield hist_a, hist_b, hist_c, ""

    submit_btn.click(
        stream_all_models,
        [user_input, thinking_toggle, chatbot_a, chatbot_b, chatbot_c],
        [chatbot_a, chatbot_b, chatbot_c, user_input]
    )

    user_input.submit(
        stream_all_models,
        [user_input, thinking_toggle, chatbot_a, chatbot_b, chatbot_c],
        [chatbot_a, chatbot_b, chatbot_c, user_input]
    )

if __name__ == "__main__":
    demo.launch()