Spaces:

andrewsiah
/

Turing-Test-Prompt-Competition

Paused

App Files Files Community

andrewsiah commited on Sep 23, 2024

Commit

23dea16

verified ·

1 Parent(s): 9a3150c

Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

.gitignore +3 -1
README.md +15 -1
chatbot.py +35 -7
eval.py +288 -71
eval_old.py +145 -0
leaderboard.py +69 -0
pyproject.toml +3 -0
requirements.txt +3 -0
uv.lock +0 -0
vllm_inference.py +3 -1

.gitignore CHANGED Viewed

@@ -1,5 +1,7 @@
 .env
 .ai/
 .cursorrules
-gradio_cache_examples/
 __pycache__/

 .env
 .ai/
 .cursorrules
 __pycache__/
+gradio_cached_examples/
+supa.ipynb
+.venv/

README.md CHANGED Viewed

@@ -6,7 +6,7 @@ sdk_version: 4.44.0
 ---
 # Turing-Test-Prompt-Competition
-This project implements a chatbot using vLLM for inference and Streamlit for the user interface.
 ## Setup and Deployment
@@ -38,6 +38,20 @@ To run the chatbot locally:
    ngrok http 8501
    ```
 ## Project Structure
 - `download_llama.py`: Script to download the LLaMA model

 ---
 # Turing-Test-Prompt-Competition
+This project implements a chatbot using vLLM for inference and Streamlit for the user interface and Gradio for the evaluation interface.
 ## Setup and Deployment
    ngrok http 8501
    ```
+### Running the Evaluation Interface
+To run the evaluation interface locally:
+1. Start the Gradio app:
+   ```
+   gradio eval.py
+   ```
+2. To deploy to HF Space, run:
+   ```
+   gradio deploy
+   ```
 ## Project Structure
 - `download_llama.py`: Script to download the LLaMA model

chatbot.py CHANGED Viewed

@@ -27,6 +27,20 @@ def get_completion(client, model_id, messages, args):
     except Exception as e:
         print(f"Error during API call: {e}")
         return None
 # App title
 st.set_page_config(page_title="Turing Test")
@@ -49,14 +63,32 @@ with st.sidebar:
     # Add system prompt input
     st.subheader('System Prompt')
     system_prompt = st.text_area("Enter a system prompt:",
-                                 "you are rolplaying as an old grandma",
                                  help="This message sets the behavior of the AI.")
     st.subheader('Models and parameters')
     selected_model = st.sidebar.selectbox('Choose a model', ['meta-llama/Meta-Llama-3.1-8B-Instruct'], key='selected_model')
     temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=5.0, value=0.8, step=0.1)
     top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.95, step=0.01)
     max_length = st.sidebar.slider('max_length', min_value=32, max_value=1024, value=32, step=8)
 # Store chat history
 if "messages" not in st.session_state.keys():
@@ -70,12 +102,8 @@ for message in st.session_state.messages[1:]:
     with st.chat_message(message["role"]):
         st.write(message["content"])
-def clear_chat_history():
-    st.session_state.messages = [
-        {"role": "system", "content": system_prompt},
-        {"role": "assistant", "content": "Hello!"}
-    ]
-st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
 # Function for generating Llama2 response using OpenAI client API
 def generate_llama2_response(prompt_input, model, temperature, top_p, max_length):

     except Exception as e:
         print(f"Error during API call: {e}")
         return None
+def save_configuration(config):
+    from supabase import create_client, Client
+    url: str = "https://rwtzkiofjrpekpcazdoa.supabase.co"
+    key: str = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InJ3dHpraW9manJwZWtwY2F6ZG9hIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MjUyMDc0MTMsImV4cCI6MjA0MDc4MzQxM30.ey2PKyQkxlXorq_NnUQtbj08MgVW31h0pq1MYMgV9eU"
+    supabase: Client = create_client(url, key)
+    response = supabase.table("config").insert(config).execute()
+def clear_chat_history():
+    st.session_state.messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "assistant", "content": "Hello!"}
+    ]
 # App title
 st.set_page_config(page_title="Turing Test")
     # Add system prompt input
     st.subheader('System Prompt')
     system_prompt = st.text_area("Enter a system prompt:",
+                                 "you are roleplaying as an old grandma",
                                  help="This message sets the behavior of the AI.")
     st.subheader('Models and parameters')
     selected_model = st.sidebar.selectbox('Choose a model', ['meta-llama/Meta-Llama-3.1-8B-Instruct'], key='selected_model')
     temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=5.0, value=0.8, step=0.1)
     top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.95, step=0.01)
     max_length = st.sidebar.slider('max_length', min_value=32, max_value=1024, value=32, step=8)
+    st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
+    # Add submit button for configuration
+    submit_config = st.sidebar.button('Submit Configuration')
+    if submit_config:
+        # Save the current configuration to the database
+        config = {
+            "user_id": "123",
+            "prompt": system_prompt,
+            "model": selected_model,
+            "temperature": temperature,
+            "top_p": top_p,
+            "max_length": max_length
+        }
+        save_configuration(config)
+        st.sidebar.success("Configuration submitted successfully!")
 # Store chat history
 if "messages" not in st.session_state.keys():
     with st.chat_message(message["role"]):
         st.write(message["content"])
 # Function for generating Llama2 response using OpenAI client API
 def generate_llama2_response(prompt_input, model, temperature, top_p, max_length):

eval.py CHANGED Viewed

@@ -1,8 +1,21 @@
 import gradio as gr
 import os
 import openai
 from dataclasses import dataclass
 @dataclass
 class Args:
@@ -16,130 +29,334 @@ class Args:
     temperature: float = 0.8
     top_p: float = 0.95
-def get_completion(client, model_id, messages, args):
     completion_args = {
-        "model": model_id,
         "messages": messages,
-        "frequency_penalty": args.frequency_penalty,
-        "max_tokens": args.max_tokens,
-        "n": args.n,
-        "presence_penalty": args.presence_penalty,
-        "seed": args.seed,
-        "stop": args.stop,
-        "stream": args.stream,
-        "temperature": args.temperature,
-        "top_p": args.top_p,
-    }
-    completion_args = {
-        k: v for k, v in completion_args.items() if v is not None
     }
     try:
         response = client.chat.completions.create(**completion_args)
         return response
     except Exception as e:
         print(f"Error during API call: {e}")
         return None
-def chat_response(message, history, model):
     # Set up OpenAI client
     openai_api_key = "super-secret-token"
     os.environ['OPENAI_API_KEY'] = openai_api_key
     openai.api_key = openai_api_key
     openai.api_base = "https://turingtest--example-vllm-openai-compatible-serve.modal.run/v1"
     client = openai.OpenAI(api_key=openai_api_key, base_url=openai.api_base)
-    # Prepare messages
-    messages = [{"role": "system", "content": "You are a helpful assistant."}]
-    # Convert history to the correct format
-    for user_msg, assistant_msg in history:
-        messages.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
-    # Set up arguments
-    args = Args()
-    # Use the correct model identifier
-    model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
     # Get completion
-    response = get_completion(client, model_id, messages, args)
-    if response and response.choices:
-        return response.choices[0].message.content
     else:
-        return f"Error: Please retry or contact support if retried more than twice."
-def create_chat_interface(model):
-    return gr.ChatInterface(
-        fn=lambda message, history: chat_response(message, history, model),
-        chatbot=gr.Chatbot(height=400, label=f"Choice {model}"),
-        textbox=gr.Textbox(placeholder="Message", container=False, scale=7),
-        # title=f"Choice {model}",
-        description="",
-        theme="dark",
-        # examples=[["what's up"]],
-        # cache_examples=True,
-        retry_btn=None,
-        undo_btn=None,
-        clear_btn=None,
     )
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate"), head=
-               """
-       <style>
-           body {
-               font-family: 'Calibri', sans-serif; /* Choose your desired font */
-           }
-       </style>
-   """) as demo:
-    gr.Markdown("## Turing Test Prompt Competition")
     with gr.Row():
         with gr.Column():
-            chat_a = create_chat_interface("A")
         with gr.Column():
-            chat_b = create_chat_interface("B")
     with gr.Row():
-        a_better = gr.Button("👉 A is better", scale=1)
-        b_better = gr.Button("👈 B is better", scale=1)
         tie = gr.Button("🤝 Tie", scale=1)
-        both_bad = gr.Button("👎 Both are bad", scale=1)
     prompt_input = gr.Textbox(placeholder="Message for both...", container=False)
     send_btn = gr.Button("Send to Both", variant="primary")
     def send_prompt(prompt):
-        # This function will now return the prompt for both chatbots
-        return prompt, prompt, gr.update(value=""), gr.update(value="")
-    # Update the click and submit events
     send_btn.click(
         send_prompt,
-        inputs=[prompt_input],
         outputs=[
-            chat_a.textbox,
-            chat_b.textbox,
             prompt_input,
             prompt_input
         ]
     )
     prompt_input.submit(
         send_prompt,
-        inputs=[prompt_input],
         outputs=[
-            chat_a.textbox,
-            chat_b.textbox,
             prompt_input,
             prompt_input
         ]
     )
 if __name__ == "__main__":
     demo.launch(share=True)

 import gradio as gr
 import os
 import openai
 from dataclasses import dataclass
+from supabase import create_client, Client
+from uuid import UUID
+from dotenv import load_dotenv
+import random
+# Load environment variables from .env file
+load_dotenv()
+# Initialize Supabase client
+SUPABASE_URL = os.getenv("SUPABASE_URL")
+SUPABASE_KEY = os.getenv("SUPABASE_KEY")
+supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
+SHOW_CONFIG = True
 @dataclass
 class Args:
     temperature: float = 0.8
     top_p: float = 0.95
+def get_completion(client, config, messages):
+    print("GETTING COMPLETION")
     completion_args = {
+        "model": config['model'],
         "messages": messages,
+        "frequency_penalty": config.get('frequency_penalty', 0),
+        "max_tokens": config.get('max_length', 32),
+        "n": config.get('n', 1),
+        "presence_penalty": config.get('presence_penalty', 0),
+        "seed": config.get('seed', 42),
+        "stop": config.get('stop', None),
+        "stream": config.get('stream', False),
+        "temperature": config.get('temperature', 0.8),
+        "top_p": config.get('top_p', 0.95),
     }
     try:
+        print("TRYING TO GET COMPLETION")
         response = client.chat.completions.create(**completion_args)
+        print("GOT COMPLETION")
         return response
     except Exception as e:
         print(f"Error during API call: {e}")
         return None
+def get_two_random_configs(round_num: int):
+    print("GETTING TWO RANDOM CONFIGS")
+    # Fetch all configurations for the current round
+    response = supabase.table("configs")\
+        .select("*")\
+        .eq("round", round_num)\
+        .execute()
+    if not response.data or len(response.data) < 2:
+        return None, None
+    # Randomly select two unique configurations
+    selected_configs = random.sample(response.data, 2)
+    return selected_configs[0], selected_configs[1]
+def initialize_session(state):
+    print("INITIALIZING SESSION")
+    current_round = get_current_round()
+    if not current_round:
+        state.value["error"] = "Error: No active round found."
+        return
+    config_a, config_b = get_two_random_configs(round_num=current_round)
+    if not config_a or not config_b:
+        state.value["error"] = "Error: Not enough configurations available for voting."
+        return
+    state.value['config_a'] = config_a
+    state.value['config_b'] = config_b
+    state.value['conversation_a'] = []
+    state.value['conversation_b'] = []
+    state.value['round'] = current_round
+def chat_response_a(message, history):
+    print("CHAT RESPONSE A")
+    return chat_response(message, history, 'a')
+def chat_response_b(message, history):
+    print("CHAT RESPONSE B")
+    return chat_response(message, history, 'b')
+def chat_response(message, history, config_type):
+    # Access the state within the Blocks
+    current_state = demo.blocks['state'].value  # Accessing state correctly
+    print("CHAT RESPONSE")
+    config_a = current_state.get('config_a')
+    config_b = current_state.get('config_b')
+    # Handle initialization if configs are missing
+    if not config_a or not config_b:
+        initialize_session(demo.blocks['state'])
+        config_a = current_state.get('config_a')
+        config_b = current_state.get('config_b')
+        if not config_a or not config_b:
+            return "Error: Configurations not initialized sufficiently."
     # Set up OpenAI client
     openai_api_key = "super-secret-token"
     os.environ['OPENAI_API_KEY'] = openai_api_key
     openai.api_key = openai_api_key
     openai.api_base = "https://turingtest--example-vllm-openai-compatible-serve.modal.run/v1"
     client = openai.OpenAI(api_key=openai_api_key, base_url=openai.api_base)
+    # Append existing conversation
+    if config_type == 'a':
+        system_message = {"role": "system", "content": f"{config_a['sys_prompt']}"}
+        messages = [system_message]
+        for user_msg, assistant_msg in current_state['conversation_a']:
+            if user_msg:
+                messages.append({"role": "user", "content": user_msg})
+            if assistant_msg:
+                messages.append({"role": "assistant", "content": assistant_msg})
+    else:
+        system_message = {"role": "system", "content": f"{config_b['sys_prompt']}"}
+        messages = [system_message]
+        for user_msg, assistant_msg in current_state['conversation_b']:
+            if user_msg:
+                messages.append({"role": "user", "content": user_msg})
+            if assistant_msg:
+                messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
+    # Determine which configuration to use
+    # config_id = config_a['id'] if config_type == 'a' else config_b['id']
     # Get completion
+    # response = get_completion(client, config_id, messages)
+    if config_type == 'a':
+        response = get_completion(client, config_a, messages)
     else:
+        response = get_completion(client, config_b, messages)
+    assistant_reply = (
+        response.choices[0].message.content if response and response.choices else
+        "Error: Please retry or contact support if retried more than twice."
     )
+    # Update the appropriate conversation state
+    if config_type == 'a':
+        current_state['conversation_a'].append((message, assistant_reply))
+    else:
+        current_state['conversation_b'].append((message, assistant_reply))
+    # Update the state
+    # demo.blocks['state'].update(current_state)
+    demo.blocks['state'].value = current_state
+    return assistant_reply
+def create_chat_interface(model_label):
+    print("CREATE CHAT INTERFACE")
+    if model_label == 'a':
+        return gr.ChatInterface(
+            fn=lambda message, history: (chat_response_a(message, history)),
+            chatbot=gr.Chatbot(height=400, label=f"Choice {model_label}"),
+            textbox=gr.Textbox(placeholder="Message", container=False, scale=7),
+            description="",
+            theme="dark",
+            retry_btn=None,
+            undo_btn=None,
+            clear_btn=None,
+        )
+    else:
+        return gr.ChatInterface(
+            fn=lambda message, history: (chat_response_b(message, history)),
+            chatbot=gr.Chatbot(height=400, label=f"Choice {model_label}"),
+            textbox=gr.Textbox(placeholder="Message", container=False, scale=7),
+            description="",
+            theme="dark",
+            retry_btn=None,
+            undo_btn=None,
+            clear_btn=None,
+        )
+def submit_vote(vote: str, state):
+    print("SUBMIT VOTE")
+    a_config_id = state.value['config_a']['id']
+    b_config_id = state.value['config_b']['id']
+    conversation_a = state.value.get('conversation_a', [])
+    conversation_b = state.value.get('conversation_b', [])
+    # Save conversations to Supabase
+    supabase.table("conversations").insert([
+        {
+            "user_id": None,  # No authentication, set to None or another identifier if available
+            "configuration_id": a_config_id,
+            "messages": conversation_a
+        },
+        {
+            "user_id": None,
+            "configuration_id": b_config_id,
+            "messages": conversation_b
+        }
+    ]).execute()
+    # Save vote to Supabase
+    supabase.table("votes").insert({
+        "a_config_id": str(a_config_id),
+        "b_config_id": str(b_config_id),
+        "voted_by_uid": None,  # No user ID since authentication is not implemented
+        "round": get_current_round(),  # Assuming Round 1; modify as needed
+        "is_tie": vote == "tie",
+        "a_wins": vote == "a",
+        "created_at": "now()"
+    }).execute()
+    # Update ELO ratings
+    # update_elo(a_config_id, b_config_id, vote)
+    # Reset conversations for next voting
+    state.value['conversation_a'] = []
+    state.value['conversation_b'] = []
+    return "Vote submitted!"
+def update_elo(a_config_id: UUID, b_config_id: UUID, vote: str):
+    print("UPDATE ELO")
+    a_elo_response = supabase.table("elos").select("rating").eq("user_id", a_config_id).single().execute()
+    b_elo_response = supabase.table("elos").select("rating").eq("user_id", b_config_id).single().execute()
+    if not a_elo_response.data or not b_elo_response.data:
+        return
+    a_elo = a_elo_response.data["rating"]
+    b_elo = b_elo_response.data["rating"]
+    if vote == "a":
+        a_new = a_elo + 10
+        b_new = b_elo - 10
+    elif vote == "b":
+        a_new = a_elo - 10
+        b_new = b_elo + 10
+    else:
+        # Tie: no change or minimal change
+        a_new = a_elo
+        b_new = b_elo
+    supabase.table("elos").update({"rating": a_new}).eq("user_id", a_config_id).execute()
+    supabase.table("elos").update({"rating": b_new}).eq("user_id", b_config_id).execute()
+def get_current_round():
+    print("GET CURRENT ROUND")
+    response = supabase.table("round_status").select("round").eq("is_eval_active", True).single().execute()
+    if response.data:
+        return response.data["round"]
+    return None
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate"), head=
+                   """
+        <style>
+            body {
+                font-family: 'Calibri', sans-serif; /* Choose your desired font */
+            }
+        </style>
+    """) as demo:
+    gr.Markdown("## Turing Test Prompt Comp")
+    # State to hold current config IDs and separate conversations
+    state = gr.State({
+        "config_a": None,
+        "config_b": None,
+        "conversation_a": [],
+        "conversation_b": [],
+        "round": 1,
+        "error": None
+    })
+    demo.blocks['state'] = state  # Assign state to a key for easy access
+    initialize_session(state)
     with gr.Row():
         with gr.Column():
+            chat_a = create_chat_interface('a')
         with gr.Column():
+            chat_b = create_chat_interface('b')
     with gr.Row():
+        a_better = gr.Button("A is better 👈", scale=1)
         tie = gr.Button("🤝 Tie", scale=1)
+        b_better = gr.Button("👉 B is better", scale=1)
+    # Output component to display status messages
+    output_message = gr.Textbox(label="Status", interactive=False)
+    # Define separate functions for each vote type
+    def submit_vote_a():
+        return submit_vote('a', state)
+    def submit_vote_b():
+        return submit_vote('b', state)
+    def submit_vote_tie():
+        return submit_vote('tie', state)
+    # Connect buttons to their respective functions
+    a_better.click(
+        submit_vote_a,
+        inputs=None,
+        outputs=output_message
+    )
+    b_better.click(
+        submit_vote_b,
+        inputs=None,
+        outputs=output_message
+    )
+    tie.click(
+        submit_vote_tie,
+        inputs=None,
+        outputs=output_message
+    )
     prompt_input = gr.Textbox(placeholder="Message for both...", container=False)
     send_btn = gr.Button("Send to Both", variant="primary")
     def send_prompt(prompt):
+        current_state = state.value
+        # Append user's prompt to both conversations
+        if prompt:
+            current_state['conversation_a'].append((prompt, None))
+            current_state['conversation_b'].append((prompt, None))
+            state.update(current_state)
+        return "", ""
     send_btn.click(
         send_prompt,
+        inputs=prompt_input,
         outputs=[
             prompt_input,
             prompt_input
         ]
     )
     prompt_input.submit(
         send_prompt,
+        inputs=prompt_input,
         outputs=[
             prompt_input,
             prompt_input
         ]
     )
 if __name__ == "__main__":
     demo.launch(share=True)

eval_old.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import gradio as gr
+import os
+import openai
+from dataclasses import dataclass
+@dataclass
+class Args:
+    frequency_penalty: float = 0
+    max_tokens: int = 32
+    n: int = 1
+    presence_penalty: float = 0
+    seed: int = 42
+    stop: str = None
+    stream: bool = False
+    temperature: float = 0.8
+    top_p: float = 0.95
+def get_completion(client, model_id, messages, args):
+    completion_args = {
+        "model": model_id,
+        "messages": messages,
+        "frequency_penalty": args.frequency_penalty,
+        "max_tokens": args.max_tokens,
+        "n": args.n,
+        "presence_penalty": args.presence_penalty,
+        "seed": args.seed,
+        "stop": args.stop,
+        "stream": args.stream,
+        "temperature": args.temperature,
+        "top_p": args.top_p,
+    }
+    completion_args = {
+        k: v for k, v in completion_args.items() if v is not None
+    }
+    try:
+        response = client.chat.completions.create(**completion_args)
+        return response
+    except Exception as e:
+        print(f"Error during API call: {e}")
+        return None
+def chat_response(message, history, model):
+    # Set up OpenAI client
+    openai_api_key = "super-secret-token"
+    os.environ['OPENAI_API_KEY'] = openai_api_key
+    openai.api_key = openai_api_key
+    openai.api_base = "https://turingtest--example-vllm-openai-compatible-serve.modal.run/v1"
+    client = openai.OpenAI(api_key=openai_api_key, base_url=openai.api_base)
+    # Prepare messages
+    messages = [{"role": "system", "content": "You are a helpful assistant."}]
+    # Convert history to the correct format
+    for user_msg, assistant_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg})
+    messages.append({"role": "user", "content": message})
+    # Set up arguments
+    args = Args()
+    # Use the correct model identifier
+    model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+    # Get completion
+    response = get_completion(client, model_id, messages, args)
+    if response and response.choices:
+        return response.choices[0].message.content
+    else:
+        return f"Error: Please retry or contact support if retried more than twice."
+def create_chat_interface(model):
+    return gr.ChatInterface(
+        fn=lambda message, history: chat_response(message, history, model),
+        chatbot=gr.Chatbot(height=400, label=f"Choice {model}"),
+        textbox=gr.Textbox(placeholder="Message", container=False, scale=7),
+        # title=f"Choice {model}",
+        description="",
+        theme="dark",
+        # examples=[["what's up"]],
+        # cache_examples=True,
+        retry_btn=None,
+        undo_btn=None,
+        clear_btn=None,
+    )
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate"), head=
+               """
+       <style>
+           body {
+               font-family: 'Calibri', sans-serif; /* Choose your desired font */
+           }
+       </style>
+   """) as demo:
+    gr.Markdown("## Turing Test Prompt Competition")
+    with gr.Row():
+        with gr.Column():
+            chat_a = create_chat_interface("A")
+        with gr.Column():
+            chat_b = create_chat_interface("B")
+    with gr.Row():
+        a_better = gr.Button("👉 A is better", scale=1)
+        b_better = gr.Button("👈 B is better", scale=1)
+        tie = gr.Button("🤝 Tie", scale=1)
+        both_bad = gr.Button("👎 Both are bad", scale=1)
+    prompt_input = gr.Textbox(placeholder="Message for both...", container=False)
+    send_btn = gr.Button("Send to Both", variant="primary")
+    def send_prompt(prompt):
+        # This function will now return the prompt for both chatbots
+        return prompt, prompt, gr.update(value=""), gr.update(value="")
+    # Update the click and submit events
+    send_btn.click(
+        send_prompt,
+        inputs=[prompt_input],
+        outputs=[
+            chat_a.textbox,
+            chat_b.textbox,
+            prompt_input,
+            prompt_input
+        ]
+    )
+    prompt_input.submit(
+        send_prompt,
+        inputs=[prompt_input],
+        outputs=[
+            chat_a.textbox,
+            chat_b.textbox,
+            prompt_input,
+            prompt_input
+        ]
+    )
+if __name__ == "__main__":
+    demo.launch(share=True)

leaderboard.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import gradio as gr
+import time
+from supabase import create_client, Client
+import os
+from dotenv import load_dotenv
+import pandas as pd
+# Load environment variables
+load_dotenv()
+# Initialize Supabase client
+SUPABASE_URL = os.getenv("SUPABASE_URL")
+SUPABASE_KEY = os.getenv("SUPABASE_KEY")
+supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
+def get_active_round():
+    # Fetch the active round data and return both round ID and round number
+    response = supabase.table("round_status").select("id, round").eq("is_eval_active", True).single().execute()
+    if response.data:
+        return response.data['id'], response.data['round']  # Return both round ID and round number
+    return None, None
+def get_elo_ratings(round_id):
+    # Query the ELO ratings based on the round_id
+    response = supabase.table("elos").select("user_id, rating").eq("round", round_id).execute()
+    print("get_elo_ratings: ", response.data)
+    if response.data:
+        df = pd.DataFrame(response.data)
+        df = df.sort_values(by='rating', ascending=False)
+        print(df.head())
+        return df
+    return pd.DataFrame(columns=['user_id', 'rating'])
+def update_info():
+    # Get the active round ID and round number
+    round_id, round_number = get_active_round()
+    print("Active Round ID:", round_id, "Round Number:", round_number)  # This will print both round ID and round number
+    if round_id:
+        # Fetch the ELO ratings based on the round ID
+        elo_ratings = get_elo_ratings(round_id)
+        return f"Active Round: {round_number}", elo_ratings  # Display the round number in the UI
+    else:
+        return "No active round found", pd.DataFrame(columns=['user_id', 'rating'])
+with gr.Blocks() as demo:
+    gr.Markdown("## Leaderboard")
+    round_info = gr.Textbox(label="")
+    elo_table = gr.DataFrame(label="ELO Ratings", headers=["User ID", "Rating"])
+    # Create a periodic update function
+    def periodic_update():
+        round_status, ratings = update_info()
+        return round_status, ratings
+    # Load initial values
+    demo.load(update_info, outputs=[round_info, elo_table])
+    # Use gr.Timer to trigger updates every 5 seconds
+    timer = gr.Timer(value=5, active=True)  # Set timer to tick every 5 seconds
+    timer.tick(periodic_update, outputs=[round_info, elo_table])
+if __name__ == "__main__":
+    demo.queue()
+    demo.launch()

pyproject.toml CHANGED Viewed

@@ -6,7 +6,10 @@ readme = "README.md"
 requires-python = ">=3.9"
 dependencies = [
     "gradio>=4.44.0",
     "modal>=0.64.126",
     "openai>=1.46.1",
     "streamlit>=1.38.0",
 ]

 requires-python = ">=3.9"
 dependencies = [
     "gradio>=4.44.0",
+    "jupyter>=1.1.1",
     "modal>=0.64.126",
     "openai>=1.46.1",
+    "python-dotenv>=1.0.1",
     "streamlit>=1.38.0",
+    "supabase>=2.7.4",
 ]

requirements.txt CHANGED Viewed

@@ -100,3 +100,6 @@ watchfiles==0.24.0
 websockets==12.0
 yarl==1.11.1
 zipp==3.20.2

 websockets==12.0
 yarl==1.11.1
 zipp==3.20.2
+supabase~=2.7.4
+python-dotenv~=1.0.1

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff

vllm_inference.py CHANGED Viewed

@@ -79,14 +79,16 @@ app = modal.App("example-vllm-openai-compatible")
 N_GPU = 1  # tip: for best results, first upgrade to more powerful GPUs, and only then increase GPU count
 TOKEN = "super-secret-token"  # auth token. for production use, replace with a modal.Secret
 MINUTES = 60  # seconds
 HOURS = 60 * MINUTES
 @app.function(
     image=vllm_image,
     gpu=modal.gpu.A100(count=N_GPU, size="40GB"),
-    container_idle_timeout=5 * MINUTES,
     timeout=24 * HOURS,
     allow_concurrent_inputs=100,
     volumes={MODELS_DIR: volume},

 N_GPU = 1  # tip: for best results, first upgrade to more powerful GPUs, and only then increase GPU count
 TOKEN = "super-secret-token"  # auth token. for production use, replace with a modal.Secret
+SECONDS = 1
 MINUTES = 60  # seconds
 HOURS = 60 * MINUTES
+# TODO: Implement secrets https://modal.com/docs/guide/secrets
 @app.function(
     image=vllm_image,
     gpu=modal.gpu.A100(count=N_GPU, size="40GB"),
+    container_idle_timeout=3 * MINUTES,
     timeout=24 * HOURS,
     allow_concurrent_inputs=100,
     volumes={MODELS_DIR: volume},