|
import json |
|
import re |
|
import argparse |
|
import gradio as gr |
|
|
|
|
|
def load_jsonl(file_path): |
|
data = [] |
|
with open(file_path, 'r') as f: |
|
for line in f: |
|
data.append(json.loads(line)) |
|
return data |
|
|
|
def display_pairwise_answer(data): |
|
chat_mds = pairwise_to_gradio_chat_mds(data) |
|
|
|
return chat_mds |
|
|
|
|
|
newline_pattern1 = re.compile("\n\n(\d+\. )") |
|
newline_pattern2 = re.compile("\n\n(- )") |
|
|
|
|
|
def post_process_answer(x): |
|
|
|
|
|
|
|
|
|
return x |
|
|
|
|
|
def pairwise_to_gradio_chat_mds(data): |
|
end = data["turn"] * 3 |
|
ans_a = data["conversation_a"] |
|
ans_b = data["conversation_b"] |
|
|
|
mds = [""] * end |
|
base = 0 |
|
for i in range(0, end, 3): |
|
mds[i] = "## User Prompt\n" + data["conversation_a"][base]["content"].strip() |
|
mds[i + 1] = f"## {data['model_a']}\n" + post_process_answer( |
|
ans_a[base + 1]["content"].strip() |
|
) |
|
mds[i + 2] = f"## {data['model_b']}\n" + post_process_answer( |
|
ans_b[base + 1]["content"].strip() |
|
) |
|
base += 2 |
|
|
|
winner = data["winner"] if "tie" in data["winner"] else data[data["winner"]] |
|
mds += [f"## Winner: {winner}"] |
|
|
|
mds += [""] * (16 - len(mds)) |
|
|
|
return mds |
|
|
|
|
|
def filter_by_language(language): |
|
return [item for item in data if item['language'] == language] |
|
|
|
def filter_by_outcome(outcome, filtered_data): |
|
return [item for item in filtered_data if item['outcome'] == outcome] |
|
|
|
def filter_by_model(model, filtered_data): |
|
if model == "anyone": |
|
return [item for item in filtered_data] |
|
return [item for item in filtered_data if item['opponent'] == model] |
|
|
|
def filter_by_conversation_a_prefix(prefix, filtered_data): |
|
return [item for item in filtered_data if item['conversation_a'][0]["content"][:128] == prefix] |
|
|
|
|
|
def update_outcome_options(language): |
|
filtered_data = filter_by_language(language) |
|
outcomes = [item['outcome'] for item in filtered_data] |
|
outcomes = list(dict.fromkeys(["Llama-4-Maverick-03-26-Experimental Won"] + outcomes)) if "Llama-4-Maverick-03-26-Experimental Won" in outcomes else list(set(outcomes)) |
|
filtered_data = filter_by_outcome(outcomes[0], filtered_data) |
|
models = ["anyone"] + list(sorted(set(item['opponent'] for item in filtered_data))) |
|
filtered_data = filter_by_model(models[0], filtered_data) |
|
prefixes = [item['conversation_a'][0]["content"][:128] for item in filtered_data] |
|
return gr.update(choices=outcomes, value=outcomes[0]), gr.update(choices=models, value=models[0]), gr.update(choices=prefixes, value=prefixes[0]) |
|
|
|
|
|
def update_model_opponent(language, outcome): |
|
filtered_data = filter_by_language(language) |
|
filtered_data = filter_by_outcome(outcome, filtered_data) |
|
models = ["anyone"] + sorted(set(item['opponent'] for item in filtered_data)) |
|
filtered_data = filter_by_model(models[0], filtered_data) |
|
prefixes = [item['conversation_a'][0]["content"][:128] for item in filtered_data] |
|
return gr.update(choices=models, value=models[0]), gr.update(choices=prefixes, value=prefixes[0]) |
|
|
|
|
|
def update_question_options(language, outcome, model): |
|
filtered_data = filter_by_language(language) |
|
filtered_data = filter_by_outcome(outcome, filtered_data) |
|
filtered_data = filter_by_model(model, filtered_data) |
|
prefixes = [item['conversation_a'][0]["content"][:128] for item in filtered_data] |
|
return gr.update(choices=prefixes, value=prefixes[0]) |
|
|
|
|
|
def display_filtered_data(language, outcome, model, prefix): |
|
filtered_data = filter_by_language(language) |
|
filtered_data = filter_by_outcome(outcome, filtered_data) |
|
filtered_data = filter_by_model(model, filtered_data) |
|
filtered_data = filter_by_conversation_a_prefix(prefix, filtered_data) |
|
if len(filtered_data) == 0: |
|
return [""] * 16 |
|
return pairwise_to_gradio_chat_mds(filtered_data[0]) |
|
|
|
|
|
def next_question(language, outcome, model, prefix): |
|
filtered_data = filter_by_language(language) |
|
filtered_data = filter_by_outcome(outcome, filtered_data) |
|
filtered_data = filter_by_model(model, filtered_data) |
|
|
|
all_items = [item['conversation_a'][0]["content"][:128] for item in filtered_data] |
|
if prefix: |
|
i = all_items.index(prefix) + 1 |
|
else: |
|
i = 0 |
|
|
|
if i >= len(all_items): |
|
return gr.update(choices=all_items, value=all_items[-1]) |
|
|
|
return gr.update(choices=all_items, value=all_items[i]) |
|
|
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--host", type=str, default="0.0.0.0") |
|
parser.add_argument("--port", type=int) |
|
parser.add_argument("--share", action="store_true") |
|
args = parser.parse_args() |
|
print(args) |
|
|
|
data = load_jsonl('data/clean-llama4.jsonl') |
|
|
|
default_lang = "English" |
|
default_opponent = "claude-3-5-sonnet-20241022" |
|
default_outcome = "Llama-4-Maverick-03-26-Experimental Won" |
|
filter_data = filter_by_language(language=default_lang) |
|
filter_data = filter_by_model(model=default_opponent, filtered_data=filter_data) |
|
filter_data = filter_by_outcome(outcome=default_outcome, filtered_data=filter_data) |
|
question_prefixes = [item['conversation_a'][0]["content"][:128] for item in filter_data] |
|
|
|
default_question = question_prefixes[2] |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(value="# Welcome to Llama-4-Maverick-03-26-Experimental battles") |
|
with gr.Row(): |
|
with gr.Column(): |
|
filter_data = filter_by_language(language=default_lang) |
|
languages = ["English"] + list(sorted(set([item['language'] for item in data if item['language'] != "English"]))) |
|
language_dropdown = gr.Dropdown(label="Select Language", choices=languages, value=default_lang) |
|
with gr.Column(): |
|
filter_data = filter_by_language(language=default_lang) |
|
models = ["anyone"] + sorted(set(item['opponent'] for item in filter_data)) |
|
model_dropdown = gr.Dropdown(label="Opponent", choices=models, value=default_opponent) |
|
with gr.Column(): |
|
filter_data = filter_by_language(language=default_lang) |
|
filter_data = filter_by_model(model=default_opponent, filtered_data=filter_data) |
|
outcomes = sorted(set(item['outcome'] for item in filter_data)) |
|
outcome_dropdown = gr.Dropdown(label="Outcome", choices=outcomes, value=default_outcome) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=5): |
|
filter_data = filter_by_outcome(outcome=default_outcome, filtered_data=filter_data) |
|
question_prefixes = [item['conversation_a'][0]["content"][:128] for item in filter_data] |
|
question_dropdown = gr.Dropdown(label="Select Question", choices=question_prefixes, value=default_question) |
|
with gr.Column(): |
|
next_button = gr.Button("Next Question") |
|
|
|
default_chat_mds = display_filtered_data(default_lang, default_outcome, default_opponent, default_question) |
|
|
|
chat_mds = [] |
|
for i in range(5): |
|
chat_mds.append(gr.Markdown(elem_id=f"user_question_{i+1}", value=default_chat_mds[len(chat_mds)])) |
|
with gr.Row(): |
|
for j in range(2): |
|
with gr.Column(scale=100): |
|
chat_mds.append(gr.Markdown(value=default_chat_mds[len(chat_mds)])) |
|
|
|
if j == 0: |
|
with gr.Column(scale=1, min_width=8): |
|
gr.Markdown() |
|
chat_mds.append(gr.Markdown()) |
|
|
|
language_dropdown.change(fn=update_outcome_options, inputs=language_dropdown, outputs=[outcome_dropdown, model_dropdown, question_dropdown]) |
|
outcome_dropdown.change(fn=update_model_opponent, inputs=[language_dropdown, outcome_dropdown], outputs=[model_dropdown, question_dropdown]) |
|
model_dropdown.change(fn=update_question_options, inputs=[language_dropdown, outcome_dropdown, model_dropdown], outputs=question_dropdown) |
|
next_button.click(fn=next_question, inputs=[language_dropdown, outcome_dropdown, model_dropdown, question_dropdown], outputs=question_dropdown) |
|
question_dropdown.change(fn=display_filtered_data, inputs=[language_dropdown, outcome_dropdown, model_dropdown, question_dropdown], outputs=chat_mds) |
|
|
|
question_dropdown = next_question(default_lang, default_outcome, default_opponent, default_question) |
|
chat_mds = display_filtered_data(default_lang, default_outcome, default_opponent, default_question) |
|
|
|
demo.launch(share=args.share) |
|
|