Spaces:
Configuration error
Configuration error
Kian Kyars
commited on
Commit
·
776bdaf
1
Parent(s):
bf21013
Agentic Demo: LLM Debate & Judge (Track 3), README tag, video placeholder
Browse files
README.md
CHANGED
|
@@ -28,3 +28,32 @@ This is the **Decider MCP**: a minimal Modal+Gradio MCP Server that helps you de
|
|
| 28 |
---
|
| 29 |
|
| 30 |
**Note:** Model selection is supported in the Gradio UI. For MCP API, you can extend the endpoint to accept model choices.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
---
|
| 29 |
|
| 30 |
**Note:** Model selection is supported in the Gradio UI. For MCP API, you can extend the endpoint to accept model choices.
|
| 31 |
+
|
| 32 |
+
## Special Awards
|
| 33 |
+
|
| 34 |
+
👑 Modal Labs Choice Award
|
| 35 |
+
|
| 36 |
+
---
|
| 37 |
+
sdk: gradio
|
| 38 |
+
sdk_version: 4.0.0
|
| 39 |
+
---
|
| 40 |
+
|
| 41 |
+
tag: agent-demo-track
|
| 42 |
+
|
| 43 |
+
# Agentic Demo: LLM Debate & Judge
|
| 44 |
+
|
| 45 |
+
This Gradio app lets you enter a debate topic. Two LLMs (user-selectable) generate arguments, and a third LLM (also user-selectable) acts as a judge, summarizes both arguments, and picks a winner.
|
| 46 |
+
|
| 47 |
+
- Select models for Agent A, Agent B, and Judge
|
| 48 |
+
- Enter a debate topic
|
| 49 |
+
- See both arguments and the judge's summary/winner
|
| 50 |
+
|
| 51 |
+
## Usage
|
| 52 |
+
1. Enter a debate topic/question
|
| 53 |
+
2. Pick three different models
|
| 54 |
+
3. Click submit to see the debate and the judge's verdict
|
| 55 |
+
|
| 56 |
+
## Video Overview
|
| 57 |
+
[Add your video link here]
|
| 58 |
+
|
| 59 |
+
---
|
app.py
CHANGED
|
@@ -9,44 +9,77 @@ ALL_MODELS = [
|
|
| 9 |
"mistralai/Mistral-7B-v0.1"
|
| 10 |
]
|
| 11 |
|
| 12 |
-
def
|
| 13 |
"""
|
| 14 |
-
|
| 15 |
Args:
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
| 19 |
Returns:
|
| 20 |
-
|
| 21 |
"""
|
| 22 |
-
if
|
| 23 |
-
return {"error": "Please select
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
demo = gr.Interface(
|
| 42 |
-
fn=
|
| 43 |
inputs=[
|
| 44 |
-
gr.Textbox(label="
|
| 45 |
-
gr.Dropdown(ALL_MODELS, label="
|
| 46 |
-
gr.Dropdown(ALL_MODELS, label="
|
|
|
|
| 47 |
],
|
| 48 |
-
outputs=gr.JSON(label="
|
| 49 |
-
title="
|
| 50 |
)
|
| 51 |
|
| 52 |
-
demo.launch(
|
|
|
|
| 9 |
"mistralai/Mistral-7B-v0.1"
|
| 10 |
]
|
| 11 |
|
| 12 |
+
def debate_agent(topic, agent_a_model, agent_b_model, judge_model):
|
| 13 |
"""
|
| 14 |
+
Two LLMs debate a topic, a third LLM judges and picks a winner.
|
| 15 |
Args:
|
| 16 |
+
topic: The debate topic/question
|
| 17 |
+
agent_a_model: Model for Agent A
|
| 18 |
+
agent_b_model: Model for Agent B
|
| 19 |
+
judge_model: Model for Judge
|
| 20 |
Returns:
|
| 21 |
+
Arguments from A and B, judge summary, and winner
|
| 22 |
"""
|
| 23 |
+
if len({agent_a_model, agent_b_model, judge_model}) < 3:
|
| 24 |
+
return {"error": "Please select three different models."}
|
| 25 |
+
# Agent A
|
| 26 |
+
tokenizer_a = AutoTokenizer.from_pretrained(agent_a_model, token=os.environ["HUGGINGFACE_API_KEY"])
|
| 27 |
+
model_a = AutoModelForCausalLM.from_pretrained(
|
| 28 |
+
agent_a_model,
|
| 29 |
+
token=os.environ["HUGGINGFACE_API_KEY"],
|
| 30 |
+
load_in_4bit=True,
|
| 31 |
+
device_map="auto"
|
| 32 |
+
)
|
| 33 |
+
prompt_a = f"Debate as Agent A: {topic}"
|
| 34 |
+
inputs_a = tokenizer_a(prompt_a, return_tensors="pt").to(model_a.device)
|
| 35 |
+
outputs_a = model_a.generate(**inputs_a, max_new_tokens=200)
|
| 36 |
+
arg_a = tokenizer_a.decode(outputs_a[0], skip_special_tokens=True)
|
| 37 |
+
# Agent B
|
| 38 |
+
tokenizer_b = AutoTokenizer.from_pretrained(agent_b_model, token=os.environ["HUGGINGFACE_API_KEY"])
|
| 39 |
+
model_b = AutoModelForCausalLM.from_pretrained(
|
| 40 |
+
agent_b_model,
|
| 41 |
+
token=os.environ["HUGGINGFACE_API_KEY"],
|
| 42 |
+
load_in_4bit=True,
|
| 43 |
+
device_map="auto"
|
| 44 |
+
)
|
| 45 |
+
prompt_b = f"Debate as Agent B: {topic}"
|
| 46 |
+
inputs_b = tokenizer_b(prompt_b, return_tensors="pt").to(model_b.device)
|
| 47 |
+
outputs_b = model_b.generate(**inputs_b, max_new_tokens=200)
|
| 48 |
+
arg_b = tokenizer_b.decode(outputs_b[0], skip_special_tokens=True)
|
| 49 |
+
# Judge
|
| 50 |
+
judge_prompt = (
|
| 51 |
+
f"You are the judge of a debate.\n"
|
| 52 |
+
f"Topic: {topic}\n"
|
| 53 |
+
f"Agent A says: {arg_a}\n"
|
| 54 |
+
f"Agent B says: {arg_b}\n"
|
| 55 |
+
f"Summarize both arguments and pick a winner (A or B) with a short justification."
|
| 56 |
+
)
|
| 57 |
+
tokenizer_j = AutoTokenizer.from_pretrained(judge_model, token=os.environ["HUGGINGFACE_API_KEY"])
|
| 58 |
+
model_j = AutoModelForCausalLM.from_pretrained(
|
| 59 |
+
judge_model,
|
| 60 |
+
token=os.environ["HUGGINGFACE_API_KEY"],
|
| 61 |
+
load_in_4bit=True,
|
| 62 |
+
device_map="auto"
|
| 63 |
+
)
|
| 64 |
+
inputs_j = tokenizer_j(judge_prompt, return_tensors="pt").to(model_j.device)
|
| 65 |
+
outputs_j = model_j.generate(**inputs_j, max_new_tokens=200)
|
| 66 |
+
judge_summary = tokenizer_j.decode(outputs_j[0], skip_special_tokens=True)
|
| 67 |
+
return {
|
| 68 |
+
"Agent A": arg_a,
|
| 69 |
+
"Agent B": arg_b,
|
| 70 |
+
"Judge": judge_summary
|
| 71 |
+
}
|
| 72 |
|
| 73 |
demo = gr.Interface(
|
| 74 |
+
fn=debate_agent,
|
| 75 |
inputs=[
|
| 76 |
+
gr.Textbox(label="Debate Topic"),
|
| 77 |
+
gr.Dropdown(ALL_MODELS, label="Agent A Model", value=ALL_MODELS[0]),
|
| 78 |
+
gr.Dropdown(ALL_MODELS, label="Agent B Model", value=ALL_MODELS[1]),
|
| 79 |
+
gr.Dropdown(ALL_MODELS, label="Judge Model", value=ALL_MODELS[2])
|
| 80 |
],
|
| 81 |
+
outputs=gr.JSON(label="Debate Results"),
|
| 82 |
+
title="Agentic Demo: LLM Debate & Judge"
|
| 83 |
)
|
| 84 |
|
| 85 |
+
demo.launch()
|