Kian Kyars commited on
Commit
776bdaf
·
1 Parent(s): bf21013

Agentic Demo: LLM Debate & Judge (Track 3), README tag, video placeholder

Browse files
Files changed (2) hide show
  1. README.md +29 -0
  2. app.py +64 -31
README.md CHANGED
@@ -28,3 +28,32 @@ This is the **Decider MCP**: a minimal Modal+Gradio MCP Server that helps you de
28
  ---
29
 
30
  **Note:** Model selection is supported in the Gradio UI. For MCP API, you can extend the endpoint to accept model choices.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  ---
29
 
30
  **Note:** Model selection is supported in the Gradio UI. For MCP API, you can extend the endpoint to accept model choices.
31
+
32
+ ## Special Awards
33
+
34
+ 👑 Modal Labs Choice Award
35
+
36
+ ---
37
+ sdk: gradio
38
+ sdk_version: 4.0.0
39
+ ---
40
+
41
+ tag: agent-demo-track
42
+
43
+ # Agentic Demo: LLM Debate & Judge
44
+
45
+ This Gradio app lets you enter a debate topic. Two LLMs (user-selectable) generate arguments, and a third LLM (also user-selectable) acts as a judge, summarizes both arguments, and picks a winner.
46
+
47
+ - Select models for Agent A, Agent B, and Judge
48
+ - Enter a debate topic
49
+ - See both arguments and the judge's summary/winner
50
+
51
+ ## Usage
52
+ 1. Enter a debate topic/question
53
+ 2. Pick three different models
54
+ 3. Click submit to see the debate and the judge's verdict
55
+
56
+ ## Video Overview
57
+ [Add your video link here]
58
+
59
+ ---
app.py CHANGED
@@ -9,44 +9,77 @@ ALL_MODELS = [
9
  "mistralai/Mistral-7B-v0.1"
10
  ]
11
 
12
- def decider_mcp(query, model_a, model_b):
13
  """
14
- Ask two LLMs and return both answers and a consensus.
15
  Args:
16
- query: The question to ask
17
- model_a: First model name
18
- model_b: Second model name
 
19
  Returns:
20
- JSON with both answers and consensus
21
  """
22
- if model_a == model_b:
23
- return {"error": "Please select two different models."}
24
- responses = {}
25
- for model_id in [model_a, model_b]:
26
- tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ["HUGGINGFACE_API_KEY"])
27
- model = AutoModelForCausalLM.from_pretrained(
28
- model_id,
29
- token=os.environ["HUGGINGFACE_API_KEY"],
30
- load_in_4bit=True,
31
- device_map="auto"
32
- )
33
- prompt = f"Answer as an expert: {query}"
34
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
35
- outputs = model.generate(**inputs, max_new_tokens=200)
36
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
37
- responses[model_id] = response
38
- consensus = max(responses.values(), key=len)
39
- return {"consensus": consensus, "model_responses": responses}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  demo = gr.Interface(
42
- fn=decider_mcp,
43
  inputs=[
44
- gr.Textbox(label="Ask a question"),
45
- gr.Dropdown(ALL_MODELS, label="Model A", value=ALL_MODELS[0]),
46
- gr.Dropdown(ALL_MODELS, label="Model B", value=ALL_MODELS[1])
 
47
  ],
48
- outputs=gr.JSON(label="Consensus & Model Answers"),
49
- title="Decider MCP: Compare Two LLMs"
50
  )
51
 
52
- demo.launch(mcp_server=True)
 
9
  "mistralai/Mistral-7B-v0.1"
10
  ]
11
 
12
+ def debate_agent(topic, agent_a_model, agent_b_model, judge_model):
13
  """
14
+ Two LLMs debate a topic, a third LLM judges and picks a winner.
15
  Args:
16
+ topic: The debate topic/question
17
+ agent_a_model: Model for Agent A
18
+ agent_b_model: Model for Agent B
19
+ judge_model: Model for Judge
20
  Returns:
21
+ Arguments from A and B, judge summary, and winner
22
  """
23
+ if len({agent_a_model, agent_b_model, judge_model}) < 3:
24
+ return {"error": "Please select three different models."}
25
+ # Agent A
26
+ tokenizer_a = AutoTokenizer.from_pretrained(agent_a_model, token=os.environ["HUGGINGFACE_API_KEY"])
27
+ model_a = AutoModelForCausalLM.from_pretrained(
28
+ agent_a_model,
29
+ token=os.environ["HUGGINGFACE_API_KEY"],
30
+ load_in_4bit=True,
31
+ device_map="auto"
32
+ )
33
+ prompt_a = f"Debate as Agent A: {topic}"
34
+ inputs_a = tokenizer_a(prompt_a, return_tensors="pt").to(model_a.device)
35
+ outputs_a = model_a.generate(**inputs_a, max_new_tokens=200)
36
+ arg_a = tokenizer_a.decode(outputs_a[0], skip_special_tokens=True)
37
+ # Agent B
38
+ tokenizer_b = AutoTokenizer.from_pretrained(agent_b_model, token=os.environ["HUGGINGFACE_API_KEY"])
39
+ model_b = AutoModelForCausalLM.from_pretrained(
40
+ agent_b_model,
41
+ token=os.environ["HUGGINGFACE_API_KEY"],
42
+ load_in_4bit=True,
43
+ device_map="auto"
44
+ )
45
+ prompt_b = f"Debate as Agent B: {topic}"
46
+ inputs_b = tokenizer_b(prompt_b, return_tensors="pt").to(model_b.device)
47
+ outputs_b = model_b.generate(**inputs_b, max_new_tokens=200)
48
+ arg_b = tokenizer_b.decode(outputs_b[0], skip_special_tokens=True)
49
+ # Judge
50
+ judge_prompt = (
51
+ f"You are the judge of a debate.\n"
52
+ f"Topic: {topic}\n"
53
+ f"Agent A says: {arg_a}\n"
54
+ f"Agent B says: {arg_b}\n"
55
+ f"Summarize both arguments and pick a winner (A or B) with a short justification."
56
+ )
57
+ tokenizer_j = AutoTokenizer.from_pretrained(judge_model, token=os.environ["HUGGINGFACE_API_KEY"])
58
+ model_j = AutoModelForCausalLM.from_pretrained(
59
+ judge_model,
60
+ token=os.environ["HUGGINGFACE_API_KEY"],
61
+ load_in_4bit=True,
62
+ device_map="auto"
63
+ )
64
+ inputs_j = tokenizer_j(judge_prompt, return_tensors="pt").to(model_j.device)
65
+ outputs_j = model_j.generate(**inputs_j, max_new_tokens=200)
66
+ judge_summary = tokenizer_j.decode(outputs_j[0], skip_special_tokens=True)
67
+ return {
68
+ "Agent A": arg_a,
69
+ "Agent B": arg_b,
70
+ "Judge": judge_summary
71
+ }
72
 
73
  demo = gr.Interface(
74
+ fn=debate_agent,
75
  inputs=[
76
+ gr.Textbox(label="Debate Topic"),
77
+ gr.Dropdown(ALL_MODELS, label="Agent A Model", value=ALL_MODELS[0]),
78
+ gr.Dropdown(ALL_MODELS, label="Agent B Model", value=ALL_MODELS[1]),
79
+ gr.Dropdown(ALL_MODELS, label="Judge Model", value=ALL_MODELS[2])
80
  ],
81
+ outputs=gr.JSON(label="Debate Results"),
82
+ title="Agentic Demo: LLM Debate & Judge"
83
  )
84
 
85
+ demo.launch()