Spaces:

Agents-MCP-Hackathon
/

Decider-MCP

Configuration error

App Files Files Community

Kian Kyars commited on Jun 10

Commit

776bdaf

1 Parent(s): bf21013

Agentic Demo: LLM Debate & Judge (Track 3), README tag, video placeholder

Browse files

Files changed (2) hide show

README.md +29 -0
app.py +64 -31

README.md CHANGED Viewed

@@ -28,3 +28,32 @@ This is the **Decider MCP**: a minimal Modal+Gradio MCP Server that helps you de
 ---
 **Note:** Model selection is supported in the Gradio UI. For MCP API, you can extend the endpoint to accept model choices.

 ---
 **Note:** Model selection is supported in the Gradio UI. For MCP API, you can extend the endpoint to accept model choices.
+## Special Awards
+👑 Modal Labs Choice Award
+---
+sdk: gradio
+sdk_version: 4.0.0
+---
+tag: agent-demo-track
+# Agentic Demo: LLM Debate & Judge
+This Gradio app lets you enter a debate topic. Two LLMs (user-selectable) generate arguments, and a third LLM (also user-selectable) acts as a judge, summarizes both arguments, and picks a winner.
+- Select models for Agent A, Agent B, and Judge
+- Enter a debate topic
+- See both arguments and the judge's summary/winner
+## Usage
+1. Enter a debate topic/question
+2. Pick three different models
+3. Click submit to see the debate and the judge's verdict
+## Video Overview
+[Add your video link here]
+---

app.py CHANGED Viewed

@@ -9,44 +9,77 @@ ALL_MODELS = [
     "mistralai/Mistral-7B-v0.1"
 ]
-def decider_mcp(query, model_a, model_b):
     """
-    Ask two LLMs and return both answers and a consensus.
     Args:
-        query: The question to ask
-        model_a: First model name
-        model_b: Second model name
     Returns:
-        JSON with both answers and consensus
     """
-    if model_a == model_b:
-        return {"error": "Please select two different models."}
-    responses = {}
-    for model_id in [model_a, model_b]:
-        tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ["HUGGINGFACE_API_KEY"])
-        model = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            token=os.environ["HUGGINGFACE_API_KEY"],
-            load_in_4bit=True,
-            device_map="auto"
-        )
-        prompt = f"Answer as an expert: {query}"
-        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-        outputs = model.generate(**inputs, max_new_tokens=200)
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        responses[model_id] = response
-    consensus = max(responses.values(), key=len)
-    return {"consensus": consensus, "model_responses": responses}
 demo = gr.Interface(
-    fn=decider_mcp,
     inputs=[
-        gr.Textbox(label="Ask a question"),
-        gr.Dropdown(ALL_MODELS, label="Model A", value=ALL_MODELS[0]),
-        gr.Dropdown(ALL_MODELS, label="Model B", value=ALL_MODELS[1])
     ],
-    outputs=gr.JSON(label="Consensus & Model Answers"),
-    title="Decider MCP: Compare Two LLMs"
 )
-demo.launch(mcp_server=True)

     "mistralai/Mistral-7B-v0.1"
 ]
+def debate_agent(topic, agent_a_model, agent_b_model, judge_model):
     """
+    Two LLMs debate a topic, a third LLM judges and picks a winner.
     Args:
+        topic: The debate topic/question
+        agent_a_model: Model for Agent A
+        agent_b_model: Model for Agent B
+        judge_model: Model for Judge
     Returns:
+        Arguments from A and B, judge summary, and winner
     """
+    if len({agent_a_model, agent_b_model, judge_model}) < 3:
+        return {"error": "Please select three different models."}
+    # Agent A
+    tokenizer_a = AutoTokenizer.from_pretrained(agent_a_model, token=os.environ["HUGGINGFACE_API_KEY"])
+    model_a = AutoModelForCausalLM.from_pretrained(
+        agent_a_model,
+        token=os.environ["HUGGINGFACE_API_KEY"],
+        load_in_4bit=True,
+        device_map="auto"
+    )
+    prompt_a = f"Debate as Agent A: {topic}"
+    inputs_a = tokenizer_a(prompt_a, return_tensors="pt").to(model_a.device)
+    outputs_a = model_a.generate(**inputs_a, max_new_tokens=200)
+    arg_a = tokenizer_a.decode(outputs_a[0], skip_special_tokens=True)
+    # Agent B
+    tokenizer_b = AutoTokenizer.from_pretrained(agent_b_model, token=os.environ["HUGGINGFACE_API_KEY"])
+    model_b = AutoModelForCausalLM.from_pretrained(
+        agent_b_model,
+        token=os.environ["HUGGINGFACE_API_KEY"],
+        load_in_4bit=True,
+        device_map="auto"
+    )
+    prompt_b = f"Debate as Agent B: {topic}"
+    inputs_b = tokenizer_b(prompt_b, return_tensors="pt").to(model_b.device)
+    outputs_b = model_b.generate(**inputs_b, max_new_tokens=200)
+    arg_b = tokenizer_b.decode(outputs_b[0], skip_special_tokens=True)
+    # Judge
+    judge_prompt = (
+        f"You are the judge of a debate.\n"
+        f"Topic: {topic}\n"
+        f"Agent A says: {arg_a}\n"
+        f"Agent B says: {arg_b}\n"
+        f"Summarize both arguments and pick a winner (A or B) with a short justification."
+    )
+    tokenizer_j = AutoTokenizer.from_pretrained(judge_model, token=os.environ["HUGGINGFACE_API_KEY"])
+    model_j = AutoModelForCausalLM.from_pretrained(
+        judge_model,
+        token=os.environ["HUGGINGFACE_API_KEY"],
+        load_in_4bit=True,
+        device_map="auto"
+    )
+    inputs_j = tokenizer_j(judge_prompt, return_tensors="pt").to(model_j.device)
+    outputs_j = model_j.generate(**inputs_j, max_new_tokens=200)
+    judge_summary = tokenizer_j.decode(outputs_j[0], skip_special_tokens=True)
+    return {
+        "Agent A": arg_a,
+        "Agent B": arg_b,
+        "Judge": judge_summary
+    }
 demo = gr.Interface(
+    fn=debate_agent,
     inputs=[
+        gr.Textbox(label="Debate Topic"),
+        gr.Dropdown(ALL_MODELS, label="Agent A Model", value=ALL_MODELS[0]),
+        gr.Dropdown(ALL_MODELS, label="Agent B Model", value=ALL_MODELS[1]),
+        gr.Dropdown(ALL_MODELS, label="Judge Model", value=ALL_MODELS[2])
     ],
+    outputs=gr.JSON(label="Debate Results"),
+    title="Agentic Demo: LLM Debate & Judge"
 )
+demo.launch()