import gradio as gr
import random
import pandas as pd
import os
import threading
import time
import numpy as np
from utils.data_loader import get_random_example
from utils.models import generate_summaries, model_names
from utils.ui_helpers import toggle_context_display, update_feedback, get_context_html
from utils.leaderboard import load_leaderboard_data, submit_vote_with_elo, generate_leaderboard_html
from utils.vote_logger import save_vote_details
from utils.shared import generation_interrupt
feedback_options = {
    "left": ["Model A: More complete", "Model A: More accurate", "Model A: More relevant", "Model A: Better written", "Model A: Better refusal (if applicable)"],
    "right": ["Model B: More complete", "Model B: More accurate", "Model B: More relevant", "Model B: Better written", "Model B: Better refusal (if applicable)"],
    "tie": ["Model A: Complete", "Model A: Accurate", "Model A: Relevant", "Model A: Well written", "Model A: Correct refusal (if applicable)", 
           "Model B: Complete", "Model B: Accurate", "Model B: Relevant", "Model B: Well written", "Model B: Corrent refusal (if applicable)"],
    "neither": ["Model A: Incomplete", "Model A: Hallucinate", "Model A: Irrelevant", "Model A: Incorrect refusal (if applicable)",
               "Model B: Incomplete", "Model B: Hallucinate", "Model B: Irrelevant", "Model B: Incorrect refusal (if applicable)"]
}
def weighted_sample_without_replacement(population, weights, k=2):
    """
    Performs a weighted random sampling without replacement.
    
    Args:
        population: The list of items to sample from
        weights: The weight for each item
        k: Number of items to sample
        
    Returns:
        A list of k sampled items
    """
    if len(population) <= k:
        return population
    
    # Convert weights to numpy array for efficient operations
    weights = np.array(weights)
    
    # Create a copy of the population and weights
    remaining_population = population.copy()
    remaining_weights = weights.copy()
    
    selected = []
    
    for _ in range(k):
        # Normalize weights so they sum to 1
        normalized_weights = remaining_weights / remaining_weights.sum()
        
        # Randomly select one item based on weights
        selected_idx = np.random.choice(len(remaining_population), p=normalized_weights)
        
        # Add the selected item to our result
        selected.append(remaining_population[selected_idx])
        
        # Remove the selected item from the pool
        remaining_population.pop(selected_idx)
        remaining_weights = np.delete(remaining_weights, selected_idx)
        
    return selected
def load_context(set_interrupt=False):
    if set_interrupt:
        generation_interrupt.set()
        time.sleep(0.2)
        
    generation_interrupt.clear()
    example = get_random_example()
    
    context_desc = example.get('processed_context_desc', '')
    if context_desc:
        context_desc = f"
The question and context are about: {context_desc}
"
    
    show_full = False
    context_html = get_context_html(example, show_full=show_full)
    
    return [
        example,
        gr.update(value=example['question']),
        gr.update(value=context_desc, visible=bool(context_desc)),
        gr.update(value=context_html),
        gr.update(value="Show Full Context", elem_classes=["context-toggle-button"]),
        show_full
    ]
def load_leaderboard():
    results = load_leaderboard_data()
    leaderboard_html = generate_leaderboard_html(results)
    return leaderboard_html
def generate_model_summaries(example):
    result = {
        "model_a": "",
        "model_b": "",
        "summary_a": "",
        "summary_b": "",
        "completed": False
    }
    
    if generation_interrupt.is_set():
        return result
    try:
        # Get current leaderboard data to determine model usage counts
        leaderboard_data = load_leaderboard_data()
        
        # Calculate weights using inverse weighting
        # Weight = K / (games_played + C)
        K = 100  # Scaling factor
        C = 5    # Smoothing constant
        
        weights = []
        model_list = []
        
        for model in model_names:
            # Get games played for the model, default to 0 if not found
            games_played = leaderboard_data["games_played"].get(model, 0)
            
            # Calculate weight using inverse formula
            weight = K / (games_played + C)
            
            weights.append(weight)
            model_list.append(model)
        
        # Select two models using weighted sampling without replacement
        selected_models = weighted_sample_without_replacement(model_list, weights, k=2)
        m_a_name, m_b_name = selected_models
        
        result["model_a"] = m_a_name
        result["model_b"] = m_b_name
        
        s_a, s_b = generate_summaries(example, m_a_name, m_b_name)
        
        if not generation_interrupt.is_set():
            result["summary_a"] = s_a
            result["summary_b"] = s_b
            result["completed"] = bool(s_a and s_b)
    except Exception as e:
        print(f"Error in generation: {e}")
        
    return result
def process_generation_result(result):
    if not result["completed"] or not result["summary_a"] or not result["summary_b"]:
        return [
            result.get("model_a", ""), 
            result.get("model_b", ""), 
            result.get("summary_a", ""), 
            result.get("summary_b", ""),
            None, [], False, load_leaderboard_data(),
            gr.update(value=result.get("summary_a", "Generation was interrupted or failed.")),
            gr.update(value=result.get("summary_b", "Generation was interrupted or failed.")),
            gr.update(interactive=False, elem_classes=["vote-button"]),
            gr.update(interactive=False, elem_classes=["vote-button"]),
            gr.update(interactive=False, elem_classes=["vote-button"]),
            gr.update(interactive=False, elem_classes=["vote-button", "vote-button-neither"]),
            gr.update(choices=[], value=[], interactive=False, visible=False),
            gr.update(visible=False),
            gr.update(interactive=False, visible=True),
            gr.update(visible=False),
            gr.update(interactive=True),
            gr.update(elem_classes=[])
        ]
    
    buttons_interactive = bool(result["summary_a"] and result["summary_b"])
    
    agg_results = load_leaderboard_data()
    return [
        result["model_a"], result["model_b"], 
        result["summary_a"], result["summary_b"],
        None, [], False, agg_results,
        gr.update(value=result["summary_a"]),
        gr.update(value=result["summary_b"]),
        gr.update(interactive=buttons_interactive, elem_classes=["vote-button"]),
        gr.update(interactive=buttons_interactive, elem_classes=["vote-button"]),
        gr.update(interactive=buttons_interactive, elem_classes=["vote-button"]),
        gr.update(interactive=buttons_interactive, elem_classes=["vote-button", "vote-button-neither"]),
        gr.update(choices=[], value=[], interactive=False, visible=False),
        gr.update(visible=False),
        gr.update(interactive=False, visible=True),
        gr.update(visible=False),
        gr.update(interactive=True),
        gr.update(elem_classes=[])
    ]
def process_example(example):
    result = generate_model_summaries(example)
    return process_generation_result(result)
def select_vote_improved(winner_choice):
    feedback_choices = feedback_options.get(winner_choice, [])
    btn_a_classes = ["vote-button"]
    btn_b_classes = ["vote-button"]
    btn_tie_classes = ["vote-button"]
    btn_neither_classes = ["vote-button", "vote-button-neither"]
    
    if winner_choice == 'left':
        btn_a_classes.append("selected")
    elif winner_choice == 'right':
        btn_b_classes.append("selected")
    elif winner_choice == 'tie':
        btn_tie_classes.append("selected")
    elif winner_choice == 'neither':
        btn_neither_classes.append("selected")
    return [
        winner_choice,
        gr.update(choices=feedback_choices, value=[], interactive=True, visible=True),
        gr.update(visible=True),
        gr.update(interactive=True),
        gr.update(elem_classes=btn_a_classes),
        gr.update(elem_classes=btn_b_classes),
        gr.update(elem_classes=btn_tie_classes),
        gr.update(elem_classes=btn_neither_classes)
    ]
def handle_vote_submission(example, m_a, m_b, winner, feedback, summary_a, summary_b, current_results):
    if winner is None:
        print("Warning: Submit called without a winner selected.")
        return {}
    save_vote_details(example, m_a, m_b, winner, feedback, summary_a, summary_b)
    return submit_vote_with_elo(m_a, m_b, winner, feedback, current_results)
def show_loading_state():
    """Show loading state while fetching new content and reset UI elements"""
    return [
        gr.update(value="Loading new question and summaries...", interactive=False),
        gr.update(value="Loading new question and summaries...", interactive=False),
        gr.update(interactive=False, elem_classes=["vote-button"]),  # Reset styling
        gr.update(interactive=False, elem_classes=["vote-button"]),
        gr.update(interactive=False, elem_classes=["vote-button"]),
        gr.update(interactive=False, elem_classes=["vote-button", "vote-button-neither"]),
        gr.update(visible=False),      # feedback_section
        gr.update(interactive=False),  # submit_button
        gr.update(visible=False),      # results_reveal_area
        gr.update(interactive=False),  # random_question_btn
        None  # Reset selected_winner
    ]
def handle_new_example_click():
    return load_context(set_interrupt=True)[0]
def update_ui_for_new_context(example):
    context_desc = example.get('processed_context_desc', '')
    if context_desc:
        context_desc = f"The question and context are about: {context_desc}
"
    
    return [
        gr.update(value=example['question']),
        gr.update(value=context_desc, visible=bool(context_desc)),
        gr.update(value=get_context_html(example, False)),
        gr.update(value="Show Full Context", elem_classes=["context-toggle-button"]),
        False
    ]
def cleanup_on_disconnect():
    print(f"Browser disconnected. Cleaning up resources...")
    generation_interrupt.set()
with gr.Blocks(theme=gr.themes.Default(
    primary_hue=gr.themes.colors.orange,
    secondary_hue=gr.themes.colors.slate
)) as demo:
    css_path = os.path.join(os.getcwd(), 'static', 'styles.css')
    
    with open(css_path, 'r') as f:
        css_content = f.read()
    
    gr.HTML(f"")
    
    unload_js = """
    
    """
    gr.HTML(unload_js)
    current_example = gr.State({})
    model_a_name = gr.State("")
    model_b_name = gr.State("")
    summary_a_text = gr.State("")
    summary_b_text = gr.State("")
    selected_winner = gr.State(None)
    feedback_list = gr.State([])
    show_results_state = gr.State(False)
    results_agg = gr.State(load_leaderboard_data())
    show_full_context = gr.State(False)
    with gr.Tabs() as tabs:
        with gr.TabItem("Arena", id="arena-tab"):
            gr.Markdown("# Small Language Model RAG Summarization/Generation Arena")
            gr.Markdown("""
🏟️ This arena evaluates SLMs on document QA tasks with retrieved context. They should provide **grounded, comprehensive** answers or **properly decline** when information is insufficient.
📝 Insturction: 1. **Review the query and context**. 2. **Compare answers** generated by two different models. 3. **Vote for the better response** or select 'Tie/Neither' if appropriate.
""")
            gr.HTML("
")
            with gr.Column(elem_id="main-interface-area") as main_interface_area:
                with gr.Row(elem_id="query-title-row"):
                    gr.Markdown("### 💬 Query - Question About Document Content", elem_classes="section-heading")
                with gr.Row(elem_id="query-container"):
                    with gr.Row(elem_classes="query-box-row"):
                        query_display = gr.Markdown(value="Loading question...", elem_classes="query-text", elem_id="query-section")
                    random_question_btn = gr.Button("🔄 Try a New Question", elem_classes="query-button")
                
                context_description = gr.Markdown("", elem_classes="context-description")
                
                gr.HTML("
")
                with gr.Row(elem_id="context-header-row"):
                    gr.Markdown("### 📋 Context - Retrieved Content from the Document", elem_classes="context-title")
                    context_toggle_btn = gr.Button("Show Full Context", elem_classes=["context-toggle-button"])
                    
                context_display = gr.HTML(value="Loading context...", label="Context Chunks")
                gr.Markdown("---")
                gr.Markdown("### 🔍 Compare Models - Are these Grounded, Complete Answers or Correct Rejections?", elem_classes="section-heading")
                with gr.Row(elem_id="summary-containers"):
                    with gr.Column(scale=1):
                        with gr.Group(elem_classes=["summary-card", "summary-card-a"]):
                            summary_a_display = gr.Textbox(
                                label="Model A", 
                                lines=10, 
                                interactive=False, 
                                show_copy_button=True, 
                                autoscroll=False,
                                elem_id="summary-a-display"
                            )
                    with gr.Column(scale=1):
                        with gr.Group(elem_classes=["summary-card", "summary-card-b"]):
                            summary_b_display = gr.Textbox(
                                label="Model B", 
                                lines=10, 
                                interactive=False, 
                                show_copy_button=True,
                                autoscroll=False,
                                elem_id="summary-b-display"
                            )
                gr.HTML("
")
                gr.Markdown("### 🏅 Cast Your Vote", elem_classes="section-heading")
                with gr.Row():
                    vote_button_a = gr.Button("⬅️ Summary A is Better", elem_classes=["vote-button"], interactive=False)
                    vote_button_tie = gr.Button("🤝 Tie / Equally Good", elem_classes=["vote-button"], interactive=False)
                    vote_button_b = gr.Button("➡️ Summary B is Better", elem_classes=["vote-button"], interactive=False)
                    vote_button_neither = gr.Button("❌ Neither is Good", elem_classes=["vote-button", "vote-button-neither"], interactive=False)
                with gr.Group(elem_classes=["feedback-section"], visible=False) as feedback_section:
                    feedback_checkboxes = gr.CheckboxGroup(label="Feedback (optional)", choices=[], interactive=False)
                submit_button = gr.Button("Submit Your Vote", variant="primary", interactive=False, elem_id="submit-button")
                with gr.Column(visible=False) as results_reveal_area:
                    gr.Markdown("---")
                    gr.Markdown("### ✅ Vote Submitted!", elem_classes="section-heading")
                     
                    with gr.Row():
                        with gr.Column(scale=1):
                            gr.Markdown("### Model A was:", elem_classes="section-heading")
                            model_a_reveal = gr.Markdown("", elem_classes="model-reveal model-a-reveal")
                        with gr.Column(scale=1):
                            gr.Markdown("### Model B was:", elem_classes="section-heading")
                            model_b_reveal = gr.Markdown("", elem_classes="model-reveal model-b-reveal")
                     
                    gr.HTML("
")
                    
                    with gr.Row(elem_classes=["control-buttons"]):
                        try_another_btn = gr.Button("🔄 Try Another Question", elem_id="try-another-btn")
        with gr.TabItem("Leaderboard", id="leaderboard-tab"):
            gr.Markdown("# RAG SLM Summarizer/Generator Leaderboard", elem_classes="orange-title")
            gr.Markdown("View performance statistics for all models ranked by Elo rating.")
            
            with gr.Group(elem_id="leaderboard-info"):
                gr.Markdown("""### About Elo Ratings
                
The Elo rating system provides a more accurate ranking than simple win rates:
- All models start at 1500 points
- Points are exchanged after each comparison based on the expected outcome
- Beating a stronger model earns more points than beating a weaker one
- The ± value shows the statistical confidence interval (95%)
""")
            
            results_table_display = gr.HTML(label="Model Performance")
    context_toggle_btn.click(
        fn=toggle_context_display,
        inputs=[current_example, show_full_context],
        outputs=[show_full_context, context_display, context_toggle_btn]
    )
    
    demo.load(
        fn=load_context,
        inputs=[],
        outputs=[current_example, query_display, context_description, context_display, 
                context_toggle_btn, show_full_context]
    ).then(
        fn=process_example,
        inputs=[current_example],
        outputs=[model_a_name, model_b_name, summary_a_text, summary_b_text,
                selected_winner, feedback_list, show_results_state, results_agg,
                summary_a_display, summary_b_display, vote_button_a, vote_button_b, 
                vote_button_tie, vote_button_neither, feedback_checkboxes, feedback_section, 
                submit_button, results_reveal_area, random_question_btn, main_interface_area]
    )
    demo.load(
        fn=load_leaderboard,
        inputs=[],
        outputs=[results_table_display]
    )
    for btn in [random_question_btn, try_another_btn]:
        btn.click(
            fn=show_loading_state,
            inputs=[],
            outputs=[
                summary_a_display, summary_b_display, 
                vote_button_a, vote_button_b, vote_button_tie, vote_button_neither,
                feedback_section, submit_button, results_reveal_area, random_question_btn,
                selected_winner  # Add selected_winner to reset vote state
            ]
        ).then(
            fn=handle_new_example_click,
            inputs=[],
            outputs=[current_example]
        ).then(
            fn=update_ui_for_new_context,
            inputs=[current_example],
            outputs=[query_display, context_description, context_display, 
                    context_toggle_btn, show_full_context]
        ).then(
            fn=process_example,
            inputs=[current_example],
            outputs=[model_a_name, model_b_name, summary_a_text, summary_b_text,
                    selected_winner, feedback_list, show_results_state, results_agg,
                    summary_a_display, summary_b_display, vote_button_a, vote_button_b, 
                    vote_button_tie, vote_button_neither, feedback_checkboxes, feedback_section, 
                    submit_button, results_reveal_area, random_question_btn, main_interface_area]
        )
    for btn, choice in zip(
        [vote_button_a, vote_button_b, vote_button_tie, vote_button_neither],
        ['left', 'right', 'tie', 'neither']
    ):
        btn.click(
            fn=lambda choice=choice: select_vote_improved(choice),
            inputs=None,
            outputs=[selected_winner, feedback_checkboxes, feedback_section, submit_button, 
                    vote_button_a, vote_button_b, vote_button_tie, vote_button_neither]
        )
    feedback_checkboxes.change(
        fn=update_feedback,
        inputs=[feedback_checkboxes],
        outputs=[feedback_list]
    )
    submit_button.click(
        fn=handle_vote_submission,
        inputs=[current_example, model_a_name, model_b_name, selected_winner, feedback_list, summary_a_text, summary_b_text, results_agg],
        outputs=[show_results_state, results_agg, vote_button_a, vote_button_b, 
                vote_button_tie, vote_button_neither, feedback_checkboxes,
                feedback_section, submit_button, results_reveal_area,
                random_question_btn, results_table_display, main_interface_area,
                context_toggle_btn, model_a_reveal, model_b_reveal]
    )
    
    tabs.select(
        fn=load_leaderboard,
        inputs=[],
        outputs=[results_table_display],
        api_name="refresh_leaderboard"
    )
    
    demo.unload(cleanup_on_disconnect)
if __name__ == "__main__":
    demo.launch(debug=True)