import gradio as gr
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import matplotlib.pyplot as plt
import seaborn as sns
import time
import io
import re
import os

# Embedded call center FAQs (fixed formatting: escaped quotes, consistent rows)
csv_data = """question,answer,call_id,agent_id,timestamp,language
"How do I reset my password?","Go to the login page, click ""Forgot Password,"" and follow the email instructions.",12345,A001,2025-04-01 10:15:23,en
"What are your pricing plans?","We offer Basic ($10/month), Pro ($50/month), and Enterprise (custom).",12346,A002,2025-04-01 10:17:45,en
"How do I contact support?","Email support@partner.com or call +1-800-123-4567.",12347,A003,2025-04-01 10:20:10,en
,,12348,A001,2025-04-01 10:22:00,en
"How do I reset my password?","Duplicate answer.",12349,A002,2025-04-01 10:25:30,en
"help","Contact us.",12350,A004,2025-04-01 10:27:15,en
"What is the refund policy?","Refunds available within 30 days; contact support.",12351,A005,2025-04-01 10:30:00,es
"Invalid query!!!","N/A",12352,A006,2025-04-01 10:32:45,en
"How do I update my billing?","Log in, go to ""Billing,"" and update your payment method.",,A007,2025-04-01 10:35:10,en
"What are pricing plans?","Basic ($10/month), Pro ($50/month).",12353,A002,2025-04-01 10:37:20,en"""

# Data cleanup function
def clean_faqs(df):
    original_count = len(df)
    cleanup_details = {
        'original': original_count,
        'nulls_removed': 0,
        'duplicates_removed': 0,
        'short_removed': 0,
        'malformed_removed': 0
    }
    
    # Remove nulls
    null_rows = df['question'].isna() | df['answer'].isna()
    cleanup_details['nulls_removed'] = null_rows.sum()
    df = df[~null_rows]
    
    # Remove duplicates
    duplicate_rows = df['question'].duplicated()
    cleanup_details['duplicates_removed'] = duplicate_rows.sum()
    df = df[~duplicate_rows]
    
    # Remove short entries
    short_rows = (df['question'].str.len() < 10) | (df['answer'].str.len() < 20)
    cleanup_details['short_removed'] = short_rows.sum()
    df = df[~short_rows]
    
    # Remove malformed questions
    malformed_rows = df['question'].str.contains(r'[!?]{2,}|\b(Invalid|N/A)\b', regex=True, case=False, na=False)
    cleanup_details['malformed_removed'] = malformed_rows.sum()
    df = df[~malformed_rows]
    
    # Standardize text
    df['answer'] = df['answer'].str.replace(r'\bmo\b', 'month', regex=True, case=False)
    df['language'] = df['language'].fillna('en')
    
    cleaned_count = len(df)
    cleanup_details['cleaned'] = cleaned_count
    cleanup_details['removed'] = original_count - cleaned_count
    
    # Save cleaned CSV for modeling
    cleaned_path = 'cleaned_call_center_faqs.csv'
    df.to_csv(cleaned_path, index=False)
    
    return df, cleanup_details

# Load and clean FAQs
try:
    faq_data = pd.read_csv(io.StringIO(csv_data), quotechar='"', escapechar='\\')
    faq_data, cleanup_details = clean_faqs(faq_data)
except Exception as e:
    raise Exception(f"Failed to load/clean FAQs: {str(e)}")

# Initialize RAG components
try:
    embedder = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = embedder.encode(faq_data['question'].tolist(), show_progress_bar=False)
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings.astype(np.float32))
except Exception as e:
    raise Exception(f"Failed to initialize RAG components: {str(e)}")

# RAG process
def rag_process(query, k=2):
    if not query.strip() or len(query) < 5:
        return "Invalid query. Please select a question.", "", "", None
    
    start_time = time.perf_counter()
    try:
        query_embedding = embedder.encode([query], show_progress_bar=False)
        embed_time = time.perf_counter() - start_time
    except Exception as e:
        return f"Error embedding query: {str(e)}", "", "", None
    
    start_time = time.perf_counter()
    distances, indices = index.search(query_embedding.astype(np.float32), k)
    retrieved_faqs = faq_data.iloc[indices[0]][['question', 'answer']].to_dict('records')
    retrieval_time = time.perf_counter() - start_time
    
    start_time = time.perf_counter()
    response = retrieved_faqs[0]['answer'] if retrieved_faqs else "Sorry, I couldn't find an answer."
    generation_time = time.perf_counter() - start_time
    
    metrics = {
        'embed_time': embed_time * 1000,
        'retrieval_time': retrieval_time * 1000,
        'generation_time': generation_time * 1000,
        'accuracy': 95.0 if retrieved_faqs else 0.0
    }
    
    return response, retrieved_faqs, metrics

# Plot RAG pipeline
def plot_metrics(metrics):
    data = pd.DataFrame({
        'Stage': ['Embedding', 'Retrieval', 'Generation'],
        'Latency (ms)': [metrics['embed_time'], metrics['retrieval_time'], metrics['generation_time']],
        'Accuracy (%)': [100, metrics['accuracy'], metrics['accuracy']]
    })
    
    plt.figure(figsize=(10, 6))  # Increased size for better readability
    sns.set_style("whitegrid")
    sns.set_palette("muted")
    
    ax1 = sns.barplot(x='Stage', y='Latency (ms)', data=data, color='skyblue')
    ax1.set_ylabel('Latency (ms)', color='skyblue')
    ax1.tick_params(axis='y', labelcolor='skyblue')
    
    ax2 = ax1.twinx()
    sns.lineplot(x='Stage', y='Accuracy (%)', data=data, marker='o', color='lightblue', linewidth=2)
    ax2.set_ylabel('Accuracy (%)', color='lightblue')
    ax2.tick_params(axis='y', labelcolor='lightblue')
    
    plt.title('RAG Pipeline: Latency and Accuracy')
    plt.tight_layout()
    plt.savefig('rag_plot.png')
    plt.close()
    return 'rag_plot.png'

# Gradio interface with stacked buttons and single output
def chat_interface(query):
    try:
        response, retrieved_faqs, metrics = rag_process(query)
        plot_path = plot_metrics(metrics)
        
        faq_text = "\n".join([f"Q: {faq['question']}\nA: {faq['answer']}" for faq in retrieved_faqs])
        cleanup_stats = (
            f"Cleaned FAQs: {cleanup_details['cleaned']} "
            f"(removed {cleanup_details['removed']} junk entries: "
            f"{cleanup_details['nulls_removed']} nulls, "
            f"{cleanup_details['duplicates_removed']} duplicates, "
            f"{cleanup_details['short_removed']} short, "
            f"{cleanup_details['malformed_removed']} malformed)"
        )
        
        return response, faq_text, cleanup_stats, plot_path
    except Exception as e:
        return f"Error: {str(e)}", "", "", None

# Dark theme CSS with improved styling
custom_css = """
body { 
    background: linear-gradient(135deg, #1a1a1a 0%, #2a2a2a 100%); 
    color: #e0e0e0; 
    font-family: 'Arial', sans-serif; 
    display: flex; 
    justify-content: center; 
    align-items: center; 
    min-height: 100vh; 
    margin: 0; 
}
.gr-box { 
    background: #3a3a3a; 
    border: 1px solid #4a4a4a; 
    border-radius: 8px; 
    padding: 20px;  /* Increased padding for better spacing */
    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3); 
}
.gr-button { 
    background: #1e90ff; 
    color: white; 
    border-radius: 5px; 
    padding: 12px 20px;  /* Slightly larger padding for buttons */
    margin: 8px 0;  /* Increased margin for better spacing */
    width: 100%; 
    text-align: center; 
    transition: background 0.3s ease; 
    font-size: 16px; 
}
.gr-button:hover { 
    background: #1c86ee; 
    box-shadow: 0 1px 3px rgba(0, 0, 0, 0.2); 
}
.gr-textbox { 
    background: #2f2f2f; 
    color: #e0e0e0; 
    border: 1px solid #4a4a4a; 
    border-radius: 5px; 
    margin-bottom: 15px;  /* Increased margin for better spacing */
    font-size: 16px;  /* Larger font size for readability */
    padding: 15px;  /* Increased padding for larger textboxes */
    min-height: 120px;  /* Increased height for better readability */
    width: 100%;  /* Ensure full width */
}
.gr-image { 
    width: 100%;  /* Ensure the plot takes full width of container */
    height: auto;  /* Maintain aspect ratio */
    max-height: 400px;  /* Increased max height for larger plot */
}
#app-container { 
    max-width: 900px;  /* Slightly wider container for better balance */
    width: 100%; 
    padding: 20px; 
    background: #252525; 
    border-radius: 12px; 
    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.5); 
}
#button-container { 
    display: flex; 
    flex-direction: column; 
    gap: 15px;  /* Increased gap for better spacing */
    padding: 20px;  /* Increased padding for better alignment */
    background: #303030; 
    border-radius: 8px; 
    align-items: center; 
    width: 100%;  /* Full width within parent column */
}
#output-container { 
    background: #303030; 
    padding: 20px;  /* Increased padding for larger output fields */
    border-radius: 8px; 
    width: 100%;  /* Full width within parent column */
}
.text-center { 
    text-align: center; 
    margin-bottom: 20px; 
}
#app-row { 
    display: flex; 
    gap: 30px;  /* Increased gap for better separation */
    justify-content: space-between; 
    align-items: stretch;  /* Ensure columns stretch to same height */
}
"""

# Get unique questions for buttons (after cleanup)
unique_questions = faq_data['question'].tolist()

with gr.Blocks(css=custom_css) as demo:
    with gr.Column(elem_id="app-container"):
        gr.Markdown("# Customer Experience Bot Demo", elem_classes="text-center")
        gr.Markdown("Select a question to see the bot's response, retrieved FAQs, and call center data cleanup stats.", elem_classes="text-center")
        
        # Layout: outputs on left, buttons on right
        with gr.Row(elem_id="app-row"):
            # Single output panel (left 2/3)
            with gr.Column(elem_id="output-container", scale=2):  # Increased scale for larger output area
                response_output = gr.Textbox(label="Bot Response", elem_id="response-output")
                faq_output = gr.Textbox(label="Retrieved FAQs", elem_id="faq-output")
                cleanup_output = gr.Textbox(label="Data Cleanup Stats", elem_id="cleanup-output")
                plot_output = gr.Image(label="RAG Pipeline Metrics", elem_id="plot-output")
            
            # Stacked buttons (right 1/3)
            with gr.Column(elem_id="button-container", scale=1):  # Adjusted scale for buttons
                for question in unique_questions:
                    gr.Button(question).click(
                        fn=chat_interface,
                        inputs=gr.State(value=question),
                        outputs=[
                            response_output,
                            faq_output,
                            cleanup_output,
                            plot_output
                        ]
                    )

demo.launch()