import gradio as gr
from huggingface_hub import InferenceClient
import os
import time
import re

HF_TOKEN = os.getenv("HF_TOKEN")

client = InferenceClient(
    provider="sambanova",
    api_key=HF_TOKEN,
)

MODELS = {
    "LLaMA 70B": "meta-llama/Llama-3.3-70B-Instruct",
    "Qwen 32B": "Qwen/QwQ-32B",
    "DeepSeek R1": "deepseek-ai/DeepSeek-R1"
}

history_log = []    
recent_history = []   
show_history_flag = False

def clean_response(text):
    text = re.sub(r"</?think>", "", text)
    text = re.sub(r"\\boxed\\{.*?\\}", "", text)
    return text.strip()

def chatbot_response(user_input, model_name):
    model_id = MODELS[model_name]
    
    # Mantém apenas as 2 últimas interações para o modelo
    messages = recent_history[-2:] if len(recent_history) >= 2 else []
    messages.append({"role": "user", "content": user_input})

    start_time = time.time()
    try:
        completion = client.chat.completions.create(
            model=model_id,
            messages=messages,
            max_tokens=8192 if "Qwen" in model_id else 900
        )
        response = completion.choices[0].message['content']
    except Exception as e:
        response = f"Erro ao gerar resposta: {str(e)}"
    end_time = time.time()

    # Atualiza os dois históricos
    recent_history.append({"role": "user", "content": user_input})
    recent_history.append({"role": "assistant", "content": response})

    history_log.append({
        "Modelo": model_name,
        "Pergunta": user_input,
        "Resposta": response,
        "Tempo de Resposta (s)": round(end_time - start_time, 2)
    })

    return response

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("## ⚙️ Configurações")
            model_selector = gr.Dropdown(
                choices=list(MODELS.keys()),
                label="Escolha o Modelo",
                value="LLaMA 70B"
            )
            
        with gr.Column(scale=3):
            gr.Markdown("# 🤖 Chatbot - API SambaNova")
            chatbot = gr.Chatbot(height=500)
            msg = gr.Textbox(placeholder="Digite sua mensagem aqui..", show_label=False)

            btn = gr.Button("Enviar", variant="primary")
            history_btn = gr.Button("Histórico", variant="secondary")
            history_output = gr.JSON()

    def respond(message, chat_history, model_name):
        response = chatbot_response(message, model_name)
        response = clean_response(response)
        chat_history.append((message, response))
        return "", chat_history

    btn.click(respond, [msg, chatbot, model_selector], [msg, chatbot])
    msg.submit(respond, [msg, chatbot, model_selector], [msg, chatbot])

    def toggle_history():
        global show_history_flag
        show_history_flag = not show_history_flag
        return history_log if show_history_flag else {}

    history_btn.click(toggle_history, inputs=[], outputs=history_output)

if __name__ == "__main__":
    demo.launch()