import gradio as gr from huggingface_hub import InferenceClient import os import time import re HF_TOKEN = os.getenv("HF_TOKEN") client = InferenceClient( provider="sambanova", api_key=HF_TOKEN, ) MODELS = { "LLaMA 70B": "meta-llama/Llama-3.3-70B-Instruct", "Qwen 32B": "Qwen/QwQ-32B", "DeepSeek R1": "deepseek-ai/DeepSeek-R1" } history_log = [] recent_history = [] show_history_flag = False def clean_response(text): text = re.sub(r"", "", text) text = re.sub(r"\\boxed\\{.*?\\}", "", text) return text.strip() def chatbot_response(user_input, model_name): model_id = MODELS[model_name] # Mantém apenas as 2 últimas interações para o modelo messages = recent_history[-2:] if len(recent_history) >= 2 else [] messages.append({"role": "user", "content": user_input}) start_time = time.time() try: completion = client.chat.completions.create( model=model_id, messages=messages, max_tokens=8192 if "Qwen" in model_id else 900 ) response = completion.choices[0].message['content'] except Exception as e: response = f"Erro ao gerar resposta: {str(e)}" end_time = time.time() # Atualiza os dois históricos recent_history.append({"role": "user", "content": user_input}) recent_history.append({"role": "assistant", "content": response}) history_log.append({ "Modelo": model_name, "Pergunta": user_input, "Resposta": response, "Tempo de Resposta (s)": round(end_time - start_time, 2) }) return response with gr.Blocks(theme=gr.themes.Soft()) as demo: with gr.Row(): with gr.Column(scale=1): gr.Markdown("## ⚙️ Configurações") model_selector = gr.Dropdown( choices=list(MODELS.keys()), label="Escolha o Modelo", value="LLaMA 70B" ) with gr.Column(scale=3): gr.Markdown("# 🤖 Chatbot - API SambaNova") chatbot = gr.Chatbot(height=500) msg = gr.Textbox(placeholder="Digite sua mensagem aqui..", show_label=False) btn = gr.Button("Enviar", variant="primary") history_btn = gr.Button("Histórico", variant="secondary") history_output = gr.JSON() def respond(message, chat_history, model_name): response = chatbot_response(message, model_name) response = clean_response(response) chat_history.append((message, response)) return "", chat_history btn.click(respond, [msg, chatbot, model_selector], [msg, chatbot]) msg.submit(respond, [msg, chatbot, model_selector], [msg, chatbot]) def toggle_history(): global show_history_flag show_history_flag = not show_history_flag return history_log if show_history_flag else {} history_btn.click(toggle_history, inputs=[], outputs=history_output) if __name__ == "__main__": demo.launch()