#!/usr/bin/env python3 import random import logging import numpy as np from typing import List, Optional from langchain_community.vectorstores import FAISS from langchain.embeddings.base import Embeddings from gradio_client import Client import gradio as gr # Configuration DATA_FILE = "data-mtc.txt" # This file is no longer used in the Space DATABASE_DIR = "." # Database files are in the root directory CHUNK_SIZE = 800 TOP_K_RESULTS = 100 SIMILARITY_THRESHOLD = 0.8 BASE_SYSTEM_PROMPT = """ Répondez en français selon ces règles : 1. Utilisez EXCLUSIVEMENT le contexte fourni. 2. Structurez la réponse en : - Définition principale. - Caractéristiques clés. - Relations avec d'autres concepts. 3. Si aucune information pertinente, indiquez-le clairement. Contexte : {context} """ # Logging configuration logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler() # Output to console in the Space ] ) # Example Questions Pool EXAMPLE_QUESTIONS = [ "Comment intégrer les enseignements MTC dans la vie quotidienne ?", "Comment se préparer à une discussion de groupe MTC ?", "Quels sont les obstacles courants à la compréhension des Chroniques ?" ] class GradioEmbeddings(Embeddings): """Embedding management using Gradio API""" def __init__(self): super().__init__() self.client = Client("localsavageai/embijiji3") def _generate_embedding(self, text: str) -> np.ndarray: """Generate an embedding via the Gradio API""" try: result = self.client.predict( document=text.strip(), api_name="/embed" ) if not isinstance(result, list): raise ValueError("Invalid embedding response from Gradio API") return np.array(result, dtype=np.float32) except Exception as e: logging.error(f"Embedding error: {str(e)}") raise RuntimeError("Failed to generate embedding") from e def embed_documents(self, texts: List[str]) -> List[List[float]]: return [self._generate_embedding(text).tolist() for text in texts] def embed_query(self, text: str) -> List[float]: return self._generate_embedding(text).tolist() def initialize_vector_store() -> FAISS: """Robust initialization of the vector store""" embeddings = GradioEmbeddings() try: logging.info("Loading existing database...") return FAISS.load_local( DATABASE_DIR, embeddings, allow_dangerous_deserialization=True ) except Exception as e: logging.error(f"FAISS loading error: {str(e)}") raise def generate_response(user_input: str, vector_store: FAISS) -> Optional[str]: """Generate a response with complete error handling""" try: docs_scores = vector_store.similarity_search_with_score( user_input, k=TOP_K_RESULTS * 3 ) filtered_docs = [ (doc, score) for doc, score in docs_scores if score < SIMILARITY_THRESHOLD ] filtered_docs.sort(key=lambda x: x[1]) if not filtered_docs: return ("Aucune correspondance trouvée dans les textes MTC. " "Essayez avec des termes plus spécifiques.") best_docs = [doc for doc, _ in filtered_docs[:TOP_K_RESULTS]] context = "\n".join( f"=== Source {i + 1} ===\n{doc.page_content}\n" for i, doc in enumerate(best_docs) ) response = Client("Qwen/Qwen2.5-Max-Demo").predict( query=user_input, history=[], system=BASE_SYSTEM_PROMPT.format(context=context), api_name="/model_chat" ) if isinstance(response, tuple) and len(response) >= 2: chat_history = response[1] if isinstance(chat_history, list) and len(chat_history) > 0: last_message = chat_history[-1] if isinstance(last_message, (list, tuple)) and len(last_message) >= 2: return last_message[1] return "Réponse indisponible - Veuillez reformuler votre question." except Exception as e: logging.error(f"Generation error: {str(e)}", exc_info=True) return "Une erreur s'est produite lors de la génération de la réponse." def chatbot(query): """Main function to run the chatbot""" try: vs = initialize_vector_store() response = generate_response(query, vs) return response or "Aucune réponse générée." except Exception as e: logging.error(f"Chatbot error: {str(e)}") return f"Une erreur s'est produite : {str(e)}" # Gradio Interface Simplification def get_random_questions(): """Get 3 random example questions""" return random.sample(EXAMPLE_QUESTIONS, 3) with gr.Blocks(title="MTC Chatbot", theme=gr.themes.Soft()) as demo: # Header Section gr.Markdown("""

📚 Assistant des Chroniques MTC

Posez vos questions sur les enseignements MTC

""") # Chat Interface chatbot = gr.Chatbot( label="Dialogue", bubble_full_width=False, ) # Input Section with gr.Row(): msg = gr.Textbox( scale=7, placeholder="Écrivez votre question ici...", container=False ) btn = gr.Button("Envoyer", scale=1) # Example Questions with gr.Row(): gr.Examples( examples=get_random_questions(), inputs=msg, label="Exemples de questions:", examples_per_page=3 ) # Chat Functions def respond(message, chat_history): try: vs = initialize_vector_store() response = generate_response(message, vs) chat_history.append((message, response)) return "", chat_history except Exception as e: logging.error(f"Error: {str(e)}") return "", chat_history + [(message, f"Erreur: {str(e)}")] # Event Handling btn.click( respond, [msg, chatbot], [msg, chatbot] ) msg.submit( respond, [msg, chatbot], [msg, chatbot] ) if __name__ == "__main__": demo.launch()