Spaces:
Sleeping
Sleeping
#!/usr/bin/env python3 | |
import random | |
import logging | |
import numpy as np | |
from typing import List, Optional | |
from langchain_community.vectorstores import FAISS | |
from langchain.embeddings.base import Embeddings | |
from gradio_client import Client | |
import gradio as gr | |
# Configuration | |
DATA_FILE = "data-mtc.txt" # This file is no longer used in the Space | |
DATABASE_DIR = "." # Database files are in the root directory | |
CHUNK_SIZE = 400 | |
TOP_K_RESULTS = 600 | |
SIMILARITY_THRESHOLD = 0.5 | |
BASE_SYSTEM_PROMPT = """ | |
Répondez en français selon ces règles : | |
1. Utilisez EXCLUSIVEMENT le contexte fourni | |
2. Structurez la réponse en : | |
- Définition principale | |
- Caractéristiques clés (3 points maximum) | |
- Relations avec d'autres concepts | |
3. Si aucune information pertinente, indiquez-le clairement | |
4. Le contexte est " étherique " & " le context n'est pas spirituel car la definition du mot "spirituel" est basé sur des croyances manipulées par des entités, alors que l'éthérique est lié à la pure vibration sans forme. | |
Contexte : | |
{context} | |
""" | |
# Logging configuration | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s', | |
handlers=[ | |
logging.StreamHandler() # Output to console in the Space | |
] | |
) | |
# Example Questions Pool | |
EXAMPLE_QUESTIONS = [ | |
"Comment intégrer les enseignements MTC dans la vie quotidienne ?", | |
"Comment se préparer à une discussion de groupe MTC ?", | |
"Quels sont les obstacles courants à la compréhension des Chroniques ?" | |
] | |
class GradioEmbeddings(Embeddings): | |
"""Embedding management using Gradio API""" | |
def __init__(self): | |
super().__init__() | |
self.client = Client("localsavageai/embijiji3") | |
def _generate_embedding(self, text: str) -> np.ndarray: | |
"""Generate an embedding via the Gradio API""" | |
try: | |
result = self.client.predict( | |
document=text.strip(), | |
api_name="/embed" | |
) | |
if not isinstance(result, list): | |
raise ValueError("Invalid embedding response from Gradio API") | |
return np.array(result, dtype=np.float32) | |
except Exception as e: | |
logging.error(f"Embedding error: {str(e)}") | |
raise RuntimeError("Failed to generate embedding") from e | |
def embed_documents(self, texts: List[str]) -> List[List[float]]: | |
return [self._generate_embedding(text).tolist() for text in texts] | |
def embed_query(self, text: str) -> List[float]: | |
return self._generate_embedding(text).tolist() | |
def initialize_vector_store() -> FAISS: | |
"""Robust initialization of the vector store""" | |
embeddings = GradioEmbeddings() | |
try: | |
logging.info("Loading existing database...") | |
return FAISS.load_local( | |
DATABASE_DIR, | |
embeddings, | |
allow_dangerous_deserialization=True | |
) | |
except Exception as e: | |
logging.error(f"FAISS loading error: {str(e)}") | |
raise | |
def generate_response(user_input: str, vector_store: FAISS) -> Optional[str]: | |
"""Generate a response with complete error handling""" | |
try: | |
docs_scores = vector_store.similarity_search_with_score( | |
user_input, | |
k=TOP_K_RESULTS * 3 | |
) | |
filtered_docs = [ | |
(doc, score) for doc, score in docs_scores | |
if score < SIMILARITY_THRESHOLD | |
] | |
filtered_docs.sort(key=lambda x: x[1]) | |
if not filtered_docs: | |
return ("Aucune correspondance trouvée dans les textes MTC. " | |
"Essayez avec des termes plus spécifiques.") | |
best_docs = [doc for doc, _ in filtered_docs[:TOP_K_RESULTS]] | |
context = "\n---\n".join(doc.page_content for doc in best_docs) | |
response = Client("Qwen/Qwen2.5-Max-Demo").predict( | |
query=user_input, | |
history=[], | |
system=BASE_SYSTEM_PROMPT.format(context=context), | |
api_name="/model_chat" | |
) | |
if isinstance(response, tuple) and len(response) >= 2: | |
chat_history = response[1] | |
if isinstance(chat_history, list) and len(chat_history) > 0: | |
last_message = chat_history[-1] | |
if isinstance(last_message, (list, tuple)) and len(last_message) >= 2: | |
return last_message[1] | |
return "Réponse indisponible - Veuillez reformuler votre question." | |
except Exception as e: | |
logging.error(f"Generation error: {str(e)}", exc_info=True) | |
return "Une erreur s'est produite lors de la génération de la réponse." | |
def chatbot(query): | |
"""Main function to run the chatbot""" | |
try: | |
vs = initialize_vector_store() | |
response = generate_response(query, vs) | |
return response or "Aucune réponse générée." | |
except Exception as e: | |
logging.error(f"Chatbot error: {str(e)}") | |
return f"Une erreur s'est produite : {str(e)}" | |
def chatbot(query): | |
"""Main function to run the chatbot""" | |
try: | |
vs = initialize_vector_store() | |
response = generate_response(query, vs) | |
return response or "Aucune réponse générée." | |
except Exception as e: | |
logging.error(f"Chatbot error: {str(e)}") | |
return f"Une erreur s'est produite : {str(e)}" | |
# Gradio Interface | |
def get_random_questions(): | |
"""Get 3 random example questions""" | |
return random.sample(EXAMPLE_QUESTIONS, 3) | |
with gr.Blocks(title="MTC Chatbot") as demo: # Removed the theme=gr.themes.Soft() to use the default light theme | |
# Header Section | |
gr.Markdown(""" | |
<div style="text-align: center;"> | |
<h1>📚 L'IA des Chroniques MTC</h1> | |
<p>Copyright © 2025 Michel Thomas / MTC-QC.ca</p> | |
</div> | |
""") | |
# Chat Interface | |
chatbot = gr.Chatbot( | |
label="Dialogue", | |
bubble_full_width=False, | |
) | |
# Input Section | |
with gr.Row(): | |
msg = gr.Textbox( | |
scale=7, | |
placeholder="Écrivez votre question ici...", | |
container=False | |
) | |
btn = gr.Button("Envoyer", scale=1) | |
# Example Questions | |
with gr.Row(): | |
gr.Examples( | |
examples=get_random_questions(), | |
inputs=msg, | |
label="Exemples de questions:", | |
examples_per_page=3 | |
) | |
# Chat Functions | |
def respond(message, chat_history): | |
try: | |
vs = initialize_vector_store() | |
response = generate_response(message, vs) | |
chat_history.append((message, response)) | |
return "", chat_history | |
except Exception as e: | |
logging.error(f"Error: {str(e)}") | |
return "", chat_history + [(message, f"Erreur: {str(e)}")] | |
# Event Handling | |
btn.click( | |
respond, | |
[msg, chatbot], | |
[msg, chatbot] | |
) | |
msg.submit( | |
respond, | |
[msg, chatbot], | |
[msg, chatbot] | |
) | |
if __name__ == "__main__": | |
demo.launch() |