Spaces:

localsavageai
/

savoir-mtc

Sleeping

App Files Files Community

localsavageai commited on Apr 2

Commit

943806d

verified ·

1 Parent(s): 78b2249

Upload 2 files

Browse files

Files changed (2) hide show

app.py +88 -140
requirements.txt +4 -8

app.py CHANGED Viewed

@@ -1,23 +1,17 @@
 import os
 import logging
 import numpy as np
-from typing import List, Optional, Tuple
-import torch
-import gradio as gr
-import spaces
-from sentence_transformers import SentenceTransformer
 from langchain_community.vectorstores import FAISS
 from langchain.embeddings.base import Embeddings
 from gradio_client import Client
-import requests
-from tqdm import tqdm
 # Configuration
-DATA_FILE = "data-mtc.txt"
-DATABASE_DIR = "semantic_memory"
-QWEN_API_URL = "Qwen/Qwen2.5-Max-Demo"  # Gradio API for Qwen2.5 chat
 CHUNK_SIZE = 800
-TOP_K_RESULTS = 150
 SIMILARITY_THRESHOLD = 0.4
 BASE_SYSTEM_PROMPT = """
@@ -34,176 +28,130 @@ Contexte :
 {context}
 """
-# Configure logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s',
     handlers=[
-        logging.FileHandler("mtc_chat.log"),
-        logging.StreamHandler()
     ]
 )
-class LocalEmbeddings(Embeddings):
-    """Local sentence-transformers embeddings"""
-    def __init__(self, model):
-        self.model = model
-    def embed_documents(self, texts: List[str]) -> List[List[float]]:
-        embeddings = []
-        for text in tqdm(texts, desc="Creating embeddings"):
-            embeddings.append(self.model.encode(text).tolist())
-        return embeddings
-    def embed_query(self, text: str) -> List[float]:
-        return self.model.encode(text).tolist()
-def split_text_into_chunks(text: str) -> List[str]:
-    """Split text with overlap and sentence preservation"""
-    chunks = []
-    start = 0
-    text_length = len(text)
-    while start < text_length:
-        end = min(start + CHUNK_SIZE, text_length)
-        chunk = text[start:end]
-        # Find last complete punctuation
-        last_punct = max(
-            chunk.rfind('.'),
-            chunk.rfind('!'),
-            chunk.rfind('?'),
-            chunk.rfind('\n\n')
-        )
-        if last_punct != -1 and (end - start) > CHUNK_SIZE//2:
-            end = start + last_punct + 1
-        chunks.append(text[start:end].strip())
-        start = end if end > start else start + CHUNK_SIZE
-    return chunks
-def initialize_vector_store(embeddings: Embeddings) -> FAISS:
-    """Initialize FAISS vector store"""
-    if os.path.exists(DATABASE_DIR):
         try:
-            logging.info("Loading existing database...")
-            return FAISS.load_local(
-                DATABASE_DIR,
-                embeddings,
-                allow_dangerous_deserialization=True
             )
         except Exception as e:
-            logging.error(f"FAISS load error: {str(e)}")
-            raise
-    logging.info("Creating new vector database...")
-    if not os.path.exists(DATA_FILE):
-        raise FileNotFoundError(f"{DATA_FILE} not found")
     try:
-        with open(DATA_FILE, "r", encoding="utf-8") as f:
-            text = f.read()
-        chunks = split_text_into_chunks(text)
-        if not chunks:
-            raise ValueError("No valid chunks generated")
-        logging.info(f"Creating {len(chunks)} chunks...")
-        vector_store = FAISS.from_texts(chunks, embeddings)
-        vector_store.save_local(DATABASE_DIR)
-        logging.info("Vector store initialized successfully")
-        return vector_store
     except Exception as e:
-        logging.error(f"Initialization failed: {str(e)}")
         raise
 def generate_response(user_input: str, vector_store: FAISS) -> Optional[str]:
-    """Generate response using Qwen API"""
     try:
-        # Contextual search
         docs_scores = vector_store.similarity_search_with_score(
-            user_input,
-            k=TOP_K_RESULTS*3
         )
-        # Filter results
         filtered_docs = [
-            (doc, score) for doc, score in docs_scores
             if score < SIMILARITY_THRESHOLD
         ]
         filtered_docs.sort(key=lambda x: x[1])
         if not filtered_docs:
-            return "Aucune correspondance trouvée. Essayez des termes plus spécifiques."
         best_docs = [doc for doc, _ in filtered_docs[:TOP_K_RESULTS]]
-        # Build context
         context = "\n".join(
-            f"=== Source {i+1} ===\n{doc.page_content}\n"
             for i, doc in enumerate(best_docs)
         )
-        # Call Qwen API
-        client = Client(QWEN_API_URL, verbose=False)
-        response = client.predict(
             query=user_input,
             history=[],
             system=BASE_SYSTEM_PROMPT.format(context=context),
             api_name="/model_chat"
         )
-        # Extract response
         if isinstance(response, tuple) and len(response) >= 2:
             chat_history = response[1]
-            if chat_history and len(chat_history[-1]) >= 2:
-                return chat_history[-1][1]
-        return "Réponse indisponible - Veuillez reformuler votre question."
     except Exception as e:
         logging.error(f"Generation error: {str(e)}", exc_info=True)
-        return None
-# Initialize models and vector store
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = SentenceTransformer("cnmoro/snowflake-arctic-embed-m-v2.0-cpu", device=device, trust_remote_code=True)
-embeddings = LocalEmbeddings(model)
-vector_store = initialize_vector_store(embeddings)
-# Gradio interface
-@spaces.GPU
-def embed(document: str):
-    return model.encode(document).tolist()
-def chat_response(message: str, history: List[Tuple[str, str]]):
-    response = generate_response(message, vector_store)
-    return response or "Erreur de génération - Veuillez réessayer."
-with gr.Blocks() as app:
-    gr.Markdown("# MTC Knowledge Assistant")
-    with gr.Tab("Embeddings"):
-        gr.Markdown("## Text Embedding Demo")
-        text_input = gr.Textbox(label="Enter text to embed")
-        output = gr.JSON(label="Embedding Vector")
-        text_input.submit(embed, inputs=text_input, outputs=output)
-    with gr.Tab("MTC Chat"):
-        gr.Markdown("## Posez vos questions sur la médecine traditionnelle chinoise")
-        chatbot = gr.Chatbot(height=500)
-        msg = gr.Textbox(label="Votre question")
-        clear = gr.ClearButton([msg, chatbot])
-        msg.submit(
-            chat_response,
-            inputs=[msg, chatbot],
-            outputs=[msg, chatbot],
-            queue=True
-        )
 if __name__ == "__main__":
-    app.launch(server_name="0.0.0.0", server_port=7860)

 import os
 import logging
 import numpy as np
+from typing import List, Optional
 from langchain_community.vectorstores import FAISS
 from langchain.embeddings.base import Embeddings
 from gradio_client import Client
+import gradio as gr
 # Configuration
+DATA_FILE = "data-mtc.txt"  # This file is no longer used in the Space
+DATABASE_DIR = "."  # Database files are in the root directory
 CHUNK_SIZE = 800
+TOP_K_RESULTS = 100
 SIMILARITY_THRESHOLD = 0.4
 BASE_SYSTEM_PROMPT = """
 {context}
 """
+# Logging configuration
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s',
     handlers=[
+        logging.StreamHandler()  # Output to console in the Space
     ]
 )
+class GradioEmbeddings(Embeddings):
+    """Embedding management using Gradio API"""
+    def __init__(self):
+        super().__init__()
+        self.client = Client("localsavageai/embijiji3")
+    def _generate_embedding(self, text: str) -> np.ndarray:
+        """Generate an embedding via the Gradio API"""
         try:
+            result = self.client.predict(
+                document=text.strip(),
+                api_name="/embed"
             )
+            if not isinstance(result, list):
+                raise ValueError("Invalid embedding response from Gradio API")
+            return np.array(result, dtype=np.float32)
         except Exception as e:
+            logging.error(f"Embedding error: {str(e)}")
+            raise RuntimeError("Failed to generate embedding") from e
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        return [self._generate_embedding(text).tolist() for text in texts]
+    def embed_query(self, text: str) -> List[float]:
+        return self._generate_embedding(text).tolist()
+def initialize_vector_store() -> FAISS:
+    """Robust initialization of the vector store"""
+    embeddings = GradioEmbeddings()
     try:
+        logging.info("Loading existing database...")
+        return FAISS.load_local(
+            DATABASE_DIR,
+            embeddings,
+            allow_dangerous_deserialization=True
+        )
     except Exception as e:
+        logging.error(f"FAISS loading error: {str(e)}")
         raise
 def generate_response(user_input: str, vector_store: FAISS) -> Optional[str]:
+    """Generate a response with complete error handling"""
     try:
         docs_scores = vector_store.similarity_search_with_score(
+            user_input,
+            k=TOP_K_RESULTS * 3
         )
         filtered_docs = [
+            (doc, score) for doc, score in docs_scores
             if score < SIMILARITY_THRESHOLD
         ]
         filtered_docs.sort(key=lambda x: x[1])
         if not filtered_docs:
+            return ("No matches found in MTC texts. "
+                    "Try using more specific terms.")
         best_docs = [doc for doc, _ in filtered_docs[:TOP_K_RESULTS]]
         context = "\n".join(
+            f"=== Source {i + 1} ===\n{doc.page_content}\n"
             for i, doc in enumerate(best_docs)
         )
+        response = Client("Qwen/Qwen2.5-Max-Demo").predict(
             query=user_input,
             history=[],
             system=BASE_SYSTEM_PROMPT.format(context=context),
             api_name="/model_chat"
         )
         if isinstance(response, tuple) and len(response) >= 2:
             chat_history = response[1]
+            if isinstance(chat_history, list) and len(chat_history) > 0:
+                last_message = chat_history[-1]
+                if isinstance(last_message, (list, tuple)) and len(last_message) >= 2:
+                    return last_message[1]
+        return "Response unavailable - Please rephrase your question."
     except Exception as e:
         logging.error(f"Generation error: {str(e)}", exc_info=True)
+        return "An error occurred while generating the response."
+def chatbot(query):
+    """Main function to run the chatbot"""
+    try:
+        vs = initialize_vector_store()
+        response = generate_response(query, vs)
+        return response or "No response generated."
+    except Exception as e:
+        logging.error(f"Chatbot error: {str(e)}")
+        return f"An error occurred: {str(e)}"
+# Gradio Interface
 if __name__ == "__main__":
+    try:
+        interface = gr.Interface(
+            fn=chatbot,
+            inputs=gr.Textbox(lines=7, placeholder="Enter your query here..."),
+            outputs=gr.Textbox(lines=7, placeholder="Response from MTC will appear here..."),
+            title="MTC Chatbot",
+            description="Ask questions about MTC and get answers based on the provided data."
+        )
+        interface.launch()
+    except Exception as e:
+        logging.critical(f"CRITICAL ERROR: {str(e)}")
+        print("Failed to launch Gradio interface.  Check logs.")

requirements.txt CHANGED Viewed

@@ -1,11 +1,7 @@
-gradio>=5.23.2
-sentence-transformers
-torch
 langchain
-langchain-community
 faiss-cpu
-gradio-client
-tqdm
-requests
 numpy
-einops==0.7.0

 langchain
+langchain_community
 faiss-cpu
+gradio
+gradio_client
 numpy