Spaces:

Futuresony
/

Futuresony_auto-g.pt_1_V1.03.08.2025

Runtime error

App Files Files Community

Futuresony commited on Aug 4

Commit

c632e1e

verified ·

1 Parent(s): 6758464

Update api.py

Browse files

Files changed (1) hide show

api.py +160 -76

api.py CHANGED Viewed

@@ -52,56 +52,109 @@ print(f"SHEET_ID loaded: {'*' * len(SHEET_ID) if SHEET_ID else 'None'}")
 print(f"GOOGLE_BASE64_CREDENTIALS loaded: {'*' * len(GOOGLE_BASE64_CREDENTIALS) if GOOGLE_BASE64_CREDENTIALS else 'None'}")
 print(f"API_KEY loaded: {'*' * len(API_KEY) if API_KEY else 'None'}")
 # Initialize InferenceClient (already present in LOR3w0_wiYL)
-# Ensure HF_TOKEN is available before initializing
-if not HF_TOKEN:
-    print("Error: HF_TOKEN not loaded. InferenceClient cannot be initialized.")
-    client = None # Set client to None if token is missing
-else:
     try:
         client = InferenceClient("google/gemma-2-9b-it", token=HF_TOKEN)
-        print("InferenceClient initialized.")
     except Exception as e:
         print(f"Error initializing InferenceClient: {e}")
         print(traceback.format_exc())
         client = None # Set client to None if initialization fails
 # Load spacy model for sentence splitting (already present in LOR3w0_wiYL)
 nlp = None
-try:
-    # Load the model directly, assuming it's installed during Docker build
-    nlp = spacy.load("en_core_web_sm")
-    print("SpaCy model 'en_core_web_sm' loaded.")
-except OSError:
-    print("SpaCy model 'en_core_web_sm' not found. Please ensure it is installed.")
-    print(traceback.format_exc()) # Print traceback for debugging
-    nlp = None # Set nlp to None if loading fails
 # Load SentenceTransformer for RAG/business info retrieval and semantic detection (already present in LOR3w0_wiYL)
 embedder = None
-try:
     print("Attempting to load Sentence Transformer (sentence-transformers/paraphrase-MiniLM-L6-v2)...")
-    embedder = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
-    print("Sentence Transformer loaded.")
-except Exception as e:
      print(f"Error loading Sentence Transformer: {e}")
      print(traceback.format_exc()) # Print traceback for debugging
 # Load a Cross-Encoder model for re-ranking retrieved documents (already present in LOR3w0_wiYL)
 reranker = None
-try:
     print("Attempting to load Cross-Encoder Reranker (cross-encoder/ms-marco-MiniLM-L6-v2)...")
-    reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
-    print("Cross-Encoder Reranker loaded.")
-except Exception as e:
-    print(f"Error loading Cross-Encoder Reranker: {e}")
-    print("Please ensure the model identifier 'cross-encoder/ms-marco-MiniLM-L6-v2' is correct and accessible on Hugging Face Hub.")
-    print(traceback.format_exc())
-    reranker = None
 # Google Sheets Authentication (already present in LOR3w0_wiYL)
@@ -129,12 +182,13 @@ def authenticate_google_sheets():
 data = [] # Global variable to store loaded data
 descriptions_for_embedding = []
 embeddings = torch.tensor([])
-business_info_available = False # Flag to indicate if business info was loaded successfully
 def load_business_info():
     """Loads business information from Google Sheet and creates embeddings."""
-    global data, descriptions_for_embedding, embeddings, business_info_available
-    business_info_available = False # Reset flag
     if gc is None:
         print("Skipping Google Sheet loading: Google Sheets client not authenticated.")
@@ -168,28 +222,31 @@ def load_business_info():
                     try:
                         embeddings = embedder.encode(descriptions_for_embedding, convert_to_tensor=True)
                         print("Encoding complete.")
-                        business_info_available = True
                     except Exception as e:
                         print(f"Error during description encoding: {e}")
                         embeddings = torch.tensor([])
-                        business_info_available = False
                 else:
                     print("Skipping encoding descriptions: No descriptions found or embedder not available.")
                     embeddings = torch.tensor([])
-                    business_info_available = False
         print(f"Loaded {len(descriptions_for_embedding)} entries from Google Sheet for embedding/RAG.")
-        if not business_info_available:
             print("Business information retrieval (RAG) is NOT available.")
     except gspread.exceptions.SpreadsheetNotFound:
         print(f"Error: Google Sheet with ID '{SHEET_ID}' not found.")
         print("Please check the SHEET_ID and ensure your authenticated Google Account has access to this sheet.")
-        business_info_available = False
     except Exception as e:
         print(f"An error occurred while accessing the Google Sheet: {e}")
         print(traceback.format_exc())
-        business_info_available = False
 # Business Info Retrieval (RAG) (already present in LOR3w0_wiYL)
 def retrieve_business_info(query: str, top_n: int = 3) -> list:
@@ -197,7 +254,7 @@ def retrieve_business_info(query: str, top_n: int = 3) -> list:
     Retrieves relevant business information from loaded data based on a query.
     """
     global data
-    if not business_info_available or embedder is None or not descriptions_for_embedding or not data:
         print("Business information retrieval is not available or data is empty.")
         return []
@@ -349,7 +406,7 @@ def determine_tool_usage(query: str) -> str:
     """
     query_lower = query.lower()
-    if business_info_available:
          messages_business_check = [{"role": "user", "content": f"Does the following query ask about a specific person, service, offering, or description that is likely to be found *only* within a specific business's internal knowledge base, and not general knowledge? For example, questions about 'Salum' or 'Jackson Kisanga' are likely business-related, while questions about 'the current president of the USA' or 'who won the Ballon d'Or' are general knowledge. Answer only 'yes' or 'no'. Query: {query}"}]
          try:
              business_check_response = client.chat_completion(
@@ -366,6 +423,9 @@ def determine_tool_usage(query: str) -> str:
              print(f"Error during LLM call for business info check for query '{query}': {e}")
              print(traceback.format_exc())
              print(f"Proceeding without business info check for query '{query}' due to error.")
     date_time_check_result = perform_date_calculation(query)
     if date_time_check_result is not None:
@@ -402,6 +462,10 @@ def generate_text(prompt: str, tool_results: dict = None) -> str:
     """
     Generates text using the configured LLM, optionally incorporating tool results.
     """
     full_prompt_builder = [prompt]
     if tool_results and any(tool_results.values()):
@@ -468,6 +532,12 @@ def process_query_with_tools(query: str):
     """
     print(f"Processing query with tools: {query}")
     print("\n--- Breaking down query ---")
     prompt_for_question_breakdown = f"""
 Analyze the following query and list each distinct question found within it.
@@ -572,8 +642,8 @@ async def chat_endpoint(request: Request, api_key: str = Depends(get_api_key)):
             raise HTTPException(status_code=400, detail="Query parameter is required.")
         # Ensure client is initialized before processing query
-        if client is None:
-             raise HTTPException(status_code=503, detail="LLM client not initialized. Please check HF_TOKEN.")
         response = process_query_with_tools(query)
         return {"response": response}
@@ -588,14 +658,15 @@ async def chat_endpoint(request: Request, api_key: str = Depends(get_api_key)):
 async def health_check():
     """
     Health check endpoint to verify the application is running and essential components are loaded.
     """
     status = {
-        "status": "ok",
-        "llm_client_initialized": client is not None,
-        "business_info_loaded": business_info_available,
-        "spacy_loaded": nlp is not None,
-        "embedder_loaded": embedder is not None,
-        "reranker_loaded": reranker is not None,
         "secrets_loaded": {
             "HF_TOKEN": HF_TOKEN is not None,
             "SHEET_ID": SHEET_ID is not None,
@@ -603,16 +674,25 @@ async def health_check():
             "API_KEY": API_KEY is not None,
         }
     }
-    unhealthy_components = [key for key, value in status.items() if isinstance(value, bool) and not value]
-    if status["secrets_loaded"] and not all(status["secrets_loaded"].values()):
-         unhealthy_components.append("secrets_loaded (partial)")
-    if unhealthy_components:
-         status["status"] = "unhealthy"
-         status["unhealthy_components"] = unhealthy_components
-         return JSONResponse(status=503, content=status) # Return 503 Service Unavailable if unhealthy
-    return status # Return 200 OK if healthy
 # Optional: Root endpoint for basic info
@@ -623,11 +703,11 @@ async def read_root():
     """
     status = {
         "message": "LLM with Tools API is running",
-        "llm_client_initialized": client is not None,
-        "business_info_loaded": business_info_available,
-        "spacy_loaded": nlp is not None,
-        "embedder_loaded": embedder is not None,
-        "reranker_loaded": reranker is not None,
         "secrets_loaded": {
             "HF_TOKEN": HF_TOKEN is not None,
             "SHEET_ID": SHEET_ID is not None,
@@ -636,29 +716,33 @@ async def read_root():
         }
     }
     if not all(status["secrets_loaded"].values()):
-        status["warning"] = "Not all secrets are loaded. RAG and LLM may not function correctly."
     if not status["llm_client_initialized"]:
         status["warning"] = status.get("warning", "") + " LLM client not initialized."
-    if not status["business_info_loaded"]:
          status["warning"] = status.get("warning", "") + " Business info (RAG) not loaded."
     return status
-# Initialize Google Sheets authentication and load business info on startup
 # This will run when the script is imported or executed directly
-authenticate_google_sheets()
-load_business_info()
-# Check if spacy model, embedder, and reranker loaded correctly on startup
-if nlp is None:
-    print("Warning: SpaCy model not loaded. Sentence splitting may not work correctly.")
-if embedder is None:
-    print("Warning: Sentence Transformer (embedder) not loaded. RAG will not be available.")
-if reranker is None:
-    print("Warning: Cross-Encoder Reranker not loaded. Re-ranking of RAG results will not be performed.")
-if not business_info_available:
-    print("Warning: Business information (Google Sheet data) not loaded successfully. RAG will not be available. Please ensure the GOOGLE_BASE64_CREDENTIALS secret is set correctly.")
 # To run this FastAPI application in Colab for testing purposes,
 # you can use uvicorn.run() in a separate cell or a script.
@@ -667,4 +751,4 @@ if not business_info_available:
 # Example of how to run in Colab (requires a separate cell or script):
 # import uvicorn
 # from api import app # Assuming this code is saved as api.py
-# uvicorn.run(app, host="0.0.0.0", port=8000) # Or use a more secure host/port for production

 print(f"GOOGLE_BASE64_CREDENTIALS loaded: {'*' * len(GOOGLE_BASE64_CREDENTIALS) if GOOGLE_BASE64_CREDENTIALS else 'None'}")
 print(f"API_KEY loaded: {'*' * len(API_KEY) if API_KEY else 'None'}")
+# Global variables for component initialization status
+llm_client_initialized = False
+spacy_loaded = False
+embedder_loaded = False
+reranker_loaded = False
+business_info_loaded = False
 # Initialize InferenceClient (already present in LOR3w0_wiYL)
+client = None
+def initialize_llm_client():
+    """Initializes the Hugging Face InferenceClient."""
+    global client, llm_client_initialized
+    llm_client_initialized = False
+    print("Attempting to initialize InferenceClient...")
+    if not HF_TOKEN:
+        print("Error: HF_TOKEN not loaded. InferenceClient cannot be initialized.")
+        return
     try:
         client = InferenceClient("google/gemma-2-9b-it", token=HF_TOKEN)
+        # Optional: Make a small test call to ensure the client is working
+        try:
+            test_response = client.chat_completion(messages=[{"role": "user", "content": "hello"}], max_tokens=10)
+            if test_response:
+                 print("InferenceClient test call successful.")
+                 llm_client_initialized = True
+            else:
+                 print("InferenceClient test call failed.")
+        except Exception as test_e:
+             print(f"InferenceClient test call failed: {test_e}")
+             print(traceback.format_exc())
+             client = None # Reset client if test fails
+        if llm_client_initialized:
+             print("InferenceClient initialized.")
+        else:
+             print("InferenceClient initialization failed.")
     except Exception as e:
         print(f"Error initializing InferenceClient: {e}")
         print(traceback.format_exc())
         client = None # Set client to None if initialization fails
+        llm_client_initialized = False
 # Load spacy model for sentence splitting (already present in LOR3w0_wiYL)
 nlp = None
+def load_spacy_model():
+    """Loads the SpaCy model."""
+    global nlp, spacy_loaded
+    spacy_loaded = False
+    print("Attempting to load SpaCy model 'en_core_web_sm'...")
+    try:
+        # Load the model directly, assuming it's installed during Docker build
+        nlp = spacy.load("en_core_web_sm")
+        print("SpaCy model 'en_core_web_sm' loaded.")
+        spacy_loaded = True
+    except OSError:
+        print("SpaCy model 'en_core_web_sm' not found. Please ensure it is installed.")
+        print(traceback.format_exc()) # Print traceback for debugging
+        nlp = None # Set nlp to None if loading fails
+        spacy_loaded = False
+    except Exception as e:
+        print(f"Error loading SpaCy model: {e}")
+        print(traceback.format_exc())
+        nlp = None
+        spacy_loaded = False
 # Load SentenceTransformer for RAG/business info retrieval and semantic detection (already present in LOR3w0_wiYL)
 embedder = None
+def load_embedder_model():
+    """Loads the Sentence Transformer model."""
+    global embedder, embedder_loaded
+    embedder_loaded = False
     print("Attempting to load Sentence Transformer (sentence-transformers/paraphrase-MiniLM-L6-v2)...")
+    try:
+        embedder = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
+        print("Sentence Transformer loaded.")
+        embedder_loaded = True
+    except Exception as e:
      print(f"Error loading Sentence Transformer: {e}")
      print(traceback.format_exc()) # Print traceback for debugging
+     embedder = None
+     embedder_loaded = False
 # Load a Cross-Encoder model for re-ranking retrieved documents (already present in LOR3w0_wiYL)
 reranker = None
+def load_reranker_model():
+    """Loads the Cross-Encoder model."""
+    global reranker, reranker_loaded
+    reranker_loaded = False
     print("Attempting to load Cross-Encoder Reranker (cross-encoder/ms-marco-MiniLM-L6-v2)...")
+    try:
+        reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
+        print("Cross-Encoder Reranker loaded.")
+        reranker_loaded = True
+    except Exception as e:
+        print(f"Error loading Cross-Encoder Reranker: {e}")
+        print("Please ensure the model identifier 'cross-encoder/ms-marco-MiniLM-L6-v2' is correct and accessible on Hugging Face Hub.")
+        print(traceback.format_exc())
+        reranker = None
+        reranker_loaded = False
 # Google Sheets Authentication (already present in LOR3w0_wiYL)
 data = [] # Global variable to store loaded data
 descriptions_for_embedding = []
 embeddings = torch.tensor([])
+# business_info_available is now managed by the load_business_info function
 def load_business_info():
     """Loads business information from Google Sheet and creates embeddings."""
+    global data, descriptions_for_embedding, embeddings, business_info_loaded
+    business_info_loaded = False # Reset flag
+    print("Attempting to load business information from Google Sheet...")
     if gc is None:
         print("Skipping Google Sheet loading: Google Sheets client not authenticated.")
                     try:
                         embeddings = embedder.encode(descriptions_for_embedding, convert_to_tensor=True)
                         print("Encoding complete.")
+                        business_info_loaded = True
                     except Exception as e:
                         print(f"Error during description encoding: {e}")
                         embeddings = torch.tensor([])
+                        business_info_loaded = False
                 else:
                     print("Skipping encoding descriptions: No descriptions found or embedder not available.")
                     embeddings = torch.tensor([])
+                    business_info_loaded = False
         print(f"Loaded {len(descriptions_for_embedding)} entries from Google Sheet for embedding/RAG.")
+        if not business_info_loaded:
             print("Business information retrieval (RAG) is NOT available.")
+        else:
+            print("Business information retrieval (RAG) is available.")
     except gspread.exceptions.SpreadsheetNotFound:
         print(f"Error: Google Sheet with ID '{SHEET_ID}' not found.")
         print("Please check the SHEET_ID and ensure your authenticated Google Account has access to this sheet.")
+        business_info_loaded = False
     except Exception as e:
         print(f"An error occurred while accessing the Google Sheet: {e}")
         print(traceback.format_exc())
+        business_info_loaded = False
 # Business Info Retrieval (RAG) (already present in LOR3w0_wiYL)
 def retrieve_business_info(query: str, top_n: int = 3) -> list:
     Retrieves relevant business information from loaded data based on a query.
     """
     global data
+    if not business_info_loaded or embedder is None or not descriptions_for_embedding or not data:
         print("Business information retrieval is not available or data is empty.")
         return []
     """
     query_lower = query.lower()
+    if business_info_loaded: # Check if business info is loaded before attempting LLM check
          messages_business_check = [{"role": "user", "content": f"Does the following query ask about a specific person, service, offering, or description that is likely to be found *only* within a specific business's internal knowledge base, and not general knowledge? For example, questions about 'Salum' or 'Jackson Kisanga' are likely business-related, while questions about 'the current president of the USA' or 'who won the Ballon d'Or' are general knowledge. Answer only 'yes' or 'no'. Query: {query}"}]
          try:
              business_check_response = client.chat_completion(
              print(f"Error during LLM call for business info check for query '{query}': {e}")
              print(traceback.format_exc())
              print(f"Proceeding without business info check for query '{query}' due to error.")
+    else:
+        print("Skipping LLM business info check: Business information not loaded.")
     date_time_check_result = perform_date_calculation(query)
     if date_time_check_result is not None:
     """
     Generates text using the configured LLM, optionally incorporating tool results.
     """
+    if not llm_client_initialized or client is None:
+        print("LLM client is not initialized. Cannot generate text.")
+        return "Error: The language model is not available at this time."
     full_prompt_builder = [prompt]
     if tool_results and any(tool_results.values()):
     """
     print(f"Processing query with tools: {query}")
+    # Ensure LLM client is initialized before proceeding with any LLM calls
+    if not llm_client_initialized or client is None:
+        print("LLM client not initialized. Cannot process query.")
+        return "Error: The language model is not available. Please try again later."
     print("\n--- Breaking down query ---")
     prompt_for_question_breakdown = f"""
 Analyze the following query and list each distinct question found within it.
             raise HTTPException(status_code=400, detail="Query parameter is required.")
         # Ensure client is initialized before processing query
+        if not llm_client_initialized or client is None:
+             raise HTTPException(status_code=503, detail="LLM client not initialized. Please wait or check logs.")
         response = process_query_with_tools(query)
         return {"response": response}
 async def health_check():
     """
     Health check endpoint to verify the application is running and essential components are loaded.
+    Returns 200 OK if all critical components are loaded, 503 Service Unavailable otherwise.
     """
     status = {
+        "status": "unhealthy",
+        "llm_client_initialized": llm_client_initialized,
+        "business_info_loaded": business_info_loaded,
+        "spacy_loaded": spacy_loaded,
+        "embedder_loaded": embedder_loaded,
+        "reranker_loaded": reranker_loaded,
         "secrets_loaded": {
             "HF_TOKEN": HF_TOKEN is not None,
             "SHEET_ID": SHEET_ID is not None,
             "API_KEY": API_KEY is not None,
         }
     }
+    # Check if all critical components are loaded
+    all_critical_loaded = (
+        llm_client_initialized and
+        spacy_loaded and
+        embedder_loaded and
+        reranker_loaded and
+        (business_info_loaded if (SHEET_ID and GOOGLE_BASE64_CREDENTIALS) else True) # Business info is critical only if secrets are set
+    )
+    if all_critical_loaded:
+        status["status"] = "ok"
+        return JSONResponse(status_code=200, content=status)
+    else:
+        unhealthy_components = [key for key, value in status.items() if isinstance(value, bool) and not value]
+        if status["secrets_loaded"] and not all(status["secrets_loaded"].values()):
+            unhealthy_components.append("secrets_loaded (partial)")
+        status["unhealthy_components"] = unhealthy_components
+        return JSONResponse(status_code=503, content=status)
 # Optional: Root endpoint for basic info
     """
     status = {
         "message": "LLM with Tools API is running",
+        "llm_client_initialized": llm_client_initialized,
+        "business_info_loaded": business_info_loaded,
+        "spacy_loaded": spacy_loaded,
+        "embedder_loaded": embedder_loaded,
+        "reranker_loaded": reranker_loaded,
         "secrets_loaded": {
             "HF_TOKEN": HF_TOKEN is not None,
             "SHEET_ID": SHEET_ID is not None,
         }
     }
     if not all(status["secrets_loaded"].values()):
+        status["warning"] = status.get("warning", "") + " Not all secrets are loaded."
     if not status["llm_client_initialized"]:
         status["warning"] = status.get("warning", "") + " LLM client not initialized."
+    if not status["business_info_loaded"] and (SHEET_ID and GOOGLE_BASE64_CREDENTIALS):
          status["warning"] = status.get("warning", "") + " Business info (RAG) not loaded."
+    if not status["spacy_loaded"]:
+        status["warning"] = status.get("warning", "") + " SpaCy model not loaded."
+    if not status["embedder_loaded"]:
+        status["warning"] = status.get("warning", "") + " Embedder not loaded."
+    if not status["reranker_loaded"]:
+        status["warning"] = status.get("warning", "") + " Reranker not loaded."
     return status
+# Initialize components on startup
 # This will run when the script is imported or executed directly
+print("Starting component initialization...")
+authenticate_google_sheets() # Authenticate first as it's needed for load_business_info
+load_spacy_model()
+load_embedder_model()
+load_reranker_model()
+load_business_info() # Load business info after authentication and embedder are ready
+initialize_llm_client() # Initialize LLM client last as it might be the largest model
+print("Component initialization sequence complete.")
 # To run this FastAPI application in Colab for testing purposes,
 # you can use uvicorn.run() in a separate cell or a script.
 # Example of how to run in Colab (requires a separate cell or script):
 # import uvicorn
 # from api import app # Assuming this code is saved as api.py
+# uvicorn.run(app, host="0.0.0.0", port=8000) # Or use a more secure host/port for production