Spaces:

Futuresony
/

Futuresony_auto-g.pt_1_V1.03.08.2025

Runtime error

App Files Files Community

Futuresony_auto-g.pt_1_V1.03.08.2025 / api.py

Futuresony

Update api.py

c632e1e verified about 1 month ago

raw

history blame contribute delete

33.7 kB

	# Install necessary libraries if not already present
	# These lines will be executed as shell commands by %%writefile
	# !pip install duckduckgo_search dateparser

	# Combined Imports (already present in LOR3w0_3wiYL, keeping for clarity)
	import os
	from huggingface_hub import InferenceClient
	import torch
	import re
	import warnings
	import time
	import json
	from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, BitsAndBytesConfig # Not directly used in this API code, but kept for potential future use
	from sentence_transformers import SentenceTransformer, util, CrossEncoder
	import gspread
	# from google.colab import auth # Not directly used in this API code
	from google.auth import default # Not directly used in this API code
	from tqdm import tqdm # Not directly used in this API code
	from ddgs import DDGS # Corrected import based on user feedback
	import spacy
	from datetime import date, timedelta, datetime
	from dateutil.relativedelta import relativedelta # Corrected typo
	import traceback
	import base64
	import dateparser
	from dateparser.search import search_dates
	import pytz
	# from google.colab import userdata # Removed Colab-specific import
	from concurrent.futures import ThreadPoolExecutor, as_completed

	# FastAPI Imports
	from fastapi import FastAPI, Request, HTTPException, Depends, Security
	from fastapi.security.api_key import APIKeyHeader
	from dotenv import load_dotenv # For loading environment variables from a .env file
	from fastapi.responses import JSONResponse # Import JSONResponse

	# Load environment variables from .env file (if it exists)
	load_dotenv()

	# Suppress warnings (already present in LOR3w0_wiYL)
	warnings.filterwarnings("ignore", category=UserWarning)

	# Define global variables and load secrets from environment variables
	HF_TOKEN = os.getenv("HF_TOKEN")
	SHEET_ID = os.getenv("SHEET_ID")
	GOOGLE_BASE64_CREDENTIALS = os.getenv("GOOGLE_BASE64_CREDENTIALS")
	API_KEY = os.getenv("API_KEY") # Load API key from environment variables

	# Add print statements to check if secrets are loaded (for debugging in logs)
	print(f"HF_TOKEN loaded: {'' len(HF_TOKEN) if HF_TOKEN else 'None'}")
	print(f"SHEET_ID loaded: {'' len(SHEET_ID) if SHEET_ID else 'None'}")
	print(f"GOOGLE_BASE64_CREDENTIALS loaded: {'' len(GOOGLE_BASE64_CREDENTIALS) if GOOGLE_BASE64_CREDENTIALS else 'None'}")
	print(f"API_KEY loaded: {'' len(API_KEY) if API_KEY else 'None'}")

	# Global variables for component initialization status
	llm_client_initialized = False
	spacy_loaded = False
	embedder_loaded = False
	reranker_loaded = False
	business_info_loaded = False

	# Initialize InferenceClient (already present in LOR3w0_wiYL)
	client = None
	def initialize_llm_client():
	"""Initializes the Hugging Face InferenceClient."""
	global client, llm_client_initialized
	llm_client_initialized = False
	print("Attempting to initialize InferenceClient...")
	if not HF_TOKEN:
	print("Error: HF_TOKEN not loaded. InferenceClient cannot be initialized.")
	return

	try:
	client = InferenceClient("google/gemma-2-9b-it", token=HF_TOKEN)
	# Optional: Make a small test call to ensure the client is working
	try:
	test_response = client.chat_completion(messages=[{"role": "user", "content": "hello"}], max_tokens=10)
	if test_response:
	print("InferenceClient test call successful.")
	llm_client_initialized = True
	else:
	print("InferenceClient test call failed.")
	except Exception as test_e:
	print(f"InferenceClient test call failed: {test_e}")
	print(traceback.format_exc())
	client = None # Reset client if test fails

	if llm_client_initialized:
	print("InferenceClient initialized.")
	else:
	print("InferenceClient initialization failed.")

	except Exception as e:
	print(f"Error initializing InferenceClient: {e}")
	print(traceback.format_exc())
	client = None # Set client to None if initialization fails
	llm_client_initialized = False

	# Load spacy model for sentence splitting (already present in LOR3w0_wiYL)
	nlp = None
	def load_spacy_model():
	"""Loads the SpaCy model."""
	global nlp, spacy_loaded
	spacy_loaded = False
	print("Attempting to load SpaCy model 'en_core_web_sm'...")
	try:
	# Load the model directly, assuming it's installed during Docker build
	nlp = spacy.load("en_core_web_sm")
	print("SpaCy model 'en_core_web_sm' loaded.")
	spacy_loaded = True
	except OSError:
	print("SpaCy model 'en_core_web_sm' not found. Please ensure it is installed.")
	print(traceback.format_exc()) # Print traceback for debugging
	nlp = None # Set nlp to None if loading fails
	spacy_loaded = False
	except Exception as e:
	print(f"Error loading SpaCy model: {e}")
	print(traceback.format_exc())
	nlp = None
	spacy_loaded = False


	# Load SentenceTransformer for RAG/business info retrieval and semantic detection (already present in LOR3w0_wiYL)
	embedder = None
	def load_embedder_model():
	"""Loads the Sentence Transformer model."""
	global embedder, embedder_loaded
	embedder_loaded = False
	print("Attempting to load Sentence Transformer (sentence-transformers/paraphrase-MiniLM-L6-v2)...")
	try:
	embedder = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
	print("Sentence Transformer loaded.")
	embedder_loaded = True
	except Exception as e:
	print(f"Error loading Sentence Transformer: {e}")
	print(traceback.format_exc()) # Print traceback for debugging
	embedder = None
	embedder_loaded = False


	# Load a Cross-Encoder model for re-ranking retrieved documents (already present in LOR3w0_wiYL)
	reranker = None
	def load_reranker_model():
	"""Loads the Cross-Encoder model."""
	global reranker, reranker_loaded
	reranker_loaded = False
	print("Attempting to load Cross-Encoder Reranker (cross-encoder/ms-marco-MiniLM-L6-v2)...")
	try:
	reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
	print("Cross-Encoder Reranker loaded.")
	reranker_loaded = True
	except Exception as e:
	print(f"Error loading Cross-Encoder Reranker: {e}")
	print("Please ensure the model identifier 'cross-encoder/ms-marco-MiniLM-L6-v2' is correct and accessible on Hugging Face Hub.")
	print(traceback.format_exc())
	reranker = None
	reranker_loaded = False


	# Google Sheets Authentication (already present in LOR3w0_wiYL)
	gc = None # Global variable for gspread client
	def authenticate_google_sheets():
	"""Authenticates with Google Sheets using base64 encoded credentials."""
	global gc
	print("Authenticating Google Account...")
	if not GOOGLE_BASE64_CREDENTIALS:
	print("Error: GOOGLE_BASE64_CREDENTIALS secret not found. Skipping Google Sheets authentication.")
	return False

	try:
	credentials_json = base64.b64decode(GOOGLE_BASE64_CREDENTIALS).decode('utf-8')
	credentials = json.loads(credentials_json)
	gc = gspread.service_account_from_dict(credentials)
	print("Google Sheets authentication successful via service account.")
	return True
	except Exception as e:
	print(f"Google Sheets authentication failed: {e}")
	print(traceback.format_exc())
	return False

	# Google Sheets Data Loading and Embedding (already present in LOR3w0_wiYL)
	data = [] # Global variable to store loaded data
	descriptions_for_embedding = []
	embeddings = torch.tensor([])
	# business_info_available is now managed by the load_business_info function

	def load_business_info():
	"""Loads business information from Google Sheet and creates embeddings."""
	global data, descriptions_for_embedding, embeddings, business_info_loaded
	business_info_loaded = False # Reset flag
	print("Attempting to load business information from Google Sheet...")

	if gc is None:
	print("Skipping Google Sheet loading: Google Sheets client not authenticated.")
	return

	if not SHEET_ID:
	print("Error: SHEET_ID not set. Skipping Google Sheet loading.")
	return

	try:
	sheet = gc.open_by_key(SHEET_ID).sheet1
	print(f"Successfully opened Google Sheet with ID: {SHEET_ID}")
	data_records = sheet.get_all_records()

	if not data_records:
	print(f"Warning: No data records found in Google Sheet with ID: {SHEET_ID}")
	data = []
	descriptions_for_embedding = []
	else:
	filtered_data = [row for row in data_records if row.get('Service') and row.get('Description')]
	if not filtered_data:
	print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
	data = []
	descriptions_for_embedding = []
	else:
	data = filtered_data
	descriptions_for_embedding = [f"Service: {row['Service']}. Description: {row['Description']}" for row in data]

	if descriptions_for_embedding and embedder is not None:
	print("Encoding descriptions...")
	try:
	embeddings = embedder.encode(descriptions_for_embedding, convert_to_tensor=True)
	print("Encoding complete.")
	business_info_loaded = True
	except Exception as e:
	print(f"Error during description encoding: {e}")
	embeddings = torch.tensor([])
	business_info_loaded = False
	else:
	print("Skipping encoding descriptions: No descriptions found or embedder not available.")
	embeddings = torch.tensor([])
	business_info_loaded = False

	print(f"Loaded {len(descriptions_for_embedding)} entries from Google Sheet for embedding/RAG.")
	if not business_info_loaded:
	print("Business information retrieval (RAG) is NOT available.")
	else:
	print("Business information retrieval (RAG) is available.")


	except gspread.exceptions.SpreadsheetNotFound:
	print(f"Error: Google Sheet with ID '{SHEET_ID}' not found.")
	print("Please check the SHEET_ID and ensure your authenticated Google Account has access to this sheet.")
	business_info_loaded = False
	except Exception as e:
	print(f"An error occurred while accessing the Google Sheet: {e}")
	print(traceback.format_exc())
	business_info_loaded = False

	# Business Info Retrieval (RAG) (already present in LOR3w0_wiYL)
	def retrieve_business_info(query: str, top_n: int = 3) -> list:
	"""
	Retrieves relevant business information from loaded data based on a query.
	"""
	global data
	if not business_info_loaded or embedder is None or not descriptions_for_embedding or not data:
	print("Business information retrieval is not available or data is empty.")
	return []

	try:
	query_embedding = embedder.encode(query, convert_to_tensor=True)
	cosine_scores = util.cos_sim(query_embedding, embeddings)[0]
	top_results_indices = torch.topk(cosine_scores, k=min(top_n, len(data)))[1].tolist()
	top_results = [data[i] for i in top_results_indices]

	if reranker is not None and top_results:
	print("Re-ranking top results...")
	rerank_pairs = [(query, descriptions_for_embedding[i]) for i in top_results_indices]
	rerank_scores = reranker.predict(rerank_pairs)
	reranked_indices = sorted(range(len(rerank_scores)), key=lambda i: rerank_scores[i], reverse=True)
	reranked_results = [top_results[i] for i in reranked_indices]
	print("Re-ranking complete.")
	return reranked_results
	else:
	return top_results

	except Exception as e:
	print(f"Error during business information retrieval: {e}")
	print(traceback.format_exc())
	return []

	# Function to perform DuckDuckGo Search and return results with URLs (already present in LOR3w0_wiYL)
	def perform_duckduckgo_search(query: str, max_results: int = 5):
	"""
	Performs a search using DuckDuckGo and returns a list of dictionaries.
	Includes a delay to avoid rate limits.
	Returns an empty list and prints an error if search fails.
	"""
	print(f"Executing Tool: perform_duckduckgo_search with query='{query}')")
	search_results_list = []
	try:
	time.sleep(1)

	with DDGS() as ddgs:
	search_query = query.strip()

	if not search_query or len(search_query.split()) < 2:
	print(f"Skipping search for short query: '{search_query}'")
	return []

	print(f"Sending search query to DuckDuckGo: '{search_query}'")
	results_generator = ddgs.text(search_query, max_results=max_results)
	results_found = False
	for r in results_generator:
	search_results_list.append(r)
	results_found = True

	print(f"Raw results from DuckDuckGo: {search_results_list}")

	if not results_found and max_results > 0:
	print(f"DuckDuckGo search for '{search_query}' returned no results.")
	elif results_found:
	print(f"DuckDuckGo search for '{search_query}' completed. Found {len(search_results_list)} results.")

	except Exception as e:
	print(f"Error during Duckduckgo search for '{search_query if 'search_query' in locals() else query}': {e}")
	print(traceback.format_exc())
	return []

	return search_results_list

	# Define the new semantic date/time detection and calculation function using dateparser (already present in LOR3w0_wiYL)
	def perform_date_calculation(query: str) -> str or None:
	"""
	Analyzes query for date/time information using dateparser.
	If dateparser finds a date, it returns a human-friendly response string.
	Otherwise, it returns None.
	It is designed to handle multiple languages and provide the time for East Africa (Tanzania).
	"""
	print(f"Executing Tool: perform_date_calculation with query='{query}') using dateparser.search_dates")

	try:
	eafrica_tz = pytz.timezone('Africa/Dar_es_Salaam')
	now = datetime.now(eafrica_tz)
	except pytz.UnknownTimeZoneError:
	print("Error: Unknown timezone 'Africa/Dar_es_Salaam'. Using default system time.")
	now = datetime.now()

	try:
	found = search_dates(
	query,
	settings={
	"PREFER_DATES_FROM": "future",
	"RELATIVE_BASE": now
	},
	languages=['sw', 'en'] # Prioritize Swahili
	)

	if not found:
	print("dateparser.search_dates could not parse any date/time.")
	return None

	text_snippet, parsed = found[0]
	print(f"dateparser.search_dates found: text='{text_snippet}', parsed='{parsed}')")

	is_swahili = any(swahili_phrase in query.lower() for swahili_phrase in ['tarehe', 'siku', 'saa', 'muda', 'leo', 'kesho', 'jana', 'ngapi', 'gani', 'mwezi', 'mwaka'])

	if now.tzinfo is not None and parsed.tzinfo is None:
	parsed = now.tzinfo.localize(parsed)
	elif now.tzinfo is None and parsed.tzinfo is not None:
	parsed = parsed.replace(tzinfo=None)

	if parsed.date() == now.date():
	if abs((parsed - now).total_seconds()) < 60 or parsed.time() == datetime.min.time():
	print("Query parsed to today's date and time is close to 'now' or midnight, returning current time/date.")
	if is_swahili:
	return f"Kwa saa za Afrika Mashariki (Tanzania), tarehe ya leo ni {now.strftime('%A, %d %B %Y')} na saa ni {now.strftime('%H:%M:%S')}."
	else:
	return f"In East Africa (Tanzania), the current date is {now.strftime('%A, %d %B %Y')} and the time is {now.strftime('%H:%M:%S')}."
	else:
	print(f"Query parsed to a specific time today: {parsed.strftime('%H:%M:%S')}")
	if is_swahili:
	return f"Hiyo inafanyika leo, {parsed.strftime('%A, %d %B %Y')}, saa {parsed.strftime('%H:%M:%S')} saa za Afrika Mashariki."
	else:
	return f"That falls on today, {parsed.strftime('%A, %d %B %Y')}, at {parsed.strftime('%H:%M:%S')} East Africa Time."
	else:
	print(f"Query parsed to a specific date: {parsed.strftime('%A, %d %B %Y')} at {parsed.strftime('%H:%M:%S')}")
	time_str = parsed.strftime('%H:%M:%S')
	date_str = parsed.strftime('%A, %d %B %Y')
	if parsed.tzinfo:
	tz_name = parsed.tzinfo.tzname(parsed) or 'UTC'
	if is_swahili:
	return f"Hiyo inafanyika tarehe {date_str} saa {time_str} {tz_name}."
	else:
	return f"That falls on {date_str} at {time_str} {tz_name}."
	else:
	if is_swahili:
	return f"Hiyo inafanyika tarehe {date_str} saa {time_str}."
	else:
	return f"That falls on {date_str} at {time_str}."

	except Exception as e:
	print(f"Error during dateparser.search_dates execution: {e}")
	print(traceback.format_exc())
	return f"An error occurred while parsing date/time: {e}"

	# Function to determine if a query requires a tool or can be answered directly (already present in LOR3w0_wiYL)
	def determine_tool_usage(query: str) -> str:
	"""
	Analyzes the query to determine if a specific tool is needed.
	Returns the name of the tool ('duckduckgo_search', 'business_info_retrieval',
	'date_calculation') or 'none' if no specific tool is clearly indicated.
	Prioritizes business information retrieval, then specific tools based on keywords
	and LLM judgment.
	"""
	query_lower = query.lower()

	if business_info_loaded: # Check if business info is loaded before attempting LLM check
	messages_business_check = [{"role": "user", "content": f"Does the following query ask about a specific person, service, offering, or description that is likely to be found only within a specific business's internal knowledge base, and not general knowledge? For example, questions about 'Salum' or 'Jackson Kisanga' are likely business-related, while questions about 'the current president of the USA' or 'who won the Ballon d'Or' are general knowledge. Answer only 'yes' or 'no'. Query: {query}"}]
	try:
	business_check_response = client.chat_completion(
	messages=messages_business_check,
	max_tokens=10,
	temperature=0.1
	).choices[0].message.content.strip().lower()
	if business_check_response == "yes":
	print(f"Detected as specific business info query based on LLM check: '{query}'")
	return "business_info_retrieval"
	else:
	print(f"LLM check indicates not a specific business info query: '{query}'")
	except Exception as e:
	print(f"Error during LLM call for business info check for query '{query}': {e}")
	print(traceback.format_exc())
	print(f"Proceeding without business info check for query '{query}' due to error.")
	else:
	print("Skipping LLM business info check: Business information not loaded.")


	date_time_check_result = perform_date_calculation(query)
	if date_time_check_result is not None:
	print(f"Detected as date/time calculation query based on dateparser result for: '{query}'")
	return "date_calculation"

	messages_tool_determination_search = [{"role": "user", "content": f"Does the following query require searching the web for current or general knowledge information (e.g., news, facts, definitions, current events)? Respond ONLY with 'duckduckgo_search' or 'none'. Query: {query}"}]
	try:
	search_determination_response = client.chat_completion(
	messages=messages_tool_determination_search,
	max_tokens=20,
	temperature=0.1,
	top_p=0.9
	).choices[0].message.content or ""
	response_lower = search_determination_response.strip().lower()

	if "duckduckgo_search" in response_lower:
	print(f"Model-determined tool for '{query}': 'duckduckgo_search'")
	return "duckduckgo_search"
	else:
	print(f"Model-determined tool for '{query}': 'none' (for search)")

	except Exception as e:
	print(f"Error during LLM call for search tool determination for query '{query}': {e}")
	print(traceback.format_exc())
	print(f"Proceeding without search tool check for query '{query}' due to error.")

	print(f"No specific tool determined for '{query}'. Defaulting to 'none'.")
	return "none"


	# Function to generate text using the LLM, incorporating tool results if available (already present in LOR3w0_wiYL)
	def generate_text(prompt: str, tool_results: dict = None) -> str:
	"""
	Generates text using the configured LLM, optionally incorporating tool results.
	"""
	if not llm_client_initialized or client is None:
	print("LLM client is not initialized. Cannot generate text.")
	return "Error: The language model is not available at this time."

	full_prompt_builder = [prompt]

	if tool_results and any(tool_results.values()):
	full_prompt_builder.append("\n\nTool Results:\n")
	for question, results in tool_results.items():
	if results:
	full_prompt_builder.append(f"--- Results for: {question} ---\n")
	if isinstance(results, list):
	for i, result in enumerate(results):
	if isinstance(result, dict) and 'Service' in result and 'Description' in result:
	full_prompt_builder.append(f"Business Info {i+1}:\nService: {result.get('Service', 'N/A')}\nDescription: {result.get('Description', 'N/A')}\n\n")
	elif isinstance(result, dict) and 'url' in result:
	full_prompt_builder.append(f"Search Result {i+1}:\nTitle: {result.get('title', 'N/A')}\nURL: {result.get('url', 'N/A')}\nSnippet: {result.get('body', 'N/A')}\n\n")
	else:
	full_prompt_builder.append(f"{result}\n\n")
	elif isinstance(results, dict):
	for key, value in results.items():
	full_prompt_builder.append(f"{key}: {value}\n")
	full_prompt_builder.append("\n")
	else:
	full_prompt_builder.append(f"{results}\n\n")

	full_prompt_builder.append("Based on the provided tool results, answer the user's original query. If a question was answered by a tool, use the tool's result directly in your response.")
	print("Added tool results and instruction to final prompt.")
	else:
	print("No tool results to add to final prompt.")

	full_prompt = "".join(full_prompt_builder)

	print(f"Sending prompt to LLM:\n---\n{full_prompt}\n---")

	generation_config = {
	"temperature": 0.7,
	"max_new_tokens": 500,
	"top_p": 0.95,
	"top_k": 50,
	"do_sample": True,
	}

	try:
	response = client.chat_completion(
	messages=[
	{"role": "user", "content": full_prompt}
	],
	max_tokens=generation_config.get("max_new_tokens", 512),
	temperature=generation_config.get("temperature", 0.7),
	top_p=generation_config.get("top_p", 0.95)
	).choices[0].message.content or ""

	print("LLM generation successful using chat_completion.")
	return response
	except Exception as e:
	print(f"Error during final LLM generation: {e}")
	print(traceback.format_exc())
	return "An error occurred while generating the final response."

	# Refactored core chat logic into a function
	def process_query_with_tools(query: str):
	"""
	Processes user queries by breaking down multi-part queries, determining and
	executing appropriate tools for each question, and synthesizing results
	using the LLM. Prioritizes business information retrieval.
	This function is designed to be called by the API endpoint.
	"""
	print(f"Processing query with tools: {query}")

	# Ensure LLM client is initialized before proceeding with any LLM calls
	if not llm_client_initialized or client is None:
	print("LLM client not initialized. Cannot process query.")
	return "Error: The language model is not available. Please try again later."


	print("\n--- Breaking down query ---")
	prompt_for_question_breakdown = f"""
	Analyze the following query and list each distinct question found within it.
	Present each question on a new line, starting with a hyphen.

	Query: {query}
	"""
	try:
	messages_question_breakdown = [{"role": "user", "content": prompt_for_question_breakdown}]
	question_breakdown_response = client.chat_completion(
	messages=messages_question_breakdown,
	max_tokens=100,
	temperature=0.1,
	top_p=0.9
	).choices[0].message.content or ""
	individual_questions = [line.strip() for line in question_breakdown_response.split('\n') if line.strip()]
	cleaned_questions = [re.sub(r'^[-]?\s', '', q) for q in individual_questions]
	print("Individual questions identified:")
	for q in cleaned_questions:
	print(f"- {q}")
	except Exception as e:
	print(f"Error during LLM call for question breakdown: {e}")
	print(traceback.format_exc())
	cleaned_questions = [query] # Fallback to treating the whole query as one question

	print("\n--- Determining tools per question ---")
	determined_tools = {}
	for question in cleaned_questions:
	print(f"\nAnalyzing question for tool determination: '{question}'")
	determined_tools[question] = determine_tool_usage(question)
	print(f"Determined tool for '{question}': '{determined_tools[question]}'") # Corrected print statement

	print("\nSummary of determined tools per question:")
	for question, tool in determined_tools.items():
	print(f"'{question}': '{tool}'")

	print("\n--- Executing tools and collecting results ---")
	tool_results = {}
	for question, tool in determined_tools.items():
	print(f"\nExecuting tool '{tool}' for question: '{question}'")
	result = None

	if tool == "date_calculation":
	result = perform_date_calculation(question)
	elif tool == "duckduckgo_search":
	result = perform_duckduckgo_search(question)
	elif tool == "business_info_retrieval":
	result = retrieve_business_info(question)
	elif tool == "none":
	print(f"Skipping tool execution for question: '{question}' as tool is 'none'. LLM will handle.")
	result = None

	if result is not None:
	tool_results[question] = result

	print("\n--- Collected Tool Results ---")
	if tool_results:
	for question, result in tool_results.items():
	print(f"\nQuestion: {question}")
	print(f"Result: {result}")
	else:
	print("No tool results were collected.")
	print("\n--------------------------")

	print("\n--- Generating final response ---")
	final_response = generate_text(query, tool_results)

	print("\n--- Final Response from LLM ---")
	print(final_response)
	print("\n----------------------------")

	return final_response

	# --- FastAPI Application Setup ---

	app = FastAPI()

	# Define the APIKeyHeader instance correctly
	api_key_header = APIKeyHeader(name="x-api-key", auto_error=True)

	# API Key Authentication Dependency
	def get_api_key(api_key_header_value: str = Security(api_key_header)):
	# Check if API_KEY is None before comparison
	if API_KEY is None or api_key_header_value == API_KEY:
	return api_key_header_value
	else:
	raise HTTPException(status_code=403, detail="Could not validate credentials")


	# API Endpoint
	@app.post("/chat/")
	async def chat_endpoint(request: Request, api_key: str = Depends(get_api_key)):
	"""
	API endpoint to process user chat queries using the LLM and tools.
	Requires API key authentication in the 'x-api-key' header.
	"""
	try:
	body = await request.json()
	query = body.get("query")

	if not query:
	raise HTTPException(status_code=400, detail="Query parameter is required.")

	# Ensure client is initialized before processing query
	if not llm_client_initialized or client is None:
	raise HTTPException(status_code=503, detail="LLM client not initialized. Please wait or check logs.")

	response = process_query_with_tools(query)
	return {"response": response}

	except Exception as e:
	print(f"Error in chat_endpoint: {e}")
	print(traceback.format_exc())
	raise HTTPException(status_code=500, detail=f"Internal server error: {e}")

	# Health Check Endpoint
	@app.get("/health")
	async def health_check():
	"""
	Health check endpoint to verify the application is running and essential components are loaded.
	Returns 200 OK if all critical components are loaded, 503 Service Unavailable otherwise.
	"""
	status = {
	"status": "unhealthy",
	"llm_client_initialized": llm_client_initialized,
	"business_info_loaded": business_info_loaded,
	"spacy_loaded": spacy_loaded,
	"embedder_loaded": embedder_loaded,
	"reranker_loaded": reranker_loaded,
	"secrets_loaded": {
	"HF_TOKEN": HF_TOKEN is not None,
	"SHEET_ID": SHEET_ID is not None,
	"GOOGLE_BASE64_CREDENTIALS": GOOGLE_BASE64_CREDENTIALS is not None,
	"API_KEY": API_KEY is not None,
	}
	}

	# Check if all critical components are loaded
	all_critical_loaded = (
	llm_client_initialized and
	spacy_loaded and
	embedder_loaded and
	reranker_loaded and
	(business_info_loaded if (SHEET_ID and GOOGLE_BASE64_CREDENTIALS) else True) # Business info is critical only if secrets are set
	)

	if all_critical_loaded:
	status["status"] = "ok"
	return JSONResponse(status_code=200, content=status)
	else:
	unhealthy_components = [key for key, value in status.items() if isinstance(value, bool) and not value]
	if status["secrets_loaded"] and not all(status["secrets_loaded"].values()):
	unhealthy_components.append("secrets_loaded (partial)")
	status["unhealthy_components"] = unhealthy_components
	return JSONResponse(status_code=503, content=status)


	# Optional: Root endpoint for basic info
	@app.get("/")
	async def read_root():
	"""
	Root endpoint providing basic application information and status.
	"""
	status = {
	"message": "LLM with Tools API is running",
	"llm_client_initialized": llm_client_initialized,
	"business_info_loaded": business_info_loaded,
	"spacy_loaded": spacy_loaded,
	"embedder_loaded": embedder_loaded,
	"reranker_loaded": reranker_loaded,
	"secrets_loaded": {
	"HF_TOKEN": HF_TOKEN is not None,
	"SHEET_ID": SHEET_ID is not None,
	"GOOGLE_BASE64_CREDENTIALS": GOOGLE_BASE64_CREDENTIALS is not None,
	"API_KEY": API_KEY is not None,
	}
	}
	if not all(status["secrets_loaded"].values()):
	status["warning"] = status.get("warning", "") + " Not all secrets are loaded."
	if not status["llm_client_initialized"]:
	status["warning"] = status.get("warning", "") + " LLM client not initialized."
	if not status["business_info_loaded"] and (SHEET_ID and GOOGLE_BASE64_CREDENTIALS):
	status["warning"] = status.get("warning", "") + " Business info (RAG) not loaded."
	if not status["spacy_loaded"]:
	status["warning"] = status.get("warning", "") + " SpaCy model not loaded."
	if not status["embedder_loaded"]:
	status["warning"] = status.get("warning", "") + " Embedder not loaded."
	if not status["reranker_loaded"]:
	status["warning"] = status.get("warning", "") + " Reranker not loaded."


	return status


	# Initialize components on startup
	# This will run when the script is imported or executed directly
	print("Starting component initialization...")
	authenticate_google_sheets() # Authenticate first as it's needed for load_business_info
	load_spacy_model()
	load_embedder_model()
	load_reranker_model()
	load_business_info() # Load business info after authentication and embedder are ready
	initialize_llm_client() # Initialize LLM client last as it might be the largest model

	print("Component initialization sequence complete.")

	# To run this FastAPI application in Colab for testing purposes,
	# you can use uvicorn.run() in a separate cell or a script.
	# For production deployment, you would typically use a proper ASGI server setup.

	# Example of how to run in Colab (requires a separate cell or script):
	# import uvicorn
	# from api import app # Assuming this code is saved as api.py
	# uvicorn.run(app, host="0.0.0.0", port=8000) # Or use a more secure host/port for production