Futuresony commited on
Commit
c632e1e
·
verified ·
1 Parent(s): 6758464

Update api.py

Browse files
Files changed (1) hide show
  1. api.py +160 -76
api.py CHANGED
@@ -52,56 +52,109 @@ print(f"SHEET_ID loaded: {'*' * len(SHEET_ID) if SHEET_ID else 'None'}")
52
  print(f"GOOGLE_BASE64_CREDENTIALS loaded: {'*' * len(GOOGLE_BASE64_CREDENTIALS) if GOOGLE_BASE64_CREDENTIALS else 'None'}")
53
  print(f"API_KEY loaded: {'*' * len(API_KEY) if API_KEY else 'None'}")
54
 
 
 
 
 
 
 
55
 
56
  # Initialize InferenceClient (already present in LOR3w0_wiYL)
57
- # Ensure HF_TOKEN is available before initializing
58
- if not HF_TOKEN:
59
- print("Error: HF_TOKEN not loaded. InferenceClient cannot be initialized.")
60
- client = None # Set client to None if token is missing
61
- else:
 
 
 
 
 
62
  try:
63
  client = InferenceClient("google/gemma-2-9b-it", token=HF_TOKEN)
64
- print("InferenceClient initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  except Exception as e:
66
  print(f"Error initializing InferenceClient: {e}")
67
  print(traceback.format_exc())
68
  client = None # Set client to None if initialization fails
69
-
70
 
71
  # Load spacy model for sentence splitting (already present in LOR3w0_wiYL)
72
  nlp = None
73
- try:
74
- # Load the model directly, assuming it's installed during Docker build
75
- nlp = spacy.load("en_core_web_sm")
76
- print("SpaCy model 'en_core_web_sm' loaded.")
77
- except OSError:
78
- print("SpaCy model 'en_core_web_sm' not found. Please ensure it is installed.")
79
- print(traceback.format_exc()) # Print traceback for debugging
80
- nlp = None # Set nlp to None if loading fails
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
 
83
  # Load SentenceTransformer for RAG/business info retrieval and semantic detection (already present in LOR3w0_wiYL)
84
  embedder = None
85
- try:
 
 
 
86
  print("Attempting to load Sentence Transformer (sentence-transformers/paraphrase-MiniLM-L6-v2)...")
87
- embedder = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
88
- print("Sentence Transformer loaded.")
89
- except Exception as e:
 
 
90
  print(f"Error loading Sentence Transformer: {e}")
91
  print(traceback.format_exc()) # Print traceback for debugging
 
 
92
 
93
 
94
  # Load a Cross-Encoder model for re-ranking retrieved documents (already present in LOR3w0_wiYL)
95
  reranker = None
96
- try:
 
 
 
97
  print("Attempting to load Cross-Encoder Reranker (cross-encoder/ms-marco-MiniLM-L6-v2)...")
98
- reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
99
- print("Cross-Encoder Reranker loaded.")
100
- except Exception as e:
101
- print(f"Error loading Cross-Encoder Reranker: {e}")
102
- print("Please ensure the model identifier 'cross-encoder/ms-marco-MiniLM-L6-v2' is correct and accessible on Hugging Face Hub.")
103
- print(traceback.format_exc())
104
- reranker = None
 
 
 
105
 
106
 
107
  # Google Sheets Authentication (already present in LOR3w0_wiYL)
@@ -129,12 +182,13 @@ def authenticate_google_sheets():
129
  data = [] # Global variable to store loaded data
130
  descriptions_for_embedding = []
131
  embeddings = torch.tensor([])
132
- business_info_available = False # Flag to indicate if business info was loaded successfully
133
 
134
  def load_business_info():
135
  """Loads business information from Google Sheet and creates embeddings."""
136
- global data, descriptions_for_embedding, embeddings, business_info_available
137
- business_info_available = False # Reset flag
 
138
 
139
  if gc is None:
140
  print("Skipping Google Sheet loading: Google Sheets client not authenticated.")
@@ -168,28 +222,31 @@ def load_business_info():
168
  try:
169
  embeddings = embedder.encode(descriptions_for_embedding, convert_to_tensor=True)
170
  print("Encoding complete.")
171
- business_info_available = True
172
  except Exception as e:
173
  print(f"Error during description encoding: {e}")
174
  embeddings = torch.tensor([])
175
- business_info_available = False
176
  else:
177
  print("Skipping encoding descriptions: No descriptions found or embedder not available.")
178
  embeddings = torch.tensor([])
179
- business_info_available = False
180
 
181
  print(f"Loaded {len(descriptions_for_embedding)} entries from Google Sheet for embedding/RAG.")
182
- if not business_info_available:
183
  print("Business information retrieval (RAG) is NOT available.")
 
 
 
184
 
185
  except gspread.exceptions.SpreadsheetNotFound:
186
  print(f"Error: Google Sheet with ID '{SHEET_ID}' not found.")
187
  print("Please check the SHEET_ID and ensure your authenticated Google Account has access to this sheet.")
188
- business_info_available = False
189
  except Exception as e:
190
  print(f"An error occurred while accessing the Google Sheet: {e}")
191
  print(traceback.format_exc())
192
- business_info_available = False
193
 
194
  # Business Info Retrieval (RAG) (already present in LOR3w0_wiYL)
195
  def retrieve_business_info(query: str, top_n: int = 3) -> list:
@@ -197,7 +254,7 @@ def retrieve_business_info(query: str, top_n: int = 3) -> list:
197
  Retrieves relevant business information from loaded data based on a query.
198
  """
199
  global data
200
- if not business_info_available or embedder is None or not descriptions_for_embedding or not data:
201
  print("Business information retrieval is not available or data is empty.")
202
  return []
203
 
@@ -349,7 +406,7 @@ def determine_tool_usage(query: str) -> str:
349
  """
350
  query_lower = query.lower()
351
 
352
- if business_info_available:
353
  messages_business_check = [{"role": "user", "content": f"Does the following query ask about a specific person, service, offering, or description that is likely to be found *only* within a specific business's internal knowledge base, and not general knowledge? For example, questions about 'Salum' or 'Jackson Kisanga' are likely business-related, while questions about 'the current president of the USA' or 'who won the Ballon d'Or' are general knowledge. Answer only 'yes' or 'no'. Query: {query}"}]
354
  try:
355
  business_check_response = client.chat_completion(
@@ -366,6 +423,9 @@ def determine_tool_usage(query: str) -> str:
366
  print(f"Error during LLM call for business info check for query '{query}': {e}")
367
  print(traceback.format_exc())
368
  print(f"Proceeding without business info check for query '{query}' due to error.")
 
 
 
369
 
370
  date_time_check_result = perform_date_calculation(query)
371
  if date_time_check_result is not None:
@@ -402,6 +462,10 @@ def generate_text(prompt: str, tool_results: dict = None) -> str:
402
  """
403
  Generates text using the configured LLM, optionally incorporating tool results.
404
  """
 
 
 
 
405
  full_prompt_builder = [prompt]
406
 
407
  if tool_results and any(tool_results.values()):
@@ -468,6 +532,12 @@ def process_query_with_tools(query: str):
468
  """
469
  print(f"Processing query with tools: {query}")
470
 
 
 
 
 
 
 
471
  print("\n--- Breaking down query ---")
472
  prompt_for_question_breakdown = f"""
473
  Analyze the following query and list each distinct question found within it.
@@ -572,8 +642,8 @@ async def chat_endpoint(request: Request, api_key: str = Depends(get_api_key)):
572
  raise HTTPException(status_code=400, detail="Query parameter is required.")
573
 
574
  # Ensure client is initialized before processing query
575
- if client is None:
576
- raise HTTPException(status_code=503, detail="LLM client not initialized. Please check HF_TOKEN.")
577
 
578
  response = process_query_with_tools(query)
579
  return {"response": response}
@@ -588,14 +658,15 @@ async def chat_endpoint(request: Request, api_key: str = Depends(get_api_key)):
588
  async def health_check():
589
  """
590
  Health check endpoint to verify the application is running and essential components are loaded.
 
591
  """
592
  status = {
593
- "status": "ok",
594
- "llm_client_initialized": client is not None,
595
- "business_info_loaded": business_info_available,
596
- "spacy_loaded": nlp is not None,
597
- "embedder_loaded": embedder is not None,
598
- "reranker_loaded": reranker is not None,
599
  "secrets_loaded": {
600
  "HF_TOKEN": HF_TOKEN is not None,
601
  "SHEET_ID": SHEET_ID is not None,
@@ -603,16 +674,25 @@ async def health_check():
603
  "API_KEY": API_KEY is not None,
604
  }
605
  }
606
- unhealthy_components = [key for key, value in status.items() if isinstance(value, bool) and not value]
607
- if status["secrets_loaded"] and not all(status["secrets_loaded"].values()):
608
- unhealthy_components.append("secrets_loaded (partial)")
609
-
610
- if unhealthy_components:
611
- status["status"] = "unhealthy"
612
- status["unhealthy_components"] = unhealthy_components
613
- return JSONResponse(status=503, content=status) # Return 503 Service Unavailable if unhealthy
614
 
615
- return status # Return 200 OK if healthy
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
 
617
 
618
  # Optional: Root endpoint for basic info
@@ -623,11 +703,11 @@ async def read_root():
623
  """
624
  status = {
625
  "message": "LLM with Tools API is running",
626
- "llm_client_initialized": client is not None,
627
- "business_info_loaded": business_info_available,
628
- "spacy_loaded": nlp is not None,
629
- "embedder_loaded": embedder is not None,
630
- "reranker_loaded": reranker is not None,
631
  "secrets_loaded": {
632
  "HF_TOKEN": HF_TOKEN is not None,
633
  "SHEET_ID": SHEET_ID is not None,
@@ -636,29 +716,33 @@ async def read_root():
636
  }
637
  }
638
  if not all(status["secrets_loaded"].values()):
639
- status["warning"] = "Not all secrets are loaded. RAG and LLM may not function correctly."
640
  if not status["llm_client_initialized"]:
641
  status["warning"] = status.get("warning", "") + " LLM client not initialized."
642
- if not status["business_info_loaded"]:
643
  status["warning"] = status.get("warning", "") + " Business info (RAG) not loaded."
 
 
 
 
 
 
 
644
 
645
  return status
646
 
647
 
648
- # Initialize Google Sheets authentication and load business info on startup
649
  # This will run when the script is imported or executed directly
650
- authenticate_google_sheets()
651
- load_business_info()
652
-
653
- # Check if spacy model, embedder, and reranker loaded correctly on startup
654
- if nlp is None:
655
- print("Warning: SpaCy model not loaded. Sentence splitting may not work correctly.")
656
- if embedder is None:
657
- print("Warning: Sentence Transformer (embedder) not loaded. RAG will not be available.")
658
- if reranker is None:
659
- print("Warning: Cross-Encoder Reranker not loaded. Re-ranking of RAG results will not be performed.")
660
- if not business_info_available:
661
- print("Warning: Business information (Google Sheet data) not loaded successfully. RAG will not be available. Please ensure the GOOGLE_BASE64_CREDENTIALS secret is set correctly.")
662
 
663
  # To run this FastAPI application in Colab for testing purposes,
664
  # you can use uvicorn.run() in a separate cell or a script.
@@ -667,4 +751,4 @@ if not business_info_available:
667
  # Example of how to run in Colab (requires a separate cell or script):
668
  # import uvicorn
669
  # from api import app # Assuming this code is saved as api.py
670
- # uvicorn.run(app, host="0.0.0.0", port=8000) # Or use a more secure host/port for production
 
52
  print(f"GOOGLE_BASE64_CREDENTIALS loaded: {'*' * len(GOOGLE_BASE64_CREDENTIALS) if GOOGLE_BASE64_CREDENTIALS else 'None'}")
53
  print(f"API_KEY loaded: {'*' * len(API_KEY) if API_KEY else 'None'}")
54
 
55
+ # Global variables for component initialization status
56
+ llm_client_initialized = False
57
+ spacy_loaded = False
58
+ embedder_loaded = False
59
+ reranker_loaded = False
60
+ business_info_loaded = False
61
 
62
  # Initialize InferenceClient (already present in LOR3w0_wiYL)
63
+ client = None
64
+ def initialize_llm_client():
65
+ """Initializes the Hugging Face InferenceClient."""
66
+ global client, llm_client_initialized
67
+ llm_client_initialized = False
68
+ print("Attempting to initialize InferenceClient...")
69
+ if not HF_TOKEN:
70
+ print("Error: HF_TOKEN not loaded. InferenceClient cannot be initialized.")
71
+ return
72
+
73
  try:
74
  client = InferenceClient("google/gemma-2-9b-it", token=HF_TOKEN)
75
+ # Optional: Make a small test call to ensure the client is working
76
+ try:
77
+ test_response = client.chat_completion(messages=[{"role": "user", "content": "hello"}], max_tokens=10)
78
+ if test_response:
79
+ print("InferenceClient test call successful.")
80
+ llm_client_initialized = True
81
+ else:
82
+ print("InferenceClient test call failed.")
83
+ except Exception as test_e:
84
+ print(f"InferenceClient test call failed: {test_e}")
85
+ print(traceback.format_exc())
86
+ client = None # Reset client if test fails
87
+
88
+ if llm_client_initialized:
89
+ print("InferenceClient initialized.")
90
+ else:
91
+ print("InferenceClient initialization failed.")
92
+
93
  except Exception as e:
94
  print(f"Error initializing InferenceClient: {e}")
95
  print(traceback.format_exc())
96
  client = None # Set client to None if initialization fails
97
+ llm_client_initialized = False
98
 
99
  # Load spacy model for sentence splitting (already present in LOR3w0_wiYL)
100
  nlp = None
101
+ def load_spacy_model():
102
+ """Loads the SpaCy model."""
103
+ global nlp, spacy_loaded
104
+ spacy_loaded = False
105
+ print("Attempting to load SpaCy model 'en_core_web_sm'...")
106
+ try:
107
+ # Load the model directly, assuming it's installed during Docker build
108
+ nlp = spacy.load("en_core_web_sm")
109
+ print("SpaCy model 'en_core_web_sm' loaded.")
110
+ spacy_loaded = True
111
+ except OSError:
112
+ print("SpaCy model 'en_core_web_sm' not found. Please ensure it is installed.")
113
+ print(traceback.format_exc()) # Print traceback for debugging
114
+ nlp = None # Set nlp to None if loading fails
115
+ spacy_loaded = False
116
+ except Exception as e:
117
+ print(f"Error loading SpaCy model: {e}")
118
+ print(traceback.format_exc())
119
+ nlp = None
120
+ spacy_loaded = False
121
 
122
 
123
  # Load SentenceTransformer for RAG/business info retrieval and semantic detection (already present in LOR3w0_wiYL)
124
  embedder = None
125
+ def load_embedder_model():
126
+ """Loads the Sentence Transformer model."""
127
+ global embedder, embedder_loaded
128
+ embedder_loaded = False
129
  print("Attempting to load Sentence Transformer (sentence-transformers/paraphrase-MiniLM-L6-v2)...")
130
+ try:
131
+ embedder = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
132
+ print("Sentence Transformer loaded.")
133
+ embedder_loaded = True
134
+ except Exception as e:
135
  print(f"Error loading Sentence Transformer: {e}")
136
  print(traceback.format_exc()) # Print traceback for debugging
137
+ embedder = None
138
+ embedder_loaded = False
139
 
140
 
141
  # Load a Cross-Encoder model for re-ranking retrieved documents (already present in LOR3w0_wiYL)
142
  reranker = None
143
+ def load_reranker_model():
144
+ """Loads the Cross-Encoder model."""
145
+ global reranker, reranker_loaded
146
+ reranker_loaded = False
147
  print("Attempting to load Cross-Encoder Reranker (cross-encoder/ms-marco-MiniLM-L6-v2)...")
148
+ try:
149
+ reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
150
+ print("Cross-Encoder Reranker loaded.")
151
+ reranker_loaded = True
152
+ except Exception as e:
153
+ print(f"Error loading Cross-Encoder Reranker: {e}")
154
+ print("Please ensure the model identifier 'cross-encoder/ms-marco-MiniLM-L6-v2' is correct and accessible on Hugging Face Hub.")
155
+ print(traceback.format_exc())
156
+ reranker = None
157
+ reranker_loaded = False
158
 
159
 
160
  # Google Sheets Authentication (already present in LOR3w0_wiYL)
 
182
  data = [] # Global variable to store loaded data
183
  descriptions_for_embedding = []
184
  embeddings = torch.tensor([])
185
+ # business_info_available is now managed by the load_business_info function
186
 
187
  def load_business_info():
188
  """Loads business information from Google Sheet and creates embeddings."""
189
+ global data, descriptions_for_embedding, embeddings, business_info_loaded
190
+ business_info_loaded = False # Reset flag
191
+ print("Attempting to load business information from Google Sheet...")
192
 
193
  if gc is None:
194
  print("Skipping Google Sheet loading: Google Sheets client not authenticated.")
 
222
  try:
223
  embeddings = embedder.encode(descriptions_for_embedding, convert_to_tensor=True)
224
  print("Encoding complete.")
225
+ business_info_loaded = True
226
  except Exception as e:
227
  print(f"Error during description encoding: {e}")
228
  embeddings = torch.tensor([])
229
+ business_info_loaded = False
230
  else:
231
  print("Skipping encoding descriptions: No descriptions found or embedder not available.")
232
  embeddings = torch.tensor([])
233
+ business_info_loaded = False
234
 
235
  print(f"Loaded {len(descriptions_for_embedding)} entries from Google Sheet for embedding/RAG.")
236
+ if not business_info_loaded:
237
  print("Business information retrieval (RAG) is NOT available.")
238
+ else:
239
+ print("Business information retrieval (RAG) is available.")
240
+
241
 
242
  except gspread.exceptions.SpreadsheetNotFound:
243
  print(f"Error: Google Sheet with ID '{SHEET_ID}' not found.")
244
  print("Please check the SHEET_ID and ensure your authenticated Google Account has access to this sheet.")
245
+ business_info_loaded = False
246
  except Exception as e:
247
  print(f"An error occurred while accessing the Google Sheet: {e}")
248
  print(traceback.format_exc())
249
+ business_info_loaded = False
250
 
251
  # Business Info Retrieval (RAG) (already present in LOR3w0_wiYL)
252
  def retrieve_business_info(query: str, top_n: int = 3) -> list:
 
254
  Retrieves relevant business information from loaded data based on a query.
255
  """
256
  global data
257
+ if not business_info_loaded or embedder is None or not descriptions_for_embedding or not data:
258
  print("Business information retrieval is not available or data is empty.")
259
  return []
260
 
 
406
  """
407
  query_lower = query.lower()
408
 
409
+ if business_info_loaded: # Check if business info is loaded before attempting LLM check
410
  messages_business_check = [{"role": "user", "content": f"Does the following query ask about a specific person, service, offering, or description that is likely to be found *only* within a specific business's internal knowledge base, and not general knowledge? For example, questions about 'Salum' or 'Jackson Kisanga' are likely business-related, while questions about 'the current president of the USA' or 'who won the Ballon d'Or' are general knowledge. Answer only 'yes' or 'no'. Query: {query}"}]
411
  try:
412
  business_check_response = client.chat_completion(
 
423
  print(f"Error during LLM call for business info check for query '{query}': {e}")
424
  print(traceback.format_exc())
425
  print(f"Proceeding without business info check for query '{query}' due to error.")
426
+ else:
427
+ print("Skipping LLM business info check: Business information not loaded.")
428
+
429
 
430
  date_time_check_result = perform_date_calculation(query)
431
  if date_time_check_result is not None:
 
462
  """
463
  Generates text using the configured LLM, optionally incorporating tool results.
464
  """
465
+ if not llm_client_initialized or client is None:
466
+ print("LLM client is not initialized. Cannot generate text.")
467
+ return "Error: The language model is not available at this time."
468
+
469
  full_prompt_builder = [prompt]
470
 
471
  if tool_results and any(tool_results.values()):
 
532
  """
533
  print(f"Processing query with tools: {query}")
534
 
535
+ # Ensure LLM client is initialized before proceeding with any LLM calls
536
+ if not llm_client_initialized or client is None:
537
+ print("LLM client not initialized. Cannot process query.")
538
+ return "Error: The language model is not available. Please try again later."
539
+
540
+
541
  print("\n--- Breaking down query ---")
542
  prompt_for_question_breakdown = f"""
543
  Analyze the following query and list each distinct question found within it.
 
642
  raise HTTPException(status_code=400, detail="Query parameter is required.")
643
 
644
  # Ensure client is initialized before processing query
645
+ if not llm_client_initialized or client is None:
646
+ raise HTTPException(status_code=503, detail="LLM client not initialized. Please wait or check logs.")
647
 
648
  response = process_query_with_tools(query)
649
  return {"response": response}
 
658
  async def health_check():
659
  """
660
  Health check endpoint to verify the application is running and essential components are loaded.
661
+ Returns 200 OK if all critical components are loaded, 503 Service Unavailable otherwise.
662
  """
663
  status = {
664
+ "status": "unhealthy",
665
+ "llm_client_initialized": llm_client_initialized,
666
+ "business_info_loaded": business_info_loaded,
667
+ "spacy_loaded": spacy_loaded,
668
+ "embedder_loaded": embedder_loaded,
669
+ "reranker_loaded": reranker_loaded,
670
  "secrets_loaded": {
671
  "HF_TOKEN": HF_TOKEN is not None,
672
  "SHEET_ID": SHEET_ID is not None,
 
674
  "API_KEY": API_KEY is not None,
675
  }
676
  }
 
 
 
 
 
 
 
 
677
 
678
+ # Check if all critical components are loaded
679
+ all_critical_loaded = (
680
+ llm_client_initialized and
681
+ spacy_loaded and
682
+ embedder_loaded and
683
+ reranker_loaded and
684
+ (business_info_loaded if (SHEET_ID and GOOGLE_BASE64_CREDENTIALS) else True) # Business info is critical only if secrets are set
685
+ )
686
+
687
+ if all_critical_loaded:
688
+ status["status"] = "ok"
689
+ return JSONResponse(status_code=200, content=status)
690
+ else:
691
+ unhealthy_components = [key for key, value in status.items() if isinstance(value, bool) and not value]
692
+ if status["secrets_loaded"] and not all(status["secrets_loaded"].values()):
693
+ unhealthy_components.append("secrets_loaded (partial)")
694
+ status["unhealthy_components"] = unhealthy_components
695
+ return JSONResponse(status_code=503, content=status)
696
 
697
 
698
  # Optional: Root endpoint for basic info
 
703
  """
704
  status = {
705
  "message": "LLM with Tools API is running",
706
+ "llm_client_initialized": llm_client_initialized,
707
+ "business_info_loaded": business_info_loaded,
708
+ "spacy_loaded": spacy_loaded,
709
+ "embedder_loaded": embedder_loaded,
710
+ "reranker_loaded": reranker_loaded,
711
  "secrets_loaded": {
712
  "HF_TOKEN": HF_TOKEN is not None,
713
  "SHEET_ID": SHEET_ID is not None,
 
716
  }
717
  }
718
  if not all(status["secrets_loaded"].values()):
719
+ status["warning"] = status.get("warning", "") + " Not all secrets are loaded."
720
  if not status["llm_client_initialized"]:
721
  status["warning"] = status.get("warning", "") + " LLM client not initialized."
722
+ if not status["business_info_loaded"] and (SHEET_ID and GOOGLE_BASE64_CREDENTIALS):
723
  status["warning"] = status.get("warning", "") + " Business info (RAG) not loaded."
724
+ if not status["spacy_loaded"]:
725
+ status["warning"] = status.get("warning", "") + " SpaCy model not loaded."
726
+ if not status["embedder_loaded"]:
727
+ status["warning"] = status.get("warning", "") + " Embedder not loaded."
728
+ if not status["reranker_loaded"]:
729
+ status["warning"] = status.get("warning", "") + " Reranker not loaded."
730
+
731
 
732
  return status
733
 
734
 
735
+ # Initialize components on startup
736
  # This will run when the script is imported or executed directly
737
+ print("Starting component initialization...")
738
+ authenticate_google_sheets() # Authenticate first as it's needed for load_business_info
739
+ load_spacy_model()
740
+ load_embedder_model()
741
+ load_reranker_model()
742
+ load_business_info() # Load business info after authentication and embedder are ready
743
+ initialize_llm_client() # Initialize LLM client last as it might be the largest model
744
+
745
+ print("Component initialization sequence complete.")
 
 
 
746
 
747
  # To run this FastAPI application in Colab for testing purposes,
748
  # you can use uvicorn.run() in a separate cell or a script.
 
751
  # Example of how to run in Colab (requires a separate cell or script):
752
  # import uvicorn
753
  # from api import app # Assuming this code is saved as api.py
754
+ # uvicorn.run(app, host="0.0.0.0", port=8000) # Or use a more secure host/port for production