Futuresony commited on
Commit
029dfaa
·
verified ·
1 Parent(s): 9a6cded

Upload supabase.py

Browse files
Files changed (1) hide show
  1. supabase.py +591 -0
supabase.py ADDED
@@ -0,0 +1,591 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import psycopg2
2
+ import os
3
+ import pickle # Still needed for general cache
4
+ import traceback
5
+ import numpy as np
6
+ import json
7
+ import base64 # Still needed for Google Sheets auth if that part of the code is kept elsewhere
8
+ import time # Still needed for general cache
9
+ # Assuming gspread and SentenceTransformer are installed
10
+ try:
11
+ import gspread
12
+ from oauth2client.service_account import ServiceAccountCredentials
13
+ from sentence_transformers import SentenceTransformer
14
+ print("gspread and SentenceTransformer imported successfully.")
15
+ except ImportError:
16
+ print("Error: Required libraries (gspread, oauth2client, sentence_transformers) not found.")
17
+ print("Please install them: pip install psycopg2-binary gspread oauth2client sentence-transformers numpy")
18
+ pass # Allow execution to continue with a warning
19
+
20
+ # Define environment variables for Supabase database connection
21
+ # These should be set in the environment where you run this script
22
+ # Replace with your actual Supabase database credentials
23
+ SUPABASE_DB_HOST = os.getenv("SUPABASE_DB_HOST", "wziqfkzaqorzthpoxhjh.supabase.co")
24
+ SUPABASE_DB_NAME = os.getenv("SUPABASE_DB_NAME", "postgres")
25
+ SUPABASE_DB_USER = os.getenv("SUPABASE_DB_USER", "postgres")
26
+ SUPABASE_DB_PASSWORD = os.getenv("SUPABASE_DB_PASSWORD", "Me21322972..........") # Replace with your actual password
27
+ SUPABASE_DB_PORT = os.getenv("SUPABASE_DB_PORT", "5432")
28
+
29
+ # Define environment variables for Google Sheets authentication (kept for reference if needed elsewhere)
30
+ GOOGLE_BASE64_CREDENTIALS = os.getenv("GOOGLE_BASE64_CREDENTIALS")
31
+ SHEET_ID = "19ipxC2vHYhpXCefpxpIkpeYdI43a1Ku2kYwecgUULIw" # Replace with your actual Sheet ID
32
+
33
+ # Define table names - Updated to use the user's specified table name 'manual' for business data
34
+ BUSINESS_DATA_TABLE = "manual" # Updated table name
35
+ CONVERSATION_HISTORY_TABLE = "conversation_history" # Assuming this table name remains the same
36
+
37
+ # Define Embedding Dimension (must match your chosen Sentence Transformer model)
38
+ EMBEDDING_DIM = 384 # Dimension for paraphrase-MiniLM-L6-v2 or all-MiniLM-L6-v2
39
+
40
+ # --- Database Functions ---
41
+ def connect_to_supabase():
42
+ conn = None
43
+ print("Attempting to connect to Supabase database...")
44
+ # Add checks for environment variables
45
+ if not all([SUPABASE_DB_HOST, SUPABASE_DB_NAME, SUPABASE_DB_USER, SUPABASE_DB_PASSWORD]):
46
+ print("Error: Supabase database credentials (SUPABASE_DB_HOST, SUPABASE_DB_NAME, SUPABASE_DB_USER, SUPABASE_DB_PASSWORD) are not fully set as environment variables or defined in the script.")
47
+ return None
48
+ try:
49
+ conn = psycopg2.connect(
50
+ host=SUPABASE_DB_HOST,
51
+ database=SUPABASE_DB_NAME,
52
+ user=SUPABASE_DB_USER,
53
+ password=SUPABASE_DB_PASSWORD,
54
+ port=SUPABASE_DB_PORT
55
+ )
56
+ print("Connected to Supabase database successfully!")
57
+ except psycopg2.OperationalError as e:
58
+ print(f"Database connection failed: {e}")
59
+ print(traceback.format_exc())
60
+ return conn
61
+
62
+ def setup_db_schema(conn):
63
+ """Sets up the necessary tables and pgvector extension."""
64
+ print("Setting up database schema...")
65
+ try:
66
+ with conn.cursor() as cur:
67
+ # Enable pgvector extension
68
+ cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
69
+ print("pgvector extension enabled (if not already).")
70
+
71
+ # Create the 'manual' table if it doesn't exist, matching the user's specified schema
72
+ # Note: The embedding column is added here for RAG purposes, assuming it's needed in the 'manual' table.
73
+ # If embeddings should be in a separate table, this schema needs adjustment.
74
+ cur.execute(f"""
75
+ CREATE TABLE IF NOT EXISTS {BUSINESS_DATA_TABLE} (
76
+ id SERIAL PRIMARY KEY,
77
+ "Service" TEXT NOT NULL, -- Use double quotes for capitalized column names
78
+ "Description" TEXT NOT NULL, -- Use double quotes for capitalized column names
79
+ "Price" TEXT, -- Added Price column
80
+ "Available" TEXT, -- Added Available column
81
+ embedding vector({EMBEDDING_DIM}) -- Added embedding column for RAG
82
+ );
83
+ """)
84
+ print(f"Table '{BUSINESS_DATA_TABLE}' created (if not already) with columns: id, Service, Description, Price, Available, embedding.")
85
+
86
+ # Create conversation_history table (assuming this is still needed)
87
+ cur.execute(f"""
88
+ CREATE TABLE IF NOT EXISTS {CONVERSATION_HISTORY_TABLE} (
89
+ id SERIAL PRIMARY KEY,
90
+ timestamp TIMESTAMP WITH TIME ZONE NOT NULL,
91
+ user_id TEXT,
92
+ user_query TEXT,
93
+ model_response TEXT,
94
+ tool_details JSONB,
95
+ model_used TEXT
96
+ );
97
+ """)
98
+ print(f"Table '{CONVERSATION_HISTORY_TABLE}' created (if not already).")
99
+
100
+
101
+ conn.commit()
102
+ print("Database schema setup complete.")
103
+ return True
104
+ except Exception as e:
105
+ print(f"Error setting up database schema: {e}")
106
+ print(traceback.format_exc())
107
+ conn.rollback()
108
+ return False
109
+
110
+ # --- Manual Data Definition (kept for the migration script, but not used by the main app load) ---
111
+ # Define the business data manually based on the user's example
112
+ business_data_manual = [
113
+ {"Service": "Savings Account", "Price": "Free", "Description": "A basic savings account with interest", "Available": "Yes"},
114
+ # Add more data rows here in the same dictionary format
115
+ ]
116
+
117
+ # --- Data Insertion Function (using manual data) ---
118
+ def insert_manual_data_to_supabase(conn, embedder_model):
119
+ """Inserts manual business data into the Supabase database."""
120
+ print("Inserting manual business data into database...")
121
+ if embedder_model is None:
122
+ print("Skipping data insertion: Embedder not available.")
123
+ return False
124
+ if EMBEDDING_DIM is None:
125
+ print("Skipping data insertion: EMBEDDING_DIM not defined.")
126
+ return False
127
+ if not business_data_manual:
128
+ print("No manual data defined for insertion.")
129
+ return False
130
+
131
+
132
+ try:
133
+ # Check if business_data table is already populated (based on 'manual' table)
134
+ with conn.cursor() as cur:
135
+ cur.execute(f"SELECT COUNT(*) FROM {BUSINESS_DATA_TABLE};")
136
+ count = cur.fetchone()[0]
137
+ if count > 0:
138
+ print(f"Table '{BUSINESS_DATA_TABLE}' already contains {count} records. Skipping insertion of manual data.")
139
+ return True # Indicate success because data is already there
140
+
141
+ print(f"Processing {len(business_data_manual)} manual records for insertion.")
142
+
143
+ insert_count = 0
144
+ with conn.cursor() as cur:
145
+ for row in business_data_manual:
146
+ service = row.get('Service', '').strip()
147
+ description = row.get('Description', '').strip()
148
+ price = row.get('Price', '').strip() # Get Price
149
+ available = row.get('Available', '').strip() # Get Available
150
+
151
+ # The description used for embedding can include other fields if desired for RAG context
152
+ description_for_embedding = f"Service: {service}. Description: {description}. Price: {price}. Available: {available}."
153
+
154
+
155
+ if not service or not description:
156
+ print(f"Skipping row due to missing Service or Description: {row}")
157
+ continue
158
+
159
+ # Generate embedding for the description
160
+ try:
161
+ # Assuming embedder_model is a SentenceTransformer instance
162
+ embedding = embedder_model.encode(description_for_embedding, convert_to_tensor=False) # Encode single sentence
163
+ if embedding is not None:
164
+ embedding_list = embedding.tolist() # Convert numpy array to list
165
+
166
+ # SQL query to insert data into the 'manual' table with all columns
167
+ # Use double quotes for capitalized column names
168
+ sql = f"""
169
+ INSERT INTO {BUSINESS_DATA_TABLE} ("Service", "Description", "Price", "Available", embedding)
170
+ VALUES (%s, %s, %s, %s, %s::vector)
171
+ ON CONFLICT ("Service") DO NOTHING; -- Prevent duplicate inserts based on Service name
172
+ """
173
+ # Note: Using ON CONFLICT ("Service") assumes Service names are unique and you want to avoid inserting duplicates based on Service.
174
+ # If Service names are not unique or you need different conflict resolution, adjust the ON CONFLICT clause.
175
+ cur.execute(sql, (service, description, price, available, embedding_list))
176
+ insert_count += 1
177
+ # print(f"Processed Service: {service[:50]}...") # Keep for debugging
178
+
179
+ else:
180
+ print(f"Skipping insertion for Service '{service[:50]}...' due to embedding generation failure.")
181
+ except Exception as embed_e:
182
+ print(f"Error generating embedding for Service '{service[:50]}...': {embed_e}")
183
+ print(traceback.format_exc())
184
+ print("Skipping insertion for this row.")
185
+
186
+
187
+ conn.commit()
188
+ print(f"Data insertion process completed. Inserted {insert_count} records.")
189
+ return True
190
+
191
+ except Exception as e:
192
+ conn.rollback()
193
+ print(f"Error during data insertion: {e}")
194
+ print(traceback.format_exc())
195
+ return False
196
+ finally:
197
+ if cur:
198
+ cur.close()
199
+
200
+
201
+ # --- Main Execution Flow for Migration Script ---
202
+ # This block is intended to be run separately to perform the initial data migration.
203
+ # The main application startup logic will be in a different __main__ block.
204
+
205
+ # if __name__ == "__main__":
206
+ # print("Starting RAG data insertion script from manual data...")
207
+
208
+ # # 1. Initialize Embedder Model
209
+ # try:
210
+ # print(f"Loading Sentence Transformer model for embeddings (dimension: {EMBEDDING_DIM})...")
211
+ # embedder = SentenceTransformer("paraphrase-MiniLM-L6-v2")
212
+ # if embedder.get_sentence_embedding_dimension() != EMBEDDING_DIM:
213
+ # print(f"Error: Loaded embedder dimension ({embedder.get_sentence_embedding_dimension()}) does not match expected EMBEDDING_DIM ({EMBEDDING_DIM}).")
214
+ # print("Please check the model or update EMBEDDING_DIM.")
215
+ # embedder = None
216
+ # else:
217
+ # print("Embedder model loaded successfully.")
218
+
219
+ # except Exception as e:
220
+ # print(f"Error loading Sentence Transformer model: {e}")
221
+ # print(traceback.format_exc())
222
+ # embedder = None
223
+
224
+ # if embedder is None:
225
+ # print("Embedder model not available. Cannot generate embeddings for data insertion.")
226
+ # pass
227
+
228
+
229
+ # # 2. Connect to Database and Setup Schema
230
+ # db_conn = connect_to_supabase()
231
+ # if db_conn is None:
232
+ # print("Database connection failed. Cannot setup schema or insert data.")
233
+ # pass
234
+ # else:
235
+ # try:
236
+ # if setup_db_schema(db_conn):
237
+ # print("\nDatabase schema setup successful.")
238
+
239
+ # # 3. Insert Manual Data
240
+ # if embedder is not None:
241
+ # if insert_manual_data_to_supabase(db_conn, embedder):
242
+ # print("\nManual RAG Data Insertion to PostgreSQL completed.")
243
+ # else:
244
+ # print("\nManual RAG Data Insertion to PostgreSQL failed.")
245
+ # else:
246
+ # print("\nEmbedder not available. Skipping manual data insertion.")
247
+
248
+ # else:
249
+ # print("\nDatabase schema setup failed.")
250
+
251
+ # finally:
252
+ # # 4. Close Database Connection
253
+ # if db_conn:
254
+ # db_conn.close()
255
+ # print("Database connection closed.")
256
+
257
+
258
+ # print("Manual data insertion script finished.")
259
+
260
+
261
+ # --- Update load_business_info to load from PostgreSQL 'manual' table ---
262
+ def load_business_info():
263
+ """Loads business information from PostgreSQL 'manual' table and creates embeddings and FAISS index in memory."""
264
+ global data, descriptions_for_embedding, business_info_available
265
+ global rag_faiss_index, rag_metadata
266
+ # Assuming embedder and EMBEDDING_DIM are defined globally and initialized on app startup
267
+
268
+ business_info_available = False
269
+ rag_faiss_index = None
270
+ rag_metadata = []
271
+ data = []
272
+ descriptions_for_embedding = []
273
+
274
+ print("Attempting to load RAG data from PostgreSQL 'manual' table...")
275
+ db_conn = connect_to_supabase()
276
+ if db_conn is None:
277
+ print("Failed to connect to database. RAG will be unavailable.")
278
+ return
279
+
280
+ # Ensure embedder is initialized before proceeding
281
+ # Assuming embedder is initialized globally in the main application startup
282
+ if 'embedder' not in globals() or embedder is None:
283
+ print("Embedder not initialized. Cannot load RAG data embeddings.")
284
+ if db_conn: db_conn.close()
285
+ return
286
+
287
+ try:
288
+ with db_conn.cursor() as cur:
289
+ # Ensure pgvector extension is enabled (important if not done manually during setup)
290
+ # This is a good practice to ensure the session can use vector types
291
+ cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
292
+ db_conn.commit() # Commit the extension command
293
+
294
+ # Retrieve data from the 'manual' table, including embedding
295
+ # Use double quotes for capitalized column names
296
+ cur.execute(f"""
297
+ SELECT "Service", "Description", "Price", "Available", embedding
298
+ FROM {BUSINESS_DATA_TABLE};
299
+ """)
300
+ db_records = cur.fetchall()
301
+
302
+ if not db_records:
303
+ print(f"Warning: No data found in table '{BUSINESS_DATA_TABLE}'. RAG will be unavailable.")
304
+ business_info_available = False
305
+ else:
306
+ print(f"Loaded {len(db_records)} records from '{BUSINESS_DATA_TABLE}'.")
307
+ # Process the retrieved data
308
+ data = []
309
+ descriptions_for_embedding = []
310
+ embeddings_list = []
311
+
312
+ # Assuming the columns are returned in the order of the SELECT statement
313
+ for service, description, price, available, embedding in db_records:
314
+ # Store the original data row as a dictionary
315
+ data.append({'Service': service, 'Description': description, 'Price': price, 'Available': available})
316
+ # Store a combined description for potential re-ranking or context
317
+ descriptions_for_embedding.append(f"Service: {service.strip()}. Description: {description.strip()}. Price: {price.strip() if price else ''}. Available: {available.strip() if available else ''}.")
318
+ # Store the embedding (psycopg2 fetches vector as a list)
319
+ embeddings_list.append(embedding)
320
+
321
+ if data and embeddings_list:
322
+ print("Building in-memory FAISS index...")
323
+ try:
324
+ # Convert list of lists to numpy array for FAISS
325
+ embeddings_np = np.array(embeddings_list).astype('float32')
326
+
327
+ # Ensure EMBEDDING_DIM is correct
328
+ if embeddings_np.shape[1] != EMBEDDING_DIM:
329
+ print(f"Error: Embedding dimension mismatch. Expected {EMBEDDING_DIM}, got {embeddings_np.shape[1]}.")
330
+ print("This might happen if the embeddings in the database were generated with a different model or dimension.")
331
+ print("RAG will be unavailable.")
332
+ business_info_available = False
333
+ rag_faiss_index = None
334
+ rag_metadata = []
335
+ else:
336
+ # Use L2 distance (Euclidean) for FAISS Flat index
337
+ rag_faiss_index = faiss.IndexFlatL2(EMBEDDING_DIM)
338
+ rag_faiss_index.add(embeddings_np)
339
+
340
+ # rag_metadata maps FAISS index back to index in our 'data' list
341
+ rag_metadata = list(range(len(data)))
342
+
343
+ print(f"In-memory FAISS index built. Index size: {rag_faiss_index.ntotal}")
344
+ business_info_available = True
345
+
346
+ except Exception as e:
347
+ print(f"Error during FAISS index building: {e}")
348
+ print(traceback.format_exc())
349
+ rag_faiss_index = None
350
+ rag_metadata = []
351
+ business_info_available = False
352
+ else:
353
+ print("No valid data or embeddings to build FAISS index. RAG will be unavailable.")
354
+ business_info_available = False
355
+
356
+
357
+ if not business_info_available:
358
+ print("Business information retrieval (RAG) is NOT available.")
359
+ else:
360
+ print("Business information retrieval (RAG) is available using in-memory FAISS index from DB data.")
361
+
362
+ except Exception as e:
363
+ print(f"An error occurred while accessing the database for RAG data: {e}")
364
+ print(traceback.format_exc())
365
+ business_info_available = False
366
+ rag_faiss_index = None
367
+ rag_metadata = []
368
+ finally:
369
+ if db_conn:
370
+ db_conn.close()
371
+
372
+
373
+ # --- Update retrieve_business_info to use data structure from 'manual' table ---
374
+ # The core logic of retrieve_business_info using FAISS search on in-memory data remains the same.
375
+ # However, the structure of the 'data' list it accesses now comes from the 'manual' table columns.
376
+ # The retrieval function already handles accessing 'Service' and 'Description' from the dictionary.
377
+ # If you need to return Price or Available, you can adjust the return format.
378
+ # For now, assuming it returns the dictionary as loaded into the 'data' list.
379
+
380
+ def retrieve_business_info(query: str, top_n: int = 3) -> list:
381
+ """
382
+ Retrieves relevant business information from loaded data (from 'manual' table)
383
+ based on a query using in-memory FAISS index.
384
+ """
385
+ global data, rag_faiss_index, rag_metadata, descriptions_for_embedding
386
+ # Assuming embedder and reranker are defined globally and initialized on app startup
387
+
388
+ if not business_info_available or embedder is None or rag_faiss_index is None or rag_faiss_index.ntotal == 0 or not data or not rag_metadata or len(rag_metadata) != len(data):
389
+ print("Business information retrieval is not available, RAG index is empty, or data/metadata mismatch.")
390
+ return []
391
+
392
+ try:
393
+ # Use the global embedder initialized on startup
394
+ query_embedding = embedder.encode(query, convert_to_tensor=False)
395
+
396
+ # Perform FAISS search on the in-memory index
397
+ D, I = rag_faiss_index.search(np.array([query_embedding]).astype('float32'), min(top_n, rag_faiss_index.ntotal))
398
+
399
+ # Map FAISS results back to original data using rag_metadata
400
+ # Ensure indices are valid
401
+ original_indices = [rag_metadata[i] for i in I[0] if i != -1 and i < len(rag_metadata)]
402
+
403
+ # Get the actual data records based on indices
404
+ top_results = [data[i] for i in original_indices]
405
+
406
+ # Get corresponding descriptions for re-ranking
407
+ descriptions_for_reranking = [descriptions_for_embedding[i] for i in original_indices]
408
+
409
+ # Re-rank results using the global reranker
410
+ # Assuming reranker is initialized globally on app startup
411
+ if 'reranker' in globals() and reranker is not None and top_results:
412
+ print("Re-ranking top results...")
413
+ rerank_pairs = [(query, descriptions_for_reranking[i]) for i in range(len(top_results))]
414
+ rerank_scores = reranker.predict(rerank_pairs)
415
+
416
+ # Sort results based on re-ranker scores
417
+ reranked_indices = sorted(range(len(rerank_scores)), key=lambda i: rerank_scores[i], reverse=True)
418
+ reranked_results = [top_results[i] for i in reranked_indices]
419
+ print("Re-ranking complete.")
420
+ return reranked_results
421
+ else:
422
+ # If no reranker or no results, return the raw FAISS results (mapped to data)
423
+ print("Skipping re-ranking: Reranker not available or no results.")
424
+ return top_results
425
+
426
+ except Exception as e:
427
+ print(f"Error during business information retrieval (FAISS search/re-ranking): {e}")
428
+ print(traceback.format_exc())
429
+ return []
430
+
431
+
432
+ # --- Update log_conversation to log to PostgreSQL conversation_history table ---
433
+ # This function was already updated in a previous step to log to the DB.
434
+ # Ensure the table name used here matches CONVERSATION_HISTORY_TABLE.
435
+ # Assuming CONVERSATION_HISTORY_TABLE is defined globally.
436
+
437
+ # def log_conversation(user_query: str, model_response: str, tool_details: dict = None, user_id: str = None, model_used: str = None):
438
+ # """
439
+ # Logs conversation data (query, response, timestamp, optional details) to the PostgreSQL database.
440
+ # """
441
+ # print("\n--- Attempting to log conversation to PostgreSQL Database ---")
442
+ # db_conn = connect_to_supabase() # Use the Supabase connection function
443
+ # if db_conn is None:
444
+ # print("Warning: Failed to connect to database. Skipping conversation logging.")
445
+ # return
446
+
447
+ # try:
448
+ # timestamp = datetime.now().astimezone().isoformat() # Use astimezone() for timezone-aware timestamp
449
+ # tool_details_json = json.dumps(tool_details) if tool_details is not None else None
450
+ # user_id_val = user_id if user_id is not None else "anonymous"
451
+ # model_used_val = model_used if model_used is not None else "unknown"
452
+
453
+ # with db_conn.cursor() as cur:
454
+ # cur.execute(f"""
455
+ # INSERT INTO {CONVERSATION_HISTORY_TABLE} (timestamp, user_id, user_query, model_response, tool_details, model_used)
456
+ # VALUES (%s, %s, %s, %s, %s, %s);
457
+ # """, (timestamp, user_id_val, user_query, model_response, tool_details_json, model_used_val))
458
+ # db_conn.commit()
459
+ # print("Conversation data successfully logged to PostgreSQL.")
460
+
461
+ # except Exception as e:
462
+ # print(f"An unexpected error occurred during database conversation logging: {e}")
463
+ # print(traceback.format_exc())
464
+ # if db_conn:
465
+ # db_conn.rollback()
466
+ # finally:
467
+ # if db_conn:
468
+ # db_conn.close()
469
+
470
+
471
+ # --- Update load_conversation_history to load from PostgreSQL conversation_history table ---
472
+ # This function was already updated in a previous step to load from the DB.
473
+ # Ensure the table name used here matches CONVERSATION_HISTORY_TABLE.
474
+ # Assuming CONVERSATION_HISTORY_TABLE is defined globally.
475
+
476
+ # def load_conversation_history(api_key: str) -> list[dict]:
477
+ # """Loads conversation history for a given API key from the PostgreSQL database."""
478
+ # user_id_to_load = api_key if api_key is not None else "anonymous"
479
+ # print(f"Attempting to load conversation history for user '{user_id_to_load}' from PostgreSQL...")
480
+
481
+ # history = []
482
+ # db_conn = connect_to_supabase() # Use the Supabase connection function
483
+ # if db_conn is None:
484
+ # print("Warning: Failed to connect to database. Cannot load conversation history.")
485
+ # return history # Return empty history on failure
486
+
487
+ # try:
488
+ # with db_conn.cursor() as cur:
489
+ # # Retrieve history ordered by timestamp for a specific user
490
+ # cur.execute(f"""
491
+ # SELECT user_query, model_response
492
+ # FROM {CONVERSATION_HISTORY_TABLE}
493
+ # WHERE user_id = %s
494
+ # ORDER BY timestamp;
495
+ # """, (user_id_to_load,))
496
+ # db_records = cur.fetchall()
497
+
498
+ # # Format the history as a list of dictionaries for compatibility with chat function
499
+ # for user_query, model_response in db_records:
500
+ # # Add user query role
501
+ # if user_query:
502
+ # history.append({"role": "user", "content": user_query})
503
+ # # Add assistant response role
504
+ # if model_response:
505
+ # history.append({"role": "assistant", "content": model_response})
506
+
507
+ # print(f"Loaded {len(history)} turns of conversation history for user '{user_id_to_load}' from PostgreSQL.")
508
+
509
+ # except Exception as e:
510
+ # print(f"Error loading conversation history from database: {e}")
511
+ # print(traceback.format_exc())
512
+ # history = [] # Ensure empty history is returned on error
513
+ # finally:
514
+ # if db_conn:
515
+ # db_conn.close()
516
+
517
+ # return history
518
+
519
+
520
+ # --- Main Application Startup Block (__main__) ---
521
+ # This block assumes it's part of the larger application script in the Hugging Face Space
522
+ # It needs to initialize global resources and then potentially launch a Gradio interface.
523
+
524
+ # Remove the separate data insertion script execution from this block.
525
+ # The data insertion is a one-time or separate process.
526
+
527
+ # if __name__ == "__main__":
528
+ # print("Starting main application startup...")
529
+
530
+ # # 1. Load/Create Hugging Face Dataset (still used for other logging if needed)
531
+ # # ... (existing code for HF dataset loading remains)
532
+
533
+ # # 2. Authenticate and Load Business Info from PostgreSQL (updated function)
534
+ # # This function now handles connecting to DB and loading data/embeddings into memory
535
+ # load_business_info()
536
+
537
+ # # 3. Initialize other necessary global variables/clients
538
+ # # (e.g., nlp, embedder, reranker, primary_client, fallback_client)
539
+ # # These need to be initialized after load_business_info if embedder/reranker are used by it
540
+ # # Assuming embedder and reranker are initialized here or earlier in the full script:
541
+ # # try:
542
+ # # embedder = SentenceTransformer("paraphrase-MiniLM-L6-v2")
543
+ # # print("Sentence Transformer (embedder) initialized.")
544
+ # # except Exception as e:
545
+ # # print(f"Error initializing embedder: {e}")
546
+ # # embedder = None
547
+
548
+ # # try:
549
+ # # reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
550
+ # # print("Cross-Encoder (reranker) initialized.")
551
+ # # except Exception as e:
552
+ # # print(f"Error initializing reranker: {e}")
553
+ # # reranker = None
554
+
555
+ # # try:
556
+ # # nlp = spacy.load("en_core_web_sm") # Assuming spacy is imported
557
+ # # print("SpaCy model initialized.")
558
+ # # except Exception as e:
559
+ # # print(f"Error initializing SpaCy model: {e}")
560
+ # # nlp = None
561
+
562
+ # # try:
563
+ # # primary_client = InferenceClient("meta-llama/Llama-3.3-70B-Instruct", token=HF_TOKEN) # Assuming InferenceClient and HF_TOKEN
564
+ # # print("Primary LLM client initialized.")
565
+ # # except Exception as e:
566
+ # # print(f"Error initializing primary client: {e}")
567
+ # # primary_client = None
568
+
569
+ # # try:
570
+ # # fallback_client = InferenceClient("meta-llama/Llama-3.3-70B-Instruct", token=HF_TOKEN) # Assuming InferenceClient and HF_TOKEN
571
+ # # print("Fallback LLM client initialized.")
572
+ # # except Exception as e:
573
+ # # print(f"Error initializing fallback client: {e}")
574
+ # # fallback_client = None
575
+
576
+
577
+ # # 4. Check RAG availability (based on load_business_info results)
578
+ # # Check business_info_available and rag_faiss_index which are set by load_business_info
579
+ # if not business_info_available or rag_faiss_index is None:
580
+ # print("Warning: Business information (PostgreSQL data) not loaded successfully or RAG index not built. RAG will not be available.")
581
+
582
+ # # 5. Initialize the general query cache (still uses local files)
583
+ # # Assuming initialize_general_cache is defined globally
584
+ # # initialize_general_cache()
585
+
586
+ # # 6. Launch Gradio Interface (assuming gr and chat are defined globally)
587
+ # # ... (Gradio interface setup and launch code)
588
+
589
+ # Note: The provided code block contains the updated function definitions.
590
+ # These need to be integrated into the complete application script in your Hugging Face Space.
591
+ # The __main__ block structure is commented out as a guide for integration.