christian commited on
Commit
62944f8
·
1 Parent(s): 3e0aec0

testing gradio for interface

Browse files
Files changed (2) hide show
  1. app.py +541 -132
  2. requirements.txt +7 -0
app.py CHANGED
@@ -176,151 +176,560 @@
176
  # start_server()
177
 
178
 
179
- #!/usr/bin/env python3
180
- """
181
- HF Spaces deployment launcher for RAG Chatbot
182
- Repository structure: rag_app/ is the git root
183
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  import os
186
  import sys
187
- import time
188
- from pathlib import Path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
- import os
191
 
192
- # HF Spaces only allows writing to /tmp
193
- os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
194
- os.environ["HF_HOME"] = "/tmp/hf_cache"
195
- os.makedirs("/tmp/hf_cache", exist_ok=True)
196
-
197
-
198
- print("🚀 Starting HF Spaces deployment setup...")
199
- print(f"📁 Current directory: {os.getcwd()}")
200
- print(f"📂 Contents: {os.listdir('.')}")
201
-
202
- # Ensure current directory is in Python path
203
- sys.path.insert(0, os.getcwd())
204
-
205
- # HF Spaces writable path for ephemeral storage
206
- TMP_VECTOR_STORE_ROOT = "/tmp/vector_stores"
207
-
208
-
209
- def setup_for_spaces():
210
- """Setup vector stores and environment for HF Spaces"""
211
- print("🔧 Setting up vector stores for HF Spaces...")
212
-
213
- # Ensure docs folders exist in repo
214
- required_dirs = [
215
- "./docs",
216
- "./docs/mes",
217
- "./docs/technical",
218
- "./docs/general"
219
- ]
220
- for directory in required_dirs:
221
- os.makedirs(directory, exist_ok=True)
222
- exists = "✅" if os.path.exists(directory) else "❌"
223
- print(f"{exists} Directory: {directory}")
224
-
225
- # Map of vector stores (persist dirs now point to /tmp)
226
- store_configs = [
227
- ("MES Manual", "docs/mes", os.path.join(TMP_VECTOR_STORE_ROOT, "mes_db")),
228
- ("Technical Docs", "docs/technical",
229
- os.path.join(TMP_VECTOR_STORE_ROOT, "tech_db")),
230
- ("General Docs", "docs/general",
231
- os.path.join(TMP_VECTOR_STORE_ROOT, "general_db")),
232
- ]
233
-
234
- stores_to_build = []
235
- for name, doc_path, persist_dir in store_configs:
236
- # Check if store already exists in repo or in /tmp
237
- if os.path.exists(persist_dir) and os.listdir(persist_dir):
238
- print(f"✅ {name} vector store already exists in {persist_dir}")
239
- else:
240
- stores_to_build.append((name, doc_path, persist_dir))
241
- print(f"🔧 {name} vector store needs building in {persist_dir}")
242
-
243
- # Build missing vector stores
244
- if stores_to_build:
245
- print(f"🏗️ Building {len(stores_to_build)} vector store(s)...")
246
- start_time = time.time()
247
- MAX_BUILD_TIME = 600 # seconds
248
-
249
- try:
250
- from utils.vector_store import build_vector_store
251
- print("✅ Vector store utilities imported successfully")
252
-
253
- for name, doc_path, persist_dir in stores_to_build:
254
- elapsed = time.time() - start_time
255
- if elapsed > MAX_BUILD_TIME:
256
- print(
257
- f"⏰ Build time limit reached ({elapsed:.1f}s), creating empty store at {persist_dir}")
258
- os.makedirs(persist_dir, exist_ok=True)
259
- continue
260
-
261
- if os.path.exists(doc_path):
262
- doc_files = [f for f in Path(doc_path).rglob(
263
- "*") if f.is_file() and not f.name.startswith('.')]
264
- if doc_files:
265
- print(
266
- f"📄 Found {len(doc_files)} document(s) for {name}")
267
- try:
268
- build_vector_store(
269
- doc_path=doc_path, persist_directory=persist_dir)
270
- print(f"✅ {name} built successfully")
271
- except Exception as e:
272
- print(f"❌ Error building {name}: {str(e)}")
273
- os.makedirs(persist_dir, exist_ok=True)
274
- else:
275
- print(f"⚠️ No documents found in {doc_path}")
276
- os.makedirs(persist_dir, exist_ok=True)
277
- else:
278
- print(f"⚠️ Document path not found: {doc_path}")
279
- os.makedirs(persist_dir, exist_ok=True)
280
-
281
- except ImportError as e:
282
- print(f"❌ Could not import vector store utilities: {e}")
283
- for _, _, persist_dir in stores_to_build:
284
- os.makedirs(persist_dir, exist_ok=True)
285
-
286
- else:
287
- print("✅ All vector stores already exist!")
288
-
289
- print("🎉 Vector store setup completed!")
290
-
291
-
292
- def start_server():
293
- """Start the FastAPI server"""
294
- print("🌐 Starting FastAPI server...")
295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  try:
297
- from api.main import app
298
- print(" Successfully imported FastAPI app from api.main")
299
 
300
- import uvicorn
301
 
302
- port = int(os.environ.get("PORT", 7860))
303
- host = "0.0.0.0"
 
304
 
305
- print(f"🚀 Starting server on {host}:{port}")
306
- uvicorn.run(app, host=host, port=port,
307
- log_level="info", access_log=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
 
309
- except ImportError as e:
310
- print(f"❌ Could not import FastAPI app: {e}")
311
- sys.exit(1)
312
  except Exception as e:
313
- print(f"Error starting server: {e}")
314
- sys.exit(1)
315
 
 
 
 
 
 
 
 
316
 
317
- if __name__ == "__main__":
318
- print("=" * 60)
319
- print("🎯 RAG Chatbot - HF Spaces Deployment")
320
- print("=" * 60)
321
 
322
- # Setup phase
323
- setup_for_spaces()
 
 
 
324
 
325
- # Server start phase
326
- start_server()
 
 
 
 
 
 
 
 
 
 
 
176
  # start_server()
177
 
178
 
179
+ # #!/usr/bin/env python3
180
+ # """
181
+ # HF Spaces deployment launcher for RAG Chatbot
182
+ # Repository structure: rag_app/ is the git root
183
+ # """
184
+
185
+ # import os
186
+ # import sys
187
+ # import time
188
+ # from pathlib import Path
189
+
190
+ # import os
191
+
192
+ # # HF Spaces only allows writing to /tmp
193
+ # os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
194
+ # os.environ["HF_HOME"] = "/tmp/hf_cache"
195
+ # os.makedirs("/tmp/hf_cache", exist_ok=True)
196
+
197
+
198
+ # print("🚀 Starting HF Spaces deployment setup...")
199
+ # print(f"📁 Current directory: {os.getcwd()}")
200
+ # print(f"📂 Contents: {os.listdir('.')}")
201
+
202
+ # # Ensure current directory is in Python path
203
+ # sys.path.insert(0, os.getcwd())
204
+
205
+ # # HF Spaces writable path for ephemeral storage
206
+ # TMP_VECTOR_STORE_ROOT = "/tmp/vector_stores"
207
+
208
+
209
+ # def setup_for_spaces():
210
+ # """Setup vector stores and environment for HF Spaces"""
211
+ # print("🔧 Setting up vector stores for HF Spaces...")
212
+
213
+ # # Ensure docs folders exist in repo
214
+ # required_dirs = [
215
+ # "./docs",
216
+ # "./docs/mes",
217
+ # "./docs/technical",
218
+ # "./docs/general"
219
+ # ]
220
+ # for directory in required_dirs:
221
+ # os.makedirs(directory, exist_ok=True)
222
+ # exists = "✅" if os.path.exists(directory) else "❌"
223
+ # print(f"{exists} Directory: {directory}")
224
+
225
+ # # Map of vector stores (persist dirs now point to /tmp)
226
+ # store_configs = [
227
+ # ("MES Manual", "docs/mes", os.path.join(TMP_VECTOR_STORE_ROOT, "mes_db")),
228
+ # ("Technical Docs", "docs/technical",
229
+ # os.path.join(TMP_VECTOR_STORE_ROOT, "tech_db")),
230
+ # ("General Docs", "docs/general",
231
+ # os.path.join(TMP_VECTOR_STORE_ROOT, "general_db")),
232
+ # ]
233
+
234
+ # stores_to_build = []
235
+ # for name, doc_path, persist_dir in store_configs:
236
+ # # Check if store already exists in repo or in /tmp
237
+ # if os.path.exists(persist_dir) and os.listdir(persist_dir):
238
+ # print(f"✅ {name} vector store already exists in {persist_dir}")
239
+ # else:
240
+ # stores_to_build.append((name, doc_path, persist_dir))
241
+ # print(f"🔧 {name} vector store needs building in {persist_dir}")
242
+
243
+ # # Build missing vector stores
244
+ # if stores_to_build:
245
+ # print(f"🏗️ Building {len(stores_to_build)} vector store(s)...")
246
+ # start_time = time.time()
247
+ # MAX_BUILD_TIME = 600 # seconds
248
+
249
+ # try:
250
+ # from utils.vector_store import build_vector_store
251
+ # print("✅ Vector store utilities imported successfully")
252
+
253
+ # for name, doc_path, persist_dir in stores_to_build:
254
+ # elapsed = time.time() - start_time
255
+ # if elapsed > MAX_BUILD_TIME:
256
+ # print(
257
+ # f"⏰ Build time limit reached ({elapsed:.1f}s), creating empty store at {persist_dir}")
258
+ # os.makedirs(persist_dir, exist_ok=True)
259
+ # continue
260
+
261
+ # if os.path.exists(doc_path):
262
+ # doc_files = [f for f in Path(doc_path).rglob(
263
+ # "*") if f.is_file() and not f.name.startswith('.')]
264
+ # if doc_files:
265
+ # print(
266
+ # f"📄 Found {len(doc_files)} document(s) for {name}")
267
+ # try:
268
+ # build_vector_store(
269
+ # doc_path=doc_path, persist_directory=persist_dir)
270
+ # print(f"✅ {name} built successfully")
271
+ # except Exception as e:
272
+ # print(f"❌ Error building {name}: {str(e)}")
273
+ # os.makedirs(persist_dir, exist_ok=True)
274
+ # else:
275
+ # print(f"⚠️ No documents found in {doc_path}")
276
+ # os.makedirs(persist_dir, exist_ok=True)
277
+ # else:
278
+ # print(f"⚠️ Document path not found: {doc_path}")
279
+ # os.makedirs(persist_dir, exist_ok=True)
280
+
281
+ # except ImportError as e:
282
+ # print(f"❌ Could not import vector store utilities: {e}")
283
+ # for _, _, persist_dir in stores_to_build:
284
+ # os.makedirs(persist_dir, exist_ok=True)
285
+
286
+ # else:
287
+ # print("✅ All vector stores already exist!")
288
+
289
+ # print("🎉 Vector store setup completed!")
290
 
291
+
292
+ # def start_server():
293
+ # """Start the FastAPI server"""
294
+ # print("🌐 Starting FastAPI server...")
295
+
296
+ # try:
297
+ # from api.main import app
298
+ # print("✅ Successfully imported FastAPI app from api.main")
299
+
300
+ # import uvicorn
301
+
302
+ # port = int(os.environ.get("PORT", 7860))
303
+ # host = "0.0.0.0"
304
+
305
+ # print(f"🚀 Starting server on {host}:{port}")
306
+ # uvicorn.run(app, host=host, port=port,
307
+ # log_level="info", access_log=True)
308
+
309
+ # except ImportError as e:
310
+ # print(f"❌ Could not import FastAPI app: {e}")
311
+ # sys.exit(1)
312
+ # except Exception as e:
313
+ # print(f"❌ Error starting server: {e}")
314
+ # sys.exit(1)
315
+
316
+
317
+ # if __name__ == "__main__":
318
+ # print("=" * 60)
319
+ # print("🎯 RAG Chatbot - HF Spaces Deployment")
320
+ # print("=" * 60)
321
+
322
+ # # Setup phase
323
+ # setup_for_spaces()
324
+
325
+ # # Server start phase
326
+ # start_server()
327
+
328
+
329
+ # app.py - Pure Gradio approach (for Gradio template)
330
+ from fastapi import Request
331
+ import requests
332
+ from dotenv import load_dotenv
333
+ from utils.vector_store import get_vector_store
334
+ from pydantic import BaseModel
335
+ from fastapi import FastAPI, HTTPException, Request
336
  import os
337
  import sys
338
+ import gradio as gr
339
+
340
+ from utils.helpers.chat_mapper import map_answer_to_chat_response
341
+
342
+ from fastapi.middleware.cors import CORSMiddleware
343
+
344
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
345
+
346
+ load_dotenv()
347
+
348
+ app = FastAPI()
349
+
350
+ # Simplified CORS for debugging
351
+ app.add_middleware(
352
+ CORSMiddleware,
353
+ allow_origins=["*"],
354
+ allow_credentials=True,
355
+ allow_methods=["*"],
356
+ allow_headers=["*"],
357
+ )
358
+
359
+ # Vector store mapping for different domains
360
+ VECTOR_STORE_PATHS = {
361
+ "mes": "./vector_stores/mes_db",
362
+ "technical": "./vector_stores/tech_db",
363
+ "general": "./vector_stores/general_db",
364
+ "default": "./vector_stores/general_db",
365
+ }
366
 
 
367
 
368
+ class QueryRequest(BaseModel):
369
+ query: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
 
371
+
372
+ # Gemini API setup
373
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
374
+ if not GEMINI_API_KEY:
375
+ raise ValueError("GEMINI_API_KEY environment variable required")
376
+
377
+ GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"
378
+
379
+ # Vector store loader
380
+
381
+
382
+ def load_vector_store_by_prefix(query: str):
383
+ lower_q = query.lower().strip()
384
+ for prefix, path in VECTOR_STORE_PATHS.items():
385
+ if prefix != "default" and lower_q.startswith(f"{prefix}:"):
386
+ cleaned_query = lower_q[len(prefix) + 1:].strip()
387
+ return get_vector_store(persist_directory=path), cleaned_query, prefix
388
+ return get_vector_store(persist_directory=VECTOR_STORE_PATHS["default"]), query, "default"
389
+
390
+
391
+ def generate_answer_with_gemini(query: str, context_docs: list):
392
+ # Build context string
393
+ knowledge_parts = []
394
+ for i, doc in enumerate(context_docs, 1):
395
+ knowledge_parts.append(f"Data Source {i}: {doc.page_content.strip()}")
396
+ knowledge_base = "\n\n".join(knowledge_parts)
397
+
398
+ # The updated prompt is more direct and forceful
399
+ prompt = (
400
+ "You are an expert AI assistant that uses a provided knowledge base to answer questions. "
401
+ "Your responses must always be based on this knowledge base, which is the ultimate source of truth. "
402
+ "You will only use your internal knowledge to supplement the answer, never to contradict it. "
403
+ "If and only if the knowledge base contains absolutely nothing relevant to the user's question, "
404
+ "you will respond with a polite and concise statement saying you cannot answer the question from the information you have. "
405
+ "You must never answer 'I don't know' if there is any information in the knowledge base that is even tangentially related to the question. "
406
+ "Always try your best to construct a useful answer by synthesizing the provided information. "
407
+ "Do not refer to the 'knowledge base' or 'sources' directly; instead, use phrases like 'based on the information I have'.\n\n"
408
+
409
+ f"My knowledge base:\n{knowledge_base}\n\n"
410
+ f"User's Question: {query}\n\nAnswer:"
411
+ )
412
+
413
+ # print the prompt for debugging
414
+ print("Prompt sent to Gemini API:", prompt)
415
+
416
+ try:
417
+ response = requests.post(
418
+ f"{GEMINI_API_URL}?key={GEMINI_API_KEY}",
419
+ json={
420
+ "contents": [
421
+ {
422
+ "role": "user",
423
+ "parts": [
424
+ {"text": prompt}
425
+ ]
426
+ }
427
+ ],
428
+ "generationConfig": {
429
+ "temperature": 0.7,
430
+ "maxOutputTokens": 300
431
+ }
432
+ },
433
+ timeout=300
434
+ )
435
+
436
+ if response.status_code != 200:
437
+ return f"API Error: {response.status_code} - {response.text}"
438
+
439
+ data = response.json()
440
+
441
+ # Extract answer text
442
+ return (
443
+ data.get("candidates", [{}])[0]
444
+ .get("content", {})
445
+ .get("parts", [{}])[0]
446
+ .get("text", "")
447
+ .strip()
448
+ or "I couldn't generate an answer."
449
+ )
450
+
451
+ except Exception as e:
452
+ return f"Error: {str(e)}"
453
+
454
+ # Middleware for logging requests
455
+
456
+
457
+ @app.middleware("http")
458
+ async def log_requests(request: Request, call_next):
459
+ print(f"Request: {request.method} {request.url}")
460
+ print(f"Headers: {dict(request.headers)}")
461
+ print(f"Origin: {request.headers.get('origin', 'No Origin')}")
462
+ print(f"User-Agent: {request.headers.get('user-agent', 'No User-Agent')}")
463
+
464
+ response = await call_next(request)
465
+ print(f"Response Status: {response.status_code}")
466
+ return response
467
+
468
+ # NEW: Gradio interface function
469
+
470
+
471
+ def gradio_chat_interface(query: str) -> str:
472
+ """
473
+ Gradio interface function that uses your existing FastAPI logic
474
+ """
475
  try:
476
+ if not query.strip():
477
+ return "Please enter a question."
478
 
479
+ print(f"Gradio query: {query}")
480
 
481
+ # Use your existing logic
482
+ vector_store, cleaned_query, store_key = load_vector_store_by_prefix(
483
+ query)
484
 
485
+ if not vector_store:
486
+ return "Vector store not ready. Please try again later."
487
+
488
+ retriever = vector_store.as_retriever(
489
+ search_type="mmr",
490
+ search_kwargs={
491
+ "k": 6,
492
+ "fetch_k": 20,
493
+ "lambda_mult": 0.5
494
+ }
495
+ )
496
+
497
+ docs = retriever.get_relevant_documents(cleaned_query)
498
+
499
+ # Deduplicate
500
+ seen = set()
501
+ unique_docs = []
502
+ for doc in docs:
503
+ snippet = doc.page_content.strip()
504
+ if snippet not in seen:
505
+ seen.add(snippet)
506
+ unique_docs.append(doc)
507
+ docs = unique_docs[:5]
508
+
509
+ if not docs:
510
+ return "I couldn't find any relevant information in the knowledge base to answer your question."
511
+
512
+ answer = generate_answer_with_gemini(cleaned_query, docs)
513
+
514
+ # Format response for Gradio with better markdown
515
+ formatted_response = f"## Answer\n\n{answer}\n\n"
516
+
517
+ if docs:
518
+ formatted_response += "## Sources\n\n"
519
+ for i, doc in enumerate(docs, 1):
520
+ source_name = doc.metadata.get('source', 'Unknown Source')
521
+ page = doc.metadata.get('page', '')
522
+ page_info = f" (Page {page})" if page else ""
523
+
524
+ preview = doc.page_content[:400] + "..." if len(
525
+ doc.page_content) > 400 else doc.page_content
526
+ formatted_response += f"### {i}. {source_name}{page_info}\n\n{preview}\n\n---\n\n"
527
+
528
+ return formatted_response
529
+
530
+ except Exception as e:
531
+ error_msg = f"**Error occurred:**\n\n```\n{str(e)}\n```"
532
+ print(f"Gradio error: {e}")
533
+ return error_msg
534
+
535
+ # Create Gradio interface
536
+
537
+
538
+ def create_gradio_interface():
539
+ """Create and configure the Gradio interface"""
540
+
541
+ with gr.Blocks(
542
+ title="RAG Chatbot",
543
+ description="Ask questions about your knowledge base and get detailed answers with sources.",
544
+ theme='soft',
545
+
546
+ ) as interface:
547
+
548
+ gr.Markdown("""
549
+ # RAG Chatbot
550
+
551
+ Ask questions about your knowledge base and get detailed answers with sources.
552
+
553
+ **Available Knowledge:**
554
+ - MES Manual documentation (prefix with "mes:")
555
+ - Technical documentation (prefix with "technical:")
556
+ - General documentation (prefix with "general:" or no prefix)
557
+ """)
558
+
559
+ with gr.Row():
560
+ with gr.Column(scale=4):
561
+ query_input = gr.Textbox(
562
+ label="Your Question",
563
+ placeholder="Enter your question here... (e.g., 'What is machine learning?' or 'mes: How does the system work?')",
564
+ lines=3,
565
+ max_lines=10
566
+ )
567
+ with gr.Column(scale=1):
568
+ submit_btn = gr.Button(
569
+ "Ask Question", variant="primary", size="lg")
570
+ clear_btn = gr.Button("Clear", variant="secondary")
571
+
572
+ answer_output = gr.Markdown(
573
+ label="Answer & Sources",
574
+ value="Welcome! Ask a question above to get started."
575
+ )
576
+
577
+ # Event handlers
578
+ submit_btn.click(
579
+ gradio_chat_interface,
580
+ inputs=[query_input],
581
+ outputs=[answer_output]
582
+ )
583
+
584
+ query_input.submit( # Allow Enter key to submit
585
+ gradio_chat_interface,
586
+ inputs=[query_input],
587
+ outputs=[answer_output]
588
+ )
589
+
590
+ clear_btn.click(
591
+ lambda: ("", "Welcome! Ask a question above to get started."),
592
+ outputs=[query_input, answer_output]
593
+ )
594
+
595
+ # Example questions
596
+ gr.Examples(
597
+ examples=[
598
+ ["What is machine learning and how does it work?"],
599
+ ["mes: How does the MES system handle production data?"],
600
+ ["technical: Explain the database architecture"],
601
+ ["What are the main components of the system?"],
602
+ ["How do I configure the application settings?"]
603
+ ],
604
+ inputs=[query_input],
605
+ label="Example Questions"
606
+ )
607
+
608
+ gr.Markdown("""
609
+ ---
610
+
611
+ **Tips:**
612
+ - Use prefixes (mes:, technical:, general:) to search specific knowledge bases
613
+ - Be specific with your questions for better results
614
+ - Sources are provided with each answer for verification
615
+
616
+ **Technical Info:**
617
+ - Powered by FastAPI backend
618
+ - Vector search with MMR retrieval
619
+ - Gemini 2.0 Flash for answer generation
620
+ """)
621
+
622
+ return interface
623
+
624
+ # API Endpoints
625
+
626
+
627
+ @app.get("/")
628
+ def root():
629
+ return {
630
+ "status": "running",
631
+ "model": "gemini-2.0-flash",
632
+ "using_direct_api": True,
633
+ "client_ready": True,
634
+ "gradio_interface": "/gradio"
635
+ }
636
+
637
+
638
+ @app.post("/")
639
+ async def ask_question(request: Request):
640
+ try:
641
+ # Print raw incoming request body
642
+ raw_body = await request.body()
643
+ print("Incoming POST request body:")
644
+ print(raw_body.decode("utf-8"))
645
+
646
+ # Parse into your Pydantic model
647
+ parsed_request = QueryRequest.model_validate_json(raw_body)
648
+ print("Parsed request object:", parsed_request)
649
+
650
+ vector_store, cleaned_query, store_key = load_vector_store_by_prefix(
651
+ parsed_request.query
652
+ )
653
+
654
+ if not vector_store:
655
+ raise HTTPException(
656
+ status_code=500, detail="Vector store not ready"
657
+ )
658
+
659
+ retriever = vector_store.as_retriever(
660
+ search_type="mmr",
661
+ search_kwargs={
662
+ "k": 6,
663
+ "fetch_k": 20,
664
+ "lambda_mult": 0.5
665
+ }
666
+ )
667
+
668
+ docs = retriever.get_relevant_documents(cleaned_query)
669
+
670
+ # Deduplicate
671
+ seen = set()
672
+ unique_docs = []
673
+ for doc in docs:
674
+ snippet = doc.page_content.strip()
675
+ if snippet not in seen:
676
+ seen.add(snippet)
677
+ unique_docs.append(doc)
678
+ docs = unique_docs[:5]
679
+
680
+ if not docs:
681
+ return {
682
+ "answer": "I couldn't find any relevant information in the knowledge base to answer your question.",
683
+ "model_used": "gemini-2.0-flash",
684
+ "vector_store_used": VECTOR_STORE_PATHS[store_key],
685
+ "sources": []
686
+ }
687
+
688
+ answer = generate_answer_with_gemini(cleaned_query, docs)
689
+
690
+ answer_obj = {
691
+ "answer": answer,
692
+ "model_used": "gemini-2.0-flash",
693
+ "vector_store_used": VECTOR_STORE_PATHS[store_key],
694
+ "sources": [
695
+ {
696
+ "content": doc.page_content[:500] + "...\n",
697
+ "metadata": doc.metadata
698
+ }
699
+ for doc in docs
700
+ ]
701
+ }
702
+
703
+ return map_answer_to_chat_response(answer_obj)
704
 
 
 
 
705
  except Exception as e:
706
+ print(f"Error in ask_question: {e}")
707
+ raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
708
 
709
+ # Create the Gradio interface
710
+ demo = create_gradio_interface()
711
+
712
+ # Mount Gradio on FastAPI at /gradio
713
+ app = gr.mount_gradio_app(app, demo, path="/gradio")
714
+
715
+ # Add a redirect for convenience
716
 
 
 
 
 
717
 
718
+ @app.get("/ui")
719
+ async def redirect_to_gradio():
720
+ """Redirect /ui to /gradio for easier access"""
721
+ from fastapi.responses import RedirectResponse
722
+ return RedirectResponse(url="/gradio")
723
 
724
+ # Health check endpoint
725
+
726
+
727
+ @app.get("/health")
728
+ def health_check():
729
+ return {"status": "healthy", "gradio_mounted": True}
730
+
731
+
732
+ if __name__ == "__main__":
733
+ import uvicorn
734
+ port = int(os.environ.get("PORT", 8000))
735
+ uvicorn.run(app, host="0.0.0.0", port=port)
requirements.txt CHANGED
@@ -46,6 +46,13 @@ pathlib2
46
  gunicorn
47
 
48
 
 
 
 
 
 
 
 
49
  # fastapi==0.104.1
50
  # uvicorn[standard]==0.24.0
51
  # requests==2.31.0
 
46
  gunicorn
47
 
48
 
49
+ # NEW: Add Gradio for the web interface
50
+ gradio>=4.0.0
51
+
52
+ # Optional: For better Gradio themes
53
+ gradio-client
54
+
55
+
56
  # fastapi==0.104.1
57
  # uvicorn[standard]==0.24.0
58
  # requests==2.31.0