precise_chat / app.py
yogies's picture
Update app.py
db6743d verified
raw
history blame
6.49 kB
# --------------------------------------------------------------
# app.py
# --------------------------------------------------------------
import os, time, importlib.util
import gradio as gr
from huggingface_hub import hf_hub_download
from datetime import datetime
from datetime import date
from upstash_redis import Redis
# ----------------------------------------------------------------------
# Helper to read secrets from the HF Space environment
# ----------------------------------------------------------------------
def _secret(key: str, fallback: str = None) -> str:
val = os.getenv(key)
if val is not None:
return val
if fallback is not None:
return fallback
raise RuntimeError(f"Secret '{key}' not found. Please add it to your Space secrets.")
# ----------------------------------------------------------------------
# 1. Configuration & Constants
# ----------------------------------------------------------------------
REPO_ID = _secret("REPO_ID")
FILES_TO_DOWNLOAD = ["index.faiss", "index.pkl", "agent_logic.py","prec_hyde_agent.txt","prec_rag_agent.txt"]
LOCAL_DOWNLOAD_DIR = "downloaded_assets"
EMBEDDING_MODEL_NAME = "google/embeddinggemma-300m"
redis = Redis(url=_secret("UPSTASH_REDIS_URL")
, token= _secret("UPSTASH_TOKEN")
)
# ----------------------------------------------------------------------
# 2. Bootstrap Phase – download assets and import the RAG engine
# ----------------------------------------------------------------------
print("--- [UI App] Starting bootstrap process ---")
os.makedirs(LOCAL_DOWNLOAD_DIR, exist_ok=True)
hf_token = _secret("HF_TOKEN")
for filename in FILES_TO_DOWNLOAD:
print(f"--- [UI App] Downloading '{filename}'... ---")
hf_hub_download(
repo_id=REPO_ID,
filename=filename,
repo_type="dataset",
local_dir=LOCAL_DOWNLOAD_DIR,
token=hf_token,
)
# Dynamically import the RAG_Engine class
logic_script_path = os.path.join(LOCAL_DOWNLOAD_DIR, "agent_logic.py")
spec = importlib.util.spec_from_file_location("agent_logic", logic_script_path)
agent_logic_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(agent_logic_module)
engine = agent_logic_module.RAG_Engine(
local_download_dir=LOCAL_DOWNLOAD_DIR,
embedding_model_name=EMBEDDING_MODEL_NAME,
)
print("--- [UI App] Bootstrap complete. Gradio UI is starting. ---")
# ----------------------------------------------------------------------
# 3. Core Gradio Chat Logic
# ----------------------------------------------------------------------
def respond(message: str, history: list[dict[str, str]]):
"""
Called by Gradio for each user message.
Streams the response back to the UI.
"""
try:
# Check expiration
end_date = datetime.strptime(_secret("END_DATE"), "%Y-%m-%d").date()
if date.today() > end_date:
return "Chatbot sudah expired." # Direct return for errors
# Check request limit
remaining_requests = redis.decr("request_limit")
if remaining_requests < 0:
return "Kuota chat sudah habis." # Direct return for errors
# If we pass all checks, then stream the response
final_response = engine.get_response(message, history)
# Stream the response with typing effect
response = ""
for char in final_response:
response += char
time.sleep(0.01)
yield response
except Exception as e:
print(f"Error in respond function: {e}")
return "Terjadi error saat memproses permintaan. Silakan coba lagi."
# ----------------------------------------------------------------------
# 4. UI Layout – Tips + Chat + Footer
# ----------------------------------------------------------------------
# 4.1 Tips (you can edit this markdown as you wish)
tips_md = r"""
## 📋 Tips menggunakan **PRECISE RAG Agent**
- **Apa itu PRECISE RAG Agent?**
AI Agent yang menggunakan Retrieval‑Augmented Generation (RAG) untuk menjawab pertanyaan dari dokumentasi PRECISE (disimpan dalam FAISS storage).
- **Perbedaan dengan chatbot sebelumnya :**
• Dengan menggunakan agentic RAG, agent hanya mengambil dokumentasi yang dibutuhkan.
• Karena efisiensi konteks dan efisiensi design, dapat menggunakan model kecerdasan rendah, sehingga cost turun sekitar 95% dibanding versi non RAG yang sebelumnya.
- **Tips untuk menggunakan**
• Usahakan pertanyaan Anda spesifik agar jawaban lebih akurat.
• Jika jawaban kurang mengena, coba reset chat atau tanyakan ulang dengan bahasa berbeda.
• Dokumentasi chatbot mungkin berbeda penomoran-nya dengan dokumentasi yang Anda punya karena diefisiensi/diorganisasi untuk vector storage, sehingga jangan mereferensi nomor atau butir, tapi langsung sebutkan konsepnya.
• Jika konsep sangat luas atau terlalu kompleks, coba breakdown konsep ke bagian-bagiannya. Usahakan satu pertanyaan mendalam yang dilengkapi konteks daripada banyak pertanyaan yang menyangkut banyak aspek berbeda.
"""
# 4.2 Footer – the old description / notes
footer_md = r"""
---
**Komponen**: LangChain + FAISS + Redis
**Models**: Qwen3-4B-Thinking-2507, Qwen3-4B-Instruct-2507
*Updated 25 Sep 2025 – YOI*
"""
# 4.3 Chat component (no description here)
chatbot = gr.ChatInterface(
respond,
type="messages",
title="PRECISE RAG Agent (Expired 1 April 2026)",
examples=[
["Jelaskan konsep PRECISE secara sederhana."],
["Berapa keuntungan finansial yang didapat menggunakan PRECISE?"],
["Tuliskan implementasi logika LOESS menggunakan JAVA"]
],
cache_examples=False,
theme=gr.themes.Soft(),
)
# 4.4 Assemble everything inside a Blocks container
with gr.Blocks() as demo:
# Optional: add a small vertical space at the top
chatbot.render() # <-- Main chat UI
gr.Markdown(tips_md) # <-- Tips section (renders LaTeX)
gr.Markdown(footer_md) # <-- Footer (old description)
# ----------------------------------------------------------------------
# 5. Launch
# ----------------------------------------------------------------------
if __name__ == "__main__":
allowed_user = _secret("CHAT_USER")
allowed_pass = _secret("CHAT_PASS")
demo.launch(
auth=(allowed_user, allowed_pass),
server_name="0.0.0.0",
ssr_mode=False,
server_port=7860,
)