# -------------------------------------------------------------- # app.py # -------------------------------------------------------------- import os, time, importlib.util import gradio as gr from huggingface_hub import hf_hub_download # ---------------------------------------------------------------------- # Helper to read secrets from the HF Space environment # ---------------------------------------------------------------------- def _secret(key: str, fallback: str = None) -> str: val = os.getenv(key) if val is not None: return val if fallback is not None: return fallback raise RuntimeError(f"Secret '{key}' not found. Please add it to your Space secrets.") # ---------------------------------------------------------------------- # 1. Configuration & Constants # ---------------------------------------------------------------------- REPO_ID = _secret("REPO_ID") FILES_TO_DOWNLOAD = ["index.faiss", "index.pkl", "agent_logic.py"] LOCAL_DOWNLOAD_DIR = "downloaded_assets" EMBEDDING_MODEL_NAME = "google/embeddinggemma-300m" # ---------------------------------------------------------------------- # 2. Bootstrap Phase – download assets and import the RAG engine # ---------------------------------------------------------------------- print("--- [UI App] Starting bootstrap process ---") os.makedirs(LOCAL_DOWNLOAD_DIR, exist_ok=True) hf_token = _secret("HF_TOKEN") for filename in FILES_TO_DOWNLOAD: print(f"--- [UI App] Downloading '{filename}'... ---") hf_hub_download( repo_id=REPO_ID, filename=filename, repo_type="dataset", local_dir=LOCAL_DOWNLOAD_DIR, token=hf_token, ) # Dynamically import the RAG_Engine class logic_script_path = os.path.join(LOCAL_DOWNLOAD_DIR, "agent_logic.py") spec = importlib.util.spec_from_file_location("agent_logic", logic_script_path) agent_logic_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(agent_logic_module) engine = agent_logic_module.RAG_Engine( local_download_dir=LOCAL_DOWNLOAD_DIR, embedding_model_name=EMBEDDING_MODEL_NAME, ) print("--- [UI App] Bootstrap complete. Gradio UI is starting. ---") # ---------------------------------------------------------------------- # 3. Core Gradio Chat Logic # ---------------------------------------------------------------------- def respond(message: str, history: list[dict[str, str]]): """ Called by Gradio for each user message. Streams the response back to the UI. """ final_response = engine.get_response(message, history) # Simple "typing" effect – yield partial strings response = "" for char in final_response: response += char time.sleep(0.01) # tweak speed if you like yield response # ---------------------------------------------------------------------- # 4. UI Layout – Tips + Chat + Footer # ---------------------------------------------------------------------- # 4.1 Tips (you can edit this markdown as you wish) tips_md = r""" ## 📋 Tips menggunakan **PRECISE RAG Agent** - **Apa itu PRECISE RAG Agent?** AI Agent yang menggunakan Retrieval‑Augmented Generation (RAG) untuk menjawab pertanyaan dari dokumentasi PRECISE (disimpan dalam FAISS storage). - **Perbedaan dengan chatbot sebelumnya :** • Dengan menggunakan agentic RAG, agent hanya mengambil dokumentasi yang dibutuhkan. • Karena efisiensi konteks dan efisiensi design, dapat menggunakan model kecerdasan rendah, sehingga cost turun sekitar 75% dibanding versi non RAG yang sebelumnya. - **Tips untuk menggunakan** • Usahakan pertanyaan Anda spesifik agar jawaban lebih akurat. • Jika jawaban kurang mengena, coba reset chat atau tanyakan ulang dengan bahasa berbeda. • Dokumentasi chatbot mungkin berbeda penomoran-nya dengan dokumentasi yang Anda punya karena diefisiensi/diorganisasi untuk vector storage, sehingga jangan mereferensi nomor atau butir, tapi langsung sebutkan konsepnya. • Jika konsep sangat luas atau terlalu kompleks, coba breakdown konsep ke bagian-bagiannya. Usahakan satu pertanyaan mendalam yang dilengkapi konteks daripada banyak pertanyaan yang menyangkut banyak aspek berbeda. """ # 4.2 Footer – the old description / notes footer_md = r""" --- **Frameworks**: LangChain + FAISS **Model**: gpt‑oss‑20b, gemma‑3‑3n4B *Updated 07 Sep 2025 – YOI* """ # 4.3 Chat component (no description here) chatbot = gr.ChatInterface( respond, type="messages", title="PRECISE RAG Agent", examples=[ ["Jelaskan konsep PRECISE secara sederhana."], ["Berapa keuntungan finansial yang didapat menggunakan PRECISE?"], ["Tuliskan implementasi logika LOESS menggunakan JAVA"] ], cache_examples=False, theme=gr.themes.Soft(), ) # 4.4 Assemble everything inside a Blocks container with gr.Blocks() as demo: # Optional: add a small vertical space at the top chatbot.render() # <-- Main chat UI gr.Markdown(tips_md) # <-- Tips section (renders LaTeX) gr.Markdown(footer_md) # <-- Footer (old description) # ---------------------------------------------------------------------- # 5. Launch # ---------------------------------------------------------------------- if __name__ == "__main__": allowed_user = _secret("CHAT_USER") allowed_pass = _secret("CHAT_PASS") demo.launch( auth=(allowed_user, allowed_pass), server_name="0.0.0.0", ssr_mode=False, server_port=7860, )