Spaces:

baidu
/

knowledge_retrieval_demo

Running

App Files Files Community

maxiaolong03 commited on Jul 15

Commit

ed56ef4

1 Parent(s): 810a793

add files

Browse files

Files changed (1) hide show

app.py +175 -227

app.py CHANGED Viewed

@@ -18,21 +18,20 @@ This script provides a Gradio interface for interacting with a chatbot based on
 import argparse
 import base64
-from collections import namedtuple
-from datetime import datetime
-from functools import partial
 import hashlib
 import json
 import logging
-import faiss
 import os
-from argparse import ArgumentParser
 import textwrap
-import copy
 import gradio as gr
 import numpy as np
 from bot_requests import BotClient
 os.environ["NO_PROXY"] = "localhost,127.0.0.1"  # Disable proxy
@@ -40,13 +39,15 @@ os.environ["NO_PROXY"] = "localhost,127.0.0.1"  # Disable proxy
 logging.root.setLevel(logging.INFO)
 FILE_URL_DEFAULT = "data/coffee.txt"
-RELEVANT_PASSAGE_DEFAULT = textwrap.dedent("""\
     1675年时，英格兰就有3000多家咖啡馆；启蒙运动时期，咖啡馆成为民众深入讨论宗教和政治的聚集地，
     1670年代的英国国王查理二世就曾试图取缔咖啡馆。这一时期的英国人认为咖啡具有药用价值，
     甚至名医也会推荐将咖啡用于医疗。"""
 )
-QUERY_REWRITE_PROMPT = textwrap.dedent("""\
     【当前时间】
     {TIMESTAMP}
@@ -108,21 +109,13 @@ def get_args() -> argparse.Namespace:
     """
     parser = ArgumentParser(description="ERNIE models web chat demo.")
     parser.add_argument(
-        "--server-port", type=int, default=7860, help="Demo server port."
-    )
-    parser.add_argument(
-        "--server-name", type=str, default="0.0.0.0", help="Demo server name."
-    )
-    parser.add_argument(
-        "--max_char", type=int, default=20000, help="Maximum character limit for messages."
-    )
-    parser.add_argument(
-        "--max_retry_num", type=int, default=3, help="Maximum retry number for request."
-    )
-    parser.add_argument(
-        "--model_map",
-        type=str,
         default="{\"ernie-4.5-turbo-128k-preview\": \"https://qianfan.baidubce.com/v2\"}",
         help="""JSON string defining model name to endpoint mappings.
             Required Format:
@@ -133,56 +126,18 @@ def get_args() -> argparse.Namespace:
             - Specify ONE model endpoint in JSON format.
             - Prefix determines model capabilities:
             * ERNIE-4.5: Text-only model
-            """
-    )
-    parser.add_argument(
-        "--embedding_service_url",
-        type=str,
-        default="https://qianfan.baidubce.com/v2",
-        help="Embedding service url."
     )
     parser.add_argument(
-        "--qianfan_api_key",
-        type=str,
-        default=os.environ.get("API_KEY"),
-        help="Qianfan API key.",
-    )
-    parser.add_argument(
-        "--embedding_model",
-        type=str,
-        default="embedding-v1",
-        help="Embedding model name."
-    )
-    parser.add_argument(
-        "--embedding_dim",
-        type=int,
-        default=384,
-        help="Dimension of the embedding vector."
-    )
-    parser.add_argument(
-        "--chunk_size",
-        type=int,
-        default=512,
-        help="Chunk size for splitting long documents."
-    )
-    parser.add_argument(
-        "--top_k",
-        type=int,
-        default=3,
-        help="Top k results to retrieve."
-    )
-    parser.add_argument(
-        "--faiss_index_path",
-        type=str,
-        default="data/faiss_index",
-        help="Faiss index path."
-    )
-    parser.add_argument(
-        "--text_db_path",
-        type=str,
-        default="data/text_db.jsonl",
-        help="Text database path."
     )
     args = parser.parse_args()
     try:
@@ -192,7 +147,7 @@ def get_args() -> argparse.Namespace:
         if len(args.model_map) < 1:
             raise ValueError("model_map must contain at least one model configuration")
     except json.JSONDecodeError as e:
-        raise ValueError("Invalid JSON format for --model-map") from e
     return args
@@ -202,6 +157,7 @@ class FaissTextDatabase:
     A vector database for text retrieval using FAISS.
     Provides efficient similarity search and document management capabilities.
     """
     def __init__(self, args, bot_client: BotClient):
         """
         Initialize the FaissTextDatabase.
@@ -212,14 +168,14 @@ class FaissTextDatabase:
             embedding_dim: dimension of the embedding vector
         """
         self.logger = logging.getLogger(__name__)
         self.bot_client = bot_client
         self.embedding_dim = getattr(args, "embedding_dim", 384)
         self.top_k = getattr(args, "top_k", 3)
         self.context_size = getattr(args, "context_size", 2)
         self.faiss_index_path = getattr(args, "faiss_index_path", "data/faiss_index")
         self.text_db_path = getattr(args, "text_db_path", "data/text_db.jsonl")
         # If faiss_index_path exists, load it and text_db_path
         if os.path.exists(self.faiss_index_path) and os.path.exists(self.text_db_path):
             self.index = faiss.read_index(self.faiss_index_path)
@@ -227,11 +183,8 @@ class FaissTextDatabase:
                 self.text_db = json.load(f)
         else:
             self.index = faiss.IndexFlatIP(self.embedding_dim)
-            self.text_db = {
-                "file_md5s": [],  # Save file_md5s to avoid duplicates
-                "chunks": []      # Save chunks
-            }
     def calculate_md5(self, file_path: str) -> str:
         """
         Calculate the MD5 hash of a file
@@ -257,51 +210,51 @@ class FaissTextDatabase:
         """
         file_md5 = self.calculate_md5(file_path)
         return file_md5 in self.text_db["file_md5s"]
-    def add_embeddings(self, file_path: str, segments: list[str], progress_bar: gr.Progress=None, \
-            save_file: bool=False) -> bool:
         """
         Stores document embeddings in FAISS database after checking for duplicates.
         Generates embeddings for each text segment, updates the FAISS index and metadata database,
         and persists changes to disk. Includes optional progress tracking for Gradio interfaces.
         Args:
             file_path: the path of the source file
             segments: the list of segments
             progress_bar: the progress bar object
         Returns:
             bool: whether the operation was successful
         """
         file_md5 = self.calculate_md5(file_path)
         if file_md5 in self.text_db["file_md5s"]:
-            self.logger.info("File already processed: {file_path} (MD5: {file_md5})".format(
-                file_path=file_path,
-                file_md5=file_md5
-            ))
             return False
         # Generate embeddings
         vectors = []
         file_name = os.path.basename(file_path)
         file_txt = "".join(file_name.split(".")[:-1])[:30]
-        for i, segment in  enumerate(segments):
             vectors.append(self.bot_client.embed_fn(file_txt + "\n" + segment))
             if progress_bar is not None:
                 progress_bar((i + 1) / len(segments), desc=file_name + " Processing...")
         vectors = np.array(vectors)
         self.index.add(vectors.astype('float32'))
         start_id = len(self.text_db["chunks"])
         for i, text in enumerate(segments):
-            self.text_db["chunks"].append({
-                "file_md5": file_md5,
-                "file_name": file_name,
-                "file_txt": file_txt,
-                "text": text,
-                "vector_id": start_id + i
-            })
         self.text_db["file_md5s"].append(file_md5)
         if save_file:
             self.save()
@@ -312,10 +265,10 @@ class FaissTextDatabase:
         Finds the most relevant text chunks for multiple queries and includes surrounding context.
         Uses FAISS to find the closest matching embeddings, then retrieves adjacent chunks
         from the same source document to provide better context understanding.
         Args:
             query_list: list of input query strings
         Returns:
             str: the concatenated output string
         """
@@ -325,51 +278,55 @@ class FaissTextDatabase:
             query_vector = np.array([self.bot_client.embed_fn(query)]).astype('float32')
             _, indices = self.index.search(query_vector, self.top_k)
             all_indices.extend(indices[0].tolist())
         # Step 2: Remove duplicate indices
-        unique_indices = sorted(list(set(all_indices)))
         self.logger.info(f"Retrieved indices: {all_indices}")
         self.logger.info(f"Unique indices after deduplication: {unique_indices}")
         # Step 3: Expand each index with context (within same file boundaries)
         expanded_indices = set()
         file_boundaries = {}  # {file_md5: (start_idx, end_idx)}
         for target_idx in unique_indices:
             target_chunk = self.text_db["chunks"][target_idx]
             target_file_md5 = target_chunk["file_md5"]
             if target_file_md5 not in file_boundaries:
                 file_start = target_idx
                 while file_start > 0 and self.text_db["chunks"][file_start - 1]["file_md5"] == target_file_md5:
                     file_start -= 1
                 file_end = target_idx
-                while (file_end < len(self.text_db["chunks"]) - 1 and
-                    self.text_db["chunks"][file_end + 1]["file_md5"] == target_file_md5):
                     file_end += 1
             else:
                 file_start, file_end = file_boundaries[target_file_md5]
             # Calculate context range within file boundaries
             start = max(file_start, target_idx - self.context_size)
             end = min(file_end, target_idx + self.context_size)
             for pos in range(start, end + 1):
                 expanded_indices.add(pos)
         # Step 4: Sort and merge continuous chunks
-        sorted_indices = sorted(list(expanded_indices))
         groups = []
         current_group = [sorted_indices[0]]
         for i in range(1, len(sorted_indices)):
-            if (sorted_indices[i] == sorted_indices[i - 1] + 1 and
-                self.text_db["chunks"][sorted_indices[i]]["file_md5"] ==
-                self.text_db["chunks"][sorted_indices[i - 1]]["file_md5"]):
                 current_group.append(sorted_indices[i])
             else:
                 groups.append(current_group)
                 current_group = [sorted_indices[i]]
         groups.append(current_group)
         # Step 5: Create merged text for each group
         result = ""
         for idx, group in enumerate(groups):
@@ -377,22 +334,23 @@ class FaissTextDatabase:
             for idx in group:
                 result += self.text_db["chunks"][idx]["text"] + "\n"
             self.logger.info(f"Merged chunk range: {group[0]}-{group[-1]}")
         return result
     def save(self) -> None:
         """Save the database to disk."""
         faiss.write_index(self.index, self.faiss_index_path)
         with open(self.text_db_path, 'w', encoding='utf-8') as f:
             json.dump(self.text_db, f, ensure_ascii=False, indent=2)
-class GradioEvents(object):
     """
     Manages event handling and UI interactions for Gradio applications.
     Provides methods to process user inputs, trigger callbacks, and update interface components.
     """
     @staticmethod
     def get_history_conversation(task_history: list) -> tuple:
         """
@@ -412,16 +370,16 @@ class GradioEvents(object):
         for query_h, response_h in task_history:
             conversation.append({"role": "user", "content": query_h})
             conversation.append({"role": "assistant", "content": response_h})
-            conversation_str += "user:\n{query}\n assistant:\n{response}\n ".format(query=query_h, response=response_h)
         return conversation, conversation_str
     @staticmethod
     def chat_stream(
-        query: str,
-        task_history: list,
-        model: str,
         faiss_db: FaissTextDatabase,
-        bot_client: BotClient,
     ) -> dict:
         """
         Streams chatbot responses by processing queries with context from history and FAISS database.
@@ -439,11 +397,10 @@ class GradioEvents(object):
             dict: A dictionary containing the event type and its corresponding content.
         """
         conversation, conversation_str = GradioEvents.get_history_conversation(task_history)
-        conversation_str += "user:\n{query}\n".format(query=query)
         search_info_message = QUERY_REWRITE_PROMPT.format(
-            TIMESTAMP=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-            CONVERSATION=conversation_str
         )
         search_conversation = [{"role": "user", "content": search_info_message}]
         search_info_result = GradioEvents.get_sub_query(search_conversation, model, bot_client)
@@ -453,11 +410,11 @@ class GradioEvents(object):
         if search_info_result.get("query", []):
             relevant_passages = faiss_db.search_with_context(search_info_result["query"])
             yield {"type": "relevant_passage", "content": relevant_passages}
             query = ANSWER_PROMPT.format(
-                DOC_CONTENT=relevant_passages,
-                TIMESTAMP=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                CONVERSATION=conversation_str
             )
         conversation.append({"role": "user", "content": query})
@@ -466,22 +423,22 @@ class GradioEvents(object):
             for chunk in bot_client.process_stream(model, req_data):
                 if "error" in chunk:
                     raise Exception(chunk["error"])
                 message = chunk.get("choices", [{}])[0].get("delta", {})
                 content = message.get("content", "")
                 if content:
                     yield {"type": "answer", "content": content}
         except Exception as e:
             raise gr.Error("Exception: " + repr(e))
     @staticmethod
     def predict_stream(
-        query: str,
-        chatbot: list,
         task_history: list,
-        model: str,
         faiss_db: FaissTextDatabase,
         bot_client: BotClient,
     ) -> tuple:
@@ -493,7 +450,7 @@ class GradioEvents(object):
         Args:
             query (str): The content of the user's input query.
             chatbot (list): The chatbot's historical message list.
-            task_history (list): The task history record list.
             model (Model): The model used to generate responses.
             bot_client (object): The chatbot client object.
             faiss_db (FaissTextDatabase): The FAISS database instance.
@@ -503,16 +460,16 @@ class GradioEvents(object):
         """
         query = query if query else QUERY_DEFAULT
-        logging.info("User: {}".format(query))
-        chatbot.append({"role": "user", "content": query})
         # First yield the chatbot with user message
         yield chatbot, None
         new_texts = GradioEvents.chat_stream(
-            query,
-            task_history,
-            model,
             faiss_db,
             bot_client,
         )
@@ -522,7 +479,7 @@ class GradioEvents(object):
         for new_text in new_texts:
             if not isinstance(new_text, dict):
                 continue
             if new_text.get("type") == "embedding":
                 current_relevant_passage = new_text["content"]
                 yield chatbot, current_relevant_passage
@@ -533,24 +490,24 @@ class GradioEvents(object):
                 continue
             elif new_text.get("type") == "answer":
                 response += new_text["content"]
             # Remove previous message if exists
             if chatbot[-1].get("role") == "assistant":
                 chatbot.pop(-1)
             if response:
                 chatbot.append({"role": "assistant", "content": response})
                 yield chatbot, current_relevant_passage
-        logging.info("History: {}".format(task_history))
-        task_history.append((query, response))
-        logging.info("ERNIE models: {}".format(response))
     @staticmethod
     def regenerate(
-        chatbot: list,
-        task_history: list,
-        model: str,
         faiss_db: FaissTextDatabase,
         bot_client: BotClient,
     ) -> tuple:
@@ -576,15 +533,14 @@ class GradioEvents(object):
             chatbot.pop(-1)
         chatbot.pop(-1)
-        for chunk, relevant_passage in GradioEvents.predict_stream(
-            item[0],
-            chatbot,
-            task_history,
-            model,
             faiss_db,
             bot_client,
-        ):
-            yield chunk, relevant_passage
     @staticmethod
     def reset_user_input() -> gr.update:
@@ -605,19 +561,15 @@ class GradioEvents(object):
             tuple: A named tuple containing the updated values for chatbot, task_history, file_btn, and relevant_passage
         """
         GradioEvents.gc()
-        reset_result = namedtuple("reset_result",
-                           ["chatbot",
-                            "task_history",
-                            "file_btn",
-                            "relevant_passage"])
         return reset_result(
             [],  # clear chatbot
             [],  # clear task_history
             gr.update(value=None),  # clear file_btn
-            gr.update(value=None)  # reset relevant_passage
         )
     @staticmethod
     def gc():
         """
@@ -644,7 +596,7 @@ class GradioEvents(object):
         extension = image_path.split(".")[-1]
         with open(image_path, "rb") as image_file:
             base64_image = base64.b64encode(image_file.read()).decode("utf-8")
-        url = "data:image/{ext};base64,{img}".format(ext=extension, img=base64_image)
         return url
     @staticmethod
@@ -696,20 +648,20 @@ class GradioEvents(object):
         if len(line) <= chunk_size:
             return line, ""
         # Search from chunk_size position backwards
         split_pos = chunk_size
         for i in range(chunk_size, 0, -1):
             if line[i] in PUNCTUATIONS:
                 split_pos = i + 1  # Include punctuation
                 break
         # Fallback to whitespace if no punctuation found
         if split_pos == chunk_size:
             split_pos = line.rfind(" ", 0, chunk_size)
             if split_pos == -1:
                 split_pos = chunk_size  # Hard split
         return line[:split_pos], line[split_pos:]
     @staticmethod
@@ -735,7 +687,7 @@ class GradioEvents(object):
         chunks = []
         current_chunk = []
         current_length = 0
         for line in lines:
             # If adding this line would exceed chunk size (and we have content)
             if current_length + len(line) > chunk_size and current_chunk:
@@ -747,22 +699,19 @@ class GradioEvents(object):
             while len(line) > chunk_size:
                 head, line = GradioEvents.split_oversized_line(line, chunk_size)
                 chunks.append(head)
             # Add remaining line content
             if line:
                 current_chunk.append(line)
                 current_length += len(line) + 1
         if current_chunk:
             chunks.append("\n".join(current_chunk))
         return chunks
     @staticmethod
     def file_upload(
-        files_url: list,
-        chunk_size: int,
-        faiss_db: FaissTextDatabase,
-        progress_bar: gr.Progress = gr.Progress()
     ) -> str:
         """
         Uploads and processes multiple files by splitting them into semantically meaningful chunks,
@@ -783,13 +732,18 @@ class GradioEvents(object):
         for file_url in files_url:
             if not GradioEvents.save_file_to_db(file_url, chunk_size, faiss_db, progress_bar):
                 file_name = os.path.basename(file_url)
-                gr.Info("{} already processed.".format(file_name))
         yield gr.update(visible=False)
     @staticmethod
-    def save_file_to_db(file_url: str, chunk_size: int, faiss_db: FaissTextDatabase, \
-                                    progress_bar: gr.Progress=None, save_file: bool=False):
         """
         Processes and indexes document content into FAISS database with semantic-aware chunking.
         Handles file validation, text segmentation, embedding generation and storage operations.
@@ -804,31 +758,31 @@ class GradioEvents(object):
             bool: True if the file was saved successfully, otherwise False.
         """
         if not os.path.exists(file_url):
-            logging.error("File not found: {}".format(file_url))
             return False
         file_name = os.path.basename(file_url)
         if not faiss_db.is_file_processed(file_url):
-            logging.info("{} not processed yet, processing now...".format(file_url))
             try:
                 segments = GradioEvents.split_text_into_chunks(file_url, chunk_size)
                 faiss_db.add_embeddings(file_url, segments, progress_bar, save_file)
-                logging.info("{} processed successfully.".format(file_url))
                 return True
             except Exception as e:
-                logging.error("Error processing {}: {}".format(file_url, str(e)))
-                gr.Error("Error processing file: {}".format(file_name))
                 raise
         else:
-            logging.info("{} already processed.".format(file_url))
             return False
 def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db_template: FaissTextDatabase):
     """
     Launch demo program
     Args:
         args (argparse.Namespace): argparse Namespace object containing parsed command line arguments
         bot_client (BotClient): Bot client instance
@@ -855,38 +809,41 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db_templa
     }
     """
     with gr.Blocks(css=css) as demo:
-        model_name = gr.State(list(args.model_map.keys())[0])
         faiss_db = gr.State(copy.deepcopy(faiss_db_template))
         logo_url = GradioEvents.get_image_url("assets/logo.png")
-        gr.Markdown("""\
-                <p align="center"><img src="{}" \
-                style="height: 60px"/><p>""".format(logo_url))
         gr.Markdown(
             """\
 <center><font size=3>This demo is based on ERNIE models. \
 (本演示基于文心大模型实现。)</center>"""
         )
-        chatbot = gr.Chatbot(
-            label="ERNIE",
-            type="messages"
         )
         with gr.Row(equal_height=True):
             file_btn = gr.File(
-                label="Knowledge Base Upload (System default will be used if none provided. Accepted formats: TXT, MD)",
-                height="150px",
                 file_types=[".txt", ".md"],
                 elem_id="file-upload",
-                file_count="multiple"
             )
             relevant_passage = gr.Textbox(
-                label="Relevant Passage",
-                lines=5,
-                max_lines=5,
-                placeholder=RELEVANT_PASSAGE_DEFAULT,
-                interactive=False
             )
         with gr.Row():
             progress_bar = gr.Textbox(label="Progress", visible=False)
@@ -897,21 +854,15 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db_templa
             empty_btn = gr.Button("🧹 Clear History(清除历史)")
             submit_btn = gr.Button("🚀 Submit(发送)", elem_id="submit-button")
             regen_btn = gr.Button("🤔️ Regenerate(重试)")
         task_history = gr.State([])
-        predict_with_clients = partial(
-            GradioEvents.predict_stream,
-            bot_client=bot_client
-        )
-        regenerate_with_clients = partial(
-            GradioEvents.regenerate,
-            bot_client=bot_client
-        )
         file_upload_with_clients = partial(
             GradioEvents.file_upload,
         )
         chunk_size = gr.State(args.chunk_size)
         file_btn.change(
             fn=file_upload_with_clients,
@@ -919,34 +870,30 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db_templa
             outputs=[progress_bar],
         )
         query.submit(
-            predict_with_clients,
-            inputs=[query, chatbot, task_history, model_name, faiss_db],
             outputs=[chatbot, relevant_passage],
-            show_progress=True
         )
         query.submit(GradioEvents.reset_user_input, [], [query])
         submit_btn.click(
-            predict_with_clients,
             inputs=[query, chatbot, task_history, model_name, faiss_db],
             outputs=[chatbot, relevant_passage],
             show_progress=True,
         )
         submit_btn.click(GradioEvents.reset_user_input, [], [query])
         empty_btn.click(
-            GradioEvents.reset_state,
-            outputs=[chatbot, task_history, file_btn, relevant_passage], show_progress=True
         )
         regen_btn.click(
-            regenerate_with_clients,
             inputs=[chatbot, task_history, model_name, faiss_db],
             outputs=[chatbot, relevant_passage],
-            show_progress=True
         )
-    demo.queue().launch(
-        server_port=args.server_port,
-        server_name=args.server_name
-    )
 def main():
@@ -960,5 +907,6 @@ def main():
     launch_demo(args, bot_client, faiss_db)
 if __name__ == "__main__":
     main()

 import argparse
 import base64
+import copy
 import hashlib
 import json
 import logging
 import os
 import textwrap
+from argparse import ArgumentParser
+from collections import namedtuple
+from datetime import datetime
+from functools import partial
+import faiss
 import gradio as gr
 import numpy as np
 from bot_requests import BotClient
 os.environ["NO_PROXY"] = "localhost,127.0.0.1"  # Disable proxy
 logging.root.setLevel(logging.INFO)
 FILE_URL_DEFAULT = "data/coffee.txt"
+RELEVANT_PASSAGE_DEFAULT = textwrap.dedent(
+    """\
     1675年时，英格兰就有3000多家咖啡馆；启蒙运动时期，咖啡馆成为民众深入讨论宗教和政治的聚集地，
     1670年代的英国国王查理二世就曾试图取缔咖啡馆。这一时期的英国人认为咖啡具有药用价值，
     甚至名医也会推荐将咖啡用于医疗。"""
 )
+QUERY_REWRITE_PROMPT = textwrap.dedent(
+    """\
     【当前时间】
     {TIMESTAMP}
     """
     parser = ArgumentParser(description="ERNIE models web chat demo.")
+    parser.add_argument("--server-port", type=int, default=7860, help="Demo server port.")
+    parser.add_argument("--server-name", type=str, default="0.0.0.0", help="Demo server name.")
+    parser.add_argument("--max_char", type=int, default=20000, help="Maximum character limit for messages.")
+    parser.add_argument("--max_retry_num", type=int, default=3, help="Maximum retry number for request.")
     parser.add_argument(
+        "--model_map",
+        type=str,
         default="{\"ernie-4.5-turbo-128k-preview\": \"https://qianfan.baidubce.com/v2\"}",
         help="""JSON string defining model name to endpoint mappings.
             Required Format:
             - Specify ONE model endpoint in JSON format.
             - Prefix determines model capabilities:
             * ERNIE-4.5: Text-only model
+            """,
     )
     parser.add_argument(
+        "--embedding_service_url", type=str, default="https://qianfan.baidubce.com/v2", help="Embedding service url."
     )
+    parser.add_argument("--qianfan_api_key", type=str, default=os.environ.get("API_KEY"), help="Qianfan API key.")
+    parser.add_argument("--embedding_model", type=str, default="embedding-v1", help="Embedding model name.")
+    parser.add_argument("--embedding_dim", type=int, default=384, help="Dimension of the embedding vector.")
+    parser.add_argument("--chunk_size", type=int, default=512, help="Chunk size for splitting long documents.")
+    parser.add_argument("--top_k", type=int, default=3, help="Top k results to retrieve.")
+    parser.add_argument("--faiss_index_path", type=str, default="data/faiss_index", help="Faiss index path.")
+    parser.add_argument("--text_db_path", type=str, default="data/text_db.jsonl", help="Text database path.")
     args = parser.parse_args()
     try:
         if len(args.model_map) < 1:
             raise ValueError("model_map must contain at least one model configuration")
     except json.JSONDecodeError as e:
+        raise ValueError("Invalid JSON format for --model_map") from e
     return args
     A vector database for text retrieval using FAISS.
     Provides efficient similarity search and document management capabilities.
     """
     def __init__(self, args, bot_client: BotClient):
         """
         Initialize the FaissTextDatabase.
             embedding_dim: dimension of the embedding vector
         """
         self.logger = logging.getLogger(__name__)
         self.bot_client = bot_client
         self.embedding_dim = getattr(args, "embedding_dim", 384)
         self.top_k = getattr(args, "top_k", 3)
         self.context_size = getattr(args, "context_size", 2)
         self.faiss_index_path = getattr(args, "faiss_index_path", "data/faiss_index")
         self.text_db_path = getattr(args, "text_db_path", "data/text_db.jsonl")
         # If faiss_index_path exists, load it and text_db_path
         if os.path.exists(self.faiss_index_path) and os.path.exists(self.text_db_path):
             self.index = faiss.read_index(self.faiss_index_path)
                 self.text_db = json.load(f)
         else:
             self.index = faiss.IndexFlatIP(self.embedding_dim)
+            self.text_db = {"file_md5s": [], "chunks": []}  # Save file_md5s to avoid duplicates  # Save chunks
     def calculate_md5(self, file_path: str) -> str:
         """
         Calculate the MD5 hash of a file
         """
         file_md5 = self.calculate_md5(file_path)
         return file_md5 in self.text_db["file_md5s"]
+    def add_embeddings(
+        self, file_path: str, segments: list[str], progress_bar: gr.Progress = None, save_file: bool = False
+    ) -> bool:
         """
         Stores document embeddings in FAISS database after checking for duplicates.
         Generates embeddings for each text segment, updates the FAISS index and metadata database,
         and persists changes to disk. Includes optional progress tracking for Gradio interfaces.
         Args:
             file_path: the path of the source file
             segments: the list of segments
             progress_bar: the progress bar object
         Returns:
             bool: whether the operation was successful
         """
         file_md5 = self.calculate_md5(file_path)
         if file_md5 in self.text_db["file_md5s"]:
+            self.logger.info(f"File already processed: {file_path} (MD5: {file_md5})")
             return False
         # Generate embeddings
         vectors = []
         file_name = os.path.basename(file_path)
         file_txt = "".join(file_name.split(".")[:-1])[:30]
+        for i, segment in enumerate(segments):
             vectors.append(self.bot_client.embed_fn(file_txt + "\n" + segment))
             if progress_bar is not None:
                 progress_bar((i + 1) / len(segments), desc=file_name + " Processing...")
         vectors = np.array(vectors)
         self.index.add(vectors.astype('float32'))
         start_id = len(self.text_db["chunks"])
         for i, text in enumerate(segments):
+            self.text_db["chunks"].append(
+                {
+                    "file_md5": file_md5,
+                    "file_name": file_name,
+                    "file_txt": file_txt,
+                    "text": text,
+                    "vector_id": start_id + i,
+                }
+            )
         self.text_db["file_md5s"].append(file_md5)
         if save_file:
             self.save()
         Finds the most relevant text chunks for multiple queries and includes surrounding context.
         Uses FAISS to find the closest matching embeddings, then retrieves adjacent chunks
         from the same source document to provide better context understanding.
         Args:
             query_list: list of input query strings
         Returns:
             str: the concatenated output string
         """
             query_vector = np.array([self.bot_client.embed_fn(query)]).astype('float32')
             _, indices = self.index.search(query_vector, self.top_k)
             all_indices.extend(indices[0].tolist())
         # Step 2: Remove duplicate indices
+        unique_indices = sorted(set(all_indices))
         self.logger.info(f"Retrieved indices: {all_indices}")
         self.logger.info(f"Unique indices after deduplication: {unique_indices}")
         # Step 3: Expand each index with context (within same file boundaries)
         expanded_indices = set()
         file_boundaries = {}  # {file_md5: (start_idx, end_idx)}
         for target_idx in unique_indices:
             target_chunk = self.text_db["chunks"][target_idx]
             target_file_md5 = target_chunk["file_md5"]
             if target_file_md5 not in file_boundaries:
                 file_start = target_idx
                 while file_start > 0 and self.text_db["chunks"][file_start - 1]["file_md5"] == target_file_md5:
                     file_start -= 1
                 file_end = target_idx
+                while (
+                    file_end < len(self.text_db["chunks"]) - 1
+                    and self.text_db["chunks"][file_end + 1]["file_md5"] == target_file_md5
+                ):
                     file_end += 1
             else:
                 file_start, file_end = file_boundaries[target_file_md5]
             # Calculate context range within file boundaries
             start = max(file_start, target_idx - self.context_size)
             end = min(file_end, target_idx + self.context_size)
             for pos in range(start, end + 1):
                 expanded_indices.add(pos)
         # Step 4: Sort and merge continuous chunks
+        sorted_indices = sorted(expanded_indices)
         groups = []
         current_group = [sorted_indices[0]]
         for i in range(1, len(sorted_indices)):
+            if (
+                sorted_indices[i] == sorted_indices[i - 1] + 1
+                and self.text_db["chunks"][sorted_indices[i]]["file_md5"]
+                == self.text_db["chunks"][sorted_indices[i - 1]]["file_md5"]
+            ):
                 current_group.append(sorted_indices[i])
             else:
                 groups.append(current_group)
                 current_group = [sorted_indices[i]]
         groups.append(current_group)
         # Step 5: Create merged text for each group
         result = ""
         for idx, group in enumerate(groups):
             for idx in group:
                 result += self.text_db["chunks"][idx]["text"] + "\n"
             self.logger.info(f"Merged chunk range: {group[0]}-{group[-1]}")
         return result
     def save(self) -> None:
         """Save the database to disk."""
         faiss.write_index(self.index, self.faiss_index_path)
         with open(self.text_db_path, 'w', encoding='utf-8') as f:
             json.dump(self.text_db, f, ensure_ascii=False, indent=2)
+class GradioEvents:
     """
     Manages event handling and UI interactions for Gradio applications.
     Provides methods to process user inputs, trigger callbacks, and update interface components.
     """
     @staticmethod
     def get_history_conversation(task_history: list) -> tuple:
         """
         for query_h, response_h in task_history:
             conversation.append({"role": "user", "content": query_h})
             conversation.append({"role": "assistant", "content": response_h})
+            conversation_str += f"user:\n{query_h}\n assistant:\n{response_h}\n "
         return conversation, conversation_str
     @staticmethod
     def chat_stream(
+        query: str,
+        task_history: list,
+        model: str,
         faiss_db: FaissTextDatabase,
+        bot_client: BotClient,
     ) -> dict:
         """
         Streams chatbot responses by processing queries with context from history and FAISS database.
             dict: A dictionary containing the event type and its corresponding content.
         """
         conversation, conversation_str = GradioEvents.get_history_conversation(task_history)
+        conversation_str += f"user:\n{query}\n"
         search_info_message = QUERY_REWRITE_PROMPT.format(
+            TIMESTAMP=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), CONVERSATION=conversation_str
         )
         search_conversation = [{"role": "user", "content": search_info_message}]
         search_info_result = GradioEvents.get_sub_query(search_conversation, model, bot_client)
         if search_info_result.get("query", []):
             relevant_passages = faiss_db.search_with_context(search_info_result["query"])
             yield {"type": "relevant_passage", "content": relevant_passages}
             query = ANSWER_PROMPT.format(
+                DOC_CONTENT=relevant_passages,
+                TIMESTAMP=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                CONVERSATION=conversation_str,
             )
         conversation.append({"role": "user", "content": query})
             for chunk in bot_client.process_stream(model, req_data):
                 if "error" in chunk:
                     raise Exception(chunk["error"])
                 message = chunk.get("choices", [{}])[0].get("delta", {})
                 content = message.get("content", "")
                 if content:
                     yield {"type": "answer", "content": content}
         except Exception as e:
             raise gr.Error("Exception: " + repr(e))
     @staticmethod
     def predict_stream(
+        query: str,
+        chatbot: list,
         task_history: list,
+        model: str,
         faiss_db: FaissTextDatabase,
         bot_client: BotClient,
     ) -> tuple:
         Args:
             query (str): The content of the user's input query.
             chatbot (list): The chatbot's historical message list.
+            task_history (list): The task history record list.
             model (Model): The model used to generate responses.
             bot_client (object): The chatbot client object.
             faiss_db (FaissTextDatabase): The FAISS database instance.
         """
         query = query if query else QUERY_DEFAULT
+        logging.info(f"User: {query}")
+        chatbot.append({"role": "user", "content": query})
         # First yield the chatbot with user message
         yield chatbot, None
         new_texts = GradioEvents.chat_stream(
+            query,
+            task_history,
+            model,
             faiss_db,
             bot_client,
         )
         for new_text in new_texts:
             if not isinstance(new_text, dict):
                 continue
             if new_text.get("type") == "embedding":
                 current_relevant_passage = new_text["content"]
                 yield chatbot, current_relevant_passage
                 continue
             elif new_text.get("type") == "answer":
                 response += new_text["content"]
             # Remove previous message if exists
             if chatbot[-1].get("role") == "assistant":
                 chatbot.pop(-1)
             if response:
                 chatbot.append({"role": "assistant", "content": response})
                 yield chatbot, current_relevant_passage
+        logging.info(f"History: {task_history}")
+        task_history.append((query, response))
+        logging.info(f"ERNIE models: {response}")
     @staticmethod
     def regenerate(
+        chatbot: list,
+        task_history: list,
+        model: str,
         faiss_db: FaissTextDatabase,
         bot_client: BotClient,
     ) -> tuple:
             chatbot.pop(-1)
         chatbot.pop(-1)
+        yield from GradioEvents.predict_stream(
+            item[0],
+            chatbot,
+            task_history,
+            model,
             faiss_db,
             bot_client,
+        )
     @staticmethod
     def reset_user_input() -> gr.update:
             tuple: A named tuple containing the updated values for chatbot, task_history, file_btn, and relevant_passage
         """
         GradioEvents.gc()
+        reset_result = namedtuple("reset_result", ["chatbot", "task_history", "file_btn", "relevant_passage"])
         return reset_result(
             [],  # clear chatbot
             [],  # clear task_history
             gr.update(value=None),  # clear file_btn
+            gr.update(value=None),  # reset relevant_passage
         )
     @staticmethod
     def gc():
         """
         extension = image_path.split(".")[-1]
         with open(image_path, "rb") as image_file:
             base64_image = base64.b64encode(image_file.read()).decode("utf-8")
+        url = f"data:image/{extension};base64,{base64_image}"
         return url
     @staticmethod
         if len(line) <= chunk_size:
             return line, ""
         # Search from chunk_size position backwards
         split_pos = chunk_size
         for i in range(chunk_size, 0, -1):
             if line[i] in PUNCTUATIONS:
                 split_pos = i + 1  # Include punctuation
                 break
         # Fallback to whitespace if no punctuation found
         if split_pos == chunk_size:
             split_pos = line.rfind(" ", 0, chunk_size)
             if split_pos == -1:
                 split_pos = chunk_size  # Hard split
         return line[:split_pos], line[split_pos:]
     @staticmethod
         chunks = []
         current_chunk = []
         current_length = 0
         for line in lines:
             # If adding this line would exceed chunk size (and we have content)
             if current_length + len(line) > chunk_size and current_chunk:
             while len(line) > chunk_size:
                 head, line = GradioEvents.split_oversized_line(line, chunk_size)
                 chunks.append(head)
             # Add remaining line content
             if line:
                 current_chunk.append(line)
                 current_length += len(line) + 1
         if current_chunk:
             chunks.append("\n".join(current_chunk))
         return chunks
     @staticmethod
     def file_upload(
+        files_url: list, chunk_size: int, faiss_db: FaissTextDatabase, progress_bar: gr.Progress = gr.Progress()
     ) -> str:
         """
         Uploads and processes multiple files by splitting them into semantically meaningful chunks,
         for file_url in files_url:
             if not GradioEvents.save_file_to_db(file_url, chunk_size, faiss_db, progress_bar):
                 file_name = os.path.basename(file_url)
+                gr.Info(f"{file_name} already processed.")
         yield gr.update(visible=False)
     @staticmethod
+    def save_file_to_db(
+        file_url: str,
+        chunk_size: int,
+        faiss_db: FaissTextDatabase,
+        progress_bar: gr.Progress = None,
+        save_file: bool = False,
+    ):
         """
         Processes and indexes document content into FAISS database with semantic-aware chunking.
         Handles file validation, text segmentation, embedding generation and storage operations.
             bool: True if the file was saved successfully, otherwise False.
         """
         if not os.path.exists(file_url):
+            logging.error(f"File not found: {file_url}")
             return False
         file_name = os.path.basename(file_url)
         if not faiss_db.is_file_processed(file_url):
+            logging.info(f"{file_url} not processed yet, processing now...")
             try:
                 segments = GradioEvents.split_text_into_chunks(file_url, chunk_size)
                 faiss_db.add_embeddings(file_url, segments, progress_bar, save_file)
+                logging.info(f"{file_url} processed successfully.")
                 return True
             except Exception as e:
+                logging.error(f"Error processing {file_url}: {e!s}")
+                gr.Error(f"Error processing file: {file_name}")
                 raise
         else:
+            logging.info(f"{file_url} already processed.")
             return False
 def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db_template: FaissTextDatabase):
     """
     Launch demo program
     Args:
         args (argparse.Namespace): argparse Namespace object containing parsed command line arguments
         bot_client (BotClient): Bot client instance
     }
     """
     with gr.Blocks(css=css) as demo:
+        model_name = gr.State(next(iter(args.model_map.keys())))
         faiss_db = gr.State(copy.deepcopy(faiss_db_template))
         logo_url = GradioEvents.get_image_url("assets/logo.png")
+        gr.Markdown(
+            f"""\
+                <p align="center"><img src="{logo_url}" \
+                style="height: 60px"/><p>"""
+        )
         gr.Markdown(
             """\
 <center><font size=3>This demo is based on ERNIE models. \
 (本演示基于文心大模型实现。)</center>"""
         )
+        gr.Markdown(
+            """\
+<center><font size=3>    <a href="https://ernie.baidu.com/">ERNIE Bot</a> | \
+<a href="https://github.com/PaddlePaddle/ERNIE">GitHub</a> | \
+<a href="https://huggingface.co/baidu">Hugging Face</a> | \
+<a href="https://aistudio.baidu.com/modelsoverview">BAIDU AI Studio</a> | \
+<a href="https://yiyan.baidu.com/blog/publication/">Technical Report</a></center>"""
         )
+        chatbot = gr.Chatbot(label="ERNIE", type="messages")
         with gr.Row(equal_height=True):
             file_btn = gr.File(
+                label="Knowledge Base Upload (System default will be used if none provided. Accepted formats: TXT, MD)",
+                height="150px",
                 file_types=[".txt", ".md"],
                 elem_id="file-upload",
+                file_count="multiple",
             )
             relevant_passage = gr.Textbox(
+                label="Relevant Passage", lines=5, max_lines=5, placeholder=RELEVANT_PASSAGE_DEFAULT, interactive=False
             )
         with gr.Row():
             progress_bar = gr.Textbox(label="Progress", visible=False)
             empty_btn = gr.Button("🧹 Clear History(清除历史)")
             submit_btn = gr.Button("🚀 Submit(发送)", elem_id="submit-button")
             regen_btn = gr.Button("🤔️ Regenerate(重试)")
         task_history = gr.State([])
+        predict_with_clients = partial(GradioEvents.predict_stream, bot_client=bot_client)
+        regenerate_with_clients = partial(GradioEvents.regenerate, bot_client=bot_client)
         file_upload_with_clients = partial(
             GradioEvents.file_upload,
         )
         chunk_size = gr.State(args.chunk_size)
         file_btn.change(
             fn=file_upload_with_clients,
             outputs=[progress_bar],
         )
         query.submit(
+            predict_with_clients,
+            inputs=[query, chatbot, task_history, model_name, faiss_db],
             outputs=[chatbot, relevant_passage],
+            show_progress=True,
         )
         query.submit(GradioEvents.reset_user_input, [], [query])
         submit_btn.click(
+            predict_with_clients,
             inputs=[query, chatbot, task_history, model_name, faiss_db],
             outputs=[chatbot, relevant_passage],
             show_progress=True,
         )
         submit_btn.click(GradioEvents.reset_user_input, [], [query])
         empty_btn.click(
+            GradioEvents.reset_state, outputs=[chatbot, task_history, file_btn, relevant_passage], show_progress=True
         )
         regen_btn.click(
+            regenerate_with_clients,
             inputs=[chatbot, task_history, model_name, faiss_db],
             outputs=[chatbot, relevant_passage],
+            show_progress=True,
         )
+    demo.queue().launch(server_port=args.server_port, server_name=args.server_name)
 def main():
     launch_demo(args, bot_client, faiss_db)
 if __name__ == "__main__":
     main()