Spaces:

SiangKai
/

KCG_Smart_Search

Sleeping

App Files Files Community

SiangKai commited on Aug 14

Commit

66266ad

verified ·

1 Parent(s): c66afae

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -3

app.py CHANGED Viewed

@@ -10,10 +10,12 @@
 # =======================================================================
 import os
 import re
 import json
 import math
 import jieba
 import torch
 import gradio as gr
 import pandas as pd
 import google.generativeai as genai
@@ -37,7 +39,8 @@ from langchain_core.tools import tool
 EMBEDDING_MODEL_NAME = 'intfloat/multilingual-e5-base'
 DB_JB_PATH = "yearbook_contents_jb_db_base5"
 DB_SIM_PATH = "yearbook_contents_simple_db_base5"
-EXCEL_FILE_PATH = "合併檔案.xlsx"
 _df_cache = None
 # --- Custom Embedding Class ---
@@ -101,17 +104,53 @@ def extract_project_names_from_rag_manual_mix(query: str, db_jb, db_sim, top_k:
     return list(OrderedDict.fromkeys(combined_names))[:top_k]
 def load_data(file_path: str = EXCEL_FILE_PATH) -> pd.DataFrame:
     global _df_cache
     if _df_cache is not None:
         return _df_cache
     try:
-        print(f"讀取Excel檔案中... ({file_path})")
-        _df_cache = pd.read_excel(file_path)
         print("✅ Excel 資料載入成功。")
         return _df_cache
     except FileNotFoundError:
         print(f"❌ 錯誤：找不到檔案 {file_path}")
         return None
 def batch_find_relevant_tables(api_key: str, sub_queries: list[str], top_k: int = 1) -> dict:
     """

 # =======================================================================
 import os
 import re
+import io
 import json
 import math
 import jieba
 import torch
+import msoffcrypto
 import gradio as gr
 import pandas as pd
 import google.generativeai as genai
 EMBEDDING_MODEL_NAME = 'intfloat/multilingual-e5-base'
 DB_JB_PATH = "yearbook_contents_jb_db_base5"
 DB_SIM_PATH = "yearbook_contents_simple_db_base5"
+EXCEL_FILE_PATH = "合併檔案.xlsx"
+EXCEL_PASSWORD = os.getenv('open_key')
 _df_cache = None
 # --- Custom Embedding Class ---
     return list(OrderedDict.fromkeys(combined_names))[:top_k]
 def load_data(file_path: str = EXCEL_FILE_PATH) -> pd.DataFrame:
+    # global _df_cache
+    # if _df_cache is not None:
+    #     return _df_cache
+    # try:
+    #     print(f"讀取Excel檔案中... ({file_path})")
+    #     _df_cache = pd.read_excel(file_path)
+    #     print("✅ Excel 資料載入成功。")
+    #     return _df_cache
+    # except FileNotFoundError:
+    #     print(f"❌ 錯誤：找不到檔案 {file_path}")
+    #     return None
     global _df_cache
     if _df_cache is not None:
         return _df_cache
+    if not password:
+        print("❌ 錯誤：未提供 Excel 密碼。")
+        return None
     try:
+        print(f"解密並讀取 Excel 檔案中... ({file_path})")
+        # 建立一個暫存的記憶體空間
+        decrypted_buffer = io.BytesIO()
+        # 開啟加密檔案
+        with open(file_path, 'rb') as f:
+            # 使用 msoffcrypto 進行解密
+            file = msoffcrypto.OfficeFile(f)
+            file.load_key(password=EXCEL_PASSWORD)
+            # 將解密後的內容寫入記憶體空間
+            file.decrypt(decrypted_buffer)
+        # Pandas 從記憶體中讀取解密後的資料
+        _df_cache = pd.read_excel(decrypted_buffer)
         print("✅ Excel 資料載入成功。")
         return _df_cache
     except FileNotFoundError:
         print(f"❌ 錯誤：找不到檔案 {file_path}")
         return None
+    except Exception as e:
+        # 捕捉可能的錯誤，例如密碼錯誤
+        print(f"❌ 錯誤：無法讀取檔案，請檢查密碼是否正確或檔案是否損毀。")
+        print(f"詳細錯誤訊息: {e}")
+        return None
 def batch_find_relevant_tables(api_key: str, sub_queries: list[str], top_k: int = 1) -> dict:
     """