Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,10 +10,12 @@
|
|
| 10 |
# =======================================================================
|
| 11 |
import os
|
| 12 |
import re
|
|
|
|
| 13 |
import json
|
| 14 |
import math
|
| 15 |
import jieba
|
| 16 |
import torch
|
|
|
|
| 17 |
import gradio as gr
|
| 18 |
import pandas as pd
|
| 19 |
import google.generativeai as genai
|
|
@@ -37,7 +39,8 @@ from langchain_core.tools import tool
|
|
| 37 |
EMBEDDING_MODEL_NAME = 'intfloat/multilingual-e5-base'
|
| 38 |
DB_JB_PATH = "yearbook_contents_jb_db_base5"
|
| 39 |
DB_SIM_PATH = "yearbook_contents_simple_db_base5"
|
| 40 |
-
EXCEL_FILE_PATH = "合併檔案.xlsx"
|
|
|
|
| 41 |
_df_cache = None
|
| 42 |
|
| 43 |
# --- Custom Embedding Class ---
|
|
@@ -101,17 +104,53 @@ def extract_project_names_from_rag_manual_mix(query: str, db_jb, db_sim, top_k:
|
|
| 101 |
return list(OrderedDict.fromkeys(combined_names))[:top_k]
|
| 102 |
|
| 103 |
def load_data(file_path: str = EXCEL_FILE_PATH) -> pd.DataFrame:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
global _df_cache
|
| 105 |
if _df_cache is not None:
|
| 106 |
return _df_cache
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
try:
|
| 108 |
-
print(f"
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
print("✅ Excel 資料載入成功。")
|
| 111 |
return _df_cache
|
|
|
|
| 112 |
except FileNotFoundError:
|
| 113 |
print(f"❌ 錯誤:找不到檔案 {file_path}")
|
| 114 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
def batch_find_relevant_tables(api_key: str, sub_queries: list[str], top_k: int = 1) -> dict:
|
| 117 |
"""
|
|
|
|
| 10 |
# =======================================================================
|
| 11 |
import os
|
| 12 |
import re
|
| 13 |
+
import io
|
| 14 |
import json
|
| 15 |
import math
|
| 16 |
import jieba
|
| 17 |
import torch
|
| 18 |
+
import msoffcrypto
|
| 19 |
import gradio as gr
|
| 20 |
import pandas as pd
|
| 21 |
import google.generativeai as genai
|
|
|
|
| 39 |
EMBEDDING_MODEL_NAME = 'intfloat/multilingual-e5-base'
|
| 40 |
DB_JB_PATH = "yearbook_contents_jb_db_base5"
|
| 41 |
DB_SIM_PATH = "yearbook_contents_simple_db_base5"
|
| 42 |
+
EXCEL_FILE_PATH = "合併檔案.xlsx"
|
| 43 |
+
EXCEL_PASSWORD = os.getenv('open_key')
|
| 44 |
_df_cache = None
|
| 45 |
|
| 46 |
# --- Custom Embedding Class ---
|
|
|
|
| 104 |
return list(OrderedDict.fromkeys(combined_names))[:top_k]
|
| 105 |
|
| 106 |
def load_data(file_path: str = EXCEL_FILE_PATH) -> pd.DataFrame:
|
| 107 |
+
# global _df_cache
|
| 108 |
+
# if _df_cache is not None:
|
| 109 |
+
# return _df_cache
|
| 110 |
+
# try:
|
| 111 |
+
# print(f"讀取Excel檔案中... ({file_path})")
|
| 112 |
+
# _df_cache = pd.read_excel(file_path)
|
| 113 |
+
# print("✅ Excel 資料載入成功。")
|
| 114 |
+
# return _df_cache
|
| 115 |
+
# except FileNotFoundError:
|
| 116 |
+
# print(f"❌ 錯誤:找不到檔案 {file_path}")
|
| 117 |
+
# return None
|
| 118 |
global _df_cache
|
| 119 |
if _df_cache is not None:
|
| 120 |
return _df_cache
|
| 121 |
+
|
| 122 |
+
if not password:
|
| 123 |
+
print("❌ 錯誤:未提供 Excel 密碼。")
|
| 124 |
+
return None
|
| 125 |
+
|
| 126 |
try:
|
| 127 |
+
print(f"解密並讀取 Excel 檔案中... ({file_path})")
|
| 128 |
+
|
| 129 |
+
# 建立一個暫存的記憶體空間
|
| 130 |
+
decrypted_buffer = io.BytesIO()
|
| 131 |
+
|
| 132 |
+
# 開啟加密檔案
|
| 133 |
+
with open(file_path, 'rb') as f:
|
| 134 |
+
# 使用 msoffcrypto 進行解密
|
| 135 |
+
file = msoffcrypto.OfficeFile(f)
|
| 136 |
+
file.load_key(password=EXCEL_PASSWORD)
|
| 137 |
+
# 將解密後的內容寫入記憶體空間
|
| 138 |
+
file.decrypt(decrypted_buffer)
|
| 139 |
+
|
| 140 |
+
# Pandas 從記憶體中讀取解密後的資料
|
| 141 |
+
_df_cache = pd.read_excel(decrypted_buffer)
|
| 142 |
+
|
| 143 |
print("✅ Excel 資料載入成功。")
|
| 144 |
return _df_cache
|
| 145 |
+
|
| 146 |
except FileNotFoundError:
|
| 147 |
print(f"❌ 錯誤:找不到檔案 {file_path}")
|
| 148 |
return None
|
| 149 |
+
except Exception as e:
|
| 150 |
+
# 捕捉可能的錯誤,例如密碼錯誤
|
| 151 |
+
print(f"❌ 錯誤:無法讀取檔案,請檢查密碼是否正確或檔案是否損毀。")
|
| 152 |
+
print(f"詳細錯誤訊息: {e}")
|
| 153 |
+
return None
|
| 154 |
|
| 155 |
def batch_find_relevant_tables(api_key: str, sub_queries: list[str], top_k: int = 1) -> dict:
|
| 156 |
"""
|