SiangKai commited on
Commit
66266ad
·
verified ·
1 Parent(s): c66afae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -3
app.py CHANGED
@@ -10,10 +10,12 @@
10
  # =======================================================================
11
  import os
12
  import re
 
13
  import json
14
  import math
15
  import jieba
16
  import torch
 
17
  import gradio as gr
18
  import pandas as pd
19
  import google.generativeai as genai
@@ -37,7 +39,8 @@ from langchain_core.tools import tool
37
  EMBEDDING_MODEL_NAME = 'intfloat/multilingual-e5-base'
38
  DB_JB_PATH = "yearbook_contents_jb_db_base5"
39
  DB_SIM_PATH = "yearbook_contents_simple_db_base5"
40
- EXCEL_FILE_PATH = "合併檔案.xlsx"
 
41
  _df_cache = None
42
 
43
  # --- Custom Embedding Class ---
@@ -101,17 +104,53 @@ def extract_project_names_from_rag_manual_mix(query: str, db_jb, db_sim, top_k:
101
  return list(OrderedDict.fromkeys(combined_names))[:top_k]
102
 
103
  def load_data(file_path: str = EXCEL_FILE_PATH) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
 
104
  global _df_cache
105
  if _df_cache is not None:
106
  return _df_cache
 
 
 
 
 
107
  try:
108
- print(f"讀取Excel檔案中... ({file_path})")
109
- _df_cache = pd.read_excel(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  print("✅ Excel 資料載入成功。")
111
  return _df_cache
 
112
  except FileNotFoundError:
113
  print(f"❌ 錯誤:找不到檔案 {file_path}")
114
  return None
 
 
 
 
 
115
 
116
  def batch_find_relevant_tables(api_key: str, sub_queries: list[str], top_k: int = 1) -> dict:
117
  """
 
10
  # =======================================================================
11
  import os
12
  import re
13
+ import io
14
  import json
15
  import math
16
  import jieba
17
  import torch
18
+ import msoffcrypto
19
  import gradio as gr
20
  import pandas as pd
21
  import google.generativeai as genai
 
39
  EMBEDDING_MODEL_NAME = 'intfloat/multilingual-e5-base'
40
  DB_JB_PATH = "yearbook_contents_jb_db_base5"
41
  DB_SIM_PATH = "yearbook_contents_simple_db_base5"
42
+ EXCEL_FILE_PATH = "合併檔案.xlsx"
43
+ EXCEL_PASSWORD = os.getenv('open_key')
44
  _df_cache = None
45
 
46
  # --- Custom Embedding Class ---
 
104
  return list(OrderedDict.fromkeys(combined_names))[:top_k]
105
 
106
  def load_data(file_path: str = EXCEL_FILE_PATH) -> pd.DataFrame:
107
+ # global _df_cache
108
+ # if _df_cache is not None:
109
+ # return _df_cache
110
+ # try:
111
+ # print(f"讀取Excel檔案中... ({file_path})")
112
+ # _df_cache = pd.read_excel(file_path)
113
+ # print("✅ Excel 資料載入成功。")
114
+ # return _df_cache
115
+ # except FileNotFoundError:
116
+ # print(f"❌ 錯誤:找不到檔案 {file_path}")
117
+ # return None
118
  global _df_cache
119
  if _df_cache is not None:
120
  return _df_cache
121
+
122
+ if not password:
123
+ print("❌ 錯誤:未提供 Excel 密碼。")
124
+ return None
125
+
126
  try:
127
+ print(f"解密並讀取 Excel 檔案中... ({file_path})")
128
+
129
+ # 建立一個暫存的記憶體空間
130
+ decrypted_buffer = io.BytesIO()
131
+
132
+ # 開啟加密檔案
133
+ with open(file_path, 'rb') as f:
134
+ # 使用 msoffcrypto 進行解密
135
+ file = msoffcrypto.OfficeFile(f)
136
+ file.load_key(password=EXCEL_PASSWORD)
137
+ # 將解密後的內容寫入記憶體空間
138
+ file.decrypt(decrypted_buffer)
139
+
140
+ # Pandas 從記憶體中讀取解密後的資料
141
+ _df_cache = pd.read_excel(decrypted_buffer)
142
+
143
  print("✅ Excel 資料載入成功。")
144
  return _df_cache
145
+
146
  except FileNotFoundError:
147
  print(f"❌ 錯誤:找不到檔案 {file_path}")
148
  return None
149
+ except Exception as e:
150
+ # 捕捉可能的錯誤,例如密碼錯誤
151
+ print(f"❌ 錯誤:無法讀取檔案,請檢查密碼是否正確或檔案是否損毀。")
152
+ print(f"詳細錯誤訊息: {e}")
153
+ return None
154
 
155
  def batch_find_relevant_tables(api_key: str, sub_queries: list[str], top_k: int = 1) -> dict:
156
  """