Spaces:
Running
Running
change db link
Browse files- db/mongodb.py +6 -6
- excel/excel_translate.py +2 -2
- powerpoint/pptx_object.py +3 -3
- powerpoint/xml_handling.py +3 -3
- test.ipynb +49 -0
- word/word_translate.py +1 -1
db/mongodb.py
CHANGED
@@ -6,7 +6,7 @@ from io import BytesIO
|
|
6 |
import magic
|
7 |
|
8 |
def connect_mongodb(db_name, collection_name):
|
9 |
-
client = MongoClient("mongodb+srv://1highbar456
|
10 |
db = client[db_name]
|
11 |
fs = gridfs.GridFS(db, collection=collection_name)
|
12 |
return fs
|
@@ -24,7 +24,7 @@ def save_file_to_mongodb(uploaded_file, db_name="ppt", collection_name="root_fil
|
|
24 |
:param file_name: Tên file muốn lưu (không cần .pptx). Nếu để None, lấy tên gốc.
|
25 |
:return: file_id nếu lưu thành công, None nếu file đã tồn tại
|
26 |
"""
|
27 |
-
client = MongoClient("mongodb+srv://1highbar456
|
28 |
db = client[db_name]
|
29 |
fs = gridfs.GridFS(db, collection=collection_name)
|
30 |
|
@@ -63,7 +63,7 @@ def delete_pptx_from_mongodb(file_id, db_name="ppt", collection_name="root_file"
|
|
63 |
:param collection_name: Tên collection GridFS
|
64 |
"""
|
65 |
# Kết nối đến MongoDB
|
66 |
-
client = MongoClient("mongodb+srv://1highbar456
|
67 |
db = client[db_name]
|
68 |
fs = gridfs.GridFS(db, collection=collection_name)
|
69 |
|
@@ -100,7 +100,7 @@ def download_pptx_from_mongodb(file_id, save_path, save_name, db_name="ppt", col
|
|
100 |
full_file_path = os.path.join(save_path, save_name)
|
101 |
|
102 |
# Kết nối đến MongoDB
|
103 |
-
client = MongoClient("mongodb+srv://1highbar456
|
104 |
db = client[db_name]
|
105 |
fs = gridfs.GridFS(db, collection=collection_name)
|
106 |
|
@@ -131,7 +131,7 @@ def save_xml_to_gridfs(xml_content, file_name, db_name="ppt", collection_name="o
|
|
131 |
:param db_name: Tên database MongoDB
|
132 |
:param collection_name: Tên collection GridFS
|
133 |
"""
|
134 |
-
client = MongoClient("mongodb+srv://1highbar456
|
135 |
db = client[db_name]
|
136 |
fs = gridfs.GridFS(db, collection=collection_name)
|
137 |
|
@@ -146,7 +146,7 @@ def save_xml_to_gridfs(xml_content, file_name, db_name="ppt", collection_name="o
|
|
146 |
print(f"✅ XML '{file_name}' đã được lưu vào GridFS với ID: {file_id}")
|
147 |
|
148 |
def fetch_file_from_mongodb(db_name, collection_name, file_id):
|
149 |
-
client = MongoClient("mongodb+srv://1highbar456
|
150 |
db = client[db_name]
|
151 |
fs = gridfs.GridFS(db, collection_name)
|
152 |
|
|
|
6 |
import magic
|
7 |
|
8 |
def connect_mongodb(db_name, collection_name):
|
9 |
+
client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
|
10 |
db = client[db_name]
|
11 |
fs = gridfs.GridFS(db, collection=collection_name)
|
12 |
return fs
|
|
|
24 |
:param file_name: Tên file muốn lưu (không cần .pptx). Nếu để None, lấy tên gốc.
|
25 |
:return: file_id nếu lưu thành công, None nếu file đã tồn tại
|
26 |
"""
|
27 |
+
client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
|
28 |
db = client[db_name]
|
29 |
fs = gridfs.GridFS(db, collection=collection_name)
|
30 |
|
|
|
63 |
:param collection_name: Tên collection GridFS
|
64 |
"""
|
65 |
# Kết nối đến MongoDB
|
66 |
+
client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
|
67 |
db = client[db_name]
|
68 |
fs = gridfs.GridFS(db, collection=collection_name)
|
69 |
|
|
|
100 |
full_file_path = os.path.join(save_path, save_name)
|
101 |
|
102 |
# Kết nối đến MongoDB
|
103 |
+
client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
|
104 |
db = client[db_name]
|
105 |
fs = gridfs.GridFS(db, collection=collection_name)
|
106 |
|
|
|
131 |
:param db_name: Tên database MongoDB
|
132 |
:param collection_name: Tên collection GridFS
|
133 |
"""
|
134 |
+
client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
|
135 |
db = client[db_name]
|
136 |
fs = gridfs.GridFS(db, collection=collection_name)
|
137 |
|
|
|
146 |
print(f"✅ XML '{file_name}' đã được lưu vào GridFS với ID: {file_id}")
|
147 |
|
148 |
def fetch_file_from_mongodb(db_name, collection_name, file_id):
|
149 |
+
client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0") # Cập nhật nếu cần
|
150 |
db = client[db_name]
|
151 |
fs = gridfs.GridFS(db, collection_name)
|
152 |
|
excel/excel_translate.py
CHANGED
@@ -13,7 +13,7 @@ import os
|
|
13 |
|
14 |
def translate_xlsx(file_id: str, sheet_name: str = None, from_lang: str = 'en', target_lang: str = "fr", gemini_api: str = "", db_name: str = "excel"):
|
15 |
# Kết nối MongoDB
|
16 |
-
client = pymongo.MongoClient("mongodb+srv://1highbar456
|
17 |
db = client[db_name]
|
18 |
fs_input = gridfs.GridFS(db, collection="root_file")
|
19 |
fs_output = gridfs.GridFS(db, collection="final_file")
|
@@ -100,7 +100,7 @@ def read_csv_with_auto_encoding(csv_path):
|
|
100 |
|
101 |
def translate_csv(file_id, source_lang="en", target_lang="vi", gemini_api="", chunk_size=50, text_columns=None, db_name="csv"):
|
102 |
# Kết nối MongoDB
|
103 |
-
client = pymongo.MongoClient("mongodb+srv://1highbar456
|
104 |
db = client[db_name]
|
105 |
fs_input = gridfs.GridFS(db, collection="root_file")
|
106 |
fs_output = gridfs.GridFS(db, collection="final_file")
|
|
|
13 |
|
14 |
def translate_xlsx(file_id: str, sheet_name: str = None, from_lang: str = 'en', target_lang: str = "fr", gemini_api: str = "", db_name: str = "excel"):
|
15 |
# Kết nối MongoDB
|
16 |
+
client = pymongo.MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
|
17 |
db = client[db_name]
|
18 |
fs_input = gridfs.GridFS(db, collection="root_file")
|
19 |
fs_output = gridfs.GridFS(db, collection="final_file")
|
|
|
100 |
|
101 |
def translate_csv(file_id, source_lang="en", target_lang="vi", gemini_api="", chunk_size=50, text_columns=None, db_name="csv"):
|
102 |
# Kết nối MongoDB
|
103 |
+
client = pymongo.MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
|
104 |
db = client[db_name]
|
105 |
fs_input = gridfs.GridFS(db, collection="root_file")
|
106 |
fs_output = gridfs.GridFS(db, collection="final_file")
|
powerpoint/pptx_object.py
CHANGED
@@ -279,7 +279,7 @@ def apply_table_properties(table, table_data):
|
|
279 |
|
280 |
def get_file_from_mongodb(db_name, collection_name, file_id):
|
281 |
"""Tải tệp từ MongoDB GridFS"""
|
282 |
-
client = MongoClient("mongodb+srv://1highbar456
|
283 |
db = client[db_name]
|
284 |
fs = GridFS(db, collection_name)
|
285 |
file_data = fs.get(file_id)
|
@@ -288,7 +288,7 @@ def get_file_from_mongodb(db_name, collection_name, file_id):
|
|
288 |
|
289 |
def save_file_to_mongodb(db_name, collection_name, file_name, file_data):
|
290 |
"""Lưu tệp vào MongoDB GridFS"""
|
291 |
-
client = MongoClient("mongodb+srv://1highbar456
|
292 |
db = client[db_name]
|
293 |
fs = GridFS(db, collection_name)
|
294 |
file_id = fs.put(file_data, filename=file_name)
|
@@ -350,7 +350,7 @@ def create_translated_ppt(db_name, original_ppt_id, translated_xml_id, output_co
|
|
350 |
|
351 |
def save_file_to_mongodb(db_name, collection_name, file_name, file_data):
|
352 |
"""Lưu tệp vào MongoDB GridFS"""
|
353 |
-
client = MongoClient("mongodb+srv://1highbar456
|
354 |
db = client[db_name]
|
355 |
fs = GridFS(db, collection_name)
|
356 |
file_id = fs.put(file_data, filename=file_name)
|
|
|
279 |
|
280 |
def get_file_from_mongodb(db_name, collection_name, file_id):
|
281 |
"""Tải tệp từ MongoDB GridFS"""
|
282 |
+
client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
|
283 |
db = client[db_name]
|
284 |
fs = GridFS(db, collection_name)
|
285 |
file_data = fs.get(file_id)
|
|
|
288 |
|
289 |
def save_file_to_mongodb(db_name, collection_name, file_name, file_data):
|
290 |
"""Lưu tệp vào MongoDB GridFS"""
|
291 |
+
client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
|
292 |
db = client[db_name]
|
293 |
fs = GridFS(db, collection_name)
|
294 |
file_id = fs.put(file_data, filename=file_name)
|
|
|
350 |
|
351 |
def save_file_to_mongodb(db_name, collection_name, file_name, file_data):
|
352 |
"""Lưu tệp vào MongoDB GridFS"""
|
353 |
+
client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
|
354 |
db = client[db_name]
|
355 |
fs = GridFS(db, collection_name)
|
356 |
file_id = fs.put(file_data, filename=file_name)
|
powerpoint/xml_handling.py
CHANGED
@@ -83,7 +83,7 @@ def ppt_to_xml_mongodb(ppt_file_id: str, db_name="ppt"):
|
|
83 |
:return: ID của file XML trong MongoDB (original_xml)
|
84 |
"""
|
85 |
# Kết nối MongoDB
|
86 |
-
client = MongoClient("mongodb+srv://1highbar456
|
87 |
db = client[db_name]
|
88 |
|
89 |
fs_ppt = gridfs.GridFS(db, collection="root_file") # PPT gốc
|
@@ -143,7 +143,7 @@ def extract_text_from_xml(file_id=None, filename=None, db_name="ppt", collection
|
|
143 |
:return: Dictionary {slide_number: [text1, text2, ...]}
|
144 |
"""
|
145 |
# Kết nối MongoDB
|
146 |
-
client = MongoClient("mongodb+srv://1highbar456
|
147 |
db = client[db_name]
|
148 |
fs = gridfs.GridFS(db, collection=collection_name)
|
149 |
|
@@ -266,7 +266,7 @@ def update_xml_with_translated_text_mongodb(file_id: str, translated_dict: Dict[
|
|
266 |
:param db_name: Tên database MongoDB
|
267 |
"""
|
268 |
# Kết nối MongoDB
|
269 |
-
client = MongoClient("mongodb+srv://1highbar456
|
270 |
db = client[db_name]
|
271 |
|
272 |
fs_original = gridfs.GridFS(db, collection="original_xml") # Lấy file từ original_xml
|
|
|
83 |
:return: ID của file XML trong MongoDB (original_xml)
|
84 |
"""
|
85 |
# Kết nối MongoDB
|
86 |
+
client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
|
87 |
db = client[db_name]
|
88 |
|
89 |
fs_ppt = gridfs.GridFS(db, collection="root_file") # PPT gốc
|
|
|
143 |
:return: Dictionary {slide_number: [text1, text2, ...]}
|
144 |
"""
|
145 |
# Kết nối MongoDB
|
146 |
+
client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
|
147 |
db = client[db_name]
|
148 |
fs = gridfs.GridFS(db, collection=collection_name)
|
149 |
|
|
|
266 |
:param db_name: Tên database MongoDB
|
267 |
"""
|
268 |
# Kết nối MongoDB
|
269 |
+
client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
|
270 |
db = client[db_name]
|
271 |
|
272 |
fs_original = gridfs.GridFS(db, collection="original_xml") # Lấy file từ original_xml
|
test.ipynb
CHANGED
@@ -21,6 +21,55 @@
|
|
21 |
"# from translator import translate_text_dict"
|
22 |
]
|
23 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
{
|
25 |
"cell_type": "markdown",
|
26 |
"metadata": {},
|
|
|
21 |
"# from translator import translate_text_dict"
|
22 |
]
|
23 |
},
|
24 |
+
{
|
25 |
+
"cell_type": "code",
|
26 |
+
"execution_count": null,
|
27 |
+
"metadata": {},
|
28 |
+
"outputs": [],
|
29 |
+
"source": [
|
30 |
+
"from pymongo import MongoClient\n",
|
31 |
+
"\n",
|
32 |
+
"uri = \"mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0\"\n",
|
33 |
+
"client = MongoClient(uri, tls = True, tlsAllowInvalidCertificates=True)\n",
|
34 |
+
"\n",
|
35 |
+
"try:\n",
|
36 |
+
" client.server_info() # Kiểm tra kết nối\n",
|
37 |
+
" print(\"Kết nối thành công!\")\n",
|
38 |
+
"except Exception as e:\n",
|
39 |
+
" print(f\"Lỗi kết nối: {e}\")\n"
|
40 |
+
]
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"cell_type": "code",
|
44 |
+
"execution_count": 2,
|
45 |
+
"metadata": {},
|
46 |
+
"outputs": [
|
47 |
+
{
|
48 |
+
"name": "stdout",
|
49 |
+
"output_type": "stream",
|
50 |
+
"text": [
|
51 |
+
"✅ File requirements.txt đã được tạo mà không có dòng lỗi!\n"
|
52 |
+
]
|
53 |
+
}
|
54 |
+
],
|
55 |
+
"source": [
|
56 |
+
"import subprocess\n",
|
57 |
+
"\n",
|
58 |
+
"# Chạy pip freeze\n",
|
59 |
+
"output = subprocess.run([\"pip\", \"freeze\"], capture_output=True, text=True)\n",
|
60 |
+
"\n",
|
61 |
+
"# Lọc bỏ các thư viện có đường dẫn local\n",
|
62 |
+
"filtered_lines = [\n",
|
63 |
+
" line for line in output.stdout.split(\"\\n\") if \"feedstock_root\" not in line\n",
|
64 |
+
"]\n",
|
65 |
+
"\n",
|
66 |
+
"# Ghi vào requirements.txt\n",
|
67 |
+
"with open(\"requirements.txt\", \"w\") as f:\n",
|
68 |
+
" f.write(\"\\n\".join(filtered_lines))\n",
|
69 |
+
"\n",
|
70 |
+
"print(\"✅ File requirements.txt đã được tạo mà không có dòng lỗi!\")\n"
|
71 |
+
]
|
72 |
+
},
|
73 |
{
|
74 |
"cell_type": "markdown",
|
75 |
"metadata": {},
|
word/word_translate.py
CHANGED
@@ -224,7 +224,7 @@ def translate_header_footer(doc, source_lang, target_lang):
|
|
224 |
i += 1
|
225 |
|
226 |
def translate_docx(file_id, source_lang='en', target_lang='fr', db_name='word'):
|
227 |
-
client = MongoClient('mongodb+srv://1highbar456
|
228 |
db = client[db_name]
|
229 |
fs_input = GridFS(db, collection="root_file")
|
230 |
fs_output = GridFS(db, collection="final_file")
|
|
|
224 |
i += 1
|
225 |
|
226 |
def translate_docx(file_id, source_lang='en', target_lang='fr', db_name='word'):
|
227 |
+
client = MongoClient('mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0')
|
228 |
db = client[db_name]
|
229 |
fs_input = GridFS(db, collection="root_file")
|
230 |
fs_output = GridFS(db, collection="final_file")
|