MT_deploy / db /mongodb.py
mintlee's picture
add time
b14389a
raw
history blame
6.15 kB
from pymongo import MongoClient
import gridfs
from bson import ObjectId
import os
from io import BytesIO
import magic
from datetime import datetime, timedelta
def connect_mongodb(db_name, collection_name):
client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0")
db = client[db_name]
fs = gridfs.GridFS(db, collection=collection_name)
return fs
def save_file_to_mongodb(uploaded_file, db_name="pptx", collection_name="root_file", max_db_size_mb=500):
client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0")
db = client[db_name]
fs = gridfs.GridFS(db, collection=collection_name)
file_name = uploaded_file.name
uploaded_file.seek(0)
file_bytes = uploaded_file.read()
try:
# Kiểm tra kích thước dữ liệu hiện tại trong DB (bytes → MB)
db_size_mb = get_total_cluster_size(client)
print(f"📦 Database size: {db_size_mb:.2f} MB")
if db_size_mb > max_db_size_mb:
delete_all()
# Sau khi dọn dẹp (nếu cần), tiến hành lưu
file_id = fs.put(file_bytes, filename=file_name)
now = datetime.now() + timedelta(hours=5)
formatted_now = now.strftime("%Y-%m-%d %H:%M:%S")
print(f"{formatted_now} File '{file_name}' đã được lưu vào '{collection_name}' với ID: {file_id} \n")
return file_id, file_name
except Exception as e:
print(f"❌ Lỗi khi lưu file hoặc truy vấn MongoDB: {e} \n")
return None, None
finally:
client.close()
def save_xml_to_gridfs(xml_content, file_name, db_name="ppt", collection_name="original_xml"):
"""
Lưu XML vào MongoDB GridFS.
:param xml_content: Chuỗi XML cần lưu
:param file_name: Tên file XML
:param db_name: Tên database MongoDB
:param collection_name: Tên collection GridFS
"""
client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0")
db = client[db_name]
fs = gridfs.GridFS(db, collection=collection_name)
# Kiểm tra file đã tồn tại chưa
existing_file = fs.find_one({"filename": file_name})
if existing_file:
print(f"⚠️ File '{file_name}' đã tồn tại trong GridFS. Không lưu lại.")
return
# Chuyển đổi chuỗi XML thành bytes và lưu vào GridFS
file_id = fs.put(xml_content.encode("utf-8"), filename=file_name)
print(f"✅ XML '{file_name}' đã được lưu vào GridFS với ID: {file_id}")
def fetch_file_from_mongodb(db_name, collection_name, file_id):
client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0") # Cập nhật nếu cần
db = client[db_name]
fs = gridfs.GridFS(db, collection_name)
try:
file_data = fs.get(file_id)
pptx_io = BytesIO(file_data.read())
pptx_io.seek(0) # Đặt lại vị trí đầu file
return pptx_io, file_data.filename
except Exception as e:
print(f"Lỗi khi lấy file từ MongoDB: {e}")
return None, None
def detect_file_type(uploaded_file):
if uploaded_file is not None:
try:
# Ưu tiên kiểm tra phần mở rộng trước
ext = os.path.splitext(uploaded_file.name)[1].lower()
ext_mapping = {
".csv": "CSV", ".docx": "Word", ".doc": "Word",
".xlsx": "Excel", ".pptx": "PPTX", ".pdf": "PDF"
}
detected_type = ext_mapping.get(ext)
if detected_type:
return detected_type # Nếu có trong danh sách, trả về ngay
# Nếu không có phần mở rộng hợp lệ, fallback vào MIME type
file_bytes = uploaded_file.read(4096)
mime = magic.Magic(mime=True)
file_type = mime.from_buffer(file_bytes)
mime_types = {
"application/pdf": "PDF",
"application/vnd.ms-powerpoint": "PPTX",
"application/vnd.openxmlformats-officedocument.presentationml.presentation": "PPTX",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "Excel",
"application/vnd.ms-excel": "Excel",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "Word",
"application/msword": "Word",
"text/csv": "CSV",
"text/plain": "CSV"
}
return mime_types.get(file_type, "Unknown")
except Exception as e:
print(f"Error detecting file type: {e}")
return "Unknown"
return None
def delete_all_files_in_collection(collection_name, db_name="ppt"):
try:
client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0")
db = client[db_name]
fs = gridfs.GridFS(db, collection=collection_name)
file_ids = [file["_id"] for file in db[f"{collection_name}.files"].find({})]
for file_id in file_ids:
fs.delete(file_id)
print(f"✅ Đã xóa {len(file_ids)} file trong collection '{collection_name}' của db '{db_name}'")
except Exception as e:
print(f"❌ Lỗi khi xóa file: {str(e)}")
finally:
client.close()
def delete_all():
for i in ['root_file', 'final_file']:
for j in ['word', 'exce', 'pptx', 'csv']:
delete_all_files_in_collection(i, db_name=j)
def get_total_cluster_size(client):
total_size = 0
try:
for db_name in ['word', 'exce', 'pptx', 'csv']:
db = client[db_name]
stats = db.command("dbstats")
db_size = stats.get("dataSize", 0)
total_size += db_size
except Exception as e:
print(f"❌ Lỗi khi tính dung lượng cluster: {e}")
return -1
return total_size / (1024 ** 2)