from pymongo import MongoClient import gridfs from bson import ObjectId import os from io import BytesIO import magic import time def connect_mongodb(db_name, collection_name): client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0") db = client[db_name] fs = gridfs.GridFS(db, collection=collection_name) return fs def save_file_to_mongodb(uploaded_file, db_name="pptx", collection_name="root_file", max_db_size_mb=500): client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0") db = client[db_name] fs = gridfs.GridFS(db, collection=collection_name) file_name = uploaded_file.name uploaded_file.seek(0) file_bytes = uploaded_file.read() try: # Kiểm tra kích thước dữ liệu hiện tại trong DB (bytes → MB) db_size_mb = get_total_cluster_size(client) print(f"📦 Database size: {db_size_mb:.2f} MB") if db_size_mb > max_db_size_mb: delete_all() # Sau khi dọn dẹp (nếu cần), tiến hành lưu file_id = fs.put(file_bytes, filename=file_name) now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print(f"{now:} File '{file_name}' đã được lưu vào '{collection_name}' với ID: {file_id} \n") return file_id, file_name except Exception as e: print(f"❌ Lỗi khi lưu file hoặc truy vấn MongoDB: {e} \n") return None, None finally: client.close() def save_xml_to_gridfs(xml_content, file_name, db_name="ppt", collection_name="original_xml"): """ Lưu XML vào MongoDB GridFS. :param xml_content: Chuỗi XML cần lưu :param file_name: Tên file XML :param db_name: Tên database MongoDB :param collection_name: Tên collection GridFS """ client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0") db = client[db_name] fs = gridfs.GridFS(db, collection=collection_name) # Kiểm tra file đã tồn tại chưa existing_file = fs.find_one({"filename": file_name}) if existing_file: print(f"⚠️ File '{file_name}' đã tồn tại trong GridFS. Không lưu lại.") return # Chuyển đổi chuỗi XML thành bytes và lưu vào GridFS file_id = fs.put(xml_content.encode("utf-8"), filename=file_name) print(f"✅ XML '{file_name}' đã được lưu vào GridFS với ID: {file_id}") def fetch_file_from_mongodb(db_name, collection_name, file_id): client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0") # Cập nhật nếu cần db = client[db_name] fs = gridfs.GridFS(db, collection_name) try: file_data = fs.get(file_id) pptx_io = BytesIO(file_data.read()) pptx_io.seek(0) # Đặt lại vị trí đầu file return pptx_io, file_data.filename except Exception as e: print(f"Lỗi khi lấy file từ MongoDB: {e}") return None, None def detect_file_type(uploaded_file): if uploaded_file is not None: try: # Ưu tiên kiểm tra phần mở rộng trước ext = os.path.splitext(uploaded_file.name)[1].lower() ext_mapping = { ".csv": "CSV", ".docx": "Word", ".doc": "Word", ".xlsx": "Excel", ".pptx": "PPTX", ".pdf": "PDF" } detected_type = ext_mapping.get(ext) if detected_type: return detected_type # Nếu có trong danh sách, trả về ngay # Nếu không có phần mở rộng hợp lệ, fallback vào MIME type file_bytes = uploaded_file.read(4096) mime = magic.Magic(mime=True) file_type = mime.from_buffer(file_bytes) mime_types = { "application/pdf": "PDF", "application/vnd.ms-powerpoint": "PPTX", "application/vnd.openxmlformats-officedocument.presentationml.presentation": "PPTX", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "Excel", "application/vnd.ms-excel": "Excel", "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "Word", "application/msword": "Word", "text/csv": "CSV", "text/plain": "CSV" } return mime_types.get(file_type, "Unknown") except Exception as e: print(f"Error detecting file type: {e}") return "Unknown" return None def delete_all_files_in_collection(collection_name, db_name="ppt"): try: client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0") db = client[db_name] fs = gridfs.GridFS(db, collection=collection_name) file_ids = [file["_id"] for file in db[f"{collection_name}.files"].find({})] for file_id in file_ids: fs.delete(file_id) print(f"✅ Đã xóa {len(file_ids)} file trong collection '{collection_name}' của db '{db_name}'") except Exception as e: print(f"❌ Lỗi khi xóa file: {str(e)}") finally: client.close() def delete_all(): for i in ['root_file', 'final_file']: for j in ['word', 'exce', 'pptx', 'csv']: delete_all_files_in_collection(i, db_name=j) def get_total_cluster_size(client): total_size = 0 try: for db_name in ['word', 'exce', 'pptx', 'csv']: db = client[db_name] stats = db.command("dbstats") db_size = stats.get("dataSize", 0) total_size += db_size except Exception as e: print(f"❌ Lỗi khi tính dung lượng cluster: {e}") return -1 return total_size / (1024 ** 2)