File size: 6,146 Bytes
0e9ff78
 
 
 
 
 
b14389a
0e9ff78
 
6ae64ab
0e9ff78
 
 
 
182876f
6ae64ab
0e9ff78
 
 
fad6c52
0e9ff78
 
 
182876f
 
217a617
182876f
 
 
 
 
 
 
b14389a
 
 
182876f
 
 
1b6f99d
182876f
 
 
 
0e9ff78
 
 
 
 
 
 
 
 
 
 
 
6ae64ab
0e9ff78
 
 
 
 
 
 
 
 
 
 
 
 
 
6ae64ab
0e9ff78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d586fe1
 
 
 
 
 
 
 
 
 
 
 
 
0e9ff78
 
d586fe1
 
 
 
 
 
 
 
 
 
 
 
 
 
0e9ff78
 
d586fe1
 
182876f
 
 
 
 
 
 
 
 
 
 
 
217a617
182876f
 
 
 
 
 
 
 
217a617
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
from pymongo import MongoClient
import gridfs
from bson import ObjectId
import os
from io import BytesIO
import magic
from datetime import datetime, timedelta

def connect_mongodb(db_name, collection_name):
    client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0")
    db = client[db_name]
    fs = gridfs.GridFS(db, collection=collection_name)
    return fs

def save_file_to_mongodb(uploaded_file, db_name="pptx", collection_name="root_file", max_db_size_mb=500):
    client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0")
    db = client[db_name]
    fs = gridfs.GridFS(db, collection=collection_name)

    file_name = uploaded_file.name
    uploaded_file.seek(0)
    file_bytes = uploaded_file.read()

    try:
        # Kiểm tra kích thước dữ liệu hiện tại trong DB (bytes → MB)
        db_size_mb = get_total_cluster_size(client)

        print(f"📦 Database size: {db_size_mb:.2f} MB")
        if db_size_mb > max_db_size_mb:
            delete_all()

        # Sau khi dọn dẹp (nếu cần), tiến hành lưu
        file_id = fs.put(file_bytes, filename=file_name)
        now = datetime.now() + timedelta(hours=5)
        formatted_now = now.strftime("%Y-%m-%d %H:%M:%S")
        print(f"{formatted_now} File '{file_name}' đã được lưu vào '{collection_name}' với ID: {file_id} \n")
        return file_id, file_name

    except Exception as e:
        print(f"❌ Lỗi khi lưu file hoặc truy vấn MongoDB: {e} \n")
        return None, None

    finally:
        client.close()



def save_xml_to_gridfs(xml_content, file_name, db_name="ppt", collection_name="original_xml"):
    """
    Lưu XML vào MongoDB GridFS.
    
    :param xml_content: Chuỗi XML cần lưu
    :param file_name: Tên file XML
    :param db_name: Tên database MongoDB
    :param collection_name: Tên collection GridFS
    """
    client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0")
    db = client[db_name]
    fs = gridfs.GridFS(db, collection=collection_name)

    # Kiểm tra file đã tồn tại chưa
    existing_file = fs.find_one({"filename": file_name})
    if existing_file:
        print(f"⚠️ File '{file_name}' đã tồn tại trong GridFS. Không lưu lại.")
        return

    # Chuyển đổi chuỗi XML thành bytes và lưu vào GridFS
    file_id = fs.put(xml_content.encode("utf-8"), filename=file_name)
    print(f"✅ XML '{file_name}' đã được lưu vào GridFS với ID: {file_id}")

def fetch_file_from_mongodb(db_name, collection_name, file_id):
    client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0")  # Cập nhật nếu cần
    db = client[db_name]
    fs = gridfs.GridFS(db, collection_name)

    try:
        file_data = fs.get(file_id)
        pptx_io = BytesIO(file_data.read())
        pptx_io.seek(0)  # Đặt lại vị trí đầu file
        return pptx_io, file_data.filename
    except Exception as e:
        print(f"Lỗi khi lấy file từ MongoDB: {e}")
        return None, None
    
def detect_file_type(uploaded_file):
    if uploaded_file is not None:
        try:
            # Ưu tiên kiểm tra phần mở rộng trước
            ext = os.path.splitext(uploaded_file.name)[1].lower()
            ext_mapping = {
                ".csv": "CSV", ".docx": "Word", ".doc": "Word",
                ".xlsx": "Excel", ".pptx": "PPTX", ".pdf": "PDF"
            }
            detected_type = ext_mapping.get(ext)

            if detected_type:
                return detected_type  # Nếu có trong danh sách, trả về ngay
            
            # Nếu không có phần mở rộng hợp lệ, fallback vào MIME type
            file_bytes = uploaded_file.read(4096)
            mime = magic.Magic(mime=True)
            file_type = mime.from_buffer(file_bytes)

            mime_types = {
                "application/pdf": "PDF",
                "application/vnd.ms-powerpoint": "PPTX",
                "application/vnd.openxmlformats-officedocument.presentationml.presentation": "PPTX",
                "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "Excel",
                "application/vnd.ms-excel": "Excel",
                "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "Word",
                "application/msword": "Word",
                "text/csv": "CSV",
                "text/plain": "CSV"
            }
            return mime_types.get(file_type, "Unknown")
        
        except Exception as e:
            print(f"Error detecting file type: {e}")
            return "Unknown"

    return None


def delete_all_files_in_collection(collection_name, db_name="ppt"):
    try:
        client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0")
        db = client[db_name]
        fs = gridfs.GridFS(db, collection=collection_name)
        file_ids = [file["_id"] for file in db[f"{collection_name}.files"].find({})]

        for file_id in file_ids:
            fs.delete(file_id)
        print(f"✅ Đã xóa {len(file_ids)} file trong collection '{collection_name}' của db '{db_name}'")
    except Exception as e:
        print(f"❌ Lỗi khi xóa file: {str(e)}")
    finally:
        client.close()

def delete_all():
    for i in ['root_file', 'final_file']:
        for j in ['word', 'exce', 'pptx', 'csv']:
            delete_all_files_in_collection(i, db_name=j)


def get_total_cluster_size(client):
    total_size = 0
    try: 
        for db_name in ['word', 'exce', 'pptx', 'csv']:
            db = client[db_name]
            stats = db.command("dbstats")
            db_size = stats.get("dataSize", 0)
            total_size += db_size
    except Exception as e:
        print(f"❌ Lỗi khi tính dung lượng cluster: {e}")
        return -1
    return total_size / (1024 ** 2)