mintlee commited on
Commit
182876f
·
1 Parent(s): f9becec

add auto delete all

Browse files
Files changed (1) hide show
  1. db/mongodb.py +47 -20
db/mongodb.py CHANGED
@@ -13,33 +13,39 @@ def connect_mongodb(db_name, collection_name):
13
 
14
 
15
 
16
- def save_file_to_mongodb(uploaded_file, db_name="ppt", collection_name="root_file"):
17
- """
18
- Lưu file vào MongoDB bằng GridFS mà không kiểm tra trùng lặp.
19
-
20
- :param uploaded_file: đối tượng UploadedFile từ Streamlit
21
- :param db_name: Tên database trong MongoDB
22
- :param collection_name: Tên collection GridFS
23
- :param file_tail: Phần mở rộng mặc định của file nếu không có
24
- :return: file_id nếu lưu thành công
25
- """
26
  client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0")
27
  db = client[db_name]
28
  fs = gridfs.GridFS(db, collection=collection_name)
29
 
30
- # Xác định tên file
31
  file_name = uploaded_file.name
32
-
33
- # Đảm bảo con trỏ file đang ở đầu
34
  uploaded_file.seek(0)
35
  file_bytes = uploaded_file.read()
36
 
37
- # Lưu file vào MongoDB (không kiểm tra trùng lặp)
38
- file_id = fs.put(file_bytes, filename=file_name)
39
- print(f"✅ File '{file_name}' đã được lưu vào '{collection_name}' với ID: {file_id}")
40
-
41
- client.close()
42
- return file_id, file_name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
 
45
 
@@ -116,4 +122,25 @@ def detect_file_type(uploaded_file):
116
  print(f"Error detecting file type: {e}")
117
  return "Unknown"
118
 
119
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
 
16
+ import gridfs
17
+ from pymongo import MongoClient
18
+ import pprint
19
+
20
+ def save_file_to_mongodb(uploaded_file, db_name="pptx", collection_name="root_file", max_db_size_mb=500):
 
 
 
 
 
21
  client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0")
22
  db = client[db_name]
23
  fs = gridfs.GridFS(db, collection=collection_name)
24
 
 
25
  file_name = uploaded_file.name
 
 
26
  uploaded_file.seek(0)
27
  file_bytes = uploaded_file.read()
28
 
29
+ try:
30
+ # Kiểm tra kích thước dữ liệu hiện tại trong DB (bytes → MB)
31
+ stats = db.command("dbstats")
32
+ db_size_mb = stats.get("storageSize", 0) / (1024 * 1024)
33
+
34
+ print(f"📦 Database size: {db_size_mb:.2f} MB")
35
+ if db_size_mb > max_db_size_mb:
36
+ delete_all()
37
+
38
+ # Sau khi dọn dẹp (nếu cần), tiến hành lưu
39
+ file_id = fs.put(file_bytes, filename=file_name)
40
+ print(f"✅ File '{file_name}' đã được lưu vào '{collection_name}' với ID: {file_id}")
41
+ return file_id, file_name
42
+
43
+ except Exception as e:
44
+ print(f"❌ Lỗi khi lưu file hoặc truy vấn MongoDB: {e}")
45
+ return None, None
46
+
47
+ finally:
48
+ client.close()
49
 
50
 
51
 
 
122
  print(f"Error detecting file type: {e}")
123
  return "Unknown"
124
 
125
+ return None
126
+
127
+
128
+ def delete_all_files_in_collection(collection_name, db_name="ppt"):
129
+ try:
130
+ client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0")
131
+ db = client[db_name]
132
+ fs = gridfs.GridFS(db, collection=collection_name)
133
+ file_ids = [file["_id"] for file in db[f"{collection_name}.files"].find({})]
134
+
135
+ for file_id in file_ids:
136
+ fs.delete(file_id)
137
+ print(f"✅ Đã xóa {len(file_ids)} file trong collection '{collection_name}'")
138
+ except Exception as e:
139
+ print(f"❌ Lỗi khi xóa file: {str(e)}")
140
+ finally:
141
+ client.close()
142
+
143
+ def delete_all():
144
+ for i in ['root_file', 'final_file']:
145
+ for j in ['word', 'exce', 'pptx', 'csv']:
146
+ delete_all_files_in_collection(i, db_name=j)