Spaces:
Running
Running
update mongodb
Browse files- db/mongodb.py +29 -25
db/mongodb.py
CHANGED
@@ -162,33 +162,37 @@ def fetch_file_from_mongodb(db_name, collection_name, file_id):
|
|
162 |
def detect_file_type(uploaded_file):
|
163 |
if uploaded_file is not None:
|
164 |
try:
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
mime = magic.Magic(mime=True)
|
167 |
file_type = mime.from_buffer(file_bytes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
except Exception as e:
|
169 |
print(f"Error detecting file type: {e}")
|
170 |
-
|
171 |
-
|
172 |
-
# Danh sách MIME types phổ biến
|
173 |
-
mime_types = {
|
174 |
-
"application/pdf": "PDF",
|
175 |
-
"application/vnd.openxmlformats-officedocument.presentationml.presentation": "PPTX",
|
176 |
-
"application/vnd.ms-powerpoint": "PPTX",
|
177 |
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "Excel",
|
178 |
-
"application/vnd.ms-excel": "Excel",
|
179 |
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "Word",
|
180 |
-
"application/msword": "Word",
|
181 |
-
"text/csv": "CSV",
|
182 |
-
"text/plain": "CSV" # Một số file CSV có thể nhận diện là text/plain
|
183 |
-
}
|
184 |
-
|
185 |
-
detected_type = mime_types.get(file_type, "Unknown")
|
186 |
-
|
187 |
-
# Nếu vẫn không chắc, kiểm tra phần mở rộng file
|
188 |
-
if detected_type == "Unknown":
|
189 |
-
ext = os.path.splitext(uploaded_file.name)[1].lower()
|
190 |
-
ext_mapping = {".csv": "CSV", ".docx": "Word", ".doc": "Word", ".xlsx": "Excel", ".pptx": "PPTX", ".pdf": "PDF"}
|
191 |
-
detected_type = ext_mapping.get(ext, "Unknown")
|
192 |
-
|
193 |
-
return detected_type
|
194 |
return None
|
|
|
162 |
def detect_file_type(uploaded_file):
|
163 |
if uploaded_file is not None:
|
164 |
try:
|
165 |
+
# Ưu tiên kiểm tra phần mở rộng trước
|
166 |
+
ext = os.path.splitext(uploaded_file.name)[1].lower()
|
167 |
+
ext_mapping = {
|
168 |
+
".csv": "CSV", ".docx": "Word", ".doc": "Word",
|
169 |
+
".xlsx": "Excel", ".pptx": "PPTX", ".pdf": "PDF"
|
170 |
+
}
|
171 |
+
detected_type = ext_mapping.get(ext)
|
172 |
+
|
173 |
+
if detected_type:
|
174 |
+
return detected_type # Nếu có trong danh sách, trả về ngay
|
175 |
+
|
176 |
+
# Nếu không có phần mở rộng hợp lệ, fallback vào MIME type
|
177 |
+
file_bytes = uploaded_file.read(4096)
|
178 |
mime = magic.Magic(mime=True)
|
179 |
file_type = mime.from_buffer(file_bytes)
|
180 |
+
|
181 |
+
mime_types = {
|
182 |
+
"application/pdf": "PDF",
|
183 |
+
"application/vnd.ms-powerpoint": "PPTX",
|
184 |
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation": "PPTX",
|
185 |
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "Excel",
|
186 |
+
"application/vnd.ms-excel": "Excel",
|
187 |
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "Word",
|
188 |
+
"application/msword": "Word",
|
189 |
+
"text/csv": "CSV",
|
190 |
+
"text/plain": "CSV"
|
191 |
+
}
|
192 |
+
return mime_types.get(file_type, "Unknown")
|
193 |
+
|
194 |
except Exception as e:
|
195 |
print(f"Error detecting file type: {e}")
|
196 |
+
return "Unknown"
|
197 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
return None
|