Spaces:

mintlee
/

MT_deploy

Running

App Files Files Community

mintlee commited on May 28

Commit

b149fbe

1 Parent(s): 1685fcb

add textbox

Browse files

Files changed (3) hide show

excel/excel_translate.py +2 -0
excel/xlsx.py +199 -73
pages/upload.py +2 -2

excel/excel_translate.py CHANGED Viewed

@@ -10,6 +10,8 @@ import gridfs
 import tempfile
 import os
 def translate_xlsx(file_id: str, target_lang: str = ""):
     # Kết nối MongoDB
     client = pymongo.MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0")

 import tempfile
 import os
 def translate_xlsx(file_id: str, target_lang: str = ""):
     # Kết nối MongoDB
     client = pymongo.MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0")

excel/xlsx.py CHANGED Viewed

@@ -11,7 +11,10 @@ from io import BytesIO
 import shutil
 import io
 NS_MAIN = {'main': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
 # --- Hàm đăng ký namespace (quan trọng khi ghi file) ---
 def register_namespaces(xml_file):
@@ -21,99 +24,124 @@ def register_namespaces(xml_file):
     ])
     for ns, uri in namespaces.items():
         ET.register_namespace(ns, uri)
     # Đăng ký thêm namespace phổ biến nếu chưa có
-    if 'main' not in namespaces and '' not in namespaces: # Kiểm tra cả prefix rỗng
-         ET.register_namespace('', NS_MAIN['main']) # Đăng ký default namespace
-    elif 'main' not in namespaces:
-         ET.register_namespace('main', NS_MAIN['main']) # Đăng ký với prefix 'main'
 def extract_text_from_sheet(unzipped_folder_path: str) -> Optional[Tuple[List[Dict[str, Any]], Dict[str, Any]]]:
     """
-    Trích xuất text, lưu lại định dạng của run đầu tiên nếu là Rich Text.
     """
     modifiable_nodes = []
     shared_strings_path = os.path.join(unzipped_folder_path, "xl", "sharedStrings.xml")
     worksheets_folder = os.path.join(unzipped_folder_path, "xl", "worksheets")
     shared_tree = None
     sheet_trees = {}
     # --- Xử lý sharedStrings.xml ---
     if os.path.exists(shared_strings_path):
         try:
-            register_namespaces(shared_strings_path)
             shared_tree = ET.parse(shared_strings_path)
             root_shared = shared_tree.getroot()
             for si_element in root_shared.findall('main:si', NS_MAIN):
                 text_parts = []
-                t_elements = si_element.findall('.//main:t', NS_MAIN) # Tìm tất cả <t> con
-                # Tìm run đầu tiên (<r>) và properties (<rPr>) của nó
-                first_r = si_element.find('./main:r', NS_MAIN) # Tìm <r> con trực tiếp đầu tiên
-                first_rpr_clone = None # Lưu bản sao của <rPr> đầu tiên
-                is_rich_text = first_r is not None
                 if is_rich_text:
-                    # Tìm <rPr> bên trong <r> đầu tiên
-                    first_rpr = first_r.find('./main:rPr', NS_MAIN)
-                    if first_rpr is not None:
-                        # Sao chép sâu để không ảnh hưởng cây gốc và để dùng sau
-                        first_rpr_clone = copy.deepcopy(first_rpr)
-                # Lấy toàn bộ text
                 for t_node in t_elements:
                     if t_node.text:
                         text_parts.append(t_node.text)
                 full_text = "".join(text_parts)
-                if not full_text: continue # Bỏ qua nếu không có text
-                if is_rich_text:
                     modifiable_nodes.append({
                         'type': 'shared_rich',
                         'original_text': full_text,
-                        'element': si_element,         # Tham chiếu <si>
-                        'first_format': first_rpr_clone, # Lưu định dạng <rPr> đầu tiên (hoặc None)
                         'source_file': os.path.join("xl", "sharedStrings.xml"),
                         'sheet_name': None
                     })
-                elif t_elements: # Không phải rich text, tìm thẻ <t> đơn giản
-                    first_t = si_element.find('./main:t', NS_MAIN)
-                    if first_t is not None:
-                        modifiable_nodes.append({
                             'type': 'shared_simple',
                             'original_text': full_text,
-                            'element': first_t,          # Tham chiếu <t>
-                            'first_format': None,        # Không có định dạng đặc biệt
                             'source_file': os.path.join("xl", "sharedStrings.xml"),
                             'sheet_name': None
                         })
         except Exception as e:
             print(f"Lỗi xử lý sharedStrings: {e}")
             import traceback
             traceback.print_exc()
-    # --- Xử lý các file sheetX.xml (Inline Strings - không có định dạng phức tạp) ---
     if os.path.isdir(worksheets_folder):
         for sheet_filename in sorted(os.listdir(worksheets_folder)):
              if sheet_filename.lower().endswith(".xml"):
-                # ... (phần đọc và parse sheet tree như cũ) ...
                 sheet_file_path = os.path.join(worksheets_folder, sheet_filename)
                 try:
-                    register_namespaces(sheet_file_path)
                     sheet_tree = ET.parse(sheet_file_path)
                     sheet_trees[sheet_filename] = sheet_tree
                     root_sheet = sheet_tree.getroot()
                     for cell in root_sheet.findall('.//main:c[@t="inlineStr"]', NS_MAIN):
-                        t_element = cell.find('.//main:is/main:t', NS_MAIN)
-                        if t_element is not None and t_element.text is not None:
                              modifiable_nodes.append({
                                 'type': 'inline',
                                 'original_text': t_element.text,
-                                'element': t_element, # Tham chiếu <t>
-                                'first_format': None, # Inline string không có định dạng <rPr>
                                 'source_file': os.path.join("xl", "worksheets", sheet_filename),
                                 'sheet_name': sheet_filename
                              })
@@ -121,23 +149,91 @@ def extract_text_from_sheet(unzipped_folder_path: str) -> Optional[Tuple[List[Di
                      print(f"Lỗi xử lý sheet {sheet_filename}: {e}")
                      import traceback
                      traceback.print_exc()
     else:
-        print(f"Lỗi: Không tìm thấy thư mục worksheets: {worksheets_folder}")
-    global_data = {"shared_tree": shared_tree, "sheet_trees": sheet_trees, "shared_strings_path": shared_strings_path, "worksheets_folder": worksheets_folder}
-    return modifiable_nodes, global_data
 def apply_and_save_changes(modified_nodes_data: List[Dict[str, Any]], global_data: Dict[str, Any]) -> bool:
     """
-    Cập nhật text, giữ lại định dạng đầu tiên cho Rich Text, và lưu file XML.
     """
     if not global_data: print("Lỗi: Thiếu global_data."); return False
     updated_files = set()
-    try: ET.register_namespace('xml', "http://www.w3.org/XML/1998/namespace")
-    except ValueError: pass
     for node_info in modified_nodes_data:
         if 'modified_text' in node_info and node_info['element'] is not None:
@@ -145,59 +241,75 @@ def apply_and_save_changes(modified_nodes_data: List[Dict[str, Any]], global_dat
             modified_text = node_info['modified_text']
             original_text = node_info.get('original_text', '')
             node_type = node_info.get('type', '')
-            first_format = node_info.get('first_format') # Lấy <rPr> đã lưu (hoặc None)
             if original_text != modified_text:
-                # --- Xử lý Rich Text: Tạo lại cấu trúc <si><r>[<rPr>]<t></r></si> ---
                 if node_type == 'shared_rich':
                     si_element = element
-                    # Xóa con cũ
-                    for child in list(si_element):
                         si_element.remove(child)
-                    # Tạo run mới <r>
                     new_r = ET.Element(f"{{{NS_MAIN['main']}}}r")
-                    # Nếu có định dạng đầu tiên (<rPr>), thêm nó vào <r> mới
                     if first_format is not None:
-                        new_r.append(first_format) # Thêm bản sao <rPr> đã lưu
-                    # Tạo thẻ text mới <t>
                     new_t = ET.Element(f"{{{NS_MAIN['main']}}}t")
                     new_t.text = modified_text
-                    xml_space_attr = '{http://www.w3.org/XML/1998/namespace}space'
-                    new_t.set(xml_space_attr, 'preserve')
-                    # Thêm <t> vào <r>
                     new_r.append(new_t)
-                    # Thêm <r> vào <si>
                     si_element.append(new_r)
                     updated_files.add(node_info['source_file'])
-                    # print(f"Applied first format to Rich Text in {node_info['source_file']}")
-                # --- Xử lý Simple/Inline Text: Cập nhật thẻ <t> ---
                 elif node_type in ['shared_simple', 'inline']:
-                    t_element = element
                     t_element.text = modified_text
-                    xml_space_attr = '{http://www.w3.org/XML/1998/namespace}space'
-                    if xml_space_attr not in t_element.attrib or t_element.attrib[xml_space_attr] != 'preserve':
-                        t_element.set(xml_space_attr, 'preserve')
                     updated_files.add(node_info['source_file'])
-                    # print(f"Updated Simple/Inline Text in {node_info['source_file']}")
                 else:
-                     print(f"Cảnh báo: Loại node không xác định '{node_type}'")
-    # --- Lưu lại các file XML đã thay đổi (Giữ nguyên) ---
     success = True
-    # ... (Phần code lưu file như cũ) ...
     shared_tree = global_data.get("shared_tree"); shared_strings_path = global_data.get("shared_strings_path")
     sheet_trees = global_data.get("sheet_trees", {}); worksheets_folder = global_data.get("worksheets_folder")
     shared_strings_relative_path = os.path.join("xl", "sharedStrings.xml")
     if shared_tree and shared_strings_path and shared_strings_relative_path in updated_files:
         try:
-            # print(f"Saving modified file: {shared_strings_path}")
             shared_tree.write(shared_strings_path, encoding='utf-8', xml_declaration=True)
         except Exception as e: print(f"Lỗi lưu {shared_strings_path}: {e}"); success = False
@@ -207,12 +319,29 @@ def apply_and_save_changes(modified_nodes_data: List[Dict[str, Any]], global_dat
             if sheet_relative_path in updated_files:
                 sheet_file_path = os.path.join(worksheets_folder, sheet_filename)
                 try:
-                    # print(f"Saving modified file: {sheet_file_path}")
                     sheet_tree.write(sheet_file_path, encoding='utf-8', xml_declaration=True)
                 except Exception as e: print(f"Lỗi lưu {sheet_file_path}: {e}"); success = False
-    if success and updated_files: print(f"Đã lưu thành công {len(updated_files)} file XML đã sửa đổi (đã giữ lại định dạng đầu tiên cho Rich Text).")
-    elif not updated_files: print("Không có file XML nào cần cập nhật.") ; return True
     return success
 def zip_folder_to_excel_file(folder_path, file_name):
@@ -424,7 +553,4 @@ def translate_xlsx(file_id, file_name, source_lang='en', target_lang='vi', batch
                 shutil.rmtree(xml_folder) # Mark folder as 'handled' by zipping
             else:
                 print("LỖI NGHIÊM TRỌNG: Không thể tạo file XLSX đã dịch cuối cùng.")
-    return final_id

 import shutil
 import io
 NS_MAIN = {'main': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
+NS_DRAWING = {'xdr': "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing"}
+NS_A = {'a': "http://schemas.openxmlformats.org/drawingml/2006/main"}
 # --- Hàm đăng ký namespace (quan trọng khi ghi file) ---
 def register_namespaces(xml_file):
     ])
     for ns, uri in namespaces.items():
         ET.register_namespace(ns, uri)
     # Đăng ký thêm namespace phổ biến nếu chưa có
+    if 'main' not in namespaces and '' not in namespaces and NS_MAIN['main'] not in namespaces.values():
+         ET.register_namespace('', NS_MAIN['main'])
+    elif 'main' not in namespaces and NS_MAIN['main'] not in namespaces.values():
+         ET.register_namespace('main', NS_MAIN['main'])
+    # Đăng ký namespaces cho drawing nếu cần
+    if 'xdr' not in namespaces and NS_DRAWING['xdr'] not in namespaces.values():
+        ET.register_namespace('xdr', NS_DRAWING['xdr'])
+    if 'a' not in namespaces and NS_A['a'] not in namespaces.values():
+        ET.register_namespace('a', NS_A['a'])
 def extract_text_from_sheet(unzipped_folder_path: str) -> Optional[Tuple[List[Dict[str, Any]], Dict[str, Any]]]:
     """
+    Trích xuất text, lưu lại định dạng của run đầu tiên nếu là Rich Text,
+    bao gồm cả text từ TextBoxes trong drawings.
     """
     modifiable_nodes = []
     shared_strings_path = os.path.join(unzipped_folder_path, "xl", "sharedStrings.xml")
     worksheets_folder = os.path.join(unzipped_folder_path, "xl", "worksheets")
+    drawings_folder = os.path.join(unzipped_folder_path, "xl", "drawings") # Thêm dòng này
     shared_tree = None
     sheet_trees = {}
+    drawing_trees = {} # Thêm dòng này
     # --- Xử lý sharedStrings.xml ---
     if os.path.exists(shared_strings_path):
         try:
+            register_namespaces(shared_strings_path) # Đảm bảo register_namespaces được gọi
             shared_tree = ET.parse(shared_strings_path)
             root_shared = shared_tree.getroot()
             for si_element in root_shared.findall('main:si', NS_MAIN):
                 text_parts = []
+                # Tìm tất cả <t> con, bất kể chúng nằm trong <r> hay không
+                t_elements = si_element.findall('.//main:t', NS_MAIN)
+                first_r = si_element.find('./main:r', NS_MAIN)
+                first_rpr_clone = None
+                is_rich_text = first_r is not None # Rich text nếu có ít nhất một <r>
                 if is_rich_text:
+                    # Cố gắng tìm <rPr> bên trong <r> đầu tiên
+                    first_rpr_candidate = si_element.find('./main:r/main:rPr', NS_MAIN)
+                    if first_rpr_candidate is not None:
+                        first_rpr_clone = copy.deepcopy(first_rpr_candidate)
+                    else:
+                        # Nếu <r> đầu tiên không có <rPr>, kiểm tra <si><rPh><rPr> (Phonetic properties, ít gặp hơn)
+                        # Hoặc có thể không có định dạng nào cụ thể ở run đầu
+                        pass
                 for t_node in t_elements:
                     if t_node.text:
                         text_parts.append(t_node.text)
                 full_text = "".join(text_parts)
+                if not full_text or full_text.isspace(): continue
+                # Logic xác định type dựa trên sự hiện diện của <r> và <rPr> đã được điều chỉnh
+                if is_rich_text : # Chỉ cần có <r> là đủ, first_rpr_clone có thể là None
                     modifiable_nodes.append({
                         'type': 'shared_rich',
                         'original_text': full_text,
+                        'element': si_element,
+                        'first_format': first_rpr_clone, # Sẽ là None nếu <r> đầu không có <rPr>
                         'source_file': os.path.join("xl", "sharedStrings.xml"),
                         'sheet_name': None
                     })
+                elif t_elements: #  Trường hợp có <t> nhưng không có <r> nào (simple shared string)
+                    # Tìm <t> đầu tiên trực tiếp dưới <si> nếu không phải rich text
+                    # Hoặc nếu cấu trúc là <si><t>...</t></si>
+                    # Trong trường hợp này, element nên là si_element để khi apply_changes,
+                    # ta sẽ tạo cấu trúc <si><r><t>...</t></r></si> nếu có định dạng
+                    # hoặc <si><t>...</t></si> nếu không.
+                    # Tuy nhiên, để đơn giản hóa, nếu không có <r>, ta coi element là <t> đầu tiên
+                    # và không áp dụng "first_format" (vì nó sẽ là None).
+                    # Hoặc, ta có thể luôn coi <si> là element cho shared strings.
+                    # Lựa chọn hiện tại: nếu không có <r>, element là <t> đầu tiên tìm thấy.
+                    direct_t = si_element.find('./main:t', NS_MAIN)
+                    if direct_t is not None:
+                         modifiable_nodes.append({
                             'type': 'shared_simple',
                             'original_text': full_text,
+                            'element': direct_t, # Tham chiếu <t>
+                            'first_format': None,
                             'source_file': os.path.join("xl", "sharedStrings.xml"),
                             'sheet_name': None
                         })
+                    # else: ít khả năng xảy ra nếu t_elements có phần tử
         except Exception as e:
             print(f"Lỗi xử lý sharedStrings: {e}")
             import traceback
             traceback.print_exc()
+    # --- Xử lý các file sheetX.xml (Inline Strings) ---
     if os.path.isdir(worksheets_folder):
         for sheet_filename in sorted(os.listdir(worksheets_folder)):
              if sheet_filename.lower().endswith(".xml"):
                 sheet_file_path = os.path.join(worksheets_folder, sheet_filename)
                 try:
+                    register_namespaces(sheet_file_path) # Đảm bảo register_namespaces được gọi
                     sheet_tree = ET.parse(sheet_file_path)
                     sheet_trees[sheet_filename] = sheet_tree
                     root_sheet = sheet_tree.getroot()
                     for cell in root_sheet.findall('.//main:c[@t="inlineStr"]', NS_MAIN):
+                        t_element = cell.find('.//main:is/main:t', NS_MAIN) # Sửa lại tìm kiếm <t>
+                        if t_element is not None and t_element.text is not None and t_element.text.strip():
                              modifiable_nodes.append({
                                 'type': 'inline',
                                 'original_text': t_element.text,
+                                'element': t_element,
+                                'first_format': None,
                                 'source_file': os.path.join("xl", "worksheets", sheet_filename),
                                 'sheet_name': sheet_filename
                              })
                      print(f"Lỗi xử lý sheet {sheet_filename}: {e}")
                      import traceback
                      traceback.print_exc()
+    else:
+        print(f"Cảnh báo: Không tìm thấy thư mục worksheets: {worksheets_folder}")
+    # --- Xử lý các file drawingX.xml (Text Boxes, Shapes with Text) ---
+    if os.path.isdir(drawings_folder):
+        for drawing_filename in sorted(os.listdir(drawings_folder)):
+            if drawing_filename.lower().endswith(".xml"):
+                drawing_file_path = os.path.join(drawings_folder, drawing_filename)
+                try:
+                    register_namespaces(drawing_file_path) # Đảm bảo register_namespaces được gọi
+                    drawing_tree = ET.parse(drawing_file_path)
+                    drawing_trees[drawing_filename] = drawing_tree
+                    root_drawing = drawing_tree.getroot()
+                    # TextBoxes và Shapes có text thường nằm trong <xdr:sp> (shape) -> <xdr:txBody> (text body)
+                    # Bên trong <xdr:txBody> là các <a:p> (paragraph)
+                    for p_element in root_drawing.findall('.//xdr:txBody/a:p', {**NS_DRAWING, **NS_A}):
+                        text_parts = []
+                        # Lấy text từ tất cả <a:t> trong paragraph này
+                        t_elements = p_element.findall('.//a:t', NS_A)
+                        first_r = p_element.find('./a:r', NS_A) # Tìm <a:r> con trực tiếp đầu tiên của <a:p>
+                        first_rpr_clone = None # Định dạng của run đầu tiên trong paragraph
+                        is_rich_text_paragraph = first_r is not None # Coi là rich nếu có <a:r>
+                        if is_rich_text_paragraph:
+                            # Tìm <a:rPr> bên trong <a:r> đầu tiên của <a:p>
+                            first_rpr = first_r.find('./a:rPr', NS_A)
+                            if first_rpr is not None:
+                                first_rpr_clone = copy.deepcopy(first_rpr)
+                        for t_node in t_elements:
+                            if t_node.text:
+                                text_parts.append(t_node.text)
+                        full_text = "".join(text_parts)
+                        if not full_text or full_text.isspace(): continue
+                        # Lưu node là <a:p> vì chúng ta sẽ thay thế toàn bộ nội dung của nó
+                        # (các <a:r> và <a:t> bên trong)
+                        modifiable_nodes.append({
+                            'type': 'drawing_text', # Loại mới cho text trong drawing
+                            'original_text': full_text,
+                            'element': p_element,      # Tham chiếu đến <a:p>
+                            'first_format': first_rpr_clone, # Lưu định dạng <a:rPr> của <a:r> đầu tiên (hoặc None)
+                            'source_file': os.path.join("xl", "drawings", drawing_filename),
+                            'sheet_name': None # Có thể tìm cách liên kết ngược lại sheet nếu cần
+                        })
+                except Exception as e:
+                    print(f"Lỗi xử lý drawing {drawing_filename}: {e}")
+                    import traceback
+                    traceback.print_exc()
     else:
+        print(f"Thông tin: Không tìm thấy thư mục drawings: {drawings_folder}")
+    global_data = {
+        "shared_tree": shared_tree,
+        "sheet_trees": sheet_trees,
+        "drawing_trees": drawing_trees, # Thêm dòng này
+        "shared_strings_path": shared_strings_path,
+        "worksheets_folder": worksheets_folder,
+        "drawings_folder": drawings_folder # Thêm dòng này
+    }
+    return modifiable_nodes, global_data\
 def apply_and_save_changes(modified_nodes_data: List[Dict[str, Any]], global_data: Dict[str, Any]) -> bool:
     """
+    Cập nhật text, giữ lại định dạng đầu tiên cho Rich Text / Drawing Text, và lưu file XML.
     """
     if not global_data: print("Lỗi: Thiếu global_data."); return False
     updated_files = set()
+    try:
+        ET.register_namespace('xml', "http://www.w3.org/XML/1998/namespace")
+        # Đảm bảo các namespace chính được đăng ký trước khi thao tác
+        ET.register_namespace('', NS_MAIN['main']) # Default cho spreadsheet
+        ET.register_namespace('main', NS_MAIN['main']) # Hoặc với prefix 'main'
+        ET.register_namespace('xdr', NS_DRAWING['xdr'])
+        ET.register_namespace('a', NS_A['a'])
+    except ValueError: # Có thể đã được đăng ký
+        pass
     for node_info in modified_nodes_data:
         if 'modified_text' in node_info and node_info['element'] is not None:
             modified_text = node_info['modified_text']
             original_text = node_info.get('original_text', '')
             node_type = node_info.get('type', '')
+            first_format = node_info.get('first_format')
             if original_text != modified_text:
+                # --- Xử lý Rich Text (sharedStrings): Tạo lại cấu trúc <si><r>[<rPr>]<t></r></si> ---
                 if node_type == 'shared_rich':
                     si_element = element
+                    for child in list(si_element): # Xóa con cũ của <si>
                         si_element.remove(child)
                     new_r = ET.Element(f"{{{NS_MAIN['main']}}}r")
                     if first_format is not None:
+                        new_r.append(first_format)
                     new_t = ET.Element(f"{{{NS_MAIN['main']}}}t")
                     new_t.text = modified_text
+                    new_t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
                     new_r.append(new_t)
                     si_element.append(new_r)
                     updated_files.add(node_info['source_file'])
+                # --- Xử lý Simple Text (sharedStrings) hoặc Inline Text: Cập nhật thẻ <t> ---
                 elif node_type in ['shared_simple', 'inline']:
+                    t_element = element # element ở đây là thẻ <t>
                     t_element.text = modified_text
+                    t_element.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
+                    updated_files.add(node_info['source_file'])
+                # --- Xử lý Text trong Drawing (TextBoxes, Shapes): Tạo lại cấu trúc <a:p><a:r>[<a:rPr>]<a:t></a:t></a:r></a:p> ---
+                elif node_type == 'drawing_text':
+                    p_element = element # element ở đây là thẻ <a:p>
+                    for child in list(p_element): # Xóa con cũ của <a:p> (thường là các <a:r> hoặc <a:endParaRPr>)
+                        p_element.remove(child)
+                    # Tạo run mới <a:r>
+                    new_r = ET.Element(f"{{{NS_A['a']}}}r")
+                    # Nếu có định dạng <a:rPr> đã lưu, thêm nó vào <a:r> mới
+                    if first_format is not None: # first_format ở đây là <a:rPr>
+                        new_r.append(first_format)
+                    # Tạo thẻ text mới <a:t>
+                    new_t = ET.Element(f"{{{NS_A['a']}}}t")
+                    new_t.text = modified_text
+                    # Trong DrawingML, xml:space="preserve" thường không cần thiết cho <a:t>
+                    # vì việc xuống dòng được kiểm soát bởi <a:br> hoặc các paragraph <a:p> riêng biệt.
+                    # Tuy nhiên, việc thêm nó không gây hại.
+                    new_r.append(new_t) # Thêm <a:t> vào <a:r>
+                    p_element.append(new_r) # Thêm <a:r> vào <a:p>
+                    # Một số text box có thể có <a:endParaRPr> để định dạng cuối paragraph.
+                    # Nếu muốn giữ lại, cần logic phức tạp hơn.
+                    # Hiện tại, chúng ta chỉ tạo lại với một run duy nhất.
                     updated_files.add(node_info['source_file'])
+                    # print(f"Applied first format to Drawing Text in {node_info['source_file']}")
                 else:
+                     print(f"Cảnh báo: Loại node không xác định '{node_type}' cho text '{original_text}'")
+    # --- Lưu lại các file XML đã thay đổi ---
     success = True
     shared_tree = global_data.get("shared_tree"); shared_strings_path = global_data.get("shared_strings_path")
     sheet_trees = global_data.get("sheet_trees", {}); worksheets_folder = global_data.get("worksheets_folder")
+    drawing_trees = global_data.get("drawing_trees", {}); drawings_folder = global_data.get("drawings_folder") # Thêm
     shared_strings_relative_path = os.path.join("xl", "sharedStrings.xml")
     if shared_tree and shared_strings_path and shared_strings_relative_path in updated_files:
         try:
             shared_tree.write(shared_strings_path, encoding='utf-8', xml_declaration=True)
         except Exception as e: print(f"Lỗi lưu {shared_strings_path}: {e}"); success = False
             if sheet_relative_path in updated_files:
                 sheet_file_path = os.path.join(worksheets_folder, sheet_filename)
                 try:
                     sheet_tree.write(sheet_file_path, encoding='utf-8', xml_declaration=True)
                 except Exception as e: print(f"Lỗi lưu {sheet_file_path}: {e}"); success = False
+    # Lưu các file drawing đã thay đổi
+    if drawings_folder and os.path.exists(drawings_folder):
+        for drawing_filename, drawing_tree in drawing_trees.items():
+            drawing_relative_path = os.path.join("xl", "drawings", drawing_filename)
+            if drawing_relative_path in updated_files:
+                drawing_file_path = os.path.join(drawings_folder, drawing_filename)
+                try:
+                    # Đảm bảo namespaces được đăng ký đúng cách TRƯỚC KHI GHI
+                    # register_namespaces(drawing_file_path) # Có thể không cần nếu đã làm ở extract
+                    # Hoặc đăng ký cứng các namespace cần thiết:
+                    # ET.register_namespace('xdr', NS_DRAWING['xdr'])
+                    # ET.register_namespace('a', NS_A['a'])
+                    # (Đã chuyển lên đầu hàm apply_and_save_changes)
+                    drawing_tree.write(drawing_file_path, encoding='utf-8', xml_declaration=True)
+                except Exception as e: print(f"Lỗi lưu {drawing_file_path}: {e}"); success = False
+    if success and updated_files: print(f"Đã lưu thành công {len(updated_files)} file XML đã sửa đổi.")
+    elif not updated_files: print("Không có file XML nào cần cập nhật.") ; return True # Vẫn coi là success nếu không có gì thay đổi
     return success
 def zip_folder_to_excel_file(folder_path, file_name):
                 shutil.rmtree(xml_folder) # Mark folder as 'handled' by zipping
             else:
                 print("LỖI NGHIÊM TRỌNG: Không thể tạo file XLSX đã dịch cuối cùng.")
+    return final_id

pages/upload.py CHANGED Viewed

@@ -75,11 +75,11 @@ with st.container():
     with col1:
         st.markdown('<p style="font-size:16px; font-weight:bold; margin-bottom:4px;">🌐 Ngôn ngữ của tài liệu</p>', unsafe_allow_html=True)
-        source_lang = st.selectbox(" ", ["english", "vietnamese"], key="source_lang")
     with col2:
         st.markdown('<p style="font-size:16px; font-weight:bold; margin-bottom:4px;">🌐 Ngôn ngữ muốn dịch sang</p>', unsafe_allow_html=True)
-        target_lang = st.selectbox("  ", ["english", "vietnamese"], key="target_lang")
 def process_file(file, file_type):
     progress_bar = st.progress(0)

     with col1:
         st.markdown('<p style="font-size:16px; font-weight:bold; margin-bottom:4px;">🌐 Ngôn ngữ của tài liệu</p>', unsafe_allow_html=True)
+        source_lang = st.selectbox(" ", ["chinese", "english", "vietnamese"], key="source_lang")
     with col2:
         st.markdown('<p style="font-size:16px; font-weight:bold; margin-bottom:4px;">🌐 Ngôn ngữ muốn dịch sang</p>', unsafe_allow_html=True)
+        target_lang = st.selectbox("  ", ["chinese", "english", "vietnamese"], key="target_lang")
 def process_file(file, file_type):
     progress_bar = st.progress(0)