Spaces:
Running
Running
# ppt_objects.py | |
from pptx import Presentation | |
from pptx.enum.text import PP_ALIGN, MSO_ANCHOR | |
from pptx.enum.shapes import MSO_SHAPE_TYPE | |
import xml.etree.ElementTree as ET | |
from pptx.util import Pt | |
from pptx.dml.color import RGBColor | |
import re | |
import json | |
from pymongo import MongoClient | |
from gridfs import GridFS | |
import json | |
import xml.etree.ElementTree as ET | |
from io import BytesIO | |
def apply_group_properties_recursive(shape, shape_index, parent_element): | |
"""Recursively applies properties to shapes within groups.""" | |
if shape.shape_type == MSO_SHAPE_TYPE.GROUP: | |
group_element = parent_element.find(f".//group_element[@shape_index='{shape_index}']") | |
if group_element is not None: | |
for i, sub_shape in enumerate(shape.shapes): | |
apply_group_properties_recursive(sub_shape, i, group_element) | |
# Apply properties for sub-shapes WITHIN the group, based on their type. | |
if sub_shape.shape_type == MSO_SHAPE_TYPE.TABLE: | |
table_element = group_element.find(f".//table_element[@shape_index='{i}']") | |
if table_element: # Use a shorter name for clarity | |
props_element = table_element.find("properties") | |
if props_element is not None and props_element.text: | |
try: | |
table_data = json.loads(props_element.text) | |
apply_table_properties(sub_shape.table, table_data) | |
except (json.JSONDecodeError, KeyError) as e: | |
print(f"Error applying table properties (in group): {str(e)}") | |
elif hasattr(sub_shape, "text_frame") and sub_shape.text_frame: | |
text_element = group_element.find(f".//text_element[@shape_index='{i}']") | |
if text_element: # Shorter name | |
props_element = text_element.find("properties") | |
if props_element is not None and props_element.text: | |
try: | |
shape_data = json.loads(props_element.text) | |
apply_shape_properties(sub_shape, shape_data) | |
except (json.JSONDecodeError, KeyError) as e: | |
print(f"Error applying shape properties (in group): {str(e)}") | |
def get_alignment_value(alignment_str): | |
"""Convert alignment string (with extra characters) to PP_ALIGN enum value.""" | |
alignment_map = { | |
'center': PP_ALIGN.CENTER, | |
'left': PP_ALIGN.LEFT, | |
'right': PP_ALIGN.RIGHT, | |
'justify': PP_ALIGN.JUSTIFY | |
} | |
match = re.match(r"([A-Za-z]+)", alignment_str) | |
return alignment_map.get(match.group(1).lower()) if match else None | |
def get_vertical_anchor(value): | |
"""Converts vertical_anchor string to MSO_ANCHOR enum.""" | |
mapping = { | |
"TOP": MSO_ANCHOR.TOP, | |
"MIDDLE": MSO_ANCHOR.MIDDLE, | |
"BOTTOM": MSO_ANCHOR.BOTTOM | |
} | |
return mapping.get(value.upper().split()[0], MSO_ANCHOR.TOP) | |
def get_table_properties(table): | |
"""Extract complete table properties.""" | |
table_data = { | |
'rows': len(table.rows), | |
'cols': len(table.columns), | |
'cells': [] | |
} | |
for row in table.rows: | |
row_data = [] | |
for cell in row.cells: | |
cell_data = { | |
'text': cell.text.strip(), | |
'font_size': None, | |
'font_name': None, | |
'alignment': None, | |
'margin_left': cell.margin_left, | |
'margin_right': cell.margin_right, | |
'margin_top': cell.margin_top, | |
'margin_bottom': cell.margin_bottom, | |
'vertical_anchor': str(cell.vertical_anchor) if cell.vertical_anchor else None, | |
'font_color': None | |
} | |
if cell.text_frame.paragraphs: | |
paragraph = cell.text_frame.paragraphs[0] | |
if paragraph.runs: | |
run = paragraph.runs[0] | |
if hasattr(run.font, 'size') and run.font.size is not None: | |
cell_data['font_size'] = run.font.size.pt | |
if hasattr(run.font, 'name'): | |
cell_data['font_name'] = run.font.name | |
if hasattr(run.font, 'bold'): | |
cell_data['bold'] = run.font.bold | |
if hasattr(run.font, 'italic'): | |
cell_data['italic'] = run.font.italic | |
if (hasattr(run.font, 'color') and | |
run.font.color is not None and | |
hasattr(run.font.color, 'rgb') and | |
run.font.color.rgb is not None): | |
cell_data['font_color'] = str(run.font.color.rgb) | |
if hasattr(paragraph, 'alignment'): | |
cell_data['alignment'] = f"{paragraph.alignment}" if paragraph.alignment else None | |
row_data.append(cell_data) | |
table_data['cells'].append(row_data) | |
return table_data | |
def get_shape_properties(shape): | |
"""Extract all properties from a shape, with detailed debug prints.""" | |
shape_data = { | |
'text': '', | |
'font_size': None, | |
'font_name': None, | |
'alignment': None, | |
'width': shape.width, | |
'height': shape.height, | |
'left': shape.left, | |
'top': shape.top, | |
'bold': None, | |
'italic': None, | |
'line_spacing_info': { | |
'rule': None, | |
'value': None | |
}, | |
'space_before': None, | |
'space_after': None, | |
'font_color': None | |
} | |
if hasattr(shape, "text"): | |
shape_data['text'] = shape.text.strip() | |
if hasattr(shape, 'text_frame'): | |
for paragraph_index, paragraph in enumerate(shape.text_frame.paragraphs): | |
if paragraph.runs: | |
run = paragraph.runs[0] # Assuming properties are mostly consistent in the first run | |
if hasattr(run.font, 'size') and run.font.size is not None: | |
shape_data['font_size'] = run.font.size.pt | |
if hasattr(run.font, 'name'): | |
shape_data['font_name'] = run.font.name | |
if hasattr(run.font, 'bold'): | |
shape_data['bold'] = run.font.bold | |
if hasattr(run.font, 'italic'): | |
shape_data['italic'] = run.font.italic | |
if (hasattr(run.font, 'color') and | |
run.font.color is not None and | |
hasattr(run.font.color, 'rgb') and | |
run.font.color.rgb is not None): | |
shape_data['font_color'] = str(run.font.color.rgb) | |
if hasattr(paragraph, 'alignment') and paragraph.alignment is not None: | |
shape_data['alignment'] = str(paragraph.alignment).split('.')[-1] | |
if hasattr(paragraph, 'space_before'): | |
shape_data['space_before'] = paragraph.space_before.pt if paragraph.space_before else None | |
if hasattr(paragraph, 'space_after'): | |
shape_data['space_after'] = paragraph.space_after.pt if paragraph.space_after else None | |
if hasattr(paragraph, 'line_spacing') and paragraph.line_spacing: | |
line_spacing = paragraph.line_spacing | |
# Nếu line_spacing là một số lớn (ví dụ: 84.99 pt), có thể là EXACTLY | |
if isinstance(line_spacing, Pt) or line_spacing > 10: | |
line_spacing_rule = "EXACTLY" | |
elif isinstance(line_spacing, float): | |
line_spacing_rule = "MULTIPLE" | |
else: | |
line_spacing_rule = "UNKNOWN" | |
shape_data['line_spacing_info'] = { | |
'rule': line_spacing_rule, | |
'value': line_spacing if isinstance(line_spacing, float) else None | |
} | |
return shape_data | |
def apply_shape_properties(shape, shape_data): | |
"""Apply saved properties to a shape.""" | |
try: | |
shape.width = shape_data['width'] | |
shape.height = shape_data['height'] | |
shape.left = shape_data['left'] | |
shape.top = shape_data['top'] | |
shape.text = "" | |
paragraph = shape.text_frame.paragraphs[0] | |
run = paragraph.add_run() | |
run.text = shape_data['text'] | |
if shape_data['font_size']: | |
adjusted_size = shape_data['font_size'] * 0.9 | |
run.font.size = Pt(adjusted_size) | |
if shape_data.get('font_name'): | |
run.font.name = shape_data['font_name'] | |
else: | |
run.font.name = "Arial" | |
if shape_data.get('font_color'): | |
run.font.color.rgb = RGBColor.from_string(shape_data['font_color']) | |
if shape_data['bold'] is not None: | |
run.font.bold = shape_data['bold'] | |
if shape_data['italic'] is not None: | |
run.font.italic = shape_data['italic'] | |
if shape_data['alignment']: | |
paragraph.alignment = get_alignment_value(shape_data['alignment']) | |
line_spacing_info = shape_data.get('line_spacing_info', {}) | |
line_spacing_rule = line_spacing_info.get('rule') | |
line_spacing_value = line_spacing_info.get('value') | |
if line_spacing_rule and line_spacing_value is not None: | |
if line_spacing_rule == "EXACTLY": | |
paragraph.line_spacing = Pt(line_spacing_value) | |
elif line_spacing_rule == "AT_LEAST": | |
paragraph.line_spacing = Pt(line_spacing_value) | |
elif line_spacing_rule == "MULTIPLE": | |
paragraph.line_spacing = line_spacing_value | |
else: | |
print(f"⚠️ Unknown line spacing rule: {line_spacing_rule}") | |
if shape_data['space_before']: | |
paragraph.space_before = shape_data['space_before'] | |
if shape_data['space_after']: | |
paragraph.space_after = shape_data['space_after'] | |
except Exception as e: | |
print(f"Error applying shape properties: {str(e)}") | |
def apply_table_properties(table, table_data): | |
"""Áp dụng các thuộc tính đã lưu vào bảng PowerPoint.""" | |
for row_idx, row in enumerate(table.rows): | |
for col_idx, cell in enumerate(row.cells): | |
try: | |
cell_data = table_data['cells'][row_idx][col_idx] | |
# Áp dụng margin | |
cell.margin_left = cell_data.get('margin_left', 0) | |
cell.margin_right = cell_data.get('margin_right', 0) | |
cell.margin_top = cell_data.get('margin_top', 0) | |
cell.margin_bottom = cell_data.get('margin_bottom', 0) | |
# Áp dụng vertical_anchor (tránh dùng eval) | |
if 'vertical_anchor' in cell_data: | |
cell.vertical_anchor = get_vertical_anchor(cell_data['vertical_anchor']) | |
# Xóa nội dung cũ và thiết lập văn bản mới | |
cell.text = "" | |
paragraph = cell.text_frame.paragraphs[0] | |
run = paragraph.add_run() | |
run.text = cell_data.get('text', "") | |
# Thiết lập kích thước font | |
if 'font_size' in cell_data: | |
adjusted_size = cell_data['font_size'] * 0.9 # Giữ tỉ lệ font | |
run.font.size = Pt(adjusted_size) | |
# Thiết lập font chữ | |
run.font.name = cell_data.get('font_name', 'Arial') | |
# Màu chữ | |
if 'font_color' in cell_data: | |
run.font.color.rgb = RGBColor.from_string(cell_data['font_color']) | |
# In đậm & in nghiêng | |
run.font.bold = cell_data.get('bold', False) | |
run.font.italic = cell_data.get('italic', False) | |
# Căn lề văn bản | |
if 'alignment' in cell_data: | |
paragraph.alignment = get_alignment_value(cell_data['alignment']) | |
except Exception as e: | |
print(f"Lỗi khi thiết lập thuộc tính ô [{row_idx}, {col_idx}]: {str(e)}") | |
def get_file_from_mongodb(db_name, collection_name, file_id): | |
"""Tải tệp từ MongoDB GridFS""" | |
client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0") | |
db = client[db_name] | |
fs = GridFS(db, collection_name) | |
file_data = fs.get(file_id) | |
return BytesIO(file_data.read()) | |
def save_file_to_mongodb(db_name, collection_name, file_name, file_data): | |
"""Lưu tệp vào MongoDB GridFS""" | |
client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0") | |
db = client[db_name] | |
fs = GridFS(db, collection_name) | |
file_id = fs.put(file_data, filename=file_name) | |
return file_id | |
def create_translated_ppt(db_name, original_ppt_id, translated_xml_id, output_collection): | |
"""Tạo PowerPoint dịch từ MongoDB và lưu vào MongoDB""" | |
try: | |
# Kết nối MongoDB và tải file | |
original_ppt_io = get_file_from_mongodb(db_name, "root_file", original_ppt_id) | |
translated_xml_io = get_file_from_mongodb(db_name, "final_xml", translated_xml_id) | |
# Load PowerPoint gốc và XML dịch | |
prs = Presentation(original_ppt_io) | |
tree = ET.parse(translated_xml_io) | |
root = tree.getroot() | |
# Áp dụng bản dịch | |
for slide_number, slide in enumerate(prs.slides, 1): | |
xml_slide = root.find(f".//slide[@number='{slide_number}']") | |
if xml_slide is None: | |
continue | |
for shape_index, shape in enumerate(slide.shapes): | |
if shape.shape_type == MSO_SHAPE_TYPE.GROUP: | |
apply_group_properties_recursive(shape, shape_index, xml_slide) | |
elif shape.shape_type == MSO_SHAPE_TYPE.TABLE: | |
table_element = xml_slide.find(f".//table_element[@shape_index='{shape_index}']") | |
if table_element is not None: | |
props_element = table_element.find("properties") | |
if props_element is not None and props_element.text: | |
try: | |
table_data = json.loads(props_element.text) | |
apply_table_properties(shape.table, table_data) | |
except Exception as e: | |
print(f"Error applying table properties: {str(e)}") | |
elif hasattr(shape, "text"): | |
text_element = xml_slide.find(f".//text_element[@shape_index='{shape_index}']") | |
if text_element is not None: | |
props_element = text_element.find("properties") | |
if props_element is not None and props_element.text: | |
try: | |
shape_data = json.loads(props_element.text) | |
apply_shape_properties(shape, shape_data) | |
except Exception as e: | |
print(f"Error applying shape properties: {str(e)}") | |
# Lưu PowerPoint vào MongoDB | |
output_io = BytesIO() | |
prs.save(output_io) | |
output_io.seek(0) # Reset vị trí đọc | |
file_id = save_file_to_mongodb(db_name, output_collection, "translated_presentation.pptx", output_io) | |
print(f"Translated PowerPoint saved to MongoDB with ID: {file_id}") | |
return file_id | |
except Exception as e: | |
print(f"Error creating translated PowerPoint: {str(e)}") | |
return None | |
def save_file_to_mongodb(db_name, collection_name, file_name, file_data): | |
"""Lưu tệp vào MongoDB GridFS""" | |
client = MongoClient("mongodb+srv://admin:[email protected]/?retryWrites=true&w=majority&appName=Cluster0") | |
db = client[db_name] | |
fs = GridFS(db, collection_name) | |
file_id = fs.put(file_data, filename=file_name) | |
return file_id |