Spaces:
Running
Running
# ppt_processor.py | |
from pathlib import Path | |
from xml_handling import ppt_to_xml, translate_xml_file | |
from pptx_object import create_translated_ppt | |
import os | |
def process_ppt_file(ppt_path: Path, source_lang: str, target_lang: str): | |
"""Process a single PPT/PPTX file from XML extraction to final translation.""" | |
ppt_path = ppt_path.strip("'\"") | |
ppt_path = ppt_path.replace("\\ ", " ") | |
ppt_path = ppt_path.replace("\\'", "'") | |
ppt_path = os.path.expanduser(ppt_path) | |
ppt_path = Path(ppt_path).resolve() | |
# chuyển thành link DB trên server | |
try: | |
if not ppt_path.is_file(): | |
print(f"Error: '{ppt_path}' is not a valid file.") | |
return | |
if ppt_path.suffix.lower() not in ['.ppt', '.pptx']: | |
print(f"Error: '{ppt_path}' is not a PowerPoint file.") | |
return | |
base_dir = ppt_path.parent | |
# Original XML | |
print(f"Generating original XML for {ppt_path.name}...") | |
original_xml = ppt_to_xml(str(ppt_path)) | |
if original_xml: | |
original_output_path = base_dir / f"{ppt_path.stem}_original.xml" | |
with open(original_output_path, 'w', encoding='utf-8') as f: | |
f.write(original_xml) | |
print(f"Original XML saved: {original_output_path}") | |
# Save original XML to MongoDB | |
# save_xml_to_mongodb(original_xml, ppt_path.stem + "_original.xml") | |
# Translated XML | |
print(f"Generating translated XML (from {source_lang} to {target_lang}) for {ppt_path.name}...") | |
translated_output_path = base_dir / f"{ppt_path.stem}_translated.xml" | |
original_xml_path = base_dir / f"{ppt_path.stem}_original.xml" | |
translate_xml_file(str(original_xml_path), str(translated_output_path), source_lang, target_lang) | |
# Create Translated PPT | |
print(f"Creating translated PPT for {ppt_path.name}...") | |
output_filename = f"{ppt_path.stem}_translated{ppt_path.suffix}" | |
output_ppt_path = base_dir / output_filename | |
create_translated_ppt(str(ppt_path), str(translated_output_path), str(output_ppt_path)) | |
except Exception as e: | |
print(f"Error in process_ppt_file for {ppt_path}: {str(e)}") |