import marker
import os
import sys
import gc
import torch
from marker.config.parser import ConfigParser
from marker.models import create_model_dict

# Global variable to hold the pre-loaded converter
_converter = None

def initialize_converter():
    """Initializes the marker converter models and stores it globally."""
    global _converter
    if _converter is None:
        print("Initializing marker models...")
        try:
            # Clear any existing CUDA cache before loading models
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                gc.collect()
                print(f"CUDA memory before initialization: {torch.cuda.memory_allocated()/1024**2:.2f} MB allocated, {torch.cuda.memory_reserved()/1024**2:.2f} MB reserved")
            
            # Set custom font path from environment variable if available
            font_path = os.environ.get('MARKER_FONT_PATH')
            if font_path:
                try:
                    # Import marker settings and override font path
                    from marker import settings
                    os.makedirs(font_path, exist_ok=True)
                    custom_font_path = os.path.join(font_path, 'NotoSans-Regular.ttf')
                    settings.FONT_PATH = custom_font_path
                    print(f"Using custom font path: {custom_font_path}")
                except ImportError:
                    print("Could not import marker settings, using default font path")
                except Exception as e:
                    print(f"Error setting custom font path: {e}", file=sys.stderr)

            # Create configuration, explicitly setting output format and batch multiplier
            config_parser = ConfigParser({
                'output_format': 'markdown',
                'batch_multiplier': 4,  # Increased from default 2
                # Add any device-specific configuration here
                'device': 'cuda' if torch.cuda.is_available() else 'cpu'
            })

            # Load models with explicit device mapping
            models = create_model_dict()

            # Get converter class and create converter
            converter_cls = config_parser.get_converter_cls()
            _converter = converter_cls(
                config=config_parser.generate_config_dict(),
                artifact_dict=models,
                processor_list=config_parser.get_processors(),
                renderer=config_parser.get_renderer(),
                llm_service=config_parser.get_llm_service()
            )
            
            # Force another garbage collection after model load
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                gc.collect()
                print(f"CUDA memory after initialization: {torch.cuda.memory_allocated()/1024**2:.2f} MB allocated, {torch.cuda.memory_reserved()/1024**2:.2f} MB reserved")
                
            print("Marker models initialized successfully with batch_multiplier=4.")
        except Exception as e:
            print(f"Failed to initialize marker models: {e}", file=sys.stderr)
            _converter = None # Ensure it's None if init fails
            # Attempt to clean up GPU memory in case of initialization failure
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                gc.collect()
            raise
    else:
        print("Marker models already initialized.")

def convert_pdf(pdf_input_path, output_md_path=None):
    """
    Convert PDF file to Markdown using the pre-loaded marker converter.

    Args:
        pdf_input_path (str): Path to the input PDF file
        output_md_path (str, optional): Path where to save the output Markdown file. If None, markdown is only returned.

    Returns:
        str: The markdown text
    """
    # Check if the input PDF exists
    if not os.path.exists(pdf_input_path):
        raise FileNotFoundError(f"Input PDF file not found at '{pdf_input_path}'")

    # Check if converter is initialized
    if _converter is None:
         raise RuntimeError("Marker converter has not been initialized. Call initialize_converter() during application startup.")

    print(f"Starting conversion of '{pdf_input_path}' using pre-loaded models...")

    try:
        # Free up any temporary memory before conversion
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            
        # Convert the PDF to markdown using the pre-loaded converter
        result = _converter(pdf_input_path)

        # Access the markdown content directly from the result object
        markdown_text = result.markdown

        # If output path is provided, save the markdown
        if output_md_path:
            output_dir = os.path.dirname(output_md_path)
            if output_dir and not os.path.exists(output_dir):
                os.makedirs(output_dir, exist_ok=True)

            with open(output_md_path, "w", encoding="utf-8") as f:
                f.write(markdown_text)
            print(f"Successfully saved markdown to '{output_md_path}'")

        # Clean up temporary GPU memory after conversion
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            
        return markdown_text

    except Exception as e:
        print(f"An error occurred during conversion: {e}", file=sys.stderr)
        print(f"Error details: {str(type(e))}", file=sys.stderr)
        # Try to clean up GPU memory on error
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        raise