import gradio as gr import json import tempfile import os from typing import List, Optional, Literal, Tuple from PIL import Image import spaces from pathlib import Path from visualizer import htrflow_visualizer from htrflow.volume.volume import Collection from htrflow.pipeline.pipeline import Pipeline DEFAULT_OUTPUT = "alto" FORMAT_CHOICES = [ "letter_english", "letter_swedish", "spread_english", "spread_swedish", ] FILE_CHOICES = ["txt", "alto", "page", "json"] FormatChoices = Literal[ "letter_english", "letter_swedish", "spread_english", "spread_swedish" ] FileChoices = Literal["txt", "alto", "page", "json"] PIPELINE_CONFIGS = { "letter_english": { "steps": [ { "step": "Segmentation", "settings": { "model": "yolo", "model_settings": { "model": "Riksarkivet/yolov9-lines-within-regions-1" }, "generation_settings": {"batch_size": 8}, }, }, { "step": "TextRecognition", "settings": { "model": "TrOCR", "model_settings": {"model": "microsoft/trocr-base-handwritten"}, "generation_settings": {"batch_size": 16}, }, }, {"step": "OrderLines"}, ] }, "letter_swedish": { "steps": [ { "step": "Segmentation", "settings": { "model": "yolo", "model_settings": { "model": "Riksarkivet/yolov9-lines-within-regions-1" }, "generation_settings": {"batch_size": 8}, }, }, { "step": "TextRecognition", "settings": { "model": "TrOCR", "model_settings": { "model": "Riksarkivet/trocr-base-handwritten-hist-swe-2" }, "generation_settings": {"batch_size": 16}, }, }, {"step": "OrderLines"}, ] }, "spread_english": { "steps": [ { "step": "Segmentation", "settings": { "model": "yolo", "model_settings": {"model": "Riksarkivet/yolov9-regions-1"}, "generation_settings": {"batch_size": 4}, }, }, { "step": "Segmentation", "settings": { "model": "yolo", "model_settings": { "model": "Riksarkivet/yolov9-lines-within-regions-1" }, "generation_settings": {"batch_size": 8}, }, }, { "step": "TextRecognition", "settings": { "model": "TrOCR", "model_settings": {"model": "microsoft/trocr-base-handwritten"}, "generation_settings": {"batch_size": 16}, }, }, {"step": "ReadingOrderMarginalia", "settings": {"two_page": True}}, ] }, "spread_swedish": { "steps": [ { "step": "Segmentation", "settings": { "model": "yolo", "model_settings": {"model": "Riksarkivet/yolov9-regions-1"}, "generation_settings": {"batch_size": 4}, }, }, { "step": "Segmentation", "settings": { "model": "yolo", "model_settings": { "model": "Riksarkivet/yolov9-lines-within-regions-1" }, "generation_settings": {"batch_size": 8}, }, }, { "step": "TextRecognition", "settings": { "model": "TrOCR", "model_settings": { "model": "Riksarkivet/trocr-base-handwritten-hist-swe-2" }, "generation_settings": {"batch_size": 16}, }, }, {"step": "ReadingOrderMarginalia", "settings": {"two_page": True}}, ] }, } @spaces.GPU def _process_htr_pipeline( image_path: str, document_type: FormatChoices, custom_settings: Optional[str] = None ) -> Collection: """Process HTR pipeline and return the processed collection.""" if not image_path: raise ValueError("No image provided") if custom_settings: try: config = json.loads(custom_settings) except json.JSONDecodeError: raise ValueError("Invalid JSON in custom_settings parameter") else: config = PIPELINE_CONFIGS[document_type] collection = Collection([image_path]) pipeline = Pipeline.from_config(config) try: processed_collection = pipeline.run(collection) return processed_collection except Exception as pipeline_error: raise RuntimeError(f"Pipeline execution failed: {str(pipeline_error)}") def htr_text( image_path: str, document_type: FormatChoices = "letter_swedish", custom_settings: Optional[str] = None, ) -> str: """Extract text from handwritten documents using HTR.""" try: processed_collection = _process_htr_pipeline( image_path, document_type, custom_settings ) extracted_text = extract_text_from_collection(processed_collection) return extracted_text except Exception as e: return f"HTR text extraction failed: {str(e)}" def htrflow_file( image_path: str, document_type: FormatChoices = "letter_swedish", output_format: FileChoices = DEFAULT_OUTPUT, custom_settings: Optional[str] = None, server_name: str = "https://gabriel-htrflow-mcp.hf.space", ) -> str: """ Process HTR and return a formatted file for download. Returns: str: File path for direct download via gr.File (server_name/gradio_api/file=/tmp/gradio/{temp_folder}/{file_name}) """ try: original_filename = Path(image_path).stem or "output" processed_collection = _process_htr_pipeline( image_path, document_type, custom_settings ) temp_dir = Path(tempfile.mkdtemp()) export_dir = temp_dir / output_format processed_collection.save(directory=str(export_dir), serializer=output_format) output_file_path = None for root, _, files in os.walk(export_dir): for file in files: old_path = os.path.join(root, file) file_ext = Path(file).suffix new_filename = ( f"{original_filename}.{output_format}" if not file_ext else f"{original_filename}{file_ext}" ) new_path = os.path.join(root, new_filename) os.rename(old_path, new_path) output_file_path = new_path break if output_file_path and os.path.exists(output_file_path): return output_file_path else: return None except Exception as e: return None def extract_text_from_collection(collection: Collection) -> str: text_lines = [] for page in collection.pages: for node in page.traverse(): if hasattr(node, "text") and node.text: text_lines.append(node.text) return "\n".join(text_lines) def create_htrflow_mcp_server(): htr_text_interface = gr.Interface( fn=htr_text, inputs=[ gr.Image(type="filepath", label="Upload Image or Enter URL"), gr.Dropdown( choices=FORMAT_CHOICES, value="letter_swedish", label="Document Type" ), gr.Textbox( label="Custom Settings (JSON)", placeholder="Optional custom pipeline settings", value="", ), ], outputs=[gr.Textbox(label="Extracted Text", lines=10)], description="Extract plain text from handwritten documents using HTR", api_name="htr_text", ) htrflow_file_interface = gr.Interface( fn=htrflow_file, inputs=[ gr.Image(type="filepath", label="Upload Image or Enter URL"), gr.Dropdown( choices=FORMAT_CHOICES, value="letter_swedish", label="Document Type" ), gr.Dropdown( choices=FILE_CHOICES, value=DEFAULT_OUTPUT, label="Output Format" ), gr.Textbox( label="Custom Settings (JSON)", placeholder="Optional custom pipeline settings", value="", ), gr.Textbox( label="Server Name", value="https://gabriel-htrflow-mcp.hf.space", placeholder="Server URL for download links", ), ], outputs=[gr.File(label="Download HTR Output File")], description="Process handwritten text and get formatted file (ALTO XML, PAGE XML, JSON, or TXT)", api_name="htrflow_file", ) htrflow_viz = gr.Interface( fn=htrflow_visualizer, inputs=[ gr.Image(type="filepath", label="Upload Original Image"), gr.File(label="Upload ALTO/PAGE XML File"), ], outputs=gr.File(label="Download Visualization Image"), description="Visualize HTR results by overlaying text regions and polygons on the original image", api_name="htrflow_visualizer", ) demo = gr.TabbedInterface( [htr_text_interface, htrflow_file_interface, htrflow_viz], ["HTR Text", "HTR File", "HTR Visualizer"], title="HTRflow Handwritten Text Recognition", ) return demo if __name__ == "__main__": demo = create_htrflow_mcp_server() demo.launch(mcp_server=True, share=False, debug=False)