import gradio as gr import json from docling.document_converter import DocumentConverter, PdfFormatOption from docling.datamodel.pipeline_options import PdfPipelineOptions, TesseractCliOcrOptions from docling.datamodel.base_models import InputFormat import spaces # GPU decorator not really required for Docling OCR, but kept if you want @spaces.GPU def convert_document(file, output_format): # Configure OCR pipeline pdf_opts = PdfPipelineOptions( do_ocr=True, ocr_options=TesseractCliOcrOptions(lang=["eng"]) # or ["eng","ara"] if needed ) # Correct way: pass options via format_options converter = DocumentConverter( format_options={ InputFormat.PDF: PdfFormatOption(pipeline_options=pdf_opts) } ) # Convert document result = converter.convert(file.name) # Choose output format safely if output_format == "Markdown": converted_text = result.document.export_to_markdown() elif output_format == "JSON": converted_text = result.document.export_to_dict() else: converted_text = "⚠️ Unsupported format" # Metadata as JSON-friendly dict metadata = {"Available Attributes": dir(result.document)} return converted_text, metadata with gr.Blocks() as app: gr.Markdown("# 📄 Document Converter with Docling OCR") gr.Markdown("Upload a PDF, choose the output format, and get the converted text + metadata.") with gr.Row(): file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) format_input = gr.Radio(["Markdown", "JSON"], label="Choose Output Format") output_text = gr.Textbox(label="Converted Document", lines=20) output_metadata = gr.JSON(label="Metadata") convert_button = gr.Button("Convert") convert_button.click( fn=convert_document, inputs=[file_input, format_input], outputs=[output_text, output_metadata] ) app.launch(debug=True)