Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -19,6 +19,7 @@ os.system('python download_models_hf.py') | |
| 19 | 
             
            os.system("sed -i 's|cpu|cuda|g' /home/user/magic-pdf.json")
         | 
| 20 |  | 
| 21 | 
             
            os.system('cp -r paddleocr /home/user/.paddleocr')
         | 
|  | |
| 22 | 
             
            from gradio_pdf import PDF
         | 
| 23 |  | 
| 24 | 
             
            import gradio as gr
         | 
| @@ -110,6 +111,7 @@ def replace_image_with_base64(markdown_text, image_dir_path): | |
| 110 |  | 
| 111 |  | 
| 112 | 
             
            def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
         | 
|  | |
| 113 | 
             
                # 获取识别的md文件以及压缩包文件路径
         | 
| 114 | 
             
                local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr,
         | 
| 115 | 
             
                                                    layout_mode, formula_enable, table_enable, language)
         | 
| @@ -202,7 +204,7 @@ if __name__ == "__main__": | |
| 202 | 
             
                    with gr.Row():
         | 
| 203 | 
             
                        with gr.Column(variant='panel', scale=5):
         | 
| 204 | 
             
                            file = gr.File(label="Please upload a PDF or image", file_types=[".pdf", ".png", ".jpeg", ".jpg"])
         | 
| 205 | 
            -
                            max_pages = gr.Slider(1,  | 
| 206 | 
             
                            with gr.Row():
         | 
| 207 | 
             
                                layout_mode = gr.Dropdown(["layoutlmv3", "doclayout_yolo"], label="Layout model", value="layoutlmv3")
         | 
| 208 | 
             
                                language = gr.Dropdown(all_lang, label="Language", value="")
         | 
| @@ -213,25 +215,25 @@ if __name__ == "__main__": | |
| 213 | 
             
                            with gr.Row():
         | 
| 214 | 
             
                                change_bu = gr.Button("Convert")
         | 
| 215 | 
             
                                clear_bu = gr.ClearButton(value="Clear")
         | 
| 216 | 
            -
                            pdf_show = PDF(label= | 
| 217 | 
             
                            with gr.Accordion("Examples:"):
         | 
| 218 | 
             
                                example_root = os.path.join(os.path.dirname(__file__), "examples")
         | 
| 219 | 
             
                                gr.Examples(
         | 
| 220 | 
             
                                    examples=[os.path.join(example_root, _) for _ in os.listdir(example_root) if
         | 
| 221 | 
             
                                              _.endswith("pdf")],
         | 
| 222 | 
            -
                                    inputs= | 
| 223 | 
             
                                )
         | 
| 224 |  | 
| 225 | 
             
                        with gr.Column(variant='panel', scale=5):
         | 
| 226 | 
             
                            output_file = gr.File(label="convert result", interactive=False)
         | 
| 227 | 
             
                            with gr.Tabs():
         | 
| 228 | 
             
                                with gr.Tab("Markdown rendering"):
         | 
| 229 | 
            -
                                    md = gr.Markdown(label="Markdown rendering", height= | 
| 230 | 
             
                                                     latex_delimiters=latex_delimiters, line_breaks=True)
         | 
| 231 | 
             
                                with gr.Tab("Markdown text"):
         | 
| 232 | 
             
                                    md_text = gr.TextArea(lines=45, show_copy_button=True)
         | 
| 233 | 
            -
                    file. | 
| 234 | 
            -
                    change_bu.click(fn=to_markdown, inputs=[ | 
| 235 | 
             
                                    outputs=[md, md_text, output_file, pdf_show], api_name=False)
         | 
| 236 | 
             
                    clear_bu.add([file, md, pdf_show, md_text, output_file, is_ocr, table_enable, language])
         | 
| 237 |  | 
|  | |
| 19 | 
             
            os.system("sed -i 's|cpu|cuda|g' /home/user/magic-pdf.json")
         | 
| 20 |  | 
| 21 | 
             
            os.system('cp -r paddleocr /home/user/.paddleocr')
         | 
| 22 | 
            +
            os.system('pip install -U gradio-pdf')
         | 
| 23 | 
             
            from gradio_pdf import PDF
         | 
| 24 |  | 
| 25 | 
             
            import gradio as gr
         | 
|  | |
| 111 |  | 
| 112 |  | 
| 113 | 
             
            def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
         | 
| 114 | 
            +
                file_path = to_pdf(file_path)
         | 
| 115 | 
             
                # 获取识别的md文件以及压缩包文件路径
         | 
| 116 | 
             
                local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr,
         | 
| 117 | 
             
                                                    layout_mode, formula_enable, table_enable, language)
         | 
|  | |
| 204 | 
             
                    with gr.Row():
         | 
| 205 | 
             
                        with gr.Column(variant='panel', scale=5):
         | 
| 206 | 
             
                            file = gr.File(label="Please upload a PDF or image", file_types=[".pdf", ".png", ".jpeg", ".jpg"])
         | 
| 207 | 
            +
                            max_pages = gr.Slider(1, 20, 10, step=1, label='Max convert pages')
         | 
| 208 | 
             
                            with gr.Row():
         | 
| 209 | 
             
                                layout_mode = gr.Dropdown(["layoutlmv3", "doclayout_yolo"], label="Layout model", value="layoutlmv3")
         | 
| 210 | 
             
                                language = gr.Dropdown(all_lang, label="Language", value="")
         | 
|  | |
| 215 | 
             
                            with gr.Row():
         | 
| 216 | 
             
                                change_bu = gr.Button("Convert")
         | 
| 217 | 
             
                                clear_bu = gr.ClearButton(value="Clear")
         | 
| 218 | 
            +
                            pdf_show = PDF(label='PDF preview', interactive=False, visible=True, height=800)
         | 
| 219 | 
             
                            with gr.Accordion("Examples:"):
         | 
| 220 | 
             
                                example_root = os.path.join(os.path.dirname(__file__), "examples")
         | 
| 221 | 
             
                                gr.Examples(
         | 
| 222 | 
             
                                    examples=[os.path.join(example_root, _) for _ in os.listdir(example_root) if
         | 
| 223 | 
             
                                              _.endswith("pdf")],
         | 
| 224 | 
            +
                                    inputs=file
         | 
| 225 | 
             
                                )
         | 
| 226 |  | 
| 227 | 
             
                        with gr.Column(variant='panel', scale=5):
         | 
| 228 | 
             
                            output_file = gr.File(label="convert result", interactive=False)
         | 
| 229 | 
             
                            with gr.Tabs():
         | 
| 230 | 
             
                                with gr.Tab("Markdown rendering"):
         | 
| 231 | 
            +
                                    md = gr.Markdown(label="Markdown rendering", height=1100, show_copy_button=True,
         | 
| 232 | 
             
                                                     latex_delimiters=latex_delimiters, line_breaks=True)
         | 
| 233 | 
             
                                with gr.Tab("Markdown text"):
         | 
| 234 | 
             
                                    md_text = gr.TextArea(lines=45, show_copy_button=True)
         | 
| 235 | 
            +
                    file.change(fn=to_pdf, inputs=file, outputs=pdf_show)
         | 
| 236 | 
            +
                    change_bu.click(fn=to_markdown, inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
         | 
| 237 | 
             
                                    outputs=[md, md_text, output_file, pdf_show], api_name=False)
         | 
| 238 | 
             
                    clear_bu.add([file, md, pdf_show, md_text, output_file, is_ocr, table_enable, language])
         | 
| 239 |  | 
 
			
