Spaces:

Souvik3333
/

Nanonets-ocr-s

Running on A10G

App Files Files Community

Souvik3333 commited on 1 day ago

Commit

e7411a5

verified ·

1 Parent(s): 986f27c

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -33

app.py CHANGED Viewed

@@ -1,9 +1,3 @@
-# app.py
-# Remember to add 'PyMuPDF', 'pdf2image', and 'torch' to your requirements.txt or install them.
-# For PDF processing, you might also need to install poppler:
-# On Debian/Ubuntu: sudo apt-get install poppler-utils
-# On macOS (using Homebrew): brew install poppler
 import gradio as gr
 from PIL import Image
 from transformers import AutoModelForImageTextToText, AutoProcessor, AutoTokenizer, TextIteratorStreamer
@@ -167,7 +161,6 @@ def convert_to_markdown_stream(
     else:
         user_prompt = """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using ☐ and ☑ for check boxes."""
     # Accumulate results from all pages
     full_markdown_content = ""
@@ -212,7 +205,7 @@ def convert_to_markdown_stream(
         except Exception as e:
             return f"Error: {e}"
-def process_document(file_path, max_tokens, with_img_desc: bool = False):
     """
     Process uploaded document (PDF or image) and convert to markdown.
@@ -223,33 +216,34 @@ def process_document(file_path, max_tokens, with_img_desc: bool = False):
     Returns:
         Generator yielding markdown content
     """
-    if file_path is None:
         return "Please upload a file first."
     try:
         # Handle PDF files
-        if file_path.name.lower().endswith('.pdf'):
-            # Convert PDF to images
-            with tempfile.TemporaryDirectory() as temp_dir:
-                # Copy uploaded file to temp directory
-                temp_pdf_path = os.path.join(temp_dir, "document.pdf")
-                import shutil
-                shutil.copy(file_path.name, temp_pdf_path)
-                # Convert PDF pages to images
-                images = convert_from_path(temp_pdf_path, dpi=150)
-                images = [image.convert("RGB") for image in images]
-                images = [image.resize((2048, 2048)) for image in images]
-                # Process each page
-                for result in convert_to_markdown_stream(
-                    images, "nanonets/Nanonets-OCR-s", max_tokens, with_img_desc
-                ):
-                    yield process_tags(result)
-        # Handle image files
-        else:
-            # Open image directly
-            image = Image.open(file_path.name).convert("RGB")
             image = image.resize((2048, 2048))
             # Process single image
@@ -285,9 +279,8 @@ with gr.Blocks(title="PDF to Markdown Converter", theme=gr.themes.Soft()) as dem
     with gr.Row():
         with gr.Column(scale=1):
-            file_input = gr.File(
-                label="Upload PDF or Image Document",
-                file_types=["pdf", "image"],
                 height=200
             )
             max_tokens_slider = gr.Slider(

 import gradio as gr
 from PIL import Image
 from transformers import AutoModelForImageTextToText, AutoProcessor, AutoTokenizer, TextIteratorStreamer
     else:
         user_prompt = """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using ☐ and ☑ for check boxes."""
     # Accumulate results from all pages
     full_markdown_content = ""
         except Exception as e:
             return f"Error: {e}"
+def process_document(image, max_tokens, with_img_desc: bool = False):
     """
     Process uploaded document (PDF or image) and convert to markdown.
     Returns:
         Generator yielding markdown content
     """
+    if image is None:
         return "Please upload a file first."
     try:
         # Handle PDF files
+        # if file_path.name.lower().endswith('.pdf'):
+        #     # Convert PDF to images
+        #     with tempfile.TemporaryDirectory() as temp_dir:
+        #         # Copy uploaded file to temp directory
+        #         temp_pdf_path = os.path.join(temp_dir, "document.pdf")
+        #         import shutil
+        #         shutil.copy(file_path.name, temp_pdf_path)
+        #         # Convert PDF pages to images
+        #         images = convert_from_path(temp_pdf_path, dpi=150)
+        #         images = [image.convert("RGB") for image in images]
+        #         images = [image.resize((2048, 2048)) for image in images]
+        #         # Process each page
+        #         for result in convert_to_markdown_stream(
+        #             images, "nanonets/Nanonets-OCR-s", max_tokens, with_img_desc
+        #         ):
+        #             yield process_tags(result)
+        # # Handle image files
+        # else:
+        #     # Open image directly
+            # image = Image.open(file_path.name).convert("RGB")
+            # image = image.resize((2048, 2048))
+            image = Image.fromarray(image)
             image = image.resize((2048, 2048))
             # Process single image
     with gr.Row():
         with gr.Column(scale=1):
+            file_input = gr.Image(
+                label="Upload Image Document",
                 height=200
             )
             max_tokens_slider = gr.Slider(