Spaces:
Running
on
A10G
Running
on
A10G
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,3 @@
|
|
1 |
-
# app.py
|
2 |
-
# Remember to add 'PyMuPDF', 'pdf2image', and 'torch' to your requirements.txt or install them.
|
3 |
-
# For PDF processing, you might also need to install poppler:
|
4 |
-
# On Debian/Ubuntu: sudo apt-get install poppler-utils
|
5 |
-
# On macOS (using Homebrew): brew install poppler
|
6 |
-
|
7 |
import gradio as gr
|
8 |
from PIL import Image
|
9 |
from transformers import AutoModelForImageTextToText, AutoProcessor, AutoTokenizer, TextIteratorStreamer
|
@@ -167,7 +161,6 @@ def convert_to_markdown_stream(
|
|
167 |
else:
|
168 |
user_prompt = """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using β and β for check boxes."""
|
169 |
|
170 |
-
|
171 |
# Accumulate results from all pages
|
172 |
full_markdown_content = ""
|
173 |
|
@@ -212,7 +205,7 @@ def convert_to_markdown_stream(
|
|
212 |
except Exception as e:
|
213 |
return f"Error: {e}"
|
214 |
|
215 |
-
def process_document(
|
216 |
"""
|
217 |
Process uploaded document (PDF or image) and convert to markdown.
|
218 |
|
@@ -223,33 +216,34 @@ def process_document(file_path, max_tokens, with_img_desc: bool = False):
|
|
223 |
Returns:
|
224 |
Generator yielding markdown content
|
225 |
"""
|
226 |
-
if
|
227 |
return "Please upload a file first."
|
228 |
-
|
229 |
try:
|
230 |
# Handle PDF files
|
231 |
-
if file_path.name.lower().endswith('.pdf'):
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
|
249 |
-
# Handle image files
|
250 |
-
else:
|
251 |
-
|
252 |
-
image = Image.open(file_path.name).convert("RGB")
|
|
|
|
|
253 |
image = image.resize((2048, 2048))
|
254 |
|
255 |
# Process single image
|
@@ -285,9 +279,8 @@ with gr.Blocks(title="PDF to Markdown Converter", theme=gr.themes.Soft()) as dem
|
|
285 |
|
286 |
with gr.Row():
|
287 |
with gr.Column(scale=1):
|
288 |
-
file_input = gr.
|
289 |
-
label="Upload
|
290 |
-
file_types=["pdf", "image"],
|
291 |
height=200
|
292 |
)
|
293 |
max_tokens_slider = gr.Slider(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from PIL import Image
|
3 |
from transformers import AutoModelForImageTextToText, AutoProcessor, AutoTokenizer, TextIteratorStreamer
|
|
|
161 |
else:
|
162 |
user_prompt = """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using β and β for check boxes."""
|
163 |
|
|
|
164 |
# Accumulate results from all pages
|
165 |
full_markdown_content = ""
|
166 |
|
|
|
205 |
except Exception as e:
|
206 |
return f"Error: {e}"
|
207 |
|
208 |
+
def process_document(image, max_tokens, with_img_desc: bool = False):
|
209 |
"""
|
210 |
Process uploaded document (PDF or image) and convert to markdown.
|
211 |
|
|
|
216 |
Returns:
|
217 |
Generator yielding markdown content
|
218 |
"""
|
219 |
+
if image is None:
|
220 |
return "Please upload a file first."
|
|
|
221 |
try:
|
222 |
# Handle PDF files
|
223 |
+
# if file_path.name.lower().endswith('.pdf'):
|
224 |
+
# # Convert PDF to images
|
225 |
+
# with tempfile.TemporaryDirectory() as temp_dir:
|
226 |
+
# # Copy uploaded file to temp directory
|
227 |
+
# temp_pdf_path = os.path.join(temp_dir, "document.pdf")
|
228 |
+
# import shutil
|
229 |
+
# shutil.copy(file_path.name, temp_pdf_path)
|
230 |
|
231 |
+
# # Convert PDF pages to images
|
232 |
+
# images = convert_from_path(temp_pdf_path, dpi=150)
|
233 |
+
# images = [image.convert("RGB") for image in images]
|
234 |
+
# images = [image.resize((2048, 2048)) for image in images]
|
235 |
+
# # Process each page
|
236 |
+
# for result in convert_to_markdown_stream(
|
237 |
+
# images, "nanonets/Nanonets-OCR-s", max_tokens, with_img_desc
|
238 |
+
# ):
|
239 |
+
# yield process_tags(result)
|
240 |
|
241 |
+
# # Handle image files
|
242 |
+
# else:
|
243 |
+
# # Open image directly
|
244 |
+
# image = Image.open(file_path.name).convert("RGB")
|
245 |
+
# image = image.resize((2048, 2048))
|
246 |
+
image = Image.fromarray(image)
|
247 |
image = image.resize((2048, 2048))
|
248 |
|
249 |
# Process single image
|
|
|
279 |
|
280 |
with gr.Row():
|
281 |
with gr.Column(scale=1):
|
282 |
+
file_input = gr.Image(
|
283 |
+
label="Upload Image Document",
|
|
|
284 |
height=200
|
285 |
)
|
286 |
max_tokens_slider = gr.Slider(
|