Souvik3333 commited on
Commit
e7411a5
Β·
verified Β·
1 Parent(s): 986f27c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -33
app.py CHANGED
@@ -1,9 +1,3 @@
1
- # app.py
2
- # Remember to add 'PyMuPDF', 'pdf2image', and 'torch' to your requirements.txt or install them.
3
- # For PDF processing, you might also need to install poppler:
4
- # On Debian/Ubuntu: sudo apt-get install poppler-utils
5
- # On macOS (using Homebrew): brew install poppler
6
-
7
  import gradio as gr
8
  from PIL import Image
9
  from transformers import AutoModelForImageTextToText, AutoProcessor, AutoTokenizer, TextIteratorStreamer
@@ -167,7 +161,6 @@ def convert_to_markdown_stream(
167
  else:
168
  user_prompt = """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using ☐ and β˜‘ for check boxes."""
169
 
170
-
171
  # Accumulate results from all pages
172
  full_markdown_content = ""
173
 
@@ -212,7 +205,7 @@ def convert_to_markdown_stream(
212
  except Exception as e:
213
  return f"Error: {e}"
214
 
215
- def process_document(file_path, max_tokens, with_img_desc: bool = False):
216
  """
217
  Process uploaded document (PDF or image) and convert to markdown.
218
 
@@ -223,33 +216,34 @@ def process_document(file_path, max_tokens, with_img_desc: bool = False):
223
  Returns:
224
  Generator yielding markdown content
225
  """
226
- if file_path is None:
227
  return "Please upload a file first."
228
-
229
  try:
230
  # Handle PDF files
231
- if file_path.name.lower().endswith('.pdf'):
232
- # Convert PDF to images
233
- with tempfile.TemporaryDirectory() as temp_dir:
234
- # Copy uploaded file to temp directory
235
- temp_pdf_path = os.path.join(temp_dir, "document.pdf")
236
- import shutil
237
- shutil.copy(file_path.name, temp_pdf_path)
238
 
239
- # Convert PDF pages to images
240
- images = convert_from_path(temp_pdf_path, dpi=150)
241
- images = [image.convert("RGB") for image in images]
242
- images = [image.resize((2048, 2048)) for image in images]
243
- # Process each page
244
- for result in convert_to_markdown_stream(
245
- images, "nanonets/Nanonets-OCR-s", max_tokens, with_img_desc
246
- ):
247
- yield process_tags(result)
248
 
249
- # Handle image files
250
- else:
251
- # Open image directly
252
- image = Image.open(file_path.name).convert("RGB")
 
 
253
  image = image.resize((2048, 2048))
254
 
255
  # Process single image
@@ -285,9 +279,8 @@ with gr.Blocks(title="PDF to Markdown Converter", theme=gr.themes.Soft()) as dem
285
 
286
  with gr.Row():
287
  with gr.Column(scale=1):
288
- file_input = gr.File(
289
- label="Upload PDF or Image Document",
290
- file_types=["pdf", "image"],
291
  height=200
292
  )
293
  max_tokens_slider = gr.Slider(
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from PIL import Image
3
  from transformers import AutoModelForImageTextToText, AutoProcessor, AutoTokenizer, TextIteratorStreamer
 
161
  else:
162
  user_prompt = """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using ☐ and β˜‘ for check boxes."""
163
 
 
164
  # Accumulate results from all pages
165
  full_markdown_content = ""
166
 
 
205
  except Exception as e:
206
  return f"Error: {e}"
207
 
208
+ def process_document(image, max_tokens, with_img_desc: bool = False):
209
  """
210
  Process uploaded document (PDF or image) and convert to markdown.
211
 
 
216
  Returns:
217
  Generator yielding markdown content
218
  """
219
+ if image is None:
220
  return "Please upload a file first."
 
221
  try:
222
  # Handle PDF files
223
+ # if file_path.name.lower().endswith('.pdf'):
224
+ # # Convert PDF to images
225
+ # with tempfile.TemporaryDirectory() as temp_dir:
226
+ # # Copy uploaded file to temp directory
227
+ # temp_pdf_path = os.path.join(temp_dir, "document.pdf")
228
+ # import shutil
229
+ # shutil.copy(file_path.name, temp_pdf_path)
230
 
231
+ # # Convert PDF pages to images
232
+ # images = convert_from_path(temp_pdf_path, dpi=150)
233
+ # images = [image.convert("RGB") for image in images]
234
+ # images = [image.resize((2048, 2048)) for image in images]
235
+ # # Process each page
236
+ # for result in convert_to_markdown_stream(
237
+ # images, "nanonets/Nanonets-OCR-s", max_tokens, with_img_desc
238
+ # ):
239
+ # yield process_tags(result)
240
 
241
+ # # Handle image files
242
+ # else:
243
+ # # Open image directly
244
+ # image = Image.open(file_path.name).convert("RGB")
245
+ # image = image.resize((2048, 2048))
246
+ image = Image.fromarray(image)
247
  image = image.resize((2048, 2048))
248
 
249
  # Process single image
 
279
 
280
  with gr.Row():
281
  with gr.Column(scale=1):
282
+ file_input = gr.Image(
283
+ label="Upload Image Document",
 
284
  height=200
285
  )
286
  max_tokens_slider = gr.Slider(