broadfield-dev commited on
Commit
2f3311a
·
verified ·
1 Parent(s): a0423b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -25
app.py CHANGED
@@ -23,7 +23,6 @@ def stitch_images_vertically(images: List[Image.Image]) -> Image.Image:
23
 
24
  max_width = max(img.width for img in images)
25
  total_height = sum(img.height for img in images)
26
-
27
  stitched_image = Image.new('RGB', (max_width, total_height), (255, 255, 255))
28
 
29
  current_y = 0
@@ -33,11 +32,16 @@ def stitch_images_vertically(images: List[Image.Image]) -> Image.Image:
33
 
34
  return stitched_image
35
 
36
- def stitch_images_in_grid(images: List[Image.Image], num_columns: int) -> Image.Image:
37
  if not images:
38
  return None
39
 
40
- columns = [images[i::num_columns] for i in range(num_columns)]
 
 
 
 
 
41
 
42
  stitched_columns = [stitch_images_vertically(col) for col in columns if col]
43
 
@@ -46,7 +50,6 @@ def stitch_images_in_grid(images: List[Image.Image], num_columns: int) -> Image.
46
 
47
  max_height = max(col.height for col in stitched_columns if col)
48
  total_width = sum(col.width for col in stitched_columns if col)
49
-
50
  grid_image = Image.new('RGB', (total_width, max_height), (255, 255, 255))
51
 
52
  current_x = 0
@@ -57,7 +60,7 @@ def stitch_images_in_grid(images: List[Image.Image], num_columns: int) -> Image.
57
 
58
  return grid_image
59
 
60
- def process_pdf(pdf_file, pdf_url, dpi, num_columns, crop_top, crop_bottom, crop_left, crop_right, progress=gr.Progress()):
61
  pdf_input_source = None
62
  is_bytes = False
63
  source_name = "document"
@@ -83,12 +86,12 @@ def process_pdf(pdf_file, pdf_url, dpi, num_columns, crop_top, crop_bottom, crop
83
  raise gr.Error("Please upload a PDF file or provide a valid URL.")
84
 
85
  progress(0.3, desc="Converting PDF pages to images...")
86
- logger.info(f"Using DPI: {dpi}")
87
  try:
88
  if is_bytes:
89
- images = convert_from_bytes(pdf_input_source, dpi=dpi)
90
  else:
91
- images = convert_from_path(pdf_input_source, dpi=dpi)
92
  except (PDFInfoNotInstalledError, FileNotFoundError):
93
  raise gr.Error("Server configuration error: Poppler dependency is missing.")
94
  except (PDFPageCountError, Exception) as e:
@@ -104,15 +107,9 @@ def process_pdf(pdf_file, pdf_url, dpi, num_columns, crop_top, crop_bottom, crop
104
  progress(0.6, desc="Cropping images...")
105
  for i, img in enumerate(images):
106
  width, height = img.size
107
-
108
- left = crop_left
109
- top = crop_top
110
- right = width - crop_right
111
- bottom = height - crop_bottom
112
-
113
  if left >= right or top >= bottom:
114
  raise gr.Error(f"Crop values are too large for page {i+1}. The page dimensions are {width}x{height}, but crop settings result in an invalid area.")
115
-
116
  cropped_images.append(img.crop((left, top, right, bottom)))
117
  else:
118
  cropped_images = images
@@ -120,17 +117,15 @@ def process_pdf(pdf_file, pdf_url, dpi, num_columns, crop_top, crop_bottom, crop
120
  progress(0.7, desc=f"Stitching {len(cropped_images)} images together...")
121
 
122
  if num_columns > 1:
123
- stitched_image = stitch_images_in_grid(cropped_images, num_columns)
124
  else:
125
  stitched_image = stitch_images_vertically(cropped_images)
126
 
127
  if stitched_image is None:
128
  raise gr.Error("Image stitching failed.")
129
-
130
  logger.info("Image stitching complete.")
131
 
132
  progress(0.9, desc="Saving final image...")
133
-
134
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png", prefix=f"{source_name}_stitched_") as tmp_file:
135
  stitched_image.save(tmp_file.name, "PNG")
136
  output_path = tmp_file.name
@@ -148,22 +143,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
148
  and then append them to create a single image that you can download.
149
  """
150
  )
151
-
152
  with gr.Row():
153
  with gr.Column(scale=1):
154
  with gr.Tabs():
155
  with gr.TabItem("Upload PDF"):
156
  pdf_file_input = gr.File(label="Upload PDF File", file_types=[".pdf"])
157
  with gr.TabItem("From URL"):
158
- pdf_url_input = gr.Textbox(
159
- label="PDF URL",
160
- placeholder="e.g., https://arxiv.org/pdf/1706.03762.pdf"
161
- )
162
 
163
- dpi_slider = gr.Slider(minimum=100, maximum=600, step=5, value=200, label="Image Resolution (DPI)")
164
  columns_slider = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Number of Columns")
165
 
166
  with gr.Accordion("Advanced Options", open=False):
 
 
167
  with gr.Row():
168
  crop_left = gr.Slider(minimum=0, maximum=500, step=10, value=0, label="Crop Left")
169
  crop_right = gr.Slider(minimum=0, maximum=500, step=10, value=0, label="Crop Right")
@@ -188,7 +181,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
188
  crop_top,
189
  crop_bottom,
190
  crop_left,
191
- crop_right
 
 
192
  ],
193
  outputs=[output_image_preview, output_image_download]
194
  )
 
23
 
24
  max_width = max(img.width for img in images)
25
  total_height = sum(img.height for img in images)
 
26
  stitched_image = Image.new('RGB', (max_width, total_height), (255, 255, 255))
27
 
28
  current_y = 0
 
32
 
33
  return stitched_image
34
 
35
+ def stitch_images_in_grid(images: List[Image.Image], num_columns: int, page_order: str) -> Image.Image:
36
  if not images:
37
  return None
38
 
39
+ if page_order == "Top-to-Bottom (down)":
40
+ num_images = len(images)
41
+ num_rows = math.ceil(num_images / num_columns)
42
+ columns = [images[i*num_rows : (i+1)*num_rows] for i in range(num_columns)]
43
+ else: # Default to "Left-to-Right (across)"
44
+ columns = [images[i::num_columns] for i in range(num_columns)]
45
 
46
  stitched_columns = [stitch_images_vertically(col) for col in columns if col]
47
 
 
50
 
51
  max_height = max(col.height for col in stitched_columns if col)
52
  total_width = sum(col.width for col in stitched_columns if col)
 
53
  grid_image = Image.new('RGB', (total_width, max_height), (255, 255, 255))
54
 
55
  current_x = 0
 
60
 
61
  return grid_image
62
 
63
+ def process_pdf(pdf_file, pdf_url, dpi, num_columns, crop_top, crop_bottom, crop_left, crop_right, hide_annotations, page_order, progress=gr.Progress()):
64
  pdf_input_source = None
65
  is_bytes = False
66
  source_name = "document"
 
86
  raise gr.Error("Please upload a PDF file or provide a valid URL.")
87
 
88
  progress(0.3, desc="Converting PDF pages to images...")
89
+ logger.info(f"Using DPI: {dpi}, Hide Annotations: {hide_annotations}")
90
  try:
91
  if is_bytes:
92
+ images = convert_from_bytes(pdf_input_source, dpi=dpi, hide_annotations=hide_annotations)
93
  else:
94
+ images = convert_from_path(pdf_input_source, dpi=dpi, hide_annotations=hide_annotations)
95
  except (PDFInfoNotInstalledError, FileNotFoundError):
96
  raise gr.Error("Server configuration error: Poppler dependency is missing.")
97
  except (PDFPageCountError, Exception) as e:
 
107
  progress(0.6, desc="Cropping images...")
108
  for i, img in enumerate(images):
109
  width, height = img.size
110
+ left, top, right, bottom = crop_left, crop_top, width - crop_right, height - crop_bottom
 
 
 
 
 
111
  if left >= right or top >= bottom:
112
  raise gr.Error(f"Crop values are too large for page {i+1}. The page dimensions are {width}x{height}, but crop settings result in an invalid area.")
 
113
  cropped_images.append(img.crop((left, top, right, bottom)))
114
  else:
115
  cropped_images = images
 
117
  progress(0.7, desc=f"Stitching {len(cropped_images)} images together...")
118
 
119
  if num_columns > 1:
120
+ stitched_image = stitch_images_in_grid(cropped_images, num_columns, page_order)
121
  else:
122
  stitched_image = stitch_images_vertically(cropped_images)
123
 
124
  if stitched_image is None:
125
  raise gr.Error("Image stitching failed.")
 
126
  logger.info("Image stitching complete.")
127
 
128
  progress(0.9, desc="Saving final image...")
 
129
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png", prefix=f"{source_name}_stitched_") as tmp_file:
130
  stitched_image.save(tmp_file.name, "PNG")
131
  output_path = tmp_file.name
 
143
  and then append them to create a single image that you can download.
144
  """
145
  )
 
146
  with gr.Row():
147
  with gr.Column(scale=1):
148
  with gr.Tabs():
149
  with gr.TabItem("Upload PDF"):
150
  pdf_file_input = gr.File(label="Upload PDF File", file_types=[".pdf"])
151
  with gr.TabItem("From URL"):
152
+ pdf_url_input = gr.Textbox(label="PDF URL", placeholder="e.g., https://arxiv.org/pdf/1706.03762.pdf")
 
 
 
153
 
154
+ dpi_slider = gr.Slider(minimum=100, maximum=600, step=50, value=200, label="Image Resolution (DPI)")
155
  columns_slider = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Number of Columns")
156
 
157
  with gr.Accordion("Advanced Options", open=False):
158
+ hide_annotations_toggle = gr.Checkbox(value=True, label="Hide PDF Annotations (Links/Highlights)", info="Turn this on to remove the colored boxes that can appear around links and references.")
159
+ page_order_radio = gr.Radio(["Left-to-Right (across)", "Top-to-Bottom (down)"], value="Left-to-Right (across)", label="Multi-Column Page Order", info="Determines how pages fill the columns.")
160
  with gr.Row():
161
  crop_left = gr.Slider(minimum=0, maximum=500, step=10, value=0, label="Crop Left")
162
  crop_right = gr.Slider(minimum=0, maximum=500, step=10, value=0, label="Crop Right")
 
181
  crop_top,
182
  crop_bottom,
183
  crop_left,
184
+ crop_right,
185
+ hide_annotations_toggle,
186
+ page_order_radio
187
  ],
188
  outputs=[output_image_preview, output_image_download]
189
  )