Spaces:
Running
Running
| import os | |
| import tempfile | |
| import logging | |
| from typing import List | |
| import gradio as gr | |
| import requests | |
| from PIL import Image | |
| from pdf2image import convert_from_path, convert_from_bytes | |
| from pdf2image.exceptions import PDFInfoNotInstalledError, PDFPageCountError | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
| logger = logging.getLogger(__name__) | |
| def stitch_images_vertically(images: List[Image.Image]) -> Image.Image: | |
| if not images: | |
| return None | |
| max_width = max(img.width for img in images) | |
| total_height = sum(img.height for img in images) | |
| stitched_image = Image.new('RGB', (max_width, total_height), (255, 255, 255)) | |
| current_y = 0 | |
| for img in images: | |
| stitched_image.paste(img, (0, current_y)) | |
| current_y += img.height | |
| return stitched_image | |
| def process_pdf(pdf_file, pdf_url, progress=gr.Progress()): | |
| pdf_input_source = None | |
| is_bytes = False | |
| source_name = "document" | |
| progress(0, desc="Validating input...") | |
| if pdf_file is not None: | |
| logger.info(f"Processing uploaded file: {pdf_file.name}") | |
| pdf_input_source = pdf_file.name | |
| source_name = os.path.splitext(os.path.basename(pdf_file.name))[0] | |
| elif pdf_url and pdf_url.strip(): | |
| url = pdf_url.strip() | |
| logger.info(f"Processing file from URL: {url}") | |
| progress(0.1, desc="Downloading PDF from URL...") | |
| try: | |
| response = requests.get(url, timeout=45) | |
| response.raise_for_status() | |
| pdf_input_source = response.content | |
| source_name = os.path.splitext(os.path.basename(url.split('?')[0]))[0] | |
| is_bytes = True | |
| except requests.RequestException as e: | |
| raise gr.Error(f"Failed to download PDF from URL. Error: {e}") | |
| else: | |
| raise gr.Error("Please upload a PDF file or provide a valid URL.") | |
| progress(0.3, desc="Converting PDF pages to images...") | |
| try: | |
| if is_bytes: | |
| images = convert_from_bytes(pdf_input_source, dpi=200) | |
| else: | |
| images = convert_from_path(pdf_input_source, dpi=200) | |
| except (PDFInfoNotInstalledError, FileNotFoundError): | |
| raise gr.Error("Server configuration error: Poppler dependency is missing.") | |
| except (PDFPageCountError, Exception) as e: | |
| raise gr.Error(f"Failed to process the PDF. It might be corrupted or password-protected. Error: {e}") | |
| if not images: | |
| raise gr.Error("Could not extract any pages from the PDF. The file might be empty or invalid.") | |
| logger.info(f"Successfully converted {len(images)} pages to images.") | |
| progress(0.7, desc=f"Stitching {len(images)} images together...") | |
| stitched_image = stitch_images_vertically(images) | |
| if stitched_image is None: | |
| raise gr.Error("Image stitching failed.") | |
| logger.info("Image stitching complete.") | |
| progress(0.9, desc="Saving final image...") | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".png", prefix=f"{source_name}_stitched_") as tmp_file: | |
| stitched_image.save(tmp_file.name, "PNG") | |
| output_path = tmp_file.name | |
| logger.info(f"Final image saved to temporary path: {output_path}") | |
| progress(1, desc="Done!") | |
| return output_path, output_path | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # PDF Page Stitcher ๐ โก๏ธ ๐ผ๏ธ | |
| Upload a PDF file or provide a URL. This tool will convert every page of the PDF into an image | |
| and then append them beneath each other to create a single, tall image that you can download. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| with gr.Tabs(): | |
| with gr.TabItem("Upload PDF"): | |
| pdf_file_input = gr.File(label="Upload PDF File", file_types=[".pdf"]) | |
| with gr.TabItem("From URL"): | |
| pdf_url_input = gr.Textbox( | |
| label="PDF URL", | |
| placeholder="e.g., https://arxiv.org/pdf/1706.03762.pdf" | |
| ) | |
| submit_btn = gr.Button("Stitch PDF Pages", variant="primary") | |
| with gr.Column(scale=2): | |
| gr.Markdown("## Output") | |
| output_image_preview = gr.Image( | |
| label="Stitched Image Preview", | |
| type="filepath", | |
| interactive=False, | |
| height=600, | |
| ) | |
| output_image_download = gr.File( | |
| label="Download Stitched Image", | |
| interactive=False | |
| ) | |
| submit_btn.click( | |
| fn=process_pdf, | |
| inputs=[pdf_file_input, pdf_url_input], | |
| outputs=[output_image_preview, output_image_download] | |
| ) | |
| demo.launch(server_name="0.0.0.0", server_port=7860, debug=True) |