David commited on
Commit
b499397
·
1 Parent(s): 08073f0

Setup DescribePDF for Hugging Face Space

Browse files
.gitattributes CHANGED
@@ -1,35 +1,2 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/jekyll-gh-pages.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Sample workflow for building and deploying a Jekyll site to GitHub Pages
2
+ name: Deploy Jekyll with GitHub Pages dependencies preinstalled
3
+
4
+ on:
5
+ # Runs on pushes targeting the default branch
6
+ push:
7
+ branches: ["main"]
8
+
9
+ # Allows you to run this workflow manually from the Actions tab
10
+ workflow_dispatch:
11
+
12
+ # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
13
+ permissions:
14
+ contents: read
15
+ pages: write
16
+ id-token: write
17
+
18
+ # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
19
+ # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
20
+ concurrency:
21
+ group: "pages"
22
+ cancel-in-progress: false
23
+
24
+ jobs:
25
+ # Build job
26
+ build:
27
+ runs-on: ubuntu-latest
28
+ steps:
29
+ - name: Checkout
30
+ uses: actions/checkout@v4
31
+ - name: Setup Pages
32
+ uses: actions/configure-pages@v5
33
+ - name: Build with Jekyll
34
+ uses: actions/jekyll-build-pages@v1
35
+ with:
36
+ source: ./
37
+ destination: ./_site
38
+ - name: Upload artifact
39
+ uses: actions/upload-pages-artifact@v3
40
+
41
+ # Deployment job
42
+ deploy:
43
+ environment:
44
+ name: github-pages
45
+ url: ${{ steps.deployment.outputs.page_url }}
46
+ runs-on: ubuntu-latest
47
+ needs: build
48
+ steps:
49
+ - name: Deploy to GitHub Pages
50
+ id: deployment
51
+ uses: actions/deploy-pages@v4
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .env
2
+ .DS_Store
3
+ /describepdf/__pycache__
4
+ /describepdf.egg-info
5
+ /tests/__pycache__
CITATION.cff ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ cff-version: 1.2.0
2
+ message: "If you use this software, please cite it as below."
3
+ authors:
4
+ - family-names: "Romero Santos"
5
+ given-names: "David"
6
+ title: "DescribePDF: A tool to convert visual PDF files to detailed descriptions"
7
+ date-released: 2025-04-26
8
+ url: "https://github.com/DavidLMS/DescribePDF"
CONTRIBUTING.md ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- omit in toc -->
2
+ # Contributing to DescribePDF
3
+
4
+ First off, thanks for taking the time to contribute! Your help is greatly appreciated.
5
+
6
+ All types of contributions are encouraged and valued, whether it's code, documentation, suggestions for new features, or bug reports. Please read through the following guidelines before contributing to ensure a smooth process for everyone involved.
7
+
8
+ > And if you like the project, but just don't have time to contribute, that's fine. There are other easy ways to support the project and show your appreciation, which we would also be very happy about:
9
+ > - Star the project
10
+ > - Tweet about it
11
+ > - Refer this project in your project's README
12
+ > - Mention the project at local meetups and tell your friends/colleagues
13
+
14
+ <!-- omit in toc -->
15
+ ## Table of Contents
16
+
17
+ - [I Have a Question](#i-have-a-question)
18
+ - [I Want To Contribute](#i-want-to-contribute)
19
+ - [Reporting Bugs](#reporting-bugs)
20
+ - [Suggesting Enhancements](#suggesting-enhancements)
21
+ - [Your First Code Contribution](#your-first-code-contribution)
22
+ - [Improving The Documentation](#improving-the-documentation)
23
+ - [Styleguides](#styleguides)
24
+ - [Commit Messages](#commit-messages)
25
+
26
+ ## I Have a Question
27
+
28
+ If you want to ask a question, we assume that you have read the available [Documentation](https://github.com/DavidLMS/DescribePDF/blob/main/README.md).
29
+
30
+ Before you ask a question, it is best to search for existing [Issues](https://github.com/DavidLMS/DescribePDF/issues) that might help you. If you find a relevant issue but still need clarification, feel free to comment on it. Additionally, it’s a good idea to search the web for answers before asking.
31
+
32
+ If you still need to ask a question, we recommend the following:
33
+
34
+ - Open an [Issue](https://github.com/DavidLMS/DescribePDF/issues/new).
35
+ - Provide as much context as you can about what you're running into.
36
+ - Provide project and platform versions (Python, OS, etc.), depending on what seems relevant.
37
+
38
+ We (or someone in the community) will then take care of the issue as soon as possible.
39
+
40
+ ## I Want To Contribute
41
+
42
+ > ### Legal Notice <!-- omit in toc -->
43
+ > When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content, and that the content you contribute may be provided under the project license.
44
+
45
+ ### Reporting Bugs
46
+
47
+ #### Before Submitting a Bug Report
48
+
49
+ A good bug report shouldn't leave others needing to chase you up for more information. Please investigate carefully, collect information, and describe the issue in detail in your report. Follow these steps to help us fix any potential bugs as quickly as possible:
50
+
51
+ - Ensure you are using the latest version.
52
+ - Verify that your issue is not due to misconfiguration or environmental issues. Make sure you have read the [documentation](https://github.com/DavidLMS/DescribePDF/blob/main/README.md).
53
+ - Check if the issue has already been reported by searching the [bug tracker](https://github.com/DavidLMS/DescribePDF/issues?q=label%3Abug).
54
+ - Gather as much information as possible about the bug:
55
+ - Stack trace (if applicable)
56
+ - OS, platform, and version (Windows, Linux, macOS, etc.)
57
+ - Python version and any relevant package versions
58
+ - Steps to reliably reproduce the issue
59
+
60
+ #### How Do I Submit a Good Bug Report?
61
+
62
+ > Do not report security-related issues, vulnerabilities, or bugs with sensitive information in public forums. Instead, report these issues privately by emailing hola_at_davidlms.com.
63
+
64
+ We use GitHub issues to track bugs and errors. If you run into an issue with the project:
65
+
66
+ - Open an [Issue](https://github.com/DavidLMS/DescribePDF/issues/new). (Since we can't be sure yet if it’s a bug, avoid labeling it as such until confirmed.)
67
+ - Explain the behavior you expected and what actually happened.
68
+ - Provide as much context as possible and describe the steps someone else can follow to recreate the issue. This usually includes a code snippet or an example project.
69
+
70
+ Once it's filed:
71
+
72
+ - The project team will label the issue accordingly.
73
+ - A team member will try to reproduce the issue. If the issue cannot be reproduced, the team will ask for more information and label the issue as `needs-repro`.
74
+ - If the issue is reproducible, it will be labeled `needs-fix` and potentially other relevant tags.
75
+
76
+ ### Suggesting Enhancements
77
+
78
+ This section guides you through submitting an enhancement suggestion for DescribePDF, whether it's a new feature or an improvement to existing functionality.
79
+
80
+ #### Before Submitting an Enhancement
81
+
82
+ - Ensure you are using the latest version.
83
+ - Check the [documentation](https://github.com/DavidLMS/DescribePDF/blob/main/README.md) to see if your suggestion is already supported.
84
+ - Search the [issue tracker](https://github.com/DavidLMS/DescribePDF/issues) to see if the enhancement has already been suggested. If so, add a comment to the existing issue instead of opening a new one.
85
+ - Make sure your suggestion aligns with the scope and aims of the project. It's important to suggest features that will be beneficial to the majority of users.
86
+
87
+ #### How Do I Submit a Good Enhancement Suggestion?
88
+
89
+ Enhancement suggestions are tracked as [GitHub issues](https://github.com/DavidLMS/DescribePDF/issues).
90
+
91
+ - Use a **clear and descriptive title** for the suggestion.
92
+ - Provide a **detailed description** of the enhancement, including any relevant context.
93
+ - **Describe the current behavior** and **explain what you would expect instead**, along with reasons why the enhancement would be beneficial.
94
+ - Include **screenshots or diagrams** if applicable to help illustrate the suggestion.
95
+ - Explain why this enhancement would be useful to most `DescribePDF` users.
96
+
97
+ ### Your First Code Contribution
98
+
99
+ #### Pre-requisites
100
+
101
+ You should first [fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) the `DescribePDF` repository and then clone your forked repository:
102
+
103
+ ```bash
104
+ git clone https://github.com/<YOUR_GITHUB_USER>/DescribePDF.git
105
+ ````
106
+
107
+ Once in the cloned repository directory, create a new branch for your contribution:
108
+
109
+ ```bash
110
+ git checkout -B <feature-description>
111
+ ````
112
+
113
+ ### Contributing Workflow
114
+
115
+ 1. Make sure your code follows the style guide and passes linting with `pylint`.
116
+ 2. Write tests for any new functionality you add.
117
+ 3. Ensure all tests pass before submitting a pull request.
118
+ 4. Document any changes to APIs or core functionality.
119
+ 5. Submit your pull request, providing a clear and descriptive title and description of your changes.
120
+
121
+ ### Improving The Documentation
122
+
123
+ Contributions to documentation are welcome! Well-documented code is easier to understand and maintain. If you see areas where documentation can be improved, feel free to submit your suggestions.
124
+
125
+ ## Styleguides
126
+
127
+ ### Commit Messages
128
+
129
+ - Use clear and descriptive commit messages.
130
+ - Follow the general format: `Short summary (50 characters or less)` followed by an optional detailed explanation.
131
+
132
+ ### Code Style
133
+
134
+ - Ensure your code adheres to the project's coding standards and passes all linting checks with `pylint`.
135
+
136
+ ## License
137
+
138
+ By contributing to DescribePDF, you agree that your contributions will be licensed under the MIT License.
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 David Romero
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,14 +1,21 @@
1
  ---
2
- title: Describepdf
3
- emoji: 🐢
4
- colorFrom: purple
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.27.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
- short_description: A tool to convert PDF files to detailed Markdown description
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: DescribePDF
3
+ emoji: 📄
4
+ colorFrom: red
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 5.20.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
11
  ---
12
 
13
+ # DescribePDF
14
+
15
+ DescribePDF is an open-source tool designed to convert PDF files into detailed page-by-page descriptions in Markdown format using Vision-Language Models (VLMs).
16
+
17
+ This Space demonstrates the OpenRouter web interface of DescribePDF. You'll need an OpenRouter API key to use it. For the full version including local Ollama support, please install the package locally.
18
+
19
+ ![poster](https://davidlms.github.io/DescribePDF/assets/poster.png)
20
+
21
+ [GitHub Repository](https://github.com/DavidLMS/DescribePDF)
app.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Entry point for DescribePDF Hugging Face Space.
3
+ This file imports and launches the Gradio UI from the original package.
4
+ """
5
+
6
+ from describepdf.ui import create_ui, launch_app
7
+
8
+ # Create the Gradio interface
9
+ app = create_ui()
10
+
11
+ # This will be used by Gradio when deployed
12
+ if __name__ == "__main__":
13
+ app.launch()
describepdf/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ DescribePDF - A tool to convert PDF files to detailed Markdown descriptions using VLMs.
3
+
4
+ This package provides functionality to analyze PDF files and generate detailed
5
+ Markdown descriptions using Vision-Language Models (VLMs) from either OpenRouter
6
+ or local Ollama instances.
7
+ """
8
+
9
+ __version__ = "0.1.0"
10
+ __author__ = "David Romero"
11
+ __license__ = "MIT"
describepdf/cli.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Command-line interface for DescribePDF.
3
+
4
+ This module provides the CLI functionality for converting PDF files to markdown descriptions.
5
+ """
6
+
7
+ import argparse
8
+ import os
9
+ import sys
10
+ import logging
11
+ from typing import Dict, Any, Callable, Optional
12
+ from tqdm import tqdm
13
+
14
+ from . import config
15
+ from . import core
16
+ from . import ollama_client
17
+
18
+ # Get logger from config module
19
+ logger = logging.getLogger('describepdf')
20
+
21
+ def setup_cli_parser() -> argparse.ArgumentParser:
22
+ """
23
+ Set up the command line argument parser.
24
+
25
+ Returns:
26
+ argparse.ArgumentParser: Configured parser for command line arguments
27
+ """
28
+ parser = argparse.ArgumentParser(
29
+ description="DescribePDF - Convert PDF files to detailed Markdown descriptions",
30
+ epilog="Example: describepdf input.pdf -o output.md -l Spanish"
31
+ )
32
+
33
+ parser.add_argument(
34
+ "pdf_file",
35
+ help="Path to the PDF file to process"
36
+ )
37
+
38
+ parser.add_argument(
39
+ "-o", "--output",
40
+ help="Path to the output Markdown file (default: [pdf_name]_description.md)"
41
+ )
42
+
43
+ parser.add_argument(
44
+ "-k", "--api-key",
45
+ help="OpenRouter API Key (overrides the one in .env file)"
46
+ )
47
+
48
+ parser.add_argument(
49
+ "--local",
50
+ action="store_true",
51
+ help="Use local Ollama instead of OpenRouter"
52
+ )
53
+
54
+ parser.add_argument(
55
+ "--endpoint",
56
+ help="Ollama endpoint URL (default: http://localhost:11434)"
57
+ )
58
+
59
+ parser.add_argument(
60
+ "-m", "--vlm-model",
61
+ help="VLM model to use (default: configured in .env)"
62
+ )
63
+
64
+ parser.add_argument(
65
+ "-l", "--language",
66
+ help="Output language (default: configured in .env)"
67
+ )
68
+
69
+ parser.add_argument(
70
+ "--use-markitdown",
71
+ action="store_true",
72
+ help="Use Markitdown for enhanced text extraction"
73
+ )
74
+
75
+ parser.add_argument(
76
+ "--use-summary",
77
+ action="store_true",
78
+ help="Generate and use a PDF summary"
79
+ )
80
+
81
+ parser.add_argument(
82
+ "--summary-model",
83
+ help="Model to generate the summary (default: configured in .env)"
84
+ )
85
+
86
+ parser.add_argument(
87
+ "-v", "--verbose",
88
+ action="store_true",
89
+ help="Verbose mode (show debug messages)"
90
+ )
91
+
92
+ return parser
93
+
94
+ def create_progress_callback() -> Callable[[float, str], None]:
95
+ """
96
+ Create a progress callback function that displays progress with tqdm.
97
+
98
+ Returns:
99
+ Callable[[float, str], None]: Progress callback function
100
+ """
101
+ progress_bar = tqdm(total=100, desc="Processing", unit="%")
102
+
103
+ def callback(progress_value: float, status: str) -> None:
104
+ """
105
+ Display progress in the command line.
106
+
107
+ Args:
108
+ progress_value (float): Progress value between 0.0 and 1.0
109
+ status (str): Current status message
110
+ """
111
+ nonlocal progress_bar
112
+
113
+ current_progress = int(progress_value * 100)
114
+ last_progress = progress_bar.n
115
+ progress_diff = current_progress - last_progress
116
+
117
+ if progress_diff > 0:
118
+ progress_bar.update(progress_diff)
119
+
120
+ progress_bar.set_description(status)
121
+
122
+ if progress_value >= 1.0:
123
+ progress_bar.close()
124
+
125
+ return callback
126
+
127
+ def run_cli() -> None:
128
+ """
129
+ Main function for the command line interface.
130
+
131
+ This function parses arguments, configures the application based on
132
+ provided parameters, and runs the PDF to Markdown conversion.
133
+ """
134
+ # Parse command line arguments
135
+ parser = setup_cli_parser()
136
+ args = parser.parse_args()
137
+
138
+ # Configure logging based on verbosity
139
+ if args.verbose:
140
+ logger.setLevel(logging.DEBUG)
141
+
142
+ # Validate input file exists
143
+ if not os.path.exists(args.pdf_file) or not os.path.isfile(args.pdf_file):
144
+ logger.error(f"The PDF file '{args.pdf_file}' does not exist or is not a valid file.")
145
+ logger.info("Exiting with error code 1")
146
+ sys.exit(1)
147
+
148
+ # Load configuration from environment
149
+ env_config = config.get_config()
150
+
151
+ # Determine provider
152
+ provider = "ollama" if args.local else "openrouter"
153
+
154
+ # Prepare run configuration by merging environment config and CLI args
155
+ run_config: Dict[str, Any] = {
156
+ "provider": provider,
157
+ "output_language": args.language if args.language else env_config.get("output_language"),
158
+ "use_markitdown": args.use_markitdown if args.use_markitdown is not None else env_config.get("use_markitdown"),
159
+ "use_summary": args.use_summary if args.use_summary is not None else env_config.get("use_summary"),
160
+ }
161
+
162
+ # Configure provider-specific settings
163
+ vlm_model: Optional[str] = args.vlm_model
164
+ summary_model: Optional[str] = args.summary_model
165
+
166
+ if provider == "openrouter":
167
+ run_config["openrouter_api_key"] = args.api_key if args.api_key else env_config.get("openrouter_api_key")
168
+
169
+ if not vlm_model:
170
+ vlm_model = env_config.get("or_vlm_model")
171
+
172
+ if not summary_model and run_config["use_summary"]:
173
+ summary_model = env_config.get("or_summary_model")
174
+
175
+ if not run_config.get("openrouter_api_key"):
176
+ logger.error("An OpenRouter API key is required. Provide one with --api-key or configure it in the .env file")
177
+ logger.info("Exiting with error code 1")
178
+ sys.exit(1)
179
+
180
+ elif provider == "ollama":
181
+ run_config["ollama_endpoint"] = args.endpoint if args.endpoint else env_config.get("ollama_endpoint")
182
+
183
+ if not vlm_model:
184
+ vlm_model = env_config.get("ollama_vlm_model")
185
+
186
+ if not summary_model and run_config["use_summary"]:
187
+ summary_model = env_config.get("ollama_summary_model")
188
+
189
+ if not ollama_client.OLLAMA_AVAILABLE:
190
+ logger.error("Ollama Python client not installed. Install with 'pip install ollama'")
191
+ logger.info("Exiting with error code 1")
192
+ sys.exit(1)
193
+
194
+ if not ollama_client.check_ollama_availability(run_config["ollama_endpoint"]):
195
+ logger.error(f"Could not connect to Ollama at {run_config['ollama_endpoint']}. Make sure it is running.")
196
+ logger.info("Exiting with error code 1")
197
+ sys.exit(1)
198
+
199
+ run_config["vlm_model"] = vlm_model
200
+ if run_config["use_summary"]:
201
+ run_config["summary_llm_model"] = summary_model
202
+
203
+ # Print configuration summary
204
+ logger.info(f"Processing PDF: {os.path.basename(args.pdf_file)}")
205
+ logger.info(f"Provider: {run_config['provider']}")
206
+
207
+ if run_config['provider'] == 'openrouter':
208
+ if run_config.get('openrouter_api_key'):
209
+ masked_key = '*' * 8 + run_config['openrouter_api_key'][-5:] if len(run_config['openrouter_api_key']) > 5 else '*****'
210
+ logger.info(f"OpenRouter API Key: {masked_key}")
211
+ else:
212
+ logger.info("OpenRouter API Key: Not provided")
213
+ else:
214
+ logger.info(f"Ollama Endpoint: {run_config['ollama_endpoint']}")
215
+
216
+ logger.info(f"VLM Model: {run_config['vlm_model']}")
217
+ logger.info(f"Language: {run_config['output_language']}")
218
+ logger.info(f"Markitdown: {'Yes' if run_config['use_markitdown'] else 'No'}")
219
+ logger.info(f"Summary: {'Yes' if run_config['use_summary'] else 'No'}")
220
+ if run_config.get('use_summary') and run_config.get('summary_llm_model'):
221
+ logger.info(f"Summary model: {run_config['summary_llm_model']}")
222
+
223
+ # Create progress callback
224
+ progress_callback = create_progress_callback()
225
+
226
+ # Run conversion
227
+ status, markdown_result = core.convert_pdf_to_markdown(
228
+ args.pdf_file,
229
+ run_config,
230
+ progress_callback
231
+ )
232
+
233
+ if not markdown_result:
234
+ logger.error(f"Error: {status}")
235
+ logger.info("Exiting with error code 1")
236
+ sys.exit(1)
237
+
238
+ # Determine output filename
239
+ output_filename = args.output
240
+ if not output_filename:
241
+ base_name = os.path.splitext(os.path.basename(args.pdf_file))[0]
242
+ output_filename = f"{base_name}_description.md"
243
+
244
+ # Save output file
245
+ try:
246
+ with open(output_filename, "w", encoding="utf-8") as md_file:
247
+ md_file.write(markdown_result)
248
+ logger.info(f"Conversion completed. Result saved to: {output_filename}")
249
+ except Exception as e:
250
+ logger.error(f"Error saving output file: {e}")
251
+ logger.info("Exiting with error code 1")
252
+ sys.exit(1)
describepdf/config.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration module for DescribePDF.
3
+
4
+ This module manages loading configuration from environment variables
5
+ and prompt templates from files.
6
+ """
7
+ import os
8
+ import logging
9
+ from typing import Dict, Any, Optional
10
+ from dotenv import load_dotenv
11
+ import pathlib
12
+
13
+ # Setup central logging configuration
14
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [%(module)s] - %(message)s')
15
+ logger = logging.getLogger('describepdf')
16
+
17
+ # Directory containing prompt templates (making path absolute by using current file location)
18
+ SCRIPT_DIR = pathlib.Path(__file__).parent.parent.absolute()
19
+ PROMPTS_DIR = pathlib.Path(SCRIPT_DIR) / "prompts"
20
+
21
+ # Default configuration values
22
+ DEFAULT_CONFIG: Dict[str, Any] = {
23
+ "openrouter_api_key": None,
24
+ "or_vlm_model": "qwen/qwen2.5-vl-72b-instruct",
25
+ "or_summary_model": "google/gemini-2.5-flash-preview",
26
+
27
+ "ollama_endpoint": "http://localhost:11434",
28
+ "ollama_vlm_model": "llama3.2-vision",
29
+ "ollama_summary_model": "qwen2.5",
30
+
31
+ "output_language": "English",
32
+ "use_markitdown": False,
33
+ "use_summary": False
34
+ }
35
+
36
+ # Mapping of prompt template identifiers to their file names
37
+ PROMPT_FILES: Dict[str, str] = {
38
+ "summary": "summary_prompt.md",
39
+ "vlm_base": "vlm_prompt_base.md",
40
+ "vlm_markdown": "vlm_prompt_with_markdown.md",
41
+ "vlm_summary": "vlm_prompt_with_summary.md",
42
+ "vlm_full": "vlm_prompt_full.md"
43
+ }
44
+
45
+ # Cache for loaded configuration
46
+ _CONFIG_CACHE: Optional[Dict[str, Any]] = None
47
+
48
+ # Cache for loaded prompts
49
+ _PROMPTS_CACHE: Optional[Dict[str, str]] = None
50
+
51
+ def load_env_config() -> Dict[str, Any]:
52
+ """
53
+ Load configuration from environment variables (.env file).
54
+
55
+ This function reads configuration values from environment variables,
56
+ falling back to default values when environment variables are not set.
57
+
58
+ Returns:
59
+ Dict[str, Any]: Dictionary with the loaded configuration
60
+ """
61
+ load_dotenv()
62
+
63
+ # Start with the default config
64
+ loaded_config = DEFAULT_CONFIG.copy()
65
+
66
+ # Override defaults with environment variables if present
67
+ if os.getenv("OPENROUTER_API_KEY"):
68
+ loaded_config["openrouter_api_key"] = os.getenv("OPENROUTER_API_KEY")
69
+
70
+ if os.getenv("DEFAULT_OR_VLM_MODEL"):
71
+ loaded_config["or_vlm_model"] = os.getenv("DEFAULT_OR_VLM_MODEL")
72
+
73
+ if os.getenv("DEFAULT_OR_SUMMARY_MODEL"):
74
+ loaded_config["or_summary_model"] = os.getenv("DEFAULT_OR_SUMMARY_MODEL")
75
+
76
+ if os.getenv("OLLAMA_ENDPOINT"):
77
+ loaded_config["ollama_endpoint"] = os.getenv("OLLAMA_ENDPOINT")
78
+
79
+ if os.getenv("DEFAULT_OLLAMA_VLM_MODEL"):
80
+ loaded_config["ollama_vlm_model"] = os.getenv("DEFAULT_OLLAMA_VLM_MODEL")
81
+
82
+ if os.getenv("DEFAULT_OLLAMA_SUMMARY_MODEL"):
83
+ loaded_config["ollama_summary_model"] = os.getenv("DEFAULT_OLLAMA_SUMMARY_MODEL")
84
+
85
+ if os.getenv("DEFAULT_LANGUAGE"):
86
+ loaded_config["output_language"] = os.getenv("DEFAULT_LANGUAGE")
87
+
88
+ if os.getenv("DEFAULT_USE_MARKITDOWN"):
89
+ loaded_config["use_markitdown"] = str(os.getenv("DEFAULT_USE_MARKITDOWN")).lower() == 'true'
90
+
91
+ if os.getenv("DEFAULT_USE_SUMMARY"):
92
+ loaded_config["use_summary"] = str(os.getenv("DEFAULT_USE_SUMMARY")).lower() == 'true'
93
+
94
+ logger.info("Configuration loaded from environment variables.")
95
+
96
+ # Log configuration without sensitive data
97
+ log_config = loaded_config.copy()
98
+ if "openrouter_api_key" in log_config and log_config["openrouter_api_key"]:
99
+ log_config["openrouter_api_key"] = f"***{log_config['openrouter_api_key'][-5:]}" if len(log_config['openrouter_api_key']) > 5 else "*****"
100
+ logger.debug(f"Effective configuration: {log_config}")
101
+
102
+ return loaded_config
103
+
104
+ def load_prompt_templates() -> Dict[str, str]:
105
+ """
106
+ Load prompt templates from the prompts directory.
107
+
108
+ This function reads template files from the prompts directory specified by
109
+ PROMPTS_DIR and maps them to their corresponding keys in the PROMPT_FILES dictionary.
110
+
111
+ Returns:
112
+ Dict[str, str]: Dictionary with loaded prompt templates
113
+ """
114
+ templates: Dict[str, str] = {}
115
+
116
+ if not PROMPTS_DIR.is_dir():
117
+ logger.error(f"Prompts directory '{PROMPTS_DIR}' not found.")
118
+ return templates
119
+
120
+ for key, filename in PROMPT_FILES.items():
121
+ filepath = PROMPTS_DIR / filename
122
+ try:
123
+ with open(filepath, 'r', encoding='utf-8') as f:
124
+ templates[key] = f.read()
125
+ except FileNotFoundError:
126
+ logger.error(f"Prompt file not found: {filepath}")
127
+ except Exception as e:
128
+ logger.error(f"Error reading prompt file {filepath}: {e}")
129
+
130
+ logger.info(f"Loaded {len(templates)} prompt templates.")
131
+ return templates
132
+
133
+ def get_config() -> Dict[str, Any]:
134
+ """
135
+ Get the configuration from .env.
136
+
137
+ This function loads the configuration only once and returns the cached version
138
+ on subsequent calls, improving efficiency and ensuring consistency.
139
+
140
+ Returns:
141
+ Dict[str, Any]: Current configuration dictionary
142
+ """
143
+ global _CONFIG_CACHE
144
+
145
+ if _CONFIG_CACHE is None:
146
+ _CONFIG_CACHE = load_env_config()
147
+
148
+ return _CONFIG_CACHE
149
+
150
+ def reload_config() -> Dict[str, Any]:
151
+ """
152
+ Force reload of configuration from .env.
153
+
154
+ This function can be used when configuration needs to be explicitly refreshed.
155
+
156
+ Returns:
157
+ Dict[str, Any]: Updated configuration dictionary
158
+ """
159
+ global _CONFIG_CACHE
160
+ _CONFIG_CACHE = load_env_config()
161
+ return _CONFIG_CACHE
162
+
163
+ def get_prompts() -> Dict[str, str]:
164
+ """
165
+ Get the prompt templates.
166
+
167
+ This function loads the prompt templates only once and returns the cached version
168
+ on subsequent calls, improving efficiency.
169
+
170
+ Returns:
171
+ Dict[str, str]: Dictionary with loaded prompt templates
172
+ """
173
+ global _PROMPTS_CACHE
174
+
175
+ if _PROMPTS_CACHE is None:
176
+ _PROMPTS_CACHE = load_prompt_templates()
177
+
178
+ return _PROMPTS_CACHE
179
+
180
+ def get_required_prompts_for_config(cfg: Dict[str, Any]) -> Dict[str, str]:
181
+ """
182
+ Get only the prompt templates required for the given configuration.
183
+
184
+ This function determines which prompt templates are necessary based on the
185
+ provided configuration and returns only those templates.
186
+
187
+ Args:
188
+ cfg (Dict[str, Any]): Configuration dictionary
189
+
190
+ Returns:
191
+ Dict[str, str]: Dictionary with required prompt templates
192
+ """
193
+ prompts = get_prompts()
194
+ required_keys: List[str] = ["vlm_base"]
195
+
196
+ has_markdown = cfg.get("use_markitdown", False)
197
+ has_summary = cfg.get("use_summary", False)
198
+
199
+ if has_markdown and has_summary:
200
+ required_keys.append("vlm_full")
201
+ elif has_markdown:
202
+ required_keys.append("vlm_markdown")
203
+ elif has_summary:
204
+ required_keys.append("vlm_summary")
205
+
206
+ if has_summary:
207
+ required_keys.append("summary")
208
+
209
+ # Check if all required prompts are available
210
+ missing = [key for key in required_keys if key not in prompts]
211
+ if missing:
212
+ logger.error(f"Missing required prompt templates: {', '.join(missing)}")
213
+ return {}
214
+
215
+ return {key: prompts[key] for key in required_keys if key in prompts}
describepdf/core.py ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Core module for DescribePDF.
3
+
4
+ This module contains the main orchestration logic for converting PDFs to Markdown descriptions.
5
+ """
6
+
7
+ import os
8
+ import time
9
+ from typing import Dict, Any, Callable, Tuple, List, Optional
10
+ import contextlib
11
+ import logging
12
+
13
+ from . import config
14
+ from . import pdf_processor
15
+ from . import markitdown_processor
16
+ from . import summarizer
17
+ from . import openrouter_client
18
+ from . import ollama_client
19
+
20
+ # Get logger from config module
21
+ logger = logging.getLogger('describepdf')
22
+
23
+ class ConversionError(Exception):
24
+ """Error raised during PDF conversion process."""
25
+ pass
26
+
27
+ def format_markdown_output(descriptions: List[str], original_filename: str) -> str:
28
+ """
29
+ Combine page descriptions into a single Markdown file.
30
+
31
+ Args:
32
+ descriptions: List of strings, each being a description of a page
33
+ original_filename: Name of the original PDF file
34
+
35
+ Returns:
36
+ str: Complete Markdown content
37
+ """
38
+ md_content = f"# Description of PDF: {original_filename}\n\n"
39
+ for i, desc in enumerate(descriptions):
40
+ md_content += f"## Page {i + 1}\n\n"
41
+ md_content += desc if desc else "*No description generated for this page.*"
42
+ md_content += "\n\n---\n\n"
43
+ return md_content
44
+
45
+ def convert_pdf_to_markdown(
46
+ pdf_path: str,
47
+ cfg: Dict[str, Any],
48
+ progress_callback: Callable[[float, str], None]
49
+ ) -> Tuple[str, Optional[str]]:
50
+ """
51
+ Orchestrate the complete PDF to descriptive Markdown conversion process.
52
+
53
+ Args:
54
+ pdf_path: Path to the PDF file
55
+ cfg: Configuration dictionary for this run
56
+ progress_callback: Function accepting (float_progress, string_status)
57
+
58
+ Returns:
59
+ tuple: (status_message, result_markdown or None)
60
+ """
61
+ start_time = time.time()
62
+ progress_callback(0.0, "Starting conversion process...")
63
+ logger.info("Starting conversion process...")
64
+
65
+ # Validate provider
66
+ provider = cfg.get("provider", "openrouter").lower()
67
+ logger.info(f"Using provider: {provider}")
68
+
69
+ if provider == "openrouter":
70
+ api_key = cfg.get("openrouter_api_key")
71
+ if not api_key:
72
+ msg = "Error: OpenRouter API Key is missing."
73
+ logger.error(msg)
74
+ progress_callback(0.0, msg)
75
+ return msg, None
76
+ elif provider == "ollama":
77
+ ollama_endpoint = cfg.get("ollama_endpoint", "http://localhost:11434")
78
+ if not ollama_client.OLLAMA_AVAILABLE:
79
+ msg = "Error: Ollama Python client not installed. Install with 'pip install ollama'."
80
+ logger.error(msg)
81
+ progress_callback(0.0, msg)
82
+ return msg, None
83
+
84
+ if not ollama_client.check_ollama_availability(ollama_endpoint):
85
+ msg = f"Error: Could not connect to Ollama at {ollama_endpoint}. Make sure it is running."
86
+ logger.error(msg)
87
+ progress_callback(0.0, msg)
88
+ return msg, None
89
+ else:
90
+ msg = f"Error: Unknown provider '{provider}'. Use 'openrouter' or 'ollama'."
91
+ logger.error(msg)
92
+ progress_callback(0.0, msg)
93
+ return msg, None
94
+
95
+ # Validate input file
96
+ if not pdf_path or not os.path.exists(pdf_path) or not os.path.isfile(pdf_path):
97
+ msg = "Error: Invalid or missing PDF file."
98
+ logger.error(msg)
99
+ progress_callback(0.0, msg)
100
+ return msg, None
101
+
102
+ original_filename = os.path.basename(pdf_path)
103
+ logger.info(f"Processing file: {original_filename}")
104
+
105
+ pdf_doc = None
106
+
107
+ try:
108
+ # Load required prompts
109
+ required_prompts = config.get_required_prompts_for_config(cfg)
110
+ if not required_prompts:
111
+ msg = "Error: Could not load all required prompt templates. Check the 'prompts' directory."
112
+ progress_callback(0.0, msg)
113
+ logger.error(msg)
114
+ return msg, None
115
+
116
+ # Generate summary if needed
117
+ pdf_summary = None
118
+ summary_progress = 0.05
119
+ if cfg.get("use_summary"):
120
+ summary_model = cfg.get("summary_llm_model")
121
+ progress_callback(summary_progress, f"Generating summary using {summary_model}...")
122
+ try:
123
+ pdf_summary = summarizer.generate_summary(
124
+ pdf_path,
125
+ provider=provider,
126
+ api_key=cfg.get("openrouter_api_key"),
127
+ ollama_endpoint=cfg.get("ollama_endpoint"),
128
+ model=summary_model
129
+ )
130
+
131
+ if pdf_summary:
132
+ progress_callback(summary_progress, "Summary generated.")
133
+ logger.info("PDF summary generated.")
134
+ else:
135
+ progress_callback(summary_progress, "Warning: Could not generate summary (LLM might have returned empty).")
136
+ logger.warning("Failed to generate PDF summary or summary was empty.")
137
+ # Set use_summary to False since we don't have a summary
138
+ cfg["use_summary"] = False
139
+ except Exception as e:
140
+ error_msg = f"Warning: Summary generation failed: {e}"
141
+ progress_callback(summary_progress, error_msg)
142
+ logger.warning(error_msg)
143
+ # Set use_summary to False since summary generation failed
144
+ cfg["use_summary"] = False
145
+ else:
146
+ summary_progress = 0.0
147
+
148
+ # Load PDF and process pages
149
+ pdf_load_progress = summary_progress + 0.05
150
+ progress_callback(pdf_load_progress, "Analyzing PDF structure...")
151
+
152
+ # Use context manager to ensure PDF document is closed
153
+ with contextlib.ExitStack() as stack:
154
+ pdf_doc, pages, total_pages = pdf_processor.get_pdf_pages(pdf_path)
155
+
156
+ # Register PDF document for cleanup only if it was successfully opened
157
+ if pdf_doc is not None:
158
+ stack.callback(pdf_doc.close)
159
+ else:
160
+ msg = f"Error: Could not process PDF file: {original_filename}"
161
+ progress_callback(pdf_load_progress, msg)
162
+ logger.error(msg)
163
+ return msg, None
164
+
165
+ if not pages or total_pages == 0:
166
+ msg = f"Error: PDF file is empty: {original_filename}"
167
+ progress_callback(pdf_load_progress, msg)
168
+ logger.error(msg)
169
+ return msg, None
170
+
171
+ progress_callback(pdf_load_progress, f"PDF has {total_pages} pages. Starting page processing...")
172
+
173
+ # Process each page
174
+ all_descriptions = []
175
+ page_processing_progress_start = pdf_load_progress
176
+ total_page_progress_ratio = (0.98 - page_processing_progress_start) if total_pages > 0 else 0
177
+
178
+ for i, page in enumerate(pages):
179
+ page_num = i + 1
180
+ current_page_ratio = (page_num / total_pages) if total_pages > 0 else 1.0
181
+
182
+ # Calculate progress for this specific page
183
+ current_progress = page_processing_progress_start + (current_page_ratio * total_page_progress_ratio)
184
+
185
+ # Update progress for the start of page processing
186
+ progress_callback(current_progress, f"Processing page {page_num}/{total_pages}...")
187
+ logger.info(f"Processing page {page_num}/{total_pages}")
188
+
189
+ page_description = None
190
+ temp_page_pdf_path = None
191
+
192
+ try:
193
+ # Render page to image
194
+ render_progress_message = f"Page {page_num}: Rendering image..."
195
+ progress_callback(current_progress, render_progress_message)
196
+ image_bytes, mime_type = pdf_processor.render_page_to_image_bytes(page, image_format="jpeg")
197
+ if not image_bytes:
198
+ logger.warning(f"Could not render image for page {page_num}. Skipping VLM call.")
199
+ all_descriptions.append(f"*Error: Could not render image for page {page_num}.*")
200
+ continue
201
+
202
+ # Extract markdown context if needed
203
+ markdown_context = None
204
+ if cfg.get("use_markitdown"):
205
+ markitdown_progress_message = f"Page {page_num}: Extracting text (Markitdown)..."
206
+ progress_callback(current_progress, markitdown_progress_message)
207
+
208
+ # Verify Markitdown availability
209
+ if not markitdown_processor.MARKITDOWN_AVAILABLE:
210
+ logger.warning(f"Markitdown not available for page {page_num}. Proceeding without it.")
211
+ progress_callback(current_progress, f"Page {page_num}: Markitdown not available, skipping extraction.")
212
+ else:
213
+ temp_page_pdf_path = pdf_processor.save_page_as_temp_pdf(pdf_doc, i)
214
+
215
+ if temp_page_pdf_path:
216
+ # Register temp file for cleanup
217
+ stack.callback(lambda p=temp_page_pdf_path: os.remove(p) if os.path.exists(p) else None)
218
+
219
+ try:
220
+ markdown_context = markitdown_processor.get_markdown_for_page_via_temp_pdf(temp_page_pdf_path)
221
+ if markdown_context is None:
222
+ logger.warning(f"Markitdown failed for page {page_num}. Proceeding without it.")
223
+ progress_callback(current_progress, f"Page {page_num}: Markitdown extraction failed.")
224
+ else:
225
+ logger.info(f"Markitdown context extracted for page {page_num}.")
226
+ except Exception as markdown_err:
227
+ logger.warning(f"Error extracting Markitdown for page {page_num}: {markdown_err}")
228
+ progress_callback(current_progress, f"Page {page_num}: Markitdown extraction error.")
229
+ else:
230
+ logger.warning(f"Could not create temporary PDF for Markitdown on page {page_num}.")
231
+ progress_callback(current_progress, f"Page {page_num}: Failed to prepare for Markitdown.")
232
+
233
+ # Select appropriate prompt
234
+ prompt_key = "vlm_base"
235
+ has_markdown = cfg.get("use_markitdown") and markdown_context is not None
236
+ has_summary = cfg.get("use_summary") and pdf_summary is not None
237
+
238
+ if has_markdown and has_summary:
239
+ prompt_key = "vlm_full"
240
+ elif has_markdown:
241
+ prompt_key = "vlm_markdown"
242
+ elif has_summary:
243
+ prompt_key = "vlm_summary"
244
+
245
+ vlm_prompt_template = required_prompts.get(prompt_key)
246
+ if not vlm_prompt_template:
247
+ error_msg = f"Missing required prompt template: {prompt_key}"
248
+ progress_callback(current_progress, error_msg)
249
+ logger.error(error_msg)
250
+ all_descriptions.append(f"*Error: Could not generate description for page {page_num} due to missing prompt template.*")
251
+ continue
252
+
253
+ # Prepare prompt
254
+ prompt_text = vlm_prompt_template.replace("[PAGE_NUM]", str(page_num))
255
+ prompt_text = prompt_text.replace("[TOTAL_PAGES]", str(total_pages))
256
+ prompt_text = prompt_text.replace("[LANGUAGE]", cfg.get("output_language", "English"))
257
+ if "[MARKDOWN_CONTEXT]" in prompt_text:
258
+ prompt_text = prompt_text.replace("[MARKDOWN_CONTEXT]", markdown_context if markdown_context else "N/A")
259
+ if "[SUMMARY_CONTEXT]" in prompt_text:
260
+ prompt_text = prompt_text.replace("[SUMMARY_CONTEXT]", pdf_summary if pdf_summary else "N/A")
261
+
262
+ # Call VLM
263
+ vlm_model = cfg.get("vlm_model")
264
+ vlm_progress_message = f"Page {page_num}: Calling VLM ({vlm_model})..."
265
+ progress_callback(current_progress, vlm_progress_message)
266
+ try:
267
+ if provider == "openrouter":
268
+ page_description = openrouter_client.get_vlm_description(
269
+ cfg.get("openrouter_api_key"), vlm_model, prompt_text, image_bytes, mime_type
270
+ )
271
+ elif provider == "ollama":
272
+ page_description = ollama_client.get_vlm_description(
273
+ cfg.get("ollama_endpoint"), vlm_model, prompt_text, image_bytes, mime_type
274
+ )
275
+
276
+ if page_description:
277
+ logger.info(f"VLM description received for page {page_num}.")
278
+ else:
279
+ page_description = f"*Warning: VLM did not return a description for page {page_num}.*"
280
+ progress_callback(current_progress, f"Page {page_num}: VLM returned no description.")
281
+ logger.warning(f"VLM returned no description for page {page_num}.")
282
+
283
+ except (ValueError, ConnectionError, TimeoutError, ImportError) as api_err:
284
+ error_msg = f"API Error on page {page_num}: {api_err}. Aborting."
285
+ progress_callback(current_progress, error_msg)
286
+ logger.error(error_msg)
287
+ raise ConversionError(error_msg)
288
+
289
+ except Exception as vlm_err:
290
+ error_msg = f"Unexpected error during VLM call for page {page_num}: {vlm_err}. Skipping page."
291
+ progress_callback(current_progress, error_msg)
292
+ logger.exception(error_msg)
293
+ page_description = f"*Error: Failed to get VLM description for page {page_num} due to an unexpected error.*"
294
+
295
+ all_descriptions.append(page_description if page_description else "*No description available.*")
296
+
297
+ except ConversionError:
298
+ # Let critical errors propagate up
299
+ raise
300
+ except Exception as page_err:
301
+ error_msg = f"Unexpected error processing page {page_num}: {page_err}. Skipping page."
302
+ progress_callback(current_progress, error_msg)
303
+ logger.exception(error_msg)
304
+ all_descriptions.append(f"*Error: An unexpected error occurred while processing page {page_num}.*")
305
+
306
+ # Generate final markdown
307
+ final_progress = 0.99
308
+ progress_callback(final_progress, "Combining page descriptions into final Markdown...")
309
+ final_markdown = format_markdown_output(all_descriptions, original_filename)
310
+ logger.info("Final Markdown content assembled.")
311
+
312
+ # Report completion
313
+ end_time = time.time()
314
+ duration = end_time - start_time
315
+ final_status = f"Conversion completed successfully in {duration:.2f} seconds."
316
+ progress_callback(1.0, final_status)
317
+ logger.info(final_status)
318
+
319
+ return final_status, final_markdown
320
+
321
+ except ConversionError as critical_err:
322
+ return str(critical_err), None
323
+
324
+ except Exception as e:
325
+ error_msg = f"Critical Error during conversion: {e}"
326
+ progress_callback(0.0, error_msg)
327
+ logger.exception(error_msg)
328
+ return error_msg, None
describepdf/markitdown_processor.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MarkItDown processor module for DescribePDF.
3
+
4
+ This module handles the enhanced text extraction functionality using the
5
+ MarkItDown library to convert PDF content to markdown format.
6
+ """
7
+
8
+ import os
9
+ import logging
10
+ from typing import Optional
11
+
12
+ logger = logging.getLogger('describepdf')
13
+
14
+ # Check if MarkItDown is available
15
+ try:
16
+ from markitdown import MarkItDown
17
+ MARKITDOWN_AVAILABLE = True
18
+ logger.info("MarkItDown library is available and successfully imported.")
19
+ except ImportError:
20
+ logger.warning("MarkItDown library not installed. Install with 'pip install markitdown[pdf]'")
21
+ MARKITDOWN_AVAILABLE = False
22
+ except Exception as e:
23
+ logger.error(f"Failed to initialize MarkItDown: {e}")
24
+ MARKITDOWN_AVAILABLE = False
25
+
26
+ def _get_markdown_converter() -> Optional['MarkItDown']:
27
+ """
28
+ Initialize and return a MarkItDown converter instance.
29
+
30
+ Returns:
31
+ MarkItDown: An initialized MarkItDown converter or None if not available
32
+ """
33
+ if not MARKITDOWN_AVAILABLE:
34
+ logger.error("Cannot initialize MarkItDown converter - library not available.")
35
+ return None
36
+
37
+ try:
38
+ converter = MarkItDown()
39
+ return converter
40
+ except Exception as e:
41
+ logger.error(f"Failed to initialize MarkItDown converter: {e}")
42
+ return None
43
+
44
+ def get_markdown_for_page_via_temp_pdf(temp_pdf_path: str) -> Optional[str]:
45
+ """
46
+ Use MarkItDown to extract Markdown from a PDF file (single page).
47
+
48
+ Args:
49
+ temp_pdf_path: Path to the temporary single-page PDF file
50
+
51
+ Returns:
52
+ str: Extracted Markdown content, or None if there was an error
53
+ """
54
+ if not MARKITDOWN_AVAILABLE:
55
+ logger.error("MarkItDown converter is not available.")
56
+ return None
57
+
58
+ if not os.path.exists(temp_pdf_path):
59
+ logger.error(f"Temporary PDF file not found: {temp_pdf_path}")
60
+ return None
61
+
62
+ try:
63
+ md_converter = _get_markdown_converter()
64
+ if not md_converter:
65
+ return None
66
+
67
+ result = md_converter.convert(temp_pdf_path)
68
+ logger.debug(f"Extracted Markdown from temporary PDF: {temp_pdf_path}")
69
+ return result.text_content if result else ""
70
+ except Exception as e:
71
+ logger.error(f"MarkItDown failed to process {temp_pdf_path}: {e}")
72
+ return None
73
+
74
+ def is_available() -> bool:
75
+ """
76
+ Check if MarkItDown functionality is available.
77
+
78
+ Returns:
79
+ bool: True if MarkItDown is available, False otherwise
80
+ """
81
+ return MARKITDOWN_AVAILABLE
describepdf/ollama_client.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Ollama client module for DescribePDF.
3
+
4
+ This module handles all interactions with local Ollama API for
5
+ VLM (Vision Language Model) image description and LLM text summarization.
6
+ """
7
+
8
+ import logging
9
+ import base64
10
+ import requests
11
+ from typing import Any, Dict, List
12
+
13
+ # Try to import Ollama, but handle gracefully if it's not available
14
+ try:
15
+ import ollama
16
+ from ollama import Client
17
+ OLLAMA_AVAILABLE = True
18
+ except ImportError:
19
+ OLLAMA_AVAILABLE = False
20
+ logging.warning("Ollama Python client not available. Install with 'pip install ollama'")
21
+
22
+ # Get logger
23
+ logger = logging.getLogger('describepdf')
24
+
25
+ def check_ollama_availability(endpoint: str) -> bool:
26
+ """
27
+ Check if Ollama is available at the specified endpoint.
28
+
29
+ Args:
30
+ endpoint: URL of the Ollama endpoint
31
+
32
+ Returns:
33
+ bool: True if Ollama is available, False otherwise
34
+ """
35
+ if not OLLAMA_AVAILABLE:
36
+ logger.error("Ollama Python client not installed.")
37
+ return False
38
+
39
+ try:
40
+ # Normalize endpoint URL by removing trailing slashes
41
+ endpoint = endpoint.rstrip('/')
42
+
43
+ # Use requests to check API availability (faster than creating a Client)
44
+ response = requests.get(f"{endpoint}/api/version", timeout=5)
45
+ response.raise_for_status()
46
+
47
+ logger.info(f"Ollama is available at {endpoint}. Response status: {response.status_code}")
48
+ return True
49
+ except requests.exceptions.RequestException as e:
50
+ logger.error(f"Could not connect to Ollama at {endpoint}: {e}")
51
+ return False
52
+ except Exception as e:
53
+ logger.error(f"Unexpected error checking Ollama availability: {e}")
54
+ return False
55
+
56
+ def get_vlm_description(endpoint: str, model: str, prompt_text: str, image_bytes: bytes, mime_type: str) -> str:
57
+ """
58
+ Get a page description using a VLM through Ollama.
59
+
60
+ Args:
61
+ endpoint: URL of the Ollama endpoint
62
+ model: Ollama VLM model name
63
+ prompt_text: Text prompt
64
+ image_bytes: Bytes of the page image
65
+ mime_type: MIME type of the image ('image/png' or 'image/jpeg')
66
+
67
+ Returns:
68
+ str: Generated description
69
+
70
+ Raises:
71
+ ImportError: If Ollama Python client is not installed
72
+ ConnectionError: If communication with Ollama fails
73
+ ValueError: If there's an issue with the request parameters
74
+ """
75
+ if not OLLAMA_AVAILABLE:
76
+ raise ImportError("Ollama Python client not installed. Install with 'pip install ollama'")
77
+
78
+ try:
79
+ # Create Ollama client
80
+ client: Client = Client(host=endpoint.rstrip('/'))
81
+
82
+ # Encode image to base64
83
+ encoded_image = base64.b64encode(image_bytes).decode('utf-8')
84
+
85
+ # Prepare messages for chat API
86
+ messages: List[Dict[str, Any]] = [
87
+ {
88
+ 'role': 'user',
89
+ 'content': prompt_text,
90
+ 'images': [encoded_image]
91
+ }
92
+ ]
93
+
94
+ logger.info(f"Calling Ollama VLM model: {model}")
95
+
96
+ # Call Ollama chat API
97
+ response: Dict[str, Any] = client.chat(
98
+ model=model,
99
+ messages=messages
100
+ )
101
+
102
+ # Extract and validate response
103
+ if response and 'message' in response and 'content' in response['message']:
104
+ content = response['message']['content']
105
+ logger.info(f"Received VLM description from Ollama (model: {model}).")
106
+ return str(content)
107
+ else:
108
+ logger.warning(f"Ollama VLM response structure unexpected: {response}")
109
+ raise ValueError("Ollama returned unexpected response structure or empty content")
110
+
111
+ except ollama.ResponseError as e:
112
+ logger.error(f"Ollama API error: {e}")
113
+ raise ConnectionError(f"Ollama API error: {e}")
114
+ except Exception as e:
115
+ logger.error(f"Error getting VLM description from Ollama: {e}")
116
+ raise
117
+
118
+ def get_llm_summary(endpoint: str, model: str, prompt_text: str) -> str:
119
+ """
120
+ Get a summary using an LLM through Ollama.
121
+
122
+ Args:
123
+ endpoint: URL of the Ollama endpoint
124
+ model: Ollama LLM model for summary
125
+ prompt_text: Prompt including the text to summarize
126
+
127
+ Returns:
128
+ str: Generated summary
129
+
130
+ Raises:
131
+ ImportError: If Ollama Python client is not installed
132
+ ConnectionError: If communication with Ollama fails
133
+ ValueError: If there's an issue with the request parameters
134
+ """
135
+ if not OLLAMA_AVAILABLE:
136
+ raise ImportError("Ollama Python client not installed. Install with 'pip install ollama'")
137
+
138
+ try:
139
+ # Create Ollama client
140
+ client: Client = Client(host=endpoint.rstrip('/'))
141
+
142
+ # Prepare messages for chat API
143
+ messages: List[Dict[str, Any]] = [
144
+ {
145
+ 'role': 'user',
146
+ 'content': prompt_text
147
+ }
148
+ ]
149
+
150
+ logger.info(f"Calling Ollama LLM model for summary: {model}")
151
+
152
+ # Call Ollama chat API
153
+ response: Dict[str, Any] = client.chat(
154
+ model=model,
155
+ messages=messages
156
+ )
157
+
158
+ # Extract and validate response
159
+ if response and 'message' in response and 'content' in response['message']:
160
+ content = response['message']['content']
161
+ logger.info(f"Received summary from Ollama (model: {model}).")
162
+ return str(content)
163
+ else:
164
+ logger.warning(f"Ollama LLM summary response structure unexpected: {response}")
165
+ raise ValueError("Ollama returned unexpected response structure or empty content")
166
+
167
+ except ollama.ResponseError as e:
168
+ logger.error(f"Ollama API error: {e}")
169
+ raise ConnectionError(f"Ollama API error: {e}")
170
+ except Exception as e:
171
+ logger.error(f"Error getting LLM summary from Ollama: {e}")
172
+ raise
describepdf/openrouter_client.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ OpenRouter client module for DescribePDF.
3
+
4
+ This module handles all interactions with the OpenRouter API for
5
+ VLM (Vision Language Model) image description and LLM text summarization.
6
+ """
7
+
8
+ import requests
9
+ import base64
10
+ import json
11
+ import logging
12
+ from typing import Dict, Any, List
13
+
14
+ # Get logger from config module
15
+ logger = logging.getLogger('describepdf')
16
+
17
+ # Constants
18
+ OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
19
+ DEFAULT_TIMEOUT = 300 # 5 minutes
20
+
21
+ def encode_image_to_base64(image_bytes: bytes, mime_type: str) -> str:
22
+ """
23
+ Encode image bytes to Base64 string for the API.
24
+
25
+ Args:
26
+ image_bytes: Raw image bytes
27
+ mime_type: MIME type of the image ('image/png' or 'image/jpeg')
28
+
29
+ Returns:
30
+ str: Base64 encoded image string with data URI scheme
31
+
32
+ Raises:
33
+ ValueError: If image encoding fails
34
+ """
35
+ try:
36
+ encoded = base64.b64encode(image_bytes).decode('utf-8')
37
+ return f"data:{mime_type};base64,{encoded}"
38
+ except Exception as e:
39
+ logger.error(f"Error encoding image to Base64: {e}")
40
+ raise ValueError(f"Failed to encode image: {e}")
41
+
42
+ def call_openrouter_api(api_key: str, model: str, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
43
+ """
44
+ Make a call to the OpenRouter Chat Completions API.
45
+
46
+ Args:
47
+ api_key: OpenRouter API key
48
+ model: Model name to use
49
+ messages: List of messages in API format
50
+
51
+ Returns:
52
+ Dict: The JSON response from the API
53
+
54
+ Raises:
55
+ ValueError: If API key is missing
56
+ ConnectionError: If API call fails with error response
57
+ TimeoutError: If API call times out
58
+ """
59
+ if not api_key:
60
+ logger.error("OpenRouter API Key is missing.")
61
+ raise ValueError("OpenRouter API Key is missing.")
62
+
63
+ headers: Dict[str, str] = {
64
+ "Authorization": f"Bearer {api_key}",
65
+ "Content-Type": "application/json",
66
+ }
67
+
68
+ payload: Dict[str, Any] = {
69
+ "model": model,
70
+ "messages": messages
71
+ }
72
+
73
+ try:
74
+ # Log API call (without full message content for privacy/size)
75
+ msg_log = json.dumps(messages)[:200] + ("..." if len(json.dumps(messages)) > 200 else "")
76
+ logger.debug(f"Calling OpenRouter API. Model: {model}. Messages: {msg_log}")
77
+
78
+ # Make API request
79
+ response = requests.post(
80
+ OPENROUTER_API_URL,
81
+ headers=headers,
82
+ json=payload,
83
+ timeout=DEFAULT_TIMEOUT
84
+ )
85
+ response.raise_for_status()
86
+
87
+ logger.debug(f"API call successful. Status: {response.status_code}.")
88
+ return response.json()
89
+
90
+ except requests.exceptions.Timeout:
91
+ logger.error(f"API call timed out for model {model}.")
92
+ raise TimeoutError(f"API call timed out for model {model}.")
93
+
94
+ except requests.exceptions.RequestException as e:
95
+ # Log error details without assuming response exists
96
+ status_code = getattr(e.response, 'status_code', 'N/A') if hasattr(e, 'response') else 'N/A'
97
+ response_text = getattr(e.response, 'text', 'No response') if hasattr(e, 'response') else 'No response'
98
+ logger.error(f"API call failed for model {model}. Status: {status_code}. Response: {response_text}")
99
+
100
+ # Extract error message from response if possible
101
+ error_message = f"API Error: {e}"
102
+ if hasattr(e, 'response') and e.response:
103
+ try:
104
+ error_details = e.response.json()
105
+ if 'error' in error_details and 'message' in error_details['error']:
106
+ error_message = f"API Error ({e.response.status_code}): {error_details['error']['message']}"
107
+ else:
108
+ error_message = f"API Error ({e.response.status_code}): {e.response.text[:200]}"
109
+ except json.JSONDecodeError:
110
+ error_message = f"API Error ({e.response.status_code}): {e.response.text[:200]}"
111
+
112
+ raise ConnectionError(error_message)
113
+
114
+ def get_vlm_description(api_key: str, model: str, prompt_text: str, image_bytes: bytes, mime_type: str) -> str:
115
+ """
116
+ Get a page description using a VLM through OpenRouter.
117
+
118
+ Args:
119
+ api_key: OpenRouter API key
120
+ model: VLM model name
121
+ prompt_text: Text prompt
122
+ image_bytes: Bytes of the page image
123
+ mime_type: MIME type of the image ('image/png' or 'image/jpeg')
124
+
125
+ Returns:
126
+ str: Generated description
127
+
128
+ Raises:
129
+ ValueError: If API key is missing or image encoding fails
130
+ ConnectionError: If API call fails with error response
131
+ TimeoutError: If API call times out
132
+ """
133
+ # Encode image to base64
134
+ base64_image = encode_image_to_base64(image_bytes, mime_type)
135
+
136
+ # Prepare messages for API
137
+ messages: List[Dict[str, Any]] = [
138
+ {
139
+ "role": "user",
140
+ "content": [
141
+ {"type": "text", "text": prompt_text},
142
+ {
143
+ "type": "image_url",
144
+ "image_url": {"url": base64_image}
145
+ }
146
+ ]
147
+ }
148
+ ]
149
+
150
+ # Call OpenRouter API
151
+ response_json = call_openrouter_api(api_key, model, messages)
152
+
153
+ # Process response
154
+ if response_json and 'choices' in response_json and response_json['choices']:
155
+ if len(response_json['choices']) > 0:
156
+ message = response_json['choices'][0].get('message', {})
157
+ if message and 'content' in message:
158
+ content = message.get('content')
159
+ if content:
160
+ logger.info(f"Received VLM description for page (model: {model}).")
161
+ return str(content)
162
+
163
+ logger.warning(f"VLM response structure unexpected or content empty.")
164
+ raise ValueError("VLM returned no usable content")
165
+ else:
166
+ logger.warning(f"VLM response JSON structure unexpected: {response_json}")
167
+ raise ValueError("VLM returned unexpected response structure")
168
+
169
+ def get_llm_summary(api_key: str, model: str, prompt_text: str) -> str:
170
+ """
171
+ Get a summary using an LLM through OpenRouter.
172
+
173
+ Args:
174
+ api_key: OpenRouter API key
175
+ model: LLM model for summary
176
+ prompt_text: Prompt including the text to summarize
177
+
178
+ Returns:
179
+ str: Generated summary
180
+
181
+ Raises:
182
+ ValueError: If API key is missing
183
+ ConnectionError: If API call fails with error response
184
+ TimeoutError: If API call times out
185
+ """
186
+ # Prepare messages for API
187
+ messages: List[Dict[str, Any]] = [
188
+ {"role": "user", "content": prompt_text}
189
+ ]
190
+
191
+ # Call OpenRouter API
192
+ response_json = call_openrouter_api(api_key, model, messages)
193
+
194
+ # Process response
195
+ if response_json and 'choices' in response_json and response_json['choices']:
196
+ if len(response_json['choices']) > 0:
197
+ message = response_json['choices'][0].get('message', {})
198
+ if message and 'content' in message:
199
+ content = message.get('content')
200
+ if content:
201
+ logger.info(f"Received summary (model: {model}).")
202
+ return str(content)
203
+
204
+ logger.warning(f"LLM summary response structure unexpected or content empty.")
205
+ raise ValueError("LLM returned no usable content")
206
+ else:
207
+ logger.warning(f"LLM summary response JSON structure unexpected: {response_json}")
208
+ raise ValueError("LLM returned unexpected response structure")
describepdf/pdf_processor.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PDF processor module for DescribePDF.
3
+
4
+ This module handles all PDF file operations using PyMuPDF,
5
+ including rendering, text extraction, and file manipulation.
6
+ """
7
+
8
+ import io
9
+ import os
10
+ import tempfile
11
+ from typing import Tuple, List, Optional
12
+
13
+ # Get logger from config module
14
+ from .config import logger
15
+
16
+ try:
17
+ import pymupdf
18
+ PYMUPDF_AVAILABLE = True
19
+ except ImportError:
20
+ PYMUPDF_AVAILABLE = False
21
+ logger.error("PyMuPDF not installed. Install with 'pip install pymupdf'")
22
+
23
+ # Import PIL for image processing
24
+ try:
25
+ from PIL import Image
26
+ PIL_AVAILABLE = True
27
+ except ImportError:
28
+ PIL_AVAILABLE = False
29
+ logger.error("Pillow not installed. Install with 'pip install pillow'")
30
+
31
+ def get_pdf_pages(pdf_path: str) -> Tuple[Optional[pymupdf.Document], Optional[List[pymupdf.Page]], int]:
32
+ """
33
+ Open a PDF and return a list of page objects and the total number of pages.
34
+
35
+ NOTE: The caller is responsible for calling close() on the returned document
36
+ when done with it.
37
+
38
+ Args:
39
+ pdf_path: Path to the PDF file
40
+
41
+ Returns:
42
+ Tuple containing:
43
+ - pymupdf.Document: The open PDF document (caller must close)
44
+ - List[pymupdf.Page]: List of page objects
45
+ - int: Total number of pages (0 if error)
46
+ """
47
+ if not PYMUPDF_AVAILABLE:
48
+ logger.error("PyMuPDF is required for PDF processing but is not installed.")
49
+ return None, None, 0
50
+
51
+ try:
52
+ doc = pymupdf.open(pdf_path)
53
+ pages = [doc.load_page(i) for i in range(len(doc))]
54
+ total_pages = len(doc)
55
+ logger.info(f"Opened PDF '{os.path.basename(pdf_path)}' with {total_pages} pages.")
56
+ return doc, pages, total_pages
57
+ except Exception as e:
58
+ logger.error(f"Error opening or reading PDF {pdf_path}: {e}")
59
+ return None, None, 0
60
+
61
+ def render_page_to_image_bytes(page: pymupdf.Page, image_format: str = "jpeg", dpi: int = 150) -> Tuple[Optional[bytes], Optional[str]]:
62
+ """
63
+ Render a PDF page to image bytes in memory.
64
+
65
+ Args:
66
+ page: PyMuPDF Page object
67
+ image_format: Desired format ('png' or 'jpeg')
68
+ dpi: Image resolution
69
+
70
+ Returns:
71
+ Tuple containing:
72
+ - bytes: Image bytes
73
+ - str: MIME type ('image/png' or 'image/jpeg')
74
+ Returns (None, None) on error
75
+ """
76
+ if not PYMUPDF_AVAILABLE or not PIL_AVAILABLE:
77
+ logger.error("PyMuPDF and Pillow are required for image rendering but are not installed.")
78
+ return None, None
79
+
80
+ try:
81
+ # Validate image format
82
+ if image_format.lower() not in ["png", "jpeg"]:
83
+ logger.error(f"Unsupported image format: {image_format}")
84
+ return None, None
85
+
86
+ # Render page to pixmap
87
+ pix = page.get_pixmap(dpi=dpi)
88
+ img_bytes_io = io.BytesIO()
89
+
90
+ if image_format.lower() == "png":
91
+ # Use PyMuPDF's built-in PNG conversion
92
+ img_bytes = pix.tobytes("png")
93
+ img_bytes_io.write(img_bytes)
94
+ mime_type = "image/png"
95
+ elif image_format.lower() == "jpeg":
96
+ # Use PIL for JPEG conversion
97
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
98
+ img.save(img_bytes_io, format="JPEG", quality=85)
99
+ mime_type = "image/jpeg"
100
+
101
+ img_bytes_io.seek(0)
102
+ logger.debug(f"Rendered page {page.number + 1} to {image_format.upper()} bytes.")
103
+ return img_bytes_io.getvalue(), mime_type
104
+
105
+ except Exception as e:
106
+ logger.error(f"Error rendering page {page.number + 1} to image: {e}")
107
+ return None, None
108
+
109
+ def extract_all_text(pdf_path: str) -> Optional[str]:
110
+ """
111
+ Extract all text from a PDF file.
112
+
113
+ Args:
114
+ pdf_path: Path to the PDF file
115
+
116
+ Returns:
117
+ str: Concatenated text from all pages, or None if there was an error
118
+ """
119
+ if not PYMUPDF_AVAILABLE:
120
+ logger.error("PyMuPDF is required for text extraction but is not installed.")
121
+ return None
122
+
123
+ doc = None
124
+ try:
125
+ doc = pymupdf.open(pdf_path)
126
+ all_text = ""
127
+ for page_num in range(len(doc)):
128
+ page = doc.load_page(page_num)
129
+ all_text += page.get_text("text") + "\n\n"
130
+ logger.info(f"Extracted text from all pages of '{os.path.basename(pdf_path)}'.")
131
+ return all_text
132
+ except Exception as e:
133
+ logger.error(f"Error extracting text from PDF {pdf_path}: {e}")
134
+ return None
135
+ finally:
136
+ # Always close the document if we opened it
137
+ if doc is not None:
138
+ doc.close()
139
+
140
+ def save_page_as_temp_pdf(original_doc: pymupdf.Document, page_num: int) -> Optional[str]:
141
+ """
142
+ Save a specific page as a temporary PDF file.
143
+
144
+ Args:
145
+ original_doc: The open original PDF document
146
+ page_num: The page number (zero-based)
147
+
148
+ Returns:
149
+ str: Path to the temporary PDF file, or None if there was an error
150
+ """
151
+ if not PYMUPDF_AVAILABLE:
152
+ logger.error("PyMuPDF is required for PDF processing but is not installed.")
153
+ return None
154
+
155
+ new_doc = None
156
+ temp_pdf_path = None
157
+
158
+ try:
159
+ # Create a temporary file with a proper naming pattern
160
+ with tempfile.NamedTemporaryFile(suffix=".pdf", prefix="describepdf_page_", delete=False) as tmp_file:
161
+ temp_pdf_path = tmp_file.name
162
+
163
+ # Create new document with the single page
164
+ new_doc = pymupdf.open()
165
+ new_doc.insert_pdf(original_doc, from_page=page_num, to_page=page_num)
166
+ new_doc.save(temp_pdf_path)
167
+
168
+ logger.debug(f"Saved page {page_num + 1} to temporary PDF: {temp_pdf_path}")
169
+ return temp_pdf_path
170
+
171
+ except Exception as e:
172
+ logger.error(f"Error saving page {page_num + 1} as temporary PDF: {e}")
173
+ # Clean up on error
174
+ if temp_pdf_path and os.path.exists(temp_pdf_path):
175
+ try:
176
+ os.remove(temp_pdf_path)
177
+ logger.debug(f"Cleaned up temporary PDF due to error: {temp_pdf_path}")
178
+ except OSError as os_err:
179
+ logger.warning(f"Failed to remove temporary PDF after error: {os_err}")
180
+ return None
181
+
182
+ finally:
183
+ # Always close the new document if we created it
184
+ if new_doc is not None:
185
+ new_doc.close()
describepdf/summarizer.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Summarizer module for DescribePDF.
3
+
4
+ This module handles the generation of document summaries from PDF text content
5
+ using either OpenRouter or Ollama LLM models.
6
+ """
7
+
8
+ import logging
9
+ from typing import Optional
10
+
11
+ from . import pdf_processor
12
+ from . import openrouter_client
13
+ from . import ollama_client
14
+ from .config import get_prompts
15
+
16
+ # Get logger from config module
17
+ logger = logging.getLogger('describepdf')
18
+
19
+ # Constants
20
+ MAX_CHARS_FOR_PROMPT = 512000 # Maximum characters to include in prompt (128K tokens approx.)
21
+
22
+ def generate_summary(
23
+ pdf_path: str,
24
+ provider: str = "openrouter",
25
+ api_key: Optional[str] = None,
26
+ ollama_endpoint: Optional[str] = None,
27
+ model: Optional[str] = None
28
+ ) -> Optional[str]:
29
+ """
30
+ Generate a summary of the complete textual content of a PDF using specified provider.
31
+
32
+ Args:
33
+ pdf_path: Path to the PDF file
34
+ provider: Provider to use ("openrouter" or "ollama")
35
+ api_key: OpenRouter API key (required for openrouter provider)
36
+ ollama_endpoint: Ollama endpoint URL (required for ollama provider)
37
+ model: LLM model to use for the summary
38
+
39
+ Returns:
40
+ str: The generated summary, or None if any step fails
41
+ """
42
+ logger.info(f"Starting summary generation for '{pdf_path}' using provider {provider} with model {model}.")
43
+
44
+ # Extract text from PDF
45
+ logger.info("Extracting full text from PDF...")
46
+ full_text = pdf_processor.extract_all_text(pdf_path)
47
+
48
+ # Handle error cases
49
+ if full_text is None:
50
+ logger.error("Failed to extract text for summary.")
51
+ return None
52
+
53
+ if not full_text.strip():
54
+ logger.warning("PDF contains no extractable text for summary.")
55
+ return "Document contains no extractable text."
56
+
57
+ logger.info(f"Text extracted ({len(full_text)} characters). Preparing summary prompt...")
58
+
59
+ # Load and prepare prompt
60
+ prompts = get_prompts()
61
+ summary_prompt_template = prompts.get("summary")
62
+ if not summary_prompt_template:
63
+ logger.error("Summary prompt template not found.")
64
+ return None
65
+
66
+ # Truncate text if too long
67
+ if len(full_text) > MAX_CHARS_FOR_PROMPT:
68
+ logger.warning(
69
+ f"PDF text ({len(full_text)} chars) exceeds limit ({MAX_CHARS_FOR_PROMPT}), truncating for summary."
70
+ )
71
+ full_text = full_text[:MAX_CHARS_FOR_PROMPT] + "\n\n[... text truncated ...]"
72
+
73
+ # Fill prompt template
74
+ prompt_text = summary_prompt_template.replace("[FULL_PDF_TEXT]", full_text)
75
+
76
+ # Call LLM for summary based on provider
77
+ try:
78
+ # Handle OpenRouter provider
79
+ if provider == "openrouter":
80
+ if not api_key:
81
+ logger.error("OpenRouter API key is required for OpenRouter provider.")
82
+ return None
83
+
84
+ logger.info(f"Calling OpenRouter LLM for summary (model: {model})...")
85
+ summary = openrouter_client.get_llm_summary(api_key, model, prompt_text)
86
+ if summary:
87
+ logger.info("Summary generated successfully via OpenRouter.")
88
+ return summary
89
+ else:
90
+ logger.error("OpenRouter LLM call for summary returned no content.")
91
+ return None
92
+
93
+ # Handle Ollama provider
94
+ elif provider == "ollama":
95
+ if not ollama_endpoint:
96
+ logger.error("Ollama endpoint URL is required for Ollama provider.")
97
+ return None
98
+
99
+ logger.info(f"Calling Ollama LLM for summary (model: {model})...")
100
+ summary = ollama_client.get_llm_summary(ollama_endpoint, model, prompt_text)
101
+ if summary:
102
+ logger.info("Summary generated successfully via Ollama.")
103
+ return summary
104
+ else:
105
+ logger.error("Ollama LLM call for summary returned no content.")
106
+ return None
107
+
108
+ # Handle unsupported provider
109
+ else:
110
+ logger.error(f"Unsupported provider: {provider}")
111
+ return None
112
+
113
+ except ValueError as e:
114
+ logger.error(f"Value error during summary generation: {e}")
115
+ return None
116
+ except ConnectionError as e:
117
+ logger.error(f"Connection error during summary generation: {e}")
118
+ return None
119
+ except TimeoutError as e:
120
+ logger.error(f"Timeout error during summary generation: {e}")
121
+ return None
122
+ except ImportError as e:
123
+ logger.error(f"Import error during summary generation: {e}")
124
+ return None
125
+ except Exception as e:
126
+ logger.critical(f"Critical unexpected error during summary generation: {e}", exc_info=True)
127
+ raise
describepdf/ui.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Web UI module for DescribePDF with OpenRouter.
3
+
4
+ This module implements the Gradio-based web interface for the OpenRouter
5
+ provider version of DescribePDF.
6
+ """
7
+
8
+ import gradio as gr
9
+ import os
10
+ import tempfile
11
+ import logging
12
+ import secrets
13
+ from typing import Tuple, Optional, Dict, Any, List
14
+
15
+ from . import config
16
+ from . import core
17
+
18
+ theme = gr.themes.Soft(
19
+ primary_hue="red",
20
+ secondary_hue="rose",
21
+ spacing_size="lg",
22
+ )
23
+
24
+ def generate(
25
+ pdf_file_obj: Optional[gr.File],
26
+ ui_api_key: str,
27
+ ui_vlm_model: str,
28
+ ui_lang: str,
29
+ ui_use_md: bool,
30
+ ui_use_sum: bool,
31
+ ui_sum_model: str,
32
+ progress: gr.Progress = gr.Progress(track_tqdm=True)
33
+ ) -> Tuple[str, gr.update, Optional[str]]:
34
+ """
35
+ Wrapper function to call the core conversion process and handle the Gradio UI.
36
+
37
+ Args:
38
+ pdf_file_obj: Gradio File object for the uploaded PDF
39
+ ui_api_key: OpenRouter API key from UI
40
+ ui_vlm_model: VLM model name from UI
41
+ ui_lang: Output language from UI
42
+ ui_use_md: Whether to use Markitdown from UI
43
+ ui_use_sum: Whether to generate a summary from UI
44
+ ui_sum_model: Summary model name from UI
45
+ progress: Gradio progress tracker
46
+
47
+ Returns:
48
+ Tuple containing:
49
+ - str: Status message
50
+ - gr.update: Download button update
51
+ - Optional[str]: Markdown result content
52
+ """
53
+ # Validate input file
54
+ if pdf_file_obj is None:
55
+ return "Please upload a PDF file.", gr.update(value=None, visible=False), None
56
+
57
+ # Load environment config
58
+ env_config = config.get_config()
59
+
60
+ # Prepare configuration for this run
61
+ api_key = ui_api_key.strip() if ui_api_key.strip() else env_config.get("openrouter_api_key")
62
+
63
+ current_run_config: Dict[str, Any] = {
64
+ "provider": "openrouter",
65
+ "openrouter_api_key": api_key,
66
+ "vlm_model": ui_vlm_model,
67
+ "output_language": ui_lang,
68
+ "use_markitdown": ui_use_md,
69
+ "use_summary": ui_use_sum,
70
+ "summary_llm_model": ui_sum_model if ui_sum_model else env_config.get("or_summary_model")
71
+ }
72
+
73
+ # Validate API key
74
+ if not current_run_config.get("openrouter_api_key"):
75
+ error_msg = "Error: OpenRouter API Key is missing. Provide it in the UI or set OPENROUTER_API_KEY in the .env file."
76
+ logging.error(error_msg)
77
+ return error_msg, gr.update(value=None, visible=False), None
78
+
79
+ # Create progress callback for Gradio
80
+ def progress_callback_gradio(progress_value: float, status: str) -> None:
81
+ """
82
+ Update Gradio progress bar with current progress and status message.
83
+
84
+ Args:
85
+ progress_value (float): Progress value between 0.0 and 1.0
86
+ status (str): Current status message to display
87
+ """
88
+ clamped_progress = max(0.0, min(1.0, progress_value))
89
+ progress(clamped_progress, desc=status)
90
+ logging.info(f"Progress: {status} ({clamped_progress*100:.1f}%)")
91
+
92
+ # Run the conversion
93
+ status_message, result_markdown = core.convert_pdf_to_markdown(
94
+ pdf_file_obj.name,
95
+ current_run_config,
96
+ progress_callback_gradio
97
+ )
98
+
99
+ # Handle the download file
100
+ if result_markdown:
101
+ try:
102
+ # Get base filename from the uploaded PDF
103
+ base_name = os.path.splitext(os.path.basename(pdf_file_obj.name))[0]
104
+ download_filename = f"{base_name}_description.md"
105
+
106
+ # Create a temporary file with a random component to avoid collisions
107
+ random_suffix = secrets.token_hex(4)
108
+ temp_dir = tempfile.gettempdir()
109
+ download_filepath = os.path.join(temp_dir, f"{base_name}_{random_suffix}.md")
110
+
111
+ # Write markdown result to the temporary file
112
+ with open(download_filepath, "w", encoding="utf-8") as md_file:
113
+ md_file.write(result_markdown)
114
+
115
+ logging.info(f"Markdown result saved to temporary file for download: {download_filepath}")
116
+ download_button_update = gr.update(value=download_filepath, visible=True, label=f"Download '{download_filename}'")
117
+
118
+ except Exception as e:
119
+ logging.error(f"Error creating temporary file for download: {e}")
120
+ status_message += " (Error creating download file)"
121
+ download_button_update = gr.update(value=None, visible=False)
122
+ else:
123
+ download_button_update = gr.update(value=None, visible=False)
124
+
125
+ return (
126
+ status_message,
127
+ download_button_update,
128
+ result_markdown if result_markdown else ""
129
+ )
130
+
131
+ def create_ui() -> gr.Blocks:
132
+ """
133
+ Create and return the Gradio interface for OpenRouter.
134
+
135
+ This function sets up a Gradio web interface with tabs for PDF conversion
136
+ and configuration. It loads initial settings from the environment config
137
+ and provides UI components for adjusting settings for each conversion run.
138
+
139
+ Returns:
140
+ gr.Blocks: Configured Gradio interface ready to be launched
141
+ """
142
+ # Load initial config from environment
143
+ initial_env_config = config.get_config()
144
+
145
+ # Define suggested model lists and languages
146
+ suggested_vlms: List[str] = [
147
+ "qwen/qwen2.5-vl-72b-instruct",
148
+ "google/gemini-2.5-pro-preview-03-25",
149
+ "openai/chatgpt-4o-latest"
150
+ ]
151
+
152
+ suggested_llms: List[str] = [
153
+ "google/gemini-2.5-flash-preview",
154
+ "openai/chatgpt-4o-latest",
155
+ "anthropic/claude-3.5-sonnet"
156
+ ]
157
+
158
+ suggested_languages: List[str] = [
159
+ "English", "Spanish", "French", "German",
160
+ "Chinese", "Japanese", "Italian",
161
+ "Portuguese", "Russian", "Korean"
162
+ ]
163
+
164
+ # Set initial values from config
165
+ initial_vlm = initial_env_config.get("or_vlm_model")
166
+ initial_llm = initial_env_config.get("or_summary_model")
167
+ initial_lang = initial_env_config.get("output_language")
168
+ initial_use_md = initial_env_config.get("use_markitdown")
169
+ initial_use_sum = initial_env_config.get("use_summary")
170
+
171
+ has_env_api_key = bool(initial_env_config.get("openrouter_api_key"))
172
+
173
+ # Create the Gradio interface
174
+ with gr.Blocks(title="DescribePDF", theme=theme) as iface:
175
+ gr.Markdown("<center><img src='https://davidlms.github.io/DescribePDF/assets/poster.png' alt='Describe PDF Logo' width='600px'/></center>")
176
+ gr.Markdown(
177
+ """<div style="display: flex;align-items: center;justify-content: center">
178
+ [<a href="https://davidlms.github.io/describepdf/">Project Page</a>] | [<a href="https://github.com/DavidLMS/describepdf">Github</a>]</div>
179
+ """
180
+ )
181
+ gr.Markdown(
182
+ "DescribePDF is an open-source tool designed to convert PDF files into detailed page-by-page descriptions in Markdown format using Vision-Language Models (VLMs). Unlike traditional PDF extraction tools that focus on replicating the text layout, DescribePDF generates rich, contextual descriptions of each page's content, making it perfect for visually complex documents like catalogs, scanned documents, and presentations."
183
+ "\n\n"
184
+ "Upload a PDF, adjust settings, and click 'Describe'. "
185
+ )
186
+
187
+ with gr.Tabs():
188
+ # Generate tab
189
+ with gr.TabItem("Generate", id=0):
190
+ with gr.Row():
191
+ with gr.Column(scale=1):
192
+ pdf_input = gr.File(
193
+ label="Upload PDF",
194
+ file_types=['.pdf'],
195
+ type="filepath"
196
+ )
197
+ convert_button = gr.Button(
198
+ "Describe",
199
+ variant="primary"
200
+ )
201
+ progress_output = gr.Textbox(
202
+ label="Progress",
203
+ interactive=False,
204
+ lines=2
205
+ )
206
+ download_button = gr.File(
207
+ label="Download Markdown",
208
+ visible=False,
209
+ interactive=False
210
+ )
211
+
212
+ with gr.Column(scale=2):
213
+ markdown_output = gr.Markdown(label="Result (Markdown)")
214
+
215
+ # Configuration tab
216
+ with gr.TabItem("Settings", id=1):
217
+ gr.Markdown(
218
+ "Adjust settings for the *next* generation. These settings are **not** saved. "
219
+ "Defaults are controlled by the `.env` file."
220
+ )
221
+ api_key_input = gr.Textbox(
222
+ label="OpenRouter API Key" + (" (set in .env)" if has_env_api_key else ""),
223
+ type="password",
224
+ placeholder="Enter an API key here to override the one in .env" if has_env_api_key else "Enter your OpenRouter API key",
225
+ value=""
226
+ )
227
+ vlm_model_input = gr.Dropdown(
228
+ label="VLM Model",
229
+ choices=suggested_vlms,
230
+ value=initial_vlm,
231
+ allow_custom_value=True,
232
+ info="Select or type the OpenRouter VLM model name"
233
+ )
234
+ output_language_input = gr.Dropdown(
235
+ label="Output Language",
236
+ choices=suggested_languages,
237
+ value=initial_lang,
238
+ allow_custom_value=True,
239
+ info="Select or type the desired output language (e.g., English, Spanish)"
240
+ )
241
+ with gr.Row():
242
+ use_markitdown_checkbox = gr.Checkbox(
243
+ label="Use Markitdown for extra text context",
244
+ value=initial_use_md
245
+ )
246
+ use_summary_checkbox = gr.Checkbox(
247
+ label="Use PDF summary for augmented context (requires extra LLM call)",
248
+ value=initial_use_sum
249
+ )
250
+ summary_llm_model_input = gr.Dropdown(
251
+ label="LLM Model for Summary",
252
+ choices=suggested_llms,
253
+ value=initial_llm,
254
+ allow_custom_value=True,
255
+ info="Select or type the OpenRouter LLM model name for summaries"
256
+ )
257
+
258
+ # Connect UI components
259
+ conversion_inputs = [
260
+ pdf_input, api_key_input, vlm_model_input, output_language_input,
261
+ use_markitdown_checkbox, use_summary_checkbox, summary_llm_model_input
262
+ ]
263
+ conversion_outputs = [
264
+ progress_output, download_button, markdown_output
265
+ ]
266
+ convert_button.click(
267
+ fn=generate,
268
+ inputs=conversion_inputs,
269
+ outputs=conversion_outputs
270
+ )
271
+
272
+ return iface
273
+
274
+ def launch_app() -> None:
275
+ """
276
+ Start the application from the command line.
277
+
278
+ This function creates the Gradio UI and launches it.
279
+ """
280
+ app: gr.Blocks = create_ui()
281
+
282
+ # Check if we're on Hugging Face Spaces
283
+ if "SPACE_ID" in os.environ:
284
+ return app # Just return the app object for HF Spaces
285
+ else:
286
+ app.launch() # Launch directly when run locally
287
+
288
+ return app # Return app in all cases for flexibility
289
+
290
+ if __name__ == "__main__":
291
+ launch_app()
describepdf/ui_ollama.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Web UI module for DescribePDF with Ollama.
3
+
4
+ This module implements the Gradio-based web interface for the Ollama
5
+ provider version of DescribePDF.
6
+ """
7
+
8
+ import gradio as gr
9
+ import os
10
+ import tempfile
11
+ import logging
12
+ import secrets
13
+ from typing import Tuple, Optional, Dict, Any, List
14
+
15
+ from . import config
16
+ from . import core
17
+ from . import ollama_client
18
+
19
+ theme = gr.themes.Soft(
20
+ primary_hue="red",
21
+ secondary_hue="rose",
22
+ spacing_size="lg",
23
+ )
24
+
25
+ def generate(
26
+ pdf_file_obj: Optional[gr.File],
27
+ ollama_endpoint: str,
28
+ ui_vlm_model: str,
29
+ ui_lang: str,
30
+ ui_use_md: bool,
31
+ ui_use_sum: bool,
32
+ ui_sum_model: str,
33
+ progress: gr.Progress = gr.Progress(track_tqdm=True)
34
+ ) -> Tuple[str, gr.update, Optional[str]]:
35
+ """
36
+ Wrapper function to call the core conversion process and handle the Gradio UI for Ollama.
37
+
38
+ Args:
39
+ pdf_file_obj: Gradio File object for the uploaded PDF
40
+ ollama_endpoint: Ollama server endpoint URL
41
+ ui_vlm_model: VLM model name from UI
42
+ ui_lang: Output language from UI
43
+ ui_use_md: Whether to use Markitdown from UI
44
+ ui_use_sum: Whether to generate a summary from UI
45
+ ui_sum_model: Summary model name from UI
46
+ progress: Gradio progress tracker
47
+
48
+ Returns:
49
+ Tuple containing:
50
+ - str: Status message
51
+ - gr.update: Download button update
52
+ - Optional[str]: Markdown result content
53
+ """
54
+ # Validate input file
55
+ if pdf_file_obj is None:
56
+ return "Please upload a PDF file.", gr.update(value=None, visible=False), None
57
+
58
+ # Check Ollama availability
59
+ if not ollama_client.check_ollama_availability(ollama_endpoint):
60
+ error_msg = f"Error: Could not connect to Ollama at {ollama_endpoint}. Make sure it is running."
61
+ logging.error(error_msg)
62
+ return error_msg, gr.update(value=None, visible=False), None
63
+
64
+ # Prepare configuration for this run
65
+ current_run_config: Dict[str, Any] = {
66
+ "provider": "ollama",
67
+ "ollama_endpoint": ollama_endpoint,
68
+ "vlm_model": ui_vlm_model,
69
+ "output_language": ui_lang,
70
+ "use_markitdown": ui_use_md,
71
+ "use_summary": ui_use_sum,
72
+ "summary_llm_model": ui_sum_model
73
+ }
74
+
75
+ # Create progress callback for Gradio
76
+ def progress_callback_gradio(progress_value: float, status: str) -> None:
77
+ """
78
+ Update Gradio progress bar with current progress and status message.
79
+
80
+ Args:
81
+ progress_value (float): Progress value between 0.0 and 1.0
82
+ status (str): Current status message to display
83
+ """
84
+ clamped_progress = max(0.0, min(1.0, progress_value))
85
+ progress(clamped_progress, desc=status)
86
+ logging.info(f"Progress: {status} ({clamped_progress*100:.1f}%)")
87
+
88
+ # Run the conversion
89
+ status_message, result_markdown = core.convert_pdf_to_markdown(
90
+ pdf_file_obj.name,
91
+ current_run_config,
92
+ progress_callback_gradio
93
+ )
94
+
95
+ # Handle the download file
96
+ if result_markdown:
97
+ try:
98
+ # Get base filename from the uploaded PDF
99
+ base_name = os.path.splitext(os.path.basename(pdf_file_obj.name))[0]
100
+ download_filename = f"{base_name}_description.md"
101
+
102
+ # Create a temporary file with a random component to avoid collisions
103
+ random_suffix = secrets.token_hex(4)
104
+ temp_dir = tempfile.gettempdir()
105
+ download_filepath = os.path.join(temp_dir, f"{base_name}_{random_suffix}.md")
106
+
107
+ # Write markdown result to the temporary file
108
+ with open(download_filepath, "w", encoding="utf-8") as md_file:
109
+ md_file.write(result_markdown)
110
+
111
+ logging.info(f"Markdown result saved to temporary file for download: {download_filepath}")
112
+ download_button_update = gr.update(value=download_filepath, visible=True, label=f"Download '{download_filename}'")
113
+
114
+ except Exception as e:
115
+ logging.error(f"Error creating temporary file for download: {e}")
116
+ status_message += " (Error creating download file)"
117
+ download_button_update = gr.update(value=None, visible=False)
118
+ else:
119
+ download_button_update = gr.update(value=None, visible=False)
120
+
121
+ return (
122
+ status_message,
123
+ download_button_update,
124
+ result_markdown if result_markdown else ""
125
+ )
126
+
127
+ def create_ui() -> gr.Blocks:
128
+ """
129
+ Create and return the Gradio interface for Ollama.
130
+
131
+ This function sets up a Gradio web interface with tabs for PDF conversion
132
+ and configuration. It loads initial settings from the environment config
133
+ and provides UI components for adjusting settings for each conversion run.
134
+
135
+ Returns:
136
+ gr.Blocks: Configured Gradio interface ready to be launched
137
+ """
138
+ # Load initial config from environment
139
+ initial_env_config = config.get_config()
140
+
141
+ # Define suggested model lists and languages
142
+ suggested_vlms: List[str] = ["llama3.2-vision"]
143
+ suggested_llms: List[str] = ["qwen2.5", "llama3.2"]
144
+ suggested_languages: List[str] = [
145
+ "English", "Spanish", "French", "German",
146
+ "Chinese", "Japanese", "Italian",
147
+ "Portuguese", "Russian", "Korean"
148
+ ]
149
+
150
+ # Set initial values from config
151
+ initial_endpoint = initial_env_config.get("ollama_endpoint", "http://localhost:11434")
152
+ initial_vlm = initial_env_config.get("ollama_vlm_model", "llama3.2-vision")
153
+ initial_llm = initial_env_config.get("ollama_summary_model", "qwen2.5")
154
+ initial_lang = initial_env_config.get("output_language", "English")
155
+ initial_use_md = initial_env_config.get("use_markitdown", False)
156
+ initial_use_sum = initial_env_config.get("use_summary", False)
157
+
158
+ # Create the Gradio interface
159
+ with gr.Blocks(title="DescribePDF", theme=theme) as iface:
160
+ gr.Markdown("<center><img src='https://davidlms.github.io/DescribePDF/assets/poster.png' alt='Describe PDF Logo' width='600px'/></center>")
161
+ gr.Markdown(
162
+ """<div style="display: flex;align-items: center;justify-content: center">
163
+ [<a href="https://davidlms.github.io/describepdf/">Project Page</a>] | [<a href="https://github.com/DavidLMS/describepdf">Github</a>]</div>
164
+ """
165
+ )
166
+ gr.Markdown(
167
+ "DescribePDF is an open-source tool designed to convert PDF files into detailed page-by-page descriptions in Markdown format using Vision-Language Models (VLMs). Unlike traditional PDF extraction tools that focus on replicating the text layout, DescribePDF generates rich, contextual descriptions of each page's content, making it perfect for visually complex documents like catalogs, scanned documents, and presentations."
168
+ "\n\n"
169
+ "Upload a PDF, adjust settings, and click 'Describe'. "
170
+ )
171
+
172
+ with gr.Tabs():
173
+ # Generate tab
174
+ with gr.TabItem("Generate", id=0):
175
+ with gr.Row():
176
+ with gr.Column(scale=1):
177
+ pdf_input = gr.File(
178
+ label="Upload PDF",
179
+ file_types=['.pdf'],
180
+ type="filepath"
181
+ )
182
+ convert_button = gr.Button(
183
+ "Describe",
184
+ variant="primary"
185
+ )
186
+ progress_output = gr.Textbox(
187
+ label="Progress",
188
+ interactive=False,
189
+ lines=2
190
+ )
191
+ download_button = gr.File(
192
+ label="Download Markdown",
193
+ visible=False,
194
+ interactive=False
195
+ )
196
+
197
+ with gr.Column(scale=2):
198
+ markdown_output = gr.Markdown(label="Result (Markdown)")
199
+
200
+ # Configuration tab
201
+ with gr.TabItem("Settings", id=1):
202
+ gr.Markdown(
203
+ "Adjust settings for the *next* generation. These settings are **not** saved. "
204
+ "Defaults are controlled by the `.env` file."
205
+ )
206
+ ollama_endpoint_input = gr.Textbox(
207
+ label="Ollama Endpoint",
208
+ value=initial_endpoint,
209
+ placeholder="http://localhost:11434",
210
+ info="URL of your Ollama server"
211
+ )
212
+ vlm_model_input = gr.Dropdown(
213
+ label="VLM Model",
214
+ choices=suggested_vlms,
215
+ value=initial_vlm,
216
+ allow_custom_value=True,
217
+ info="Select or type the Ollama vision model name"
218
+ )
219
+ output_language_input = gr.Dropdown(
220
+ label="Output Language",
221
+ choices=suggested_languages,
222
+ value=initial_lang,
223
+ allow_custom_value=True,
224
+ info="Select or type the desired output language (e.g., English, Spanish)"
225
+ )
226
+ with gr.Row():
227
+ use_markitdown_checkbox = gr.Checkbox(
228
+ label="Use Markitdown for extra text context",
229
+ value=initial_use_md
230
+ )
231
+ use_summary_checkbox = gr.Checkbox(
232
+ label="Use PDF summary for augmented context (requires extra LLM call)",
233
+ value=initial_use_sum
234
+ )
235
+ summary_llm_model_input = gr.Dropdown(
236
+ label="LLM Model for Summary",
237
+ choices=suggested_llms,
238
+ value=initial_llm,
239
+ allow_custom_value=True,
240
+ info="Select or type the Ollama LLM model name for summaries"
241
+ )
242
+
243
+ # Connect UI components
244
+ conversion_inputs = [
245
+ pdf_input, ollama_endpoint_input, vlm_model_input, output_language_input,
246
+ use_markitdown_checkbox, use_summary_checkbox, summary_llm_model_input
247
+ ]
248
+ conversion_outputs = [
249
+ progress_output, download_button, markdown_output
250
+ ]
251
+ convert_button.click(
252
+ fn=generate,
253
+ inputs=conversion_inputs,
254
+ outputs=conversion_outputs
255
+ )
256
+
257
+ return iface
258
+
259
+ def launch_app() -> None:
260
+ """
261
+ Start the application from the command line.
262
+
263
+ This function creates the Gradio UI and launches it.
264
+ """
265
+ app: gr.Blocks = create_ui()
266
+ app.launch()
267
+
268
+ if __name__ == "__main__":
269
+ launch_app()
main.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Main entry point for DescribePDF application.
3
+
4
+ This module handles command-line argument parsing and routes to the appropriate
5
+ UI or CLI functionality based on the provided arguments.
6
+ """
7
+
8
+ import argparse
9
+ import sys
10
+ from typing import List, Optional
11
+
12
+ from describepdf.config import logger
13
+
14
+ def parse_arguments(args: Optional[List[str]] = None) -> argparse.Namespace:
15
+ """
16
+ Parse command line arguments.
17
+
18
+ Args:
19
+ args: List of command line arguments (default: sys.argv[1:])
20
+
21
+ Returns:
22
+ argparse.Namespace: Parsed arguments
23
+ """
24
+ parser = argparse.ArgumentParser(add_help=False)
25
+ parser.add_argument('--web', action='store_true', help='Start in web mode with Gradio (OpenRouter)')
26
+ parser.add_argument('--web-ollama', action='store_true', help='Start in web mode with Gradio (Ollama local)')
27
+
28
+ # Parse known args to allow the rest to be processed by the CLI parser
29
+ args, _ = parser.parse_known_args(args)
30
+ return args
31
+
32
+ def main(args: Optional[List[str]] = None) -> int:
33
+ """
34
+ Main function that starts the appropriate application mode.
35
+
36
+ Args:
37
+ args: List of command line arguments (default: sys.argv[1:])
38
+
39
+ Returns:
40
+ int: Exit code (0 for success, non-zero for error)
41
+ """
42
+ # Logging is already configured in config.py, we just need to use the logger
43
+ logger.info("Starting DescribePDF...")
44
+
45
+ # Parse arguments
46
+ parsed_args = parse_arguments(args)
47
+
48
+ try:
49
+ # Start in the appropriate mode
50
+ if parsed_args.web:
51
+ # Start web UI with OpenRouter
52
+ from describepdf import ui
53
+ logger.info("Starting in WEB mode with Gradio interface for OpenRouter...")
54
+ app_ui = ui.create_ui()
55
+ app_ui.launch()
56
+ logger.info("Web UI stopped.")
57
+ return 0
58
+
59
+ elif parsed_args.web_ollama:
60
+ # Start web UI with Ollama
61
+ from describepdf import ui_ollama
62
+ logger.info("Starting in WEB mode with Gradio interface for Ollama...")
63
+ app_ui = ui_ollama.create_ui()
64
+ app_ui.launch()
65
+ logger.info("Web UI (Ollama) stopped.")
66
+ return 0
67
+
68
+ else:
69
+ # Start CLI mode
70
+ from describepdf import cli
71
+ logger.info("Starting in CLI mode...")
72
+ cli.run_cli()
73
+ return 0
74
+
75
+ except ImportError as e:
76
+ logger.error(f"Failed to start, likely a missing dependency: {e}")
77
+ logger.error(f"Details: {e}")
78
+ return 1
79
+
80
+ except KeyboardInterrupt:
81
+ logger.info("Application stopped by user.")
82
+ return 0
83
+
84
+ except Exception as e:
85
+ logger.error(f"An unexpected error occurred: {e}", exc_info=True)
86
+ return 1
87
+
88
+ if __name__ == "__main__":
89
+ exit_code = main()
90
+ sys.exit(exit_code)
prompts/summary_prompt.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Please provide a concise summary of the following document content. The summary should capture the main topics and purpose of the document.
2
+
3
+ Document Text:
4
+ ```markdown
5
+ [FULL_PDF_TEXT]
6
+ ```
prompts/vlm_prompt_base.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ IMPORTANT: THE ENTIRE RESPONSE MUST BE WRITTEN IN [LANGUAGE]. DO NOT USE ANY OTHER LANGUAGE.
2
+
3
+ Describe the content of this page for a visually impaired person.
4
+ This is page [PAGE_NUM] of [TOTAL_PAGES].
5
+
6
+ YOUR TASK:
7
+ 1. Describe all visual elements (images, layout, charts, tables) in [LANGUAGE]
8
+ 2. Include all text content in [LANGUAGE]
9
+ 3. Organize the information in a structured way
10
+ 4. If text appears in another language in the document, translate it to [LANGUAGE]
11
+
12
+ RESPONSE FORMAT:
13
+ - Start directly with your description
14
+ - Write EVERYTHING in [LANGUAGE] only
15
+ - Be thorough but clear
16
+
17
+ Start your response now in [LANGUAGE]:
prompts/vlm_prompt_full.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ IMPORTANT: THE ENTIRE RESPONSE MUST BE WRITTEN IN [LANGUAGE]. DO NOT USE ANY OTHER LANGUAGE.
2
+
3
+ Describe the content of this page for a visually impaired person.
4
+ This is page [PAGE_NUM] of [TOTAL_PAGES].
5
+
6
+ YOUR TASK:
7
+ 1. Describe all visual elements (images, layout, charts, tables) in [LANGUAGE]
8
+ 2. Include all text content in [LANGUAGE]
9
+ 3. Organize the information in a structured way
10
+ 4. If text appears in another language in the document, translate it to [LANGUAGE]
11
+
12
+ RESPONSE FORMAT:
13
+ - Start directly with your description
14
+ - Write EVERYTHING in [LANGUAGE] only
15
+ - Be thorough but clear
16
+
17
+ Start your response now in [LANGUAGE]:
18
+
19
+ As additional context, here is a preliminary text extraction from the page:
20
+ ```markdown
21
+ [MARKDOWN_CONTEXT]
22
+ ```
23
+
24
+ This page is part of a document with the following summary:
25
+ [SUMMARY_CONTEXT]
26
+ Start your response directly with the description for page [PAGE_NUM] in [LANGUAGE]:
prompts/vlm_prompt_with_markdown.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Describe the content of this page for a visually impaired person.
2
+ This is page [PAGE_NUM] of [TOTAL_PAGES].
3
+ The description must be in [LANGUAGE].
4
+ Focus on describing visual elements (images, layout, charts, tables) and the text content in a structured way. All text content must be in the description too.
5
+
6
+ As additional context, here is a preliminary text extraction from the page:
7
+ ```markdown
8
+ [MARKDOWN_CONTEXT]
9
+ ```
10
+ Start your response directly with the description for page [PAGE_NUM]:
prompts/vlm_prompt_with_summary.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ IMPORTANT: THE ENTIRE RESPONSE MUST BE WRITTEN IN [LANGUAGE]. DO NOT USE ANY OTHER LANGUAGE.
2
+
3
+ Describe the content of this page for a visually impaired person.
4
+ This is page [PAGE_NUM] of [TOTAL_PAGES].
5
+
6
+ YOUR TASK:
7
+ 1. Describe all visual elements (images, layout, charts, tables) in [LANGUAGE]
8
+ 2. Include all text content in [LANGUAGE]
9
+ 3. Organize the information in a structured way
10
+ 4. If text appears in another language in the document, translate it to [LANGUAGE]
11
+
12
+ RESPONSE FORMAT:
13
+ - Start directly with your description
14
+ - Write EVERYTHING in [LANGUAGE] only
15
+ - Be thorough but clear
16
+
17
+ This page is part of a document with the following summary:
18
+ [SUMMARY_CONTEXT]
19
+
20
+ Start your response directly with the description for page [PAGE_NUM] in [LANGUAGE]:
pytest.ini ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [pytest]
2
+ testpaths = tests
3
+ python_files = test_*.py
4
+ python_classes = Test*
5
+ python_functions = test_*
6
+
7
+ markers =
8
+ unit: marks tests as unit tests
9
+ integration: marks tests as integration tests
10
+
11
+ # Configuration for pytest-cov
12
+ addopts = --cov=describepdf --cov-report=term-missing
13
+
14
+ # Logging configuration
15
+ log_cli = true
16
+ log_cli_level = INFO
17
+ log_cli_format = %(asctime)s - %(levelname)s - [%(module)s] - %(message)s
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio>=5.20.1
2
+ pymupdf>=1.24.10
3
+ requests>=2.32.3
4
+ python-dotenv>=1.1.0
5
+ markitdown[pdf]>=0.1.1
6
+ pillow>=10.4.0
7
+ ollama>=0.4.7
8
+ tqdm>=4.67.0
setup.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+ # Read requirements from requirements.txt
4
+ with open('requirements.txt') as f:
5
+ requirements = f.read().splitlines()
6
+
7
+ setup(
8
+ name="describepdf",
9
+ version="0.1.0",
10
+ description="Convert PDFs to detailed Markdown descriptions using Vision-Language Models",
11
+ author="David Romero",
12
+ packages=find_packages(),
13
+ include_package_data=True,
14
+ install_requires=requirements,
15
+ entry_points={
16
+ 'console_scripts': [
17
+ 'describepdf=describepdf.cli:run_cli',
18
+ 'describepdf-web=describepdf.ui:launch_app',
19
+ 'describepdf-web-ollama=describepdf.ui_ollama:launch_app',
20
+ ],
21
+ },
22
+ python_requires='>=3.8',
23
+ )