Spaces:
Running
Running
David
commited on
Commit
·
2e375e5
1
Parent(s):
5d46fa8
add pages selection
Browse files- describepdf/cli.py +10 -0
- describepdf/config.py +9 -35
- describepdf/core.py +82 -4
- describepdf/ui.py +26 -12
- describepdf/ui_ollama.py +32 -15
describepdf/cli.py
CHANGED
@@ -61,6 +61,10 @@ def setup_cli_parser() -> argparse.ArgumentParser:
|
|
61 |
help="VLM model to use (default: configured in .env)"
|
62 |
)
|
63 |
|
|
|
|
|
|
|
|
|
64 |
parser.add_argument(
|
65 |
"-l", "--language",
|
66 |
help="Output language (default: configured in .env)"
|
@@ -157,6 +161,7 @@ def run_cli() -> None:
|
|
157 |
"output_language": args.language if args.language else env_config.get("output_language"),
|
158 |
"use_markitdown": args.use_markitdown if args.use_markitdown is not None else env_config.get("use_markitdown"),
|
159 |
"use_summary": args.use_summary if args.use_summary is not None else env_config.get("use_summary"),
|
|
|
160 |
}
|
161 |
|
162 |
# Configure provider-specific settings
|
@@ -219,6 +224,11 @@ def run_cli() -> None:
|
|
219 |
logger.info(f"Summary: {'Yes' if run_config['use_summary'] else 'No'}")
|
220 |
if run_config.get('use_summary') and run_config.get('summary_llm_model'):
|
221 |
logger.info(f"Summary model: {run_config['summary_llm_model']}")
|
|
|
|
|
|
|
|
|
|
|
222 |
|
223 |
# Create progress callback
|
224 |
progress_callback = create_progress_callback()
|
|
|
61 |
help="VLM model to use (default: configured in .env)"
|
62 |
)
|
63 |
|
64 |
+
parser.add_argument(
|
65 |
+
"--pages",
|
66 |
+
help="Pages to process (e.g. '1,3,5-10,15'). Default: all pages."
|
67 |
+
)
|
68 |
parser.add_argument(
|
69 |
"-l", "--language",
|
70 |
help="Output language (default: configured in .env)"
|
|
|
161 |
"output_language": args.language if args.language else env_config.get("output_language"),
|
162 |
"use_markitdown": args.use_markitdown if args.use_markitdown is not None else env_config.get("use_markitdown"),
|
163 |
"use_summary": args.use_summary if args.use_summary is not None else env_config.get("use_summary"),
|
164 |
+
"page_selection": args.pages if args.pages else env_config.get("page_selection")
|
165 |
}
|
166 |
|
167 |
# Configure provider-specific settings
|
|
|
224 |
logger.info(f"Summary: {'Yes' if run_config['use_summary'] else 'No'}")
|
225 |
if run_config.get('use_summary') and run_config.get('summary_llm_model'):
|
226 |
logger.info(f"Summary model: {run_config['summary_llm_model']}")
|
227 |
+
|
228 |
+
if run_config.get('page_selection'):
|
229 |
+
logger.info(f"Page selection: {run_config['page_selection']}")
|
230 |
+
else:
|
231 |
+
logger.info("Page selection: All pages")
|
232 |
|
233 |
# Create progress callback
|
234 |
progress_callback = create_progress_callback()
|
describepdf/config.py
CHANGED
@@ -6,7 +6,7 @@ and prompt templates from files.
|
|
6 |
"""
|
7 |
import os
|
8 |
import logging
|
9 |
-
from typing import Dict, Any, Optional
|
10 |
from dotenv import load_dotenv
|
11 |
import pathlib
|
12 |
|
@@ -14,41 +14,11 @@ import pathlib
|
|
14 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [%(module)s] - %(message)s')
|
15 |
logger = logging.getLogger('describepdf')
|
16 |
|
17 |
-
def _resolve_prompts_directory() -> pathlib.Path:
|
18 |
-
"""
|
19 |
-
Resolve the path to the prompts directory with multiple fallback strategies.
|
20 |
-
Returns:
|
21 |
-
pathlib.Path: Path to the prompts directory
|
22 |
-
"""
|
23 |
-
# List of potential paths to check
|
24 |
-
potential_paths = [
|
25 |
-
# Current file's parent directory
|
26 |
-
pathlib.Path(__file__).parent.parent / "prompts",
|
27 |
-
|
28 |
-
# Relative to the current working directory
|
29 |
-
pathlib.Path.cwd() / "prompts",
|
30 |
-
|
31 |
-
# Absolute path fallback (useful in deployment)
|
32 |
-
pathlib.Path("/app/prompts"),
|
33 |
-
pathlib.Path("/workspace/prompts"),
|
34 |
-
|
35 |
-
# Hugging Face Spaces specific path
|
36 |
-
pathlib.Path("/home/user/app/prompts")
|
37 |
-
]
|
38 |
-
|
39 |
-
# Try each path
|
40 |
-
for path in potential_paths:
|
41 |
-
if path.is_dir():
|
42 |
-
logger.info(f"Prompts directory found at: {path}")
|
43 |
-
return path
|
44 |
-
|
45 |
-
# If no path is found
|
46 |
-
logger.error("Could not locate prompts directory. Using a temporary fallback.")
|
47 |
-
return pathlib.Path(__file__).parent / "prompts"
|
48 |
-
|
49 |
# Directory containing prompt templates (making path absolute by using current file location)
|
50 |
SCRIPT_DIR = pathlib.Path(__file__).parent.parent.absolute()
|
51 |
-
PROMPTS_DIR =
|
|
|
|
|
52 |
|
53 |
# Default configuration values
|
54 |
DEFAULT_CONFIG: Dict[str, Any] = {
|
@@ -62,7 +32,8 @@ DEFAULT_CONFIG: Dict[str, Any] = {
|
|
62 |
|
63 |
"output_language": "English",
|
64 |
"use_markitdown": False,
|
65 |
-
"use_summary": False
|
|
|
66 |
}
|
67 |
|
68 |
# Mapping of prompt template identifiers to their file names
|
@@ -122,6 +93,9 @@ def load_env_config() -> Dict[str, Any]:
|
|
122 |
|
123 |
if os.getenv("DEFAULT_USE_SUMMARY"):
|
124 |
loaded_config["use_summary"] = str(os.getenv("DEFAULT_USE_SUMMARY")).lower() == 'true'
|
|
|
|
|
|
|
125 |
|
126 |
logger.info("Configuration loaded from environment variables.")
|
127 |
|
|
|
6 |
"""
|
7 |
import os
|
8 |
import logging
|
9 |
+
from typing import Dict, Any, Optional, List
|
10 |
from dotenv import load_dotenv
|
11 |
import pathlib
|
12 |
|
|
|
14 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [%(module)s] - %(message)s')
|
15 |
logger = logging.getLogger('describepdf')
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
# Directory containing prompt templates (making path absolute by using current file location)
|
18 |
SCRIPT_DIR = pathlib.Path(__file__).parent.parent.absolute()
|
19 |
+
PROMPTS_DIR = pathlib.Path(SCRIPT_DIR) / "prompts"
|
20 |
+
|
21 |
+
# Default configuration values
|
22 |
|
23 |
# Default configuration values
|
24 |
DEFAULT_CONFIG: Dict[str, Any] = {
|
|
|
32 |
|
33 |
"output_language": "English",
|
34 |
"use_markitdown": False,
|
35 |
+
"use_summary": False,
|
36 |
+
"page_selection": None
|
37 |
}
|
38 |
|
39 |
# Mapping of prompt template identifiers to their file names
|
|
|
93 |
|
94 |
if os.getenv("DEFAULT_USE_SUMMARY"):
|
95 |
loaded_config["use_summary"] = str(os.getenv("DEFAULT_USE_SUMMARY")).lower() == 'true'
|
96 |
+
|
97 |
+
if os.getenv("DEFAULT_PAGE_SELECTION"):
|
98 |
+
loaded_config["page_selection"] = os.getenv("DEFAULT_PAGE_SELECTION")
|
99 |
|
100 |
logger.info("Configuration loaded from environment variables.")
|
101 |
|
describepdf/core.py
CHANGED
@@ -24,22 +24,87 @@ class ConversionError(Exception):
|
|
24 |
"""Error raised during PDF conversion process."""
|
25 |
pass
|
26 |
|
27 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
"""
|
29 |
Combine page descriptions into a single Markdown file.
|
30 |
|
31 |
Args:
|
32 |
descriptions: List of strings, each being a description of a page
|
33 |
original_filename: Name of the original PDF file
|
|
|
34 |
|
35 |
Returns:
|
36 |
str: Complete Markdown content
|
37 |
"""
|
38 |
md_content = f"# Description of PDF: {original_filename}\n\n"
|
|
|
39 |
for i, desc in enumerate(descriptions):
|
40 |
-
|
|
|
|
|
41 |
md_content += desc if desc else "*No description generated for this page.*"
|
42 |
md_content += "\n\n---\n\n"
|
|
|
43 |
return md_content
|
44 |
|
45 |
def convert_pdf_to_markdown(
|
@@ -175,7 +240,17 @@ def convert_pdf_to_markdown(
|
|
175 |
page_processing_progress_start = pdf_load_progress
|
176 |
total_page_progress_ratio = (0.98 - page_processing_progress_start) if total_pages > 0 else 0
|
177 |
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
page_num = i + 1
|
180 |
current_page_ratio = (page_num / total_pages) if total_pages > 0 else 1.0
|
181 |
|
@@ -306,7 +381,10 @@ def convert_pdf_to_markdown(
|
|
306 |
# Generate final markdown
|
307 |
final_progress = 0.99
|
308 |
progress_callback(final_progress, "Combining page descriptions into final Markdown...")
|
309 |
-
|
|
|
|
|
|
|
310 |
logger.info("Final Markdown content assembled.")
|
311 |
|
312 |
# Report completion
|
|
|
24 |
"""Error raised during PDF conversion process."""
|
25 |
pass
|
26 |
|
27 |
+
def parse_page_selection(selection_string: Optional[str], total_pages: int) -> List[int]:
|
28 |
+
"""
|
29 |
+
Parse a page selection string into a list of page indices.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
selection_string: String with page selection (e.g. "1,3,5-10,15")
|
33 |
+
total_pages: Total number of pages in the document
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
List[int]: List of zero-based page indices to process
|
37 |
+
"""
|
38 |
+
if not selection_string:
|
39 |
+
# Return all pages if selection is empty
|
40 |
+
return list(range(total_pages))
|
41 |
+
|
42 |
+
page_indices = []
|
43 |
+
|
44 |
+
try:
|
45 |
+
sections = selection_string.split(',')
|
46 |
+
for section in sections:
|
47 |
+
section = section.strip()
|
48 |
+
if not section:
|
49 |
+
continue
|
50 |
+
|
51 |
+
if '-' in section:
|
52 |
+
# Handle page range
|
53 |
+
start, end = section.split('-', 1)
|
54 |
+
start_idx = int(start.strip()) - 1 # Convert to 0-based index
|
55 |
+
end_idx = int(end.strip()) - 1
|
56 |
+
|
57 |
+
# Validate range
|
58 |
+
if start_idx < 0 or end_idx >= total_pages or start_idx > end_idx:
|
59 |
+
logger.warning(f"Invalid page range: {section}. Must be between 1 and {total_pages}.")
|
60 |
+
continue
|
61 |
+
|
62 |
+
page_indices.extend(range(start_idx, end_idx + 1))
|
63 |
+
else:
|
64 |
+
# Handle single page
|
65 |
+
page_idx = int(section) - 1 # Convert to 0-based index
|
66 |
+
|
67 |
+
# Validate page number
|
68 |
+
if page_idx < 0 or page_idx >= total_pages:
|
69 |
+
logger.warning(f"Invalid page number: {section}. Must be between 1 and {total_pages}.")
|
70 |
+
continue
|
71 |
+
|
72 |
+
page_indices.append(page_idx)
|
73 |
+
|
74 |
+
# Remove duplicates and sort
|
75 |
+
page_indices = sorted(set(page_indices))
|
76 |
+
|
77 |
+
if not page_indices:
|
78 |
+
logger.warning("No valid pages specified. Processing all pages.")
|
79 |
+
return list(range(total_pages))
|
80 |
+
|
81 |
+
return page_indices
|
82 |
+
|
83 |
+
except ValueError as e:
|
84 |
+
logger.error(f"Error parsing page selection '{selection_string}': {e}. Processing all pages.")
|
85 |
+
return list(range(total_pages))
|
86 |
+
|
87 |
+
def format_markdown_output(descriptions: List[str], original_filename: str, page_numbers: Optional[List[int]] = None) -> str:
|
88 |
"""
|
89 |
Combine page descriptions into a single Markdown file.
|
90 |
|
91 |
Args:
|
92 |
descriptions: List of strings, each being a description of a page
|
93 |
original_filename: Name of the original PDF file
|
94 |
+
page_numbers: List of actual page numbers corresponding to descriptions (1-based)
|
95 |
|
96 |
Returns:
|
97 |
str: Complete Markdown content
|
98 |
"""
|
99 |
md_content = f"# Description of PDF: {original_filename}\n\n"
|
100 |
+
|
101 |
for i, desc in enumerate(descriptions):
|
102 |
+
# Use actual page number if provided, otherwise use sequential numbering
|
103 |
+
page_num = page_numbers[i] if page_numbers else (i + 1)
|
104 |
+
md_content += f"## Page {page_num}\n\n"
|
105 |
md_content += desc if desc else "*No description generated for this page.*"
|
106 |
md_content += "\n\n---\n\n"
|
107 |
+
|
108 |
return md_content
|
109 |
|
110 |
def convert_pdf_to_markdown(
|
|
|
240 |
page_processing_progress_start = pdf_load_progress
|
241 |
total_page_progress_ratio = (0.98 - page_processing_progress_start) if total_pages > 0 else 0
|
242 |
|
243 |
+
# Parse page selection
|
244 |
+
page_selection = cfg.get("page_selection")
|
245 |
+
selected_indices = parse_page_selection(page_selection, total_pages)
|
246 |
+
|
247 |
+
if page_selection:
|
248 |
+
logger.info(f"Processing {len(selected_indices)} selected pages out of {total_pages} total pages.")
|
249 |
+
else:
|
250 |
+
logger.info(f"Processing all {total_pages} pages.")
|
251 |
+
|
252 |
+
for i in selected_indices:
|
253 |
+
page = pages[i]
|
254 |
page_num = i + 1
|
255 |
current_page_ratio = (page_num / total_pages) if total_pages > 0 else 1.0
|
256 |
|
|
|
381 |
# Generate final markdown
|
382 |
final_progress = 0.99
|
383 |
progress_callback(final_progress, "Combining page descriptions into final Markdown...")
|
384 |
+
|
385 |
+
actual_page_numbers = [i + 1 for i in selected_indices] if 'selected_indices' in locals() else None
|
386 |
+
|
387 |
+
final_markdown = format_markdown_output(all_descriptions, original_filename, actual_page_numbers)
|
388 |
logger.info("Final Markdown content assembled.")
|
389 |
|
390 |
# Report completion
|
describepdf/ui.py
CHANGED
@@ -21,7 +21,7 @@ theme = gr.themes.Soft(
|
|
21 |
spacing_size="lg",
|
22 |
)
|
23 |
|
24 |
-
def
|
25 |
pdf_file_obj: Optional[gr.File],
|
26 |
ui_api_key: str,
|
27 |
ui_vlm_model: str,
|
@@ -29,25 +29,32 @@ def generate(
|
|
29 |
ui_use_md: bool,
|
30 |
ui_use_sum: bool,
|
31 |
ui_sum_model: str,
|
|
|
32 |
progress: gr.Progress = gr.Progress(track_tqdm=True)
|
33 |
) -> Tuple[str, gr.update, Optional[str]]:
|
34 |
"""
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
Args:
|
38 |
pdf_file_obj: Gradio File object for the uploaded PDF
|
39 |
ui_api_key: OpenRouter API key from UI
|
40 |
-
ui_vlm_model: VLM model name from UI
|
41 |
-
ui_lang: Output language
|
42 |
-
ui_use_md: Whether to use Markitdown
|
43 |
-
ui_use_sum: Whether to generate a summary
|
44 |
-
ui_sum_model: Summary model name from UI
|
|
|
45 |
progress: Gradio progress tracker
|
46 |
|
47 |
Returns:
|
48 |
Tuple containing:
|
49 |
-
- str: Status message
|
50 |
-
- gr.update: Download button update
|
51 |
- Optional[str]: Markdown result content
|
52 |
"""
|
53 |
# Validate input file
|
@@ -67,7 +74,8 @@ def generate(
|
|
67 |
"output_language": ui_lang,
|
68 |
"use_markitdown": ui_use_md,
|
69 |
"use_summary": ui_use_sum,
|
70 |
-
"summary_llm_model": ui_sum_model if ui_sum_model else env_config.get("or_summary_model")
|
|
|
71 |
}
|
72 |
|
73 |
# Validate API key
|
@@ -238,6 +246,12 @@ def create_ui() -> gr.Blocks:
|
|
238 |
allow_custom_value=True,
|
239 |
info="Select or type the desired output language (e.g., English, Spanish)"
|
240 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
with gr.Row():
|
242 |
use_markitdown_checkbox = gr.Checkbox(
|
243 |
label="Use Markitdown for extra text context",
|
@@ -258,13 +272,13 @@ def create_ui() -> gr.Blocks:
|
|
258 |
# Connect UI components
|
259 |
conversion_inputs = [
|
260 |
pdf_input, api_key_input, vlm_model_input, output_language_input,
|
261 |
-
use_markitdown_checkbox, use_summary_checkbox, summary_llm_model_input
|
262 |
]
|
263 |
conversion_outputs = [
|
264 |
progress_output, download_button, markdown_output
|
265 |
]
|
266 |
convert_button.click(
|
267 |
-
fn=
|
268 |
inputs=conversion_inputs,
|
269 |
outputs=conversion_outputs
|
270 |
)
|
|
|
21 |
spacing_size="lg",
|
22 |
)
|
23 |
|
24 |
+
def convert_pdf_to_descriptive_markdown(
|
25 |
pdf_file_obj: Optional[gr.File],
|
26 |
ui_api_key: str,
|
27 |
ui_vlm_model: str,
|
|
|
29 |
ui_use_md: bool,
|
30 |
ui_use_sum: bool,
|
31 |
ui_sum_model: str,
|
32 |
+
ui_page_selection: str,
|
33 |
progress: gr.Progress = gr.Progress(track_tqdm=True)
|
34 |
) -> Tuple[str, gr.update, Optional[str]]:
|
35 |
"""
|
36 |
+
Convert a PDF file to detailed page-by-page Markdown descriptions using Vision-Language Models.
|
37 |
+
|
38 |
+
This function processes the uploaded PDF, analyzing the visual and textual content of each page
|
39 |
+
using OpenRouter's Vision-Language Models (VLMs). It generates rich, contextual descriptions in
|
40 |
+
Markdown format that capture both the visual elements and text content of the document, making
|
41 |
+
the PDF accessible and searchable in contexts where traditional text extraction would fail.
|
42 |
|
43 |
Args:
|
44 |
pdf_file_obj: Gradio File object for the uploaded PDF
|
45 |
ui_api_key: OpenRouter API key from UI
|
46 |
+
ui_vlm_model: VLM model name from UI (e.g., qwen/qwen2.5-vl-72b-instruct)
|
47 |
+
ui_lang: Output language for descriptions (e.g., English, Spanish)
|
48 |
+
ui_use_md: Whether to use Markitdown for enhanced text extraction
|
49 |
+
ui_use_sum: Whether to generate a document summary for context
|
50 |
+
ui_sum_model: Summary model name from UI (e.g., google/gemini-2.5-flash-preview)
|
51 |
+
ui_page_selection: Optional page selection string (e.g., "1,3,5-10")
|
52 |
progress: Gradio progress tracker
|
53 |
|
54 |
Returns:
|
55 |
Tuple containing:
|
56 |
+
- str: Status message indicating success or failure
|
57 |
+
- gr.update: Download button update with the result file
|
58 |
- Optional[str]: Markdown result content
|
59 |
"""
|
60 |
# Validate input file
|
|
|
74 |
"output_language": ui_lang,
|
75 |
"use_markitdown": ui_use_md,
|
76 |
"use_summary": ui_use_sum,
|
77 |
+
"summary_llm_model": ui_sum_model if ui_sum_model else env_config.get("or_summary_model"),
|
78 |
+
"page_selection": ui_page_selection.strip() if ui_page_selection.strip() else None
|
79 |
}
|
80 |
|
81 |
# Validate API key
|
|
|
246 |
allow_custom_value=True,
|
247 |
info="Select or type the desired output language (e.g., English, Spanish)"
|
248 |
)
|
249 |
+
page_selection_input = gr.Textbox(
|
250 |
+
label="Page Selection (Optional)",
|
251 |
+
value="",
|
252 |
+
placeholder="Example: 1,3,5-10,15 (leave empty for all pages)",
|
253 |
+
info="Specify individual pages or ranges to process"
|
254 |
+
)
|
255 |
with gr.Row():
|
256 |
use_markitdown_checkbox = gr.Checkbox(
|
257 |
label="Use Markitdown for extra text context",
|
|
|
272 |
# Connect UI components
|
273 |
conversion_inputs = [
|
274 |
pdf_input, api_key_input, vlm_model_input, output_language_input,
|
275 |
+
use_markitdown_checkbox, use_summary_checkbox, summary_llm_model_input, page_selection_input
|
276 |
]
|
277 |
conversion_outputs = [
|
278 |
progress_output, download_button, markdown_output
|
279 |
]
|
280 |
convert_button.click(
|
281 |
+
fn=convert_pdf_to_descriptive_markdown,
|
282 |
inputs=conversion_inputs,
|
283 |
outputs=conversion_outputs
|
284 |
)
|
describepdf/ui_ollama.py
CHANGED
@@ -22,7 +22,7 @@ theme = gr.themes.Soft(
|
|
22 |
spacing_size="lg",
|
23 |
)
|
24 |
|
25 |
-
def
|
26 |
pdf_file_obj: Optional[gr.File],
|
27 |
ollama_endpoint: str,
|
28 |
ui_vlm_model: str,
|
@@ -30,25 +30,36 @@ def generate(
|
|
30 |
ui_use_md: bool,
|
31 |
ui_use_sum: bool,
|
32 |
ui_sum_model: str,
|
|
|
33 |
progress: gr.Progress = gr.Progress(track_tqdm=True)
|
34 |
) -> Tuple[str, gr.update, Optional[str]]:
|
35 |
"""
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
Args:
|
39 |
pdf_file_obj: Gradio File object for the uploaded PDF
|
40 |
-
ollama_endpoint: Ollama server endpoint URL
|
41 |
-
ui_vlm_model: VLM model name from UI
|
42 |
-
ui_lang: Output language
|
43 |
-
ui_use_md: Whether to use Markitdown
|
44 |
-
ui_use_sum: Whether to generate a summary
|
45 |
-
ui_sum_model: Summary model name from UI
|
|
|
46 |
progress: Gradio progress tracker
|
47 |
|
48 |
Returns:
|
49 |
Tuple containing:
|
50 |
-
- str: Status message
|
51 |
-
- gr.update: Download button update
|
52 |
- Optional[str]: Markdown result content
|
53 |
"""
|
54 |
# Validate input file
|
@@ -69,7 +80,8 @@ def generate(
|
|
69 |
"output_language": ui_lang,
|
70 |
"use_markitdown": ui_use_md,
|
71 |
"use_summary": ui_use_sum,
|
72 |
-
"summary_llm_model": ui_sum_model
|
|
|
73 |
}
|
74 |
|
75 |
# Create progress callback for Gradio
|
@@ -160,7 +172,7 @@ def create_ui() -> gr.Blocks:
|
|
160 |
gr.Markdown("<center><img src='https://davidlms.github.io/DescribePDF/assets/poster.png' alt='Describe PDF Logo' width='600px'/></center>")
|
161 |
gr.Markdown(
|
162 |
"""<div style="display: flex;align-items: center;justify-content: center">
|
163 |
-
[<a href="https://davidlms.github.io/
|
164 |
"""
|
165 |
)
|
166 |
gr.Markdown(
|
@@ -223,6 +235,12 @@ def create_ui() -> gr.Blocks:
|
|
223 |
allow_custom_value=True,
|
224 |
info="Select or type the desired output language (e.g., English, Spanish)"
|
225 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
with gr.Row():
|
227 |
use_markitdown_checkbox = gr.Checkbox(
|
228 |
label="Use Markitdown for extra text context",
|
@@ -239,17 +257,16 @@ def create_ui() -> gr.Blocks:
|
|
239 |
allow_custom_value=True,
|
240 |
info="Select or type the Ollama LLM model name for summaries"
|
241 |
)
|
242 |
-
|
243 |
# Connect UI components
|
244 |
conversion_inputs = [
|
245 |
pdf_input, ollama_endpoint_input, vlm_model_input, output_language_input,
|
246 |
-
use_markitdown_checkbox, use_summary_checkbox, summary_llm_model_input
|
247 |
]
|
248 |
conversion_outputs = [
|
249 |
progress_output, download_button, markdown_output
|
250 |
]
|
251 |
convert_button.click(
|
252 |
-
fn=
|
253 |
inputs=conversion_inputs,
|
254 |
outputs=conversion_outputs
|
255 |
)
|
|
|
22 |
spacing_size="lg",
|
23 |
)
|
24 |
|
25 |
+
def convert_pdf_to_descriptive_markdown(
|
26 |
pdf_file_obj: Optional[gr.File],
|
27 |
ollama_endpoint: str,
|
28 |
ui_vlm_model: str,
|
|
|
30 |
ui_use_md: bool,
|
31 |
ui_use_sum: bool,
|
32 |
ui_sum_model: str,
|
33 |
+
ui_page_selection: str,
|
34 |
progress: gr.Progress = gr.Progress(track_tqdm=True)
|
35 |
) -> Tuple[str, gr.update, Optional[str]]:
|
36 |
"""
|
37 |
+
Convert a PDF file to detailed page-by-page Markdown descriptions using local Ollama Vision-Language Models.
|
38 |
+
|
39 |
+
This function processes the uploaded PDF, analyzing the visual and textual content of each page
|
40 |
+
using locally hosted Vision-Language Models (VLMs) through Ollama. It generates rich, contextual
|
41 |
+
descriptions in Markdown format that capture both the visual elements and text content of the document,
|
42 |
+
making the PDF accessible and searchable in contexts where traditional text extraction would fail.
|
43 |
+
|
44 |
+
Unlike the OpenRouter version, this function utilizes local models running through Ollama,
|
45 |
+
providing privacy and eliminating the need for API keys, but potentially with different model options
|
46 |
+
and performance characteristics.
|
47 |
|
48 |
Args:
|
49 |
pdf_file_obj: Gradio File object for the uploaded PDF
|
50 |
+
ollama_endpoint: Ollama server endpoint URL (e.g., http://localhost:11434)
|
51 |
+
ui_vlm_model: VLM model name from UI (e.g., llama3.2-vision)
|
52 |
+
ui_lang: Output language for descriptions (e.g., English, Spanish)
|
53 |
+
ui_use_md: Whether to use Markitdown for enhanced text extraction
|
54 |
+
ui_use_sum: Whether to generate a document summary for context
|
55 |
+
ui_sum_model: Summary model name from UI (e.g., qwen2.5)
|
56 |
+
ui_page_selection: Optional page selection string (e.g., "1,3,5-10")
|
57 |
progress: Gradio progress tracker
|
58 |
|
59 |
Returns:
|
60 |
Tuple containing:
|
61 |
+
- str: Status message indicating success or failure
|
62 |
+
- gr.update: Download button update with the result file
|
63 |
- Optional[str]: Markdown result content
|
64 |
"""
|
65 |
# Validate input file
|
|
|
80 |
"output_language": ui_lang,
|
81 |
"use_markitdown": ui_use_md,
|
82 |
"use_summary": ui_use_sum,
|
83 |
+
"summary_llm_model": ui_sum_model,
|
84 |
+
"page_selection": ui_page_selection.strip() if ui_page_selection.strip() else None
|
85 |
}
|
86 |
|
87 |
# Create progress callback for Gradio
|
|
|
172 |
gr.Markdown("<center><img src='https://davidlms.github.io/DescribePDF/assets/poster.png' alt='Describe PDF Logo' width='600px'/></center>")
|
173 |
gr.Markdown(
|
174 |
"""<div style="display: flex;align-items: center;justify-content: center">
|
175 |
+
[<a href="https://davidlms.github.io/DescribePDF/">Project Page</a>] | [<a href="https://github.com/DavidLMS/describepdf">Github</a>]</div>
|
176 |
"""
|
177 |
)
|
178 |
gr.Markdown(
|
|
|
235 |
allow_custom_value=True,
|
236 |
info="Select or type the desired output language (e.g., English, Spanish)"
|
237 |
)
|
238 |
+
page_selection_input = gr.Textbox(
|
239 |
+
label="Page Selection (Optional)",
|
240 |
+
value="",
|
241 |
+
placeholder="Example: 1,3,5-10,15 (leave empty for all pages)",
|
242 |
+
info="Specify individual pages or ranges to process"
|
243 |
+
)
|
244 |
with gr.Row():
|
245 |
use_markitdown_checkbox = gr.Checkbox(
|
246 |
label="Use Markitdown for extra text context",
|
|
|
257 |
allow_custom_value=True,
|
258 |
info="Select or type the Ollama LLM model name for summaries"
|
259 |
)
|
|
|
260 |
# Connect UI components
|
261 |
conversion_inputs = [
|
262 |
pdf_input, ollama_endpoint_input, vlm_model_input, output_language_input,
|
263 |
+
use_markitdown_checkbox, use_summary_checkbox, summary_llm_model_input, page_selection_input
|
264 |
]
|
265 |
conversion_outputs = [
|
266 |
progress_output, download_button, markdown_output
|
267 |
]
|
268 |
convert_button.click(
|
269 |
+
fn=convert_pdf_to_descriptive_markdown,
|
270 |
inputs=conversion_inputs,
|
271 |
outputs=conversion_outputs
|
272 |
)
|