Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,205 +9,18 @@ import sys
|
|
9 |
# Asegurarnos de que el directorio actual esté en el path
|
10 |
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
11 |
|
12 |
-
#
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
#
|
23 |
-
|
24 |
-
"qwen/qwen2.5-vl-72b-instruct",
|
25 |
-
"google/gemini-2.5-pro-preview-03-25",
|
26 |
-
"openai/chatgpt-4o-latest"
|
27 |
-
]
|
28 |
-
|
29 |
-
SUGGESTED_LLMS = [
|
30 |
-
"google/gemini-2.5-flash-preview",
|
31 |
-
"openai/chatgpt-4o-latest",
|
32 |
-
"anthropic/claude-3.5-sonnet"
|
33 |
-
]
|
34 |
-
|
35 |
-
SUGGESTED_LANGUAGES = [
|
36 |
-
"English", "Spanish", "French", "German",
|
37 |
-
"Chinese", "Japanese", "Italian",
|
38 |
-
"Portuguese", "Russian", "Korean"
|
39 |
-
]
|
40 |
-
|
41 |
-
def generate(
|
42 |
-
pdf_file_obj,
|
43 |
-
ui_api_key,
|
44 |
-
ui_vlm_model,
|
45 |
-
ui_lang,
|
46 |
-
ui_use_md,
|
47 |
-
ui_use_sum,
|
48 |
-
ui_sum_model,
|
49 |
-
progress=gr.Progress()
|
50 |
-
):
|
51 |
-
"""Wrapper function to call the core conversion process"""
|
52 |
-
if pdf_file_obj is None:
|
53 |
-
return "Please upload a PDF file.", gr.update(value=None, visible=False), None
|
54 |
-
|
55 |
-
# Load environment config
|
56 |
-
env_config = config.get_config()
|
57 |
-
|
58 |
-
# Prepare configuration for this run
|
59 |
-
api_key = ui_api_key.strip() if ui_api_key.strip() else env_config.get("openrouter_api_key")
|
60 |
-
|
61 |
-
current_run_config = {
|
62 |
-
"provider": "openrouter",
|
63 |
-
"openrouter_api_key": api_key,
|
64 |
-
"vlm_model": ui_vlm_model,
|
65 |
-
"output_language": ui_lang,
|
66 |
-
"use_markitdown": ui_use_md,
|
67 |
-
"use_summary": ui_use_sum,
|
68 |
-
"summary_llm_model": ui_sum_model if ui_sum_model else env_config.get("or_summary_model")
|
69 |
-
}
|
70 |
-
|
71 |
-
# Validate API key
|
72 |
-
if not current_run_config.get("openrouter_api_key"):
|
73 |
-
error_msg = "Error: OpenRouter API Key is missing. Provide it in the UI."
|
74 |
-
return error_msg, gr.update(value=None, visible=False), None
|
75 |
-
|
76 |
-
# Create progress callback for Gradio
|
77 |
-
def progress_callback(progress_value, status):
|
78 |
-
clamped_progress = max(0.0, min(1.0, progress_value))
|
79 |
-
progress(clamped_progress, desc=status)
|
80 |
-
|
81 |
-
# Run the conversion
|
82 |
-
status_message, result_markdown = core.convert_pdf_to_markdown(
|
83 |
-
pdf_file_obj.name,
|
84 |
-
current_run_config,
|
85 |
-
progress_callback
|
86 |
-
)
|
87 |
-
|
88 |
-
# Handle the download file
|
89 |
-
if result_markdown:
|
90 |
-
try:
|
91 |
-
import tempfile
|
92 |
-
import secrets
|
93 |
-
|
94 |
-
# Get base filename from the uploaded PDF
|
95 |
-
base_name = os.path.splitext(os.path.basename(pdf_file_obj.name))[0]
|
96 |
-
download_filename = f"{base_name}_description.md"
|
97 |
-
|
98 |
-
# Create a temporary file
|
99 |
-
random_suffix = secrets.token_hex(4)
|
100 |
-
temp_dir = tempfile.gettempdir()
|
101 |
-
download_filepath = os.path.join(temp_dir, f"{base_name}_{random_suffix}.md")
|
102 |
-
|
103 |
-
# Write markdown result to the temporary file
|
104 |
-
with open(download_filepath, "w", encoding="utf-8") as md_file:
|
105 |
-
md_file.write(result_markdown)
|
106 |
-
|
107 |
-
download_button_update = gr.update(value=download_filepath, visible=True, label=f"Download '{download_filename}'")
|
108 |
-
except Exception as e:
|
109 |
-
status_message += f" (Error creating download file: {str(e)})"
|
110 |
-
download_button_update = gr.update(value=None, visible=False)
|
111 |
-
else:
|
112 |
-
download_button_update = gr.update(value=None, visible=False)
|
113 |
-
|
114 |
-
return status_message, download_button_update, result_markdown
|
115 |
-
|
116 |
-
# Crear interfaz de usuario manualmente en lugar de usar la función create_ui()
|
117 |
-
with gr.Blocks(title="DescribePDF", theme=theme) as app:
|
118 |
-
gr.Markdown("<center><img src='https://davidlms.github.io/DescribePDF/assets/poster.png' alt='Describe PDF Logo' width='600px'/></center>")
|
119 |
-
gr.Markdown(
|
120 |
-
"""<div style="display: flex;align-items: center;justify-content: center">
|
121 |
-
[<a href="https://davidlms.github.io/describepdf/">Project Page</a>] | [<a href="https://github.com/DavidLMS/describepdf">Github</a>]</div>
|
122 |
-
"""
|
123 |
-
)
|
124 |
-
gr.Markdown(
|
125 |
-
"DescribePDF is an open-source tool designed to convert PDF files into detailed page-by-page descriptions in Markdown format using Vision-Language Models (VLMs). Unlike traditional PDF extraction tools that focus on replicating the text layout, DescribePDF generates rich, contextual descriptions of each page's content, making it perfect for visually complex documents like catalogs, scanned documents, and presentations."
|
126 |
-
)
|
127 |
-
|
128 |
-
with gr.Tabs():
|
129 |
-
# Generate tab
|
130 |
-
with gr.TabItem("Generate"):
|
131 |
-
with gr.Row():
|
132 |
-
with gr.Column(scale=1):
|
133 |
-
pdf_input = gr.File(
|
134 |
-
label="Upload PDF",
|
135 |
-
file_types=['.pdf'],
|
136 |
-
type="filepath"
|
137 |
-
)
|
138 |
-
convert_button = gr.Button(
|
139 |
-
"Describe",
|
140 |
-
variant="primary"
|
141 |
-
)
|
142 |
-
progress_output = gr.Textbox(
|
143 |
-
label="Progress",
|
144 |
-
interactive=False,
|
145 |
-
lines=2
|
146 |
-
)
|
147 |
-
download_button = gr.File(
|
148 |
-
label="Download Markdown",
|
149 |
-
visible=False,
|
150 |
-
interactive=False
|
151 |
-
)
|
152 |
-
|
153 |
-
with gr.Column(scale=2):
|
154 |
-
markdown_output = gr.Markdown(label="Result (Markdown)")
|
155 |
-
|
156 |
-
# Configuration tab
|
157 |
-
with gr.TabItem("Settings"):
|
158 |
-
gr.Markdown(
|
159 |
-
"Adjust settings for the *next* generation."
|
160 |
-
)
|
161 |
-
api_key_input = gr.Textbox(
|
162 |
-
label="OpenRouter API Key",
|
163 |
-
type="password",
|
164 |
-
placeholder="Enter your OpenRouter API key",
|
165 |
-
value=""
|
166 |
-
)
|
167 |
-
vlm_model_input = gr.Dropdown(
|
168 |
-
label="VLM Model",
|
169 |
-
choices=SUGGESTED_VLMS,
|
170 |
-
value=SUGGESTED_VLMS[0],
|
171 |
-
allow_custom_value=True,
|
172 |
-
info="Select or type the OpenRouter VLM model name"
|
173 |
-
)
|
174 |
-
output_language_input = gr.Dropdown(
|
175 |
-
label="Output Language",
|
176 |
-
choices=SUGGESTED_LANGUAGES,
|
177 |
-
value="English",
|
178 |
-
allow_custom_value=True,
|
179 |
-
info="Select or type the desired output language"
|
180 |
-
)
|
181 |
-
with gr.Row():
|
182 |
-
use_markitdown_checkbox = gr.Checkbox(
|
183 |
-
label="Use Markitdown for extra text context",
|
184 |
-
value=False
|
185 |
-
)
|
186 |
-
use_summary_checkbox = gr.Checkbox(
|
187 |
-
label="Use PDF summary for augmented context",
|
188 |
-
value=False
|
189 |
-
)
|
190 |
-
summary_llm_model_input = gr.Dropdown(
|
191 |
-
label="LLM Model for Summary",
|
192 |
-
choices=SUGGESTED_LLMS,
|
193 |
-
value=SUGGESTED_LLMS[0],
|
194 |
-
allow_custom_value=True,
|
195 |
-
info="Select or type the OpenRouter LLM model name for summaries"
|
196 |
-
)
|
197 |
-
|
198 |
-
# Connect UI components
|
199 |
-
conversion_inputs = [
|
200 |
-
pdf_input, api_key_input, vlm_model_input, output_language_input,
|
201 |
-
use_markitdown_checkbox, use_summary_checkbox, summary_llm_model_input
|
202 |
-
]
|
203 |
-
conversion_outputs = [
|
204 |
-
progress_output, download_button, markdown_output
|
205 |
-
]
|
206 |
-
convert_button.click(
|
207 |
-
fn=generate,
|
208 |
-
inputs=conversion_inputs,
|
209 |
-
outputs=conversion_outputs
|
210 |
-
)
|
211 |
|
212 |
# Para Hugging Face Spaces
|
213 |
if __name__ == "__main__":
|
|
|
9 |
# Asegurarnos de que el directorio actual esté en el path
|
10 |
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
11 |
|
12 |
+
# Crear una función simple para iniciar la interfaz
|
13 |
+
def create_interface():
|
14 |
+
# Importar la función de creación de UI desde el módulo
|
15 |
+
from describepdf.ui import create_ui
|
16 |
+
|
17 |
+
# Crear la interfaz
|
18 |
+
interface = create_ui()
|
19 |
+
|
20 |
+
return interface
|
21 |
+
|
22 |
+
# Crear la interfaz
|
23 |
+
app = create_interface()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
# Para Hugging Face Spaces
|
26 |
if __name__ == "__main__":
|