davidlms commited on
Commit
19d1867
·
verified ·
1 Parent(s): ff97258

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -199
app.py CHANGED
@@ -9,205 +9,18 @@ import sys
9
  # Asegurarnos de que el directorio actual esté en el path
10
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
11
 
12
- # Importar solo las partes necesarias
13
- from describepdf import config, core
14
-
15
- # Definir tema
16
- theme = gr.themes.Soft(
17
- primary_hue="red",
18
- secondary_hue="rose",
19
- spacing_size="lg",
20
- )
21
-
22
- # Variables globales para configuración predeterminada
23
- SUGGESTED_VLMS = [
24
- "qwen/qwen2.5-vl-72b-instruct",
25
- "google/gemini-2.5-pro-preview-03-25",
26
- "openai/chatgpt-4o-latest"
27
- ]
28
-
29
- SUGGESTED_LLMS = [
30
- "google/gemini-2.5-flash-preview",
31
- "openai/chatgpt-4o-latest",
32
- "anthropic/claude-3.5-sonnet"
33
- ]
34
-
35
- SUGGESTED_LANGUAGES = [
36
- "English", "Spanish", "French", "German",
37
- "Chinese", "Japanese", "Italian",
38
- "Portuguese", "Russian", "Korean"
39
- ]
40
-
41
- def generate(
42
- pdf_file_obj,
43
- ui_api_key,
44
- ui_vlm_model,
45
- ui_lang,
46
- ui_use_md,
47
- ui_use_sum,
48
- ui_sum_model,
49
- progress=gr.Progress()
50
- ):
51
- """Wrapper function to call the core conversion process"""
52
- if pdf_file_obj is None:
53
- return "Please upload a PDF file.", gr.update(value=None, visible=False), None
54
-
55
- # Load environment config
56
- env_config = config.get_config()
57
-
58
- # Prepare configuration for this run
59
- api_key = ui_api_key.strip() if ui_api_key.strip() else env_config.get("openrouter_api_key")
60
-
61
- current_run_config = {
62
- "provider": "openrouter",
63
- "openrouter_api_key": api_key,
64
- "vlm_model": ui_vlm_model,
65
- "output_language": ui_lang,
66
- "use_markitdown": ui_use_md,
67
- "use_summary": ui_use_sum,
68
- "summary_llm_model": ui_sum_model if ui_sum_model else env_config.get("or_summary_model")
69
- }
70
-
71
- # Validate API key
72
- if not current_run_config.get("openrouter_api_key"):
73
- error_msg = "Error: OpenRouter API Key is missing. Provide it in the UI."
74
- return error_msg, gr.update(value=None, visible=False), None
75
-
76
- # Create progress callback for Gradio
77
- def progress_callback(progress_value, status):
78
- clamped_progress = max(0.0, min(1.0, progress_value))
79
- progress(clamped_progress, desc=status)
80
-
81
- # Run the conversion
82
- status_message, result_markdown = core.convert_pdf_to_markdown(
83
- pdf_file_obj.name,
84
- current_run_config,
85
- progress_callback
86
- )
87
-
88
- # Handle the download file
89
- if result_markdown:
90
- try:
91
- import tempfile
92
- import secrets
93
-
94
- # Get base filename from the uploaded PDF
95
- base_name = os.path.splitext(os.path.basename(pdf_file_obj.name))[0]
96
- download_filename = f"{base_name}_description.md"
97
-
98
- # Create a temporary file
99
- random_suffix = secrets.token_hex(4)
100
- temp_dir = tempfile.gettempdir()
101
- download_filepath = os.path.join(temp_dir, f"{base_name}_{random_suffix}.md")
102
-
103
- # Write markdown result to the temporary file
104
- with open(download_filepath, "w", encoding="utf-8") as md_file:
105
- md_file.write(result_markdown)
106
-
107
- download_button_update = gr.update(value=download_filepath, visible=True, label=f"Download '{download_filename}'")
108
- except Exception as e:
109
- status_message += f" (Error creating download file: {str(e)})"
110
- download_button_update = gr.update(value=None, visible=False)
111
- else:
112
- download_button_update = gr.update(value=None, visible=False)
113
-
114
- return status_message, download_button_update, result_markdown
115
-
116
- # Crear interfaz de usuario manualmente en lugar de usar la función create_ui()
117
- with gr.Blocks(title="DescribePDF", theme=theme) as app:
118
- gr.Markdown("<center><img src='https://davidlms.github.io/DescribePDF/assets/poster.png' alt='Describe PDF Logo' width='600px'/></center>")
119
- gr.Markdown(
120
- """<div style="display: flex;align-items: center;justify-content: center">
121
- [<a href="https://davidlms.github.io/describepdf/">Project Page</a>] | [<a href="https://github.com/DavidLMS/describepdf">Github</a>]</div>
122
- """
123
- )
124
- gr.Markdown(
125
- "DescribePDF is an open-source tool designed to convert PDF files into detailed page-by-page descriptions in Markdown format using Vision-Language Models (VLMs). Unlike traditional PDF extraction tools that focus on replicating the text layout, DescribePDF generates rich, contextual descriptions of each page's content, making it perfect for visually complex documents like catalogs, scanned documents, and presentations."
126
- )
127
-
128
- with gr.Tabs():
129
- # Generate tab
130
- with gr.TabItem("Generate"):
131
- with gr.Row():
132
- with gr.Column(scale=1):
133
- pdf_input = gr.File(
134
- label="Upload PDF",
135
- file_types=['.pdf'],
136
- type="filepath"
137
- )
138
- convert_button = gr.Button(
139
- "Describe",
140
- variant="primary"
141
- )
142
- progress_output = gr.Textbox(
143
- label="Progress",
144
- interactive=False,
145
- lines=2
146
- )
147
- download_button = gr.File(
148
- label="Download Markdown",
149
- visible=False,
150
- interactive=False
151
- )
152
-
153
- with gr.Column(scale=2):
154
- markdown_output = gr.Markdown(label="Result (Markdown)")
155
-
156
- # Configuration tab
157
- with gr.TabItem("Settings"):
158
- gr.Markdown(
159
- "Adjust settings for the *next* generation."
160
- )
161
- api_key_input = gr.Textbox(
162
- label="OpenRouter API Key",
163
- type="password",
164
- placeholder="Enter your OpenRouter API key",
165
- value=""
166
- )
167
- vlm_model_input = gr.Dropdown(
168
- label="VLM Model",
169
- choices=SUGGESTED_VLMS,
170
- value=SUGGESTED_VLMS[0],
171
- allow_custom_value=True,
172
- info="Select or type the OpenRouter VLM model name"
173
- )
174
- output_language_input = gr.Dropdown(
175
- label="Output Language",
176
- choices=SUGGESTED_LANGUAGES,
177
- value="English",
178
- allow_custom_value=True,
179
- info="Select or type the desired output language"
180
- )
181
- with gr.Row():
182
- use_markitdown_checkbox = gr.Checkbox(
183
- label="Use Markitdown for extra text context",
184
- value=False
185
- )
186
- use_summary_checkbox = gr.Checkbox(
187
- label="Use PDF summary for augmented context",
188
- value=False
189
- )
190
- summary_llm_model_input = gr.Dropdown(
191
- label="LLM Model for Summary",
192
- choices=SUGGESTED_LLMS,
193
- value=SUGGESTED_LLMS[0],
194
- allow_custom_value=True,
195
- info="Select or type the OpenRouter LLM model name for summaries"
196
- )
197
-
198
- # Connect UI components
199
- conversion_inputs = [
200
- pdf_input, api_key_input, vlm_model_input, output_language_input,
201
- use_markitdown_checkbox, use_summary_checkbox, summary_llm_model_input
202
- ]
203
- conversion_outputs = [
204
- progress_output, download_button, markdown_output
205
- ]
206
- convert_button.click(
207
- fn=generate,
208
- inputs=conversion_inputs,
209
- outputs=conversion_outputs
210
- )
211
 
212
  # Para Hugging Face Spaces
213
  if __name__ == "__main__":
 
9
  # Asegurarnos de que el directorio actual esté en el path
10
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
11
 
12
+ # Crear una función simple para iniciar la interfaz
13
+ def create_interface():
14
+ # Importar la función de creación de UI desde el módulo
15
+ from describepdf.ui import create_ui
16
+
17
+ # Crear la interfaz
18
+ interface = create_ui()
19
+
20
+ return interface
21
+
22
+ # Crear la interfaz
23
+ app = create_interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # Para Hugging Face Spaces
26
  if __name__ == "__main__":