Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# app.py β HTR Space (GPU-
|
2 |
|
3 |
import os
|
4 |
from threading import Thread
|
@@ -9,10 +9,9 @@ from transformers import AutoProcessor, AutoModelForImageTextToText, Qwen2_5_VLF
|
|
9 |
from reportlab.platypus import SimpleDocTemplate, Paragraph
|
10 |
from reportlab.lib.styles import getSampleStyleSheet
|
11 |
from docx import Document
|
12 |
-
from spaces import GPU # <-- required for Spaces GPU
|
13 |
|
14 |
MAX_NEW_TOKENS_DEFAULT = 512
|
15 |
-
DEVICE = "cuda"
|
16 |
|
17 |
# ---------------------------
|
18 |
# Models config
|
@@ -88,14 +87,16 @@ def _decode_text(model, processor, tokenizer, output_ids):
|
|
88 |
return str(output_ids)
|
89 |
|
90 |
# ---------------------------
|
91 |
-
# OCR function
|
92 |
# ---------------------------
|
|
|
|
|
93 |
@GPU
|
94 |
def ocr_image_gpu(image: Image.Image, model_choice: str, query: str = None,
|
95 |
max_new_tokens: int = MAX_NEW_TOKENS_DEFAULT, temperature: float = 0.1,
|
96 |
top_p: float = 1.0, top_k: int = 0, repetition_penalty: float = 1.0):
|
97 |
if image is None:
|
98 |
-
return "Please upload
|
99 |
|
100 |
if model_choice not in MODEL_PATHS:
|
101 |
return f"Invalid model: {model_choice}"
|
@@ -179,9 +180,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
179 |
placeholder="Leave empty for RAW structured output",
|
180 |
)
|
181 |
|
182 |
-
image_input = gr.Image(type="pil", label="Upload Image
|
183 |
-
webcam_input = gr.Image(type="pil", label="Take Photo (mobile/desktop)")
|
184 |
|
|
|
185 |
with gr.Accordion("βοΈ Advanced Options", open=False):
|
186 |
max_new_tokens = gr.Slider(1, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=1, label="Max new tokens")
|
187 |
temperature = gr.Slider(0.1, 2.0, value=0.1, step=0.05, label="Temperature")
|
@@ -189,41 +190,40 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
189 |
top_k = gr.Slider(0, 1000, value=0, step=1, label="Top-k")
|
190 |
repetition_penalty = gr.Slider(0.8, 2.0, value=1.0, step=0.05, label="Repetition penalty")
|
191 |
|
|
|
|
|
|
|
192 |
raw_output = gr.Textbox(
|
193 |
label="π RAW Structured Output (exact as written)",
|
194 |
lines=18,
|
195 |
show_copy_button=True,
|
196 |
)
|
197 |
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
|
202 |
-
|
203 |
-
|
|
|
|
|
|
|
204 |
|
205 |
-
extract_btn = gr.Button("π€ Extract RAW Text", variant="primary")
|
206 |
extract_btn.click(
|
207 |
-
fn=
|
208 |
-
|
209 |
-
inputs=[image_input, webcam_input, model_choice, query_input,
|
210 |
max_new_tokens, temperature, top_p, top_k, repetition_penalty],
|
211 |
outputs=[raw_output]
|
212 |
)
|
213 |
|
214 |
-
pdf_btn
|
215 |
-
|
216 |
-
|
217 |
-
word_btn = gr.Button("β¬οΈ Download as Word")
|
218 |
-
word_btn.click(fn=save_as_word, inputs=[raw_output], outputs=[word_file])
|
219 |
-
|
220 |
-
audio_btn = gr.Button("π Download as Audio")
|
221 |
-
audio_btn.click(fn=save_as_audio, inputs=[raw_output], outputs=[audio_file])
|
222 |
|
223 |
clear_btn = gr.Button("π§Ή Clear")
|
224 |
clear_btn.click(
|
225 |
-
fn=lambda: ("", None,
|
226 |
-
outputs=[raw_output, image_input,
|
227 |
max_new_tokens, temperature, top_p, top_k, repetition_penalty],
|
228 |
)
|
229 |
|
|
|
1 |
+
# app.py β HTR Space (GPU-only, no webcam, mobile-ready)
|
2 |
|
3 |
import os
|
4 |
from threading import Thread
|
|
|
9 |
from reportlab.platypus import SimpleDocTemplate, Paragraph
|
10 |
from reportlab.lib.styles import getSampleStyleSheet
|
11 |
from docx import Document
|
|
|
12 |
|
13 |
MAX_NEW_TOKENS_DEFAULT = 512
|
14 |
+
DEVICE = "cuda" # GPU-only
|
15 |
|
16 |
# ---------------------------
|
17 |
# Models config
|
|
|
87 |
return str(output_ids)
|
88 |
|
89 |
# ---------------------------
|
90 |
+
# GPU OCR function
|
91 |
# ---------------------------
|
92 |
+
from spaces import GPU
|
93 |
+
|
94 |
@GPU
|
95 |
def ocr_image_gpu(image: Image.Image, model_choice: str, query: str = None,
|
96 |
max_new_tokens: int = MAX_NEW_TOKENS_DEFAULT, temperature: float = 0.1,
|
97 |
top_p: float = 1.0, top_k: int = 0, repetition_penalty: float = 1.0):
|
98 |
if image is None:
|
99 |
+
return "Please upload an image."
|
100 |
|
101 |
if model_choice not in MODEL_PATHS:
|
102 |
return f"Invalid model: {model_choice}"
|
|
|
180 |
placeholder="Leave empty for RAW structured output",
|
181 |
)
|
182 |
|
183 |
+
image_input = gr.Image(type="pil", label="Upload Image")
|
|
|
184 |
|
185 |
+
# Advanced Options
|
186 |
with gr.Accordion("βοΈ Advanced Options", open=False):
|
187 |
max_new_tokens = gr.Slider(1, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=1, label="Max new tokens")
|
188 |
temperature = gr.Slider(0.1, 2.0, value=0.1, step=0.05, label="Temperature")
|
|
|
190 |
top_k = gr.Slider(0, 1000, value=0, step=1, label="Top-k")
|
191 |
repetition_penalty = gr.Slider(0.8, 2.0, value=1.0, step=0.05, label="Repetition penalty")
|
192 |
|
193 |
+
# β
Extract Button ABOVE output
|
194 |
+
extract_btn = gr.Button("π€ Extract RAW Text", variant="primary")
|
195 |
+
|
196 |
raw_output = gr.Textbox(
|
197 |
label="π RAW Structured Output (exact as written)",
|
198 |
lines=18,
|
199 |
show_copy_button=True,
|
200 |
)
|
201 |
|
202 |
+
pdf_btn = gr.Button("β¬οΈ Download as PDF")
|
203 |
+
word_btn = gr.Button("β¬οΈ Download as Word")
|
204 |
+
audio_btn = gr.Button("π Download as Audio")
|
205 |
|
206 |
+
# ---------------------------
|
207 |
+
# Button Callbacks
|
208 |
+
# ---------------------------
|
209 |
+
def on_extract(uploaded, model, query, max_tokens, temp, top_p, top_k, rep):
|
210 |
+
return ocr_image_gpu(uploaded, model, query, max_tokens, temp, top_p, top_k, rep)
|
211 |
|
|
|
212 |
extract_btn.click(
|
213 |
+
fn=on_extract,
|
214 |
+
inputs=[image_input, model_choice, query_input,
|
|
|
215 |
max_new_tokens, temperature, top_p, top_k, repetition_penalty],
|
216 |
outputs=[raw_output]
|
217 |
)
|
218 |
|
219 |
+
pdf_btn.click(fn=save_as_pdf, inputs=[raw_output], outputs=[pdf_btn])
|
220 |
+
word_btn.click(fn=save_as_word, inputs=[raw_output], outputs=[word_btn])
|
221 |
+
audio_btn.click(fn=save_as_audio, inputs=[raw_output], outputs=[audio_btn])
|
|
|
|
|
|
|
|
|
|
|
222 |
|
223 |
clear_btn = gr.Button("π§Ή Clear")
|
224 |
clear_btn.click(
|
225 |
+
fn=lambda: ("", None, "", MAX_NEW_TOKENS_DEFAULT, 0.1, 1.0, 0, 1.0),
|
226 |
+
outputs=[raw_output, image_input, query_input,
|
227 |
max_new_tokens, temperature, top_p, top_k, repetition_penalty],
|
228 |
)
|
229 |
|