Emeritus-21 commited on
Commit
b49181f
Β·
verified Β·
1 Parent(s): 37e07da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -26
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py β€” HTR Space (GPU-ready, single-model load, mobile-ready)
2
 
3
  import os
4
  from threading import Thread
@@ -9,10 +9,9 @@ from transformers import AutoProcessor, AutoModelForImageTextToText, Qwen2_5_VLF
9
  from reportlab.platypus import SimpleDocTemplate, Paragraph
10
  from reportlab.lib.styles import getSampleStyleSheet
11
  from docx import Document
12
- from spaces import GPU # <-- required for Spaces GPU
13
 
14
  MAX_NEW_TOKENS_DEFAULT = 512
15
- DEVICE = "cuda"
16
 
17
  # ---------------------------
18
  # Models config
@@ -88,14 +87,16 @@ def _decode_text(model, processor, tokenizer, output_ids):
88
  return str(output_ids)
89
 
90
  # ---------------------------
91
- # OCR function for GPU
92
  # ---------------------------
 
 
93
  @GPU
94
  def ocr_image_gpu(image: Image.Image, model_choice: str, query: str = None,
95
  max_new_tokens: int = MAX_NEW_TOKENS_DEFAULT, temperature: float = 0.1,
96
  top_p: float = 1.0, top_k: int = 0, repetition_penalty: float = 1.0):
97
  if image is None:
98
- return "Please upload or capture an image."
99
 
100
  if model_choice not in MODEL_PATHS:
101
  return f"Invalid model: {model_choice}"
@@ -179,9 +180,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
179
  placeholder="Leave empty for RAW structured output",
180
  )
181
 
182
- image_input = gr.Image(type="pil", label="Upload Image (desktop/mobile)")
183
- webcam_input = gr.Image(type="pil", label="Take Photo (mobile/desktop)")
184
 
 
185
  with gr.Accordion("βš™οΈ Advanced Options", open=False):
186
  max_new_tokens = gr.Slider(1, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=1, label="Max new tokens")
187
  temperature = gr.Slider(0.1, 2.0, value=0.1, step=0.05, label="Temperature")
@@ -189,41 +190,40 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
189
  top_k = gr.Slider(0, 1000, value=0, step=1, label="Top-k")
190
  repetition_penalty = gr.Slider(0.8, 2.0, value=1.0, step=0.05, label="Repetition penalty")
191
 
 
 
 
192
  raw_output = gr.Textbox(
193
  label="πŸ“œ RAW Structured Output (exact as written)",
194
  lines=18,
195
  show_copy_button=True,
196
  )
197
 
198
- pdf_file = gr.File(label="PDF File")
199
- word_file = gr.File(label="Word File")
200
- audio_file = gr.File(label="Audio File")
201
 
202
- def choose_image(uploaded, webcam):
203
- return webcam if webcam is not None else uploaded
 
 
 
204
 
205
- extract_btn = gr.Button("πŸ“€ Extract RAW Text", variant="primary")
206
  extract_btn.click(
207
- fn=lambda uploaded, webcam, model, query, max_tokens, temp, top_p, top_k, rep:
208
- ocr_image_gpu(choose_image(uploaded, webcam), model, query, max_tokens, temp, top_p, top_k, rep),
209
- inputs=[image_input, webcam_input, model_choice, query_input,
210
  max_new_tokens, temperature, top_p, top_k, repetition_penalty],
211
  outputs=[raw_output]
212
  )
213
 
214
- pdf_btn = gr.Button("⬇️ Download as PDF")
215
- pdf_btn.click(fn=save_as_pdf, inputs=[raw_output], outputs=[pdf_file])
216
-
217
- word_btn = gr.Button("⬇️ Download as Word")
218
- word_btn.click(fn=save_as_word, inputs=[raw_output], outputs=[word_file])
219
-
220
- audio_btn = gr.Button("πŸ”Š Download as Audio")
221
- audio_btn.click(fn=save_as_audio, inputs=[raw_output], outputs=[audio_file])
222
 
223
  clear_btn = gr.Button("🧹 Clear")
224
  clear_btn.click(
225
- fn=lambda: ("", None, None, "", MAX_NEW_TOKENS_DEFAULT, 0.1, 1.0, 0, 1.0),
226
- outputs=[raw_output, image_input, webcam_input, query_input,
227
  max_new_tokens, temperature, top_p, top_k, repetition_penalty],
228
  )
229
 
 
1
+ # app.py β€” HTR Space (GPU-only, no webcam, mobile-ready)
2
 
3
  import os
4
  from threading import Thread
 
9
  from reportlab.platypus import SimpleDocTemplate, Paragraph
10
  from reportlab.lib.styles import getSampleStyleSheet
11
  from docx import Document
 
12
 
13
  MAX_NEW_TOKENS_DEFAULT = 512
14
+ DEVICE = "cuda" # GPU-only
15
 
16
  # ---------------------------
17
  # Models config
 
87
  return str(output_ids)
88
 
89
  # ---------------------------
90
+ # GPU OCR function
91
  # ---------------------------
92
+ from spaces import GPU
93
+
94
  @GPU
95
  def ocr_image_gpu(image: Image.Image, model_choice: str, query: str = None,
96
  max_new_tokens: int = MAX_NEW_TOKENS_DEFAULT, temperature: float = 0.1,
97
  top_p: float = 1.0, top_k: int = 0, repetition_penalty: float = 1.0):
98
  if image is None:
99
+ return "Please upload an image."
100
 
101
  if model_choice not in MODEL_PATHS:
102
  return f"Invalid model: {model_choice}"
 
180
  placeholder="Leave empty for RAW structured output",
181
  )
182
 
183
+ image_input = gr.Image(type="pil", label="Upload Image")
 
184
 
185
+ # Advanced Options
186
  with gr.Accordion("βš™οΈ Advanced Options", open=False):
187
  max_new_tokens = gr.Slider(1, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=1, label="Max new tokens")
188
  temperature = gr.Slider(0.1, 2.0, value=0.1, step=0.05, label="Temperature")
 
190
  top_k = gr.Slider(0, 1000, value=0, step=1, label="Top-k")
191
  repetition_penalty = gr.Slider(0.8, 2.0, value=1.0, step=0.05, label="Repetition penalty")
192
 
193
+ # βœ… Extract Button ABOVE output
194
+ extract_btn = gr.Button("πŸ“€ Extract RAW Text", variant="primary")
195
+
196
  raw_output = gr.Textbox(
197
  label="πŸ“œ RAW Structured Output (exact as written)",
198
  lines=18,
199
  show_copy_button=True,
200
  )
201
 
202
+ pdf_btn = gr.Button("⬇️ Download as PDF")
203
+ word_btn = gr.Button("⬇️ Download as Word")
204
+ audio_btn = gr.Button("πŸ”Š Download as Audio")
205
 
206
+ # ---------------------------
207
+ # Button Callbacks
208
+ # ---------------------------
209
+ def on_extract(uploaded, model, query, max_tokens, temp, top_p, top_k, rep):
210
+ return ocr_image_gpu(uploaded, model, query, max_tokens, temp, top_p, top_k, rep)
211
 
 
212
  extract_btn.click(
213
+ fn=on_extract,
214
+ inputs=[image_input, model_choice, query_input,
 
215
  max_new_tokens, temperature, top_p, top_k, repetition_penalty],
216
  outputs=[raw_output]
217
  )
218
 
219
+ pdf_btn.click(fn=save_as_pdf, inputs=[raw_output], outputs=[pdf_btn])
220
+ word_btn.click(fn=save_as_word, inputs=[raw_output], outputs=[word_btn])
221
+ audio_btn.click(fn=save_as_audio, inputs=[raw_output], outputs=[audio_btn])
 
 
 
 
 
222
 
223
  clear_btn = gr.Button("🧹 Clear")
224
  clear_btn.click(
225
+ fn=lambda: ("", None, "", MAX_NEW_TOKENS_DEFAULT, 0.1, 1.0, 0, 1.0),
226
+ outputs=[raw_output, image_input, query_input,
227
  max_new_tokens, temperature, top_p, top_k, repetition_penalty],
228
  )
229