Files changed (1) hide show
  1. app.py +29 -2
app.py CHANGED
@@ -129,7 +129,8 @@ def generate_and_preview_pdf(image: Image.Image, text_content: str, font_size: i
129
  @spaces.GPU
130
  def process_document_stream(
131
  image: Image.Image,
132
- prompt_input: str,
 
133
  max_new_tokens: int,
134
  temperature: float,
135
  top_p: float,
@@ -146,6 +147,21 @@ def process_document_stream(
146
  yield "Please enter a prompt.", ""
147
  return
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  temp_image_path = None
150
  try:
151
  # --- FIX: Save the PIL Image to a temporary file ---
@@ -230,6 +246,16 @@ def create_gradio_interface():
230
  image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])
231
 
232
  with gr.Accordion("Advanced Settings", open=False):
 
 
 
 
 
 
 
 
 
 
233
  max_new_tokens = gr.Slider(minimum=512, maximum=8192, value=2048, step=256, label="Max New Tokens")
234
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.05, value=0.7)
235
  top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.8)
@@ -276,7 +302,8 @@ def create_gradio_interface():
276
 
277
  process_btn.click(
278
  fn=process_document_stream,
279
- inputs=[image_input, prompt_input, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
 
280
  outputs=[raw_output_stream, markdown_output]
281
  )
282
 
 
129
  @spaces.GPU
130
  def process_document_stream(
131
  image: Image.Image,
132
+ prompt_input: str,
133
+ image_scale_factor: float, # New parameter for image scaling
134
  max_new_tokens: int,
135
  temperature: float,
136
  top_p: float,
 
147
  yield "Please enter a prompt.", ""
148
  return
149
 
150
+ # --- IMPLEMENTATION: Image Scaling based on user input ---
151
+ if image_scale_factor > 1.0:
152
+ try:
153
+ original_width, original_height = image.size
154
+ new_width = int(original_width * image_scale_factor)
155
+ new_height = int(original_height * image_scale_factor)
156
+ print(f"Scaling image from {image.size} to ({new_width}, {new_height}) with factor {image_scale_factor}.")
157
+ # Use a high-quality resampling filter for better results
158
+ image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
159
+ except Exception as e:
160
+ print(f"Error during image scaling: {e}")
161
+ # Continue with the original image if scaling fails
162
+ pass
163
+ # --- END IMPLEMENTATION ---
164
+
165
  temp_image_path = None
166
  try:
167
  # --- FIX: Save the PIL Image to a temporary file ---
 
246
  image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])
247
 
248
  with gr.Accordion("Advanced Settings", open=False):
249
+ # --- NEW UI ELEMENT: Image Scaling Slider ---
250
+ image_scale_factor = gr.Slider(
251
+ minimum=1.0,
252
+ maximum=3.0,
253
+ value=1.0,
254
+ step=0.1,
255
+ label="Image Upscale Factor",
256
+ info="Increases image size before processing. Can improve OCR on small text. Default: 1.0 (no change)."
257
+ )
258
+ # --- END NEW UI ELEMENT ---
259
  max_new_tokens = gr.Slider(minimum=512, maximum=8192, value=2048, step=256, label="Max New Tokens")
260
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.05, value=0.7)
261
  top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.8)
 
302
 
303
  process_btn.click(
304
  fn=process_document_stream,
305
+ # --- UPDATE: Add the new slider to the inputs list ---
306
+ inputs=[image_input, prompt_input, image_scale_factor, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
307
  outputs=[raw_output_stream, markdown_output]
308
  )
309