Spaces:
Running
on
Zero
Running
on
Zero
update app
#5
by
prithivMLmods
- opened
app.py
CHANGED
@@ -129,7 +129,8 @@ def generate_and_preview_pdf(image: Image.Image, text_content: str, font_size: i
|
|
129 |
@spaces.GPU
|
130 |
def process_document_stream(
|
131 |
image: Image.Image,
|
132 |
-
prompt_input: str,
|
|
|
133 |
max_new_tokens: int,
|
134 |
temperature: float,
|
135 |
top_p: float,
|
@@ -146,6 +147,21 @@ def process_document_stream(
|
|
146 |
yield "Please enter a prompt.", ""
|
147 |
return
|
148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
temp_image_path = None
|
150 |
try:
|
151 |
# --- FIX: Save the PIL Image to a temporary file ---
|
@@ -230,6 +246,16 @@ def create_gradio_interface():
|
|
230 |
image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])
|
231 |
|
232 |
with gr.Accordion("Advanced Settings", open=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
max_new_tokens = gr.Slider(minimum=512, maximum=8192, value=2048, step=256, label="Max New Tokens")
|
234 |
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.05, value=0.7)
|
235 |
top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.8)
|
@@ -276,7 +302,8 @@ def create_gradio_interface():
|
|
276 |
|
277 |
process_btn.click(
|
278 |
fn=process_document_stream,
|
279 |
-
|
|
|
280 |
outputs=[raw_output_stream, markdown_output]
|
281 |
)
|
282 |
|
|
|
129 |
@spaces.GPU
|
130 |
def process_document_stream(
|
131 |
image: Image.Image,
|
132 |
+
prompt_input: str,
|
133 |
+
image_scale_factor: float, # New parameter for image scaling
|
134 |
max_new_tokens: int,
|
135 |
temperature: float,
|
136 |
top_p: float,
|
|
|
147 |
yield "Please enter a prompt.", ""
|
148 |
return
|
149 |
|
150 |
+
# --- IMPLEMENTATION: Image Scaling based on user input ---
|
151 |
+
if image_scale_factor > 1.0:
|
152 |
+
try:
|
153 |
+
original_width, original_height = image.size
|
154 |
+
new_width = int(original_width * image_scale_factor)
|
155 |
+
new_height = int(original_height * image_scale_factor)
|
156 |
+
print(f"Scaling image from {image.size} to ({new_width}, {new_height}) with factor {image_scale_factor}.")
|
157 |
+
# Use a high-quality resampling filter for better results
|
158 |
+
image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
159 |
+
except Exception as e:
|
160 |
+
print(f"Error during image scaling: {e}")
|
161 |
+
# Continue with the original image if scaling fails
|
162 |
+
pass
|
163 |
+
# --- END IMPLEMENTATION ---
|
164 |
+
|
165 |
temp_image_path = None
|
166 |
try:
|
167 |
# --- FIX: Save the PIL Image to a temporary file ---
|
|
|
246 |
image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])
|
247 |
|
248 |
with gr.Accordion("Advanced Settings", open=False):
|
249 |
+
# --- NEW UI ELEMENT: Image Scaling Slider ---
|
250 |
+
image_scale_factor = gr.Slider(
|
251 |
+
minimum=1.0,
|
252 |
+
maximum=3.0,
|
253 |
+
value=1.0,
|
254 |
+
step=0.1,
|
255 |
+
label="Image Upscale Factor",
|
256 |
+
info="Increases image size before processing. Can improve OCR on small text. Default: 1.0 (no change)."
|
257 |
+
)
|
258 |
+
# --- END NEW UI ELEMENT ---
|
259 |
max_new_tokens = gr.Slider(minimum=512, maximum=8192, value=2048, step=256, label="Max New Tokens")
|
260 |
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.05, value=0.7)
|
261 |
top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.8)
|
|
|
302 |
|
303 |
process_btn.click(
|
304 |
fn=process_document_stream,
|
305 |
+
# --- UPDATE: Add the new slider to the inputs list ---
|
306 |
+
inputs=[image_input, prompt_input, image_scale_factor, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
|
307 |
outputs=[raw_output_stream, markdown_output]
|
308 |
)
|
309 |
|