Update app.py
Browse files
app.py
CHANGED
@@ -1,87 +1,84 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
-
|
28 |
-
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
status(0, desc='\nStep
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
img_gen
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
demo_data
|
65 |
-
'../src/demo_data/
|
66 |
-
'../src/demo_data/
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
<li>
|
78 |
-
|
79 |
-
<li>
|
80 |
-
<li>
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
inputs=[gr.Image(type='filepath', label='Input image')],
|
85 |
-
outputs=gr.Image(label='Output image: overlay with recognized text', type='pil', format='jpeg'),
|
86 |
-
examples=demo_data)
|
87 |
iface.launch()
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from helpers import OCRD
|
3 |
+
|
4 |
+
|
5 |
+
def run_ocrd_pipeline(img_path, status=gr.Progress(), binarize_mode='detailed', min_pixel_sum=30, median_bounds=(None, None), font_size=30):
|
6 |
+
"""
|
7 |
+
Executes the OCRD pipeline on an image from file loading to text overlay creation. This function orchestrates
|
8 |
+
the calling of various OCRD class methods to process the image, extract and recognize text, and then overlay
|
9 |
+
this text on the original image.
|
10 |
+
|
11 |
+
Parameters:
|
12 |
+
img_path (str): Path to the image file.
|
13 |
+
binarize_mode (str): Mode to be used for image binarization. Can be 'detailed', 'fast', or 'no'.
|
14 |
+
min_pixel_sum (int, optional): Minimum sum of pixels to consider a text line segmentation for extraction.
|
15 |
+
If 'default', default values are applied.
|
16 |
+
median_bounds (tuple, optional): Bounds to filter text line segmentations based on size relative to the median.
|
17 |
+
If 'default', default values are applied.
|
18 |
+
font_size (int, optional): Font size to be used in text overlay. If 'default', a default size or scaling logic is applied.
|
19 |
+
|
20 |
+
Returns:
|
21 |
+
Image: An image with overlay text, where text is extracted and recognized from the original image.
|
22 |
+
|
23 |
+
This function handles:
|
24 |
+
- Image binarization.
|
25 |
+
- Text line segmentation.
|
26 |
+
- Text line extraction and deskewing.
|
27 |
+
- Optical character recognition on text lines.
|
28 |
+
- Creating an image overlay with recognized text.
|
29 |
+
"""
|
30 |
+
|
31 |
+
# prepare kwargs
|
32 |
+
efadt_kwargs = {}
|
33 |
+
if min_pixel_sum != 'default':
|
34 |
+
efadt_kwargs['min_pixel_sum'] = min_pixel_sum
|
35 |
+
if median_bounds != 'default':
|
36 |
+
efadt_kwargs['median_bounds'] = median_bounds
|
37 |
+
|
38 |
+
ctoi_kwargs = {}
|
39 |
+
if font_size != 'default':
|
40 |
+
ctoi_kwargs['font_size'] = font_size
|
41 |
+
|
42 |
+
# run pipeline
|
43 |
+
#status(0, desc="\nReading image...\n")
|
44 |
+
ocrd = OCRD(img_path)
|
45 |
+
status(0, desc='\nStep 1/5: Binarizing image...\n')
|
46 |
+
binarized = ocrd.binarize_image(ocrd.image, binarize_mode)
|
47 |
+
status(0, desc='\nStep 2/5: Segmenting textlines...\n')
|
48 |
+
textline_segments = ocrd.segment_textlines(binarized)
|
49 |
+
status(0, desc='\nStep 3/5: Extracting, filtering and de-skewing textlines...\n')
|
50 |
+
image_scaled = ocrd.scale_image(ocrd.image) # textline_segments were predicted on rescaled image
|
51 |
+
textline_images, _ = ocrd.extract_filter_and_deskew_textlines(image_scaled, textline_segments[...,0], **efadt_kwargs)
|
52 |
+
status(0, desc='\nStep 4/5: OCR on textlines...\n')
|
53 |
+
textline_preds = ocrd.ocr_on_textlines(textline_images)
|
54 |
+
status(0, desc='\nStep 5/5: Creating output overlay image...')
|
55 |
+
img_gen = ocrd.create_text_overlay_image(textline_images, textline_preds, (image_scaled.shape[0], image_scaled.shape[1]), **ctoi_kwargs)
|
56 |
+
status(1, desc='\nJOB COMPLETED\n')
|
57 |
+
|
58 |
+
return img_gen
|
59 |
+
|
60 |
+
|
61 |
+
demo_data = [
|
62 |
+
'../src/demo_data/act_image.jpg',
|
63 |
+
'../src/demo_data/newjersey1_image.jpg',
|
64 |
+
'../src/demo_data/newjersey2_image.jpg',
|
65 |
+
'../src/demo_data/notes_image.jpg',
|
66 |
+
'../src/demo_data/washington_image.jpg'
|
67 |
+
]
|
68 |
+
|
69 |
+
|
70 |
+
iface = gr.Interface(run_ocrd_pipeline,
|
71 |
+
title="OCRD Pipeline",
|
72 |
+
description="<ul><li>This interactive demo showcases an 'Optical Character Recognition Digitization' pipeline that processes \
|
73 |
+
images to recognize text.</li> \
|
74 |
+
<li>Steps include binarization, text line segmentation, extraction, filtering and deskewing as well as OCR. \
|
75 |
+
Results are displayed as a generated overlay image.</li> \
|
76 |
+
<li>Optimized for English; other languages (e.g. German) may require OCR model fine-tuning.</li> \
|
77 |
+
<li>Uses free CPU-based compute, which is rather slow. A pipeline run will take up to 10 minutes. \
|
78 |
+
For lengthy waits, pre-computed demo results are available for download: https://github.com/pluniak/ocrd/tree/main/src/demo_data.</li> \
|
79 |
+
<li>Note: The demo is just a first version! OCR performance and computation speed can be optimized.</li> \
|
80 |
+
<li>The demo is based on code from my GitHub repository: https://github.com/pluniak/ocrd/tree/main</li></ul>",
|
81 |
+
inputs=[gr.Image(type='filepath', label='Input image')],
|
82 |
+
outputs=gr.Image(label='Output image: overlay with recognized text', type='pil', format='jpeg'),
|
83 |
+
examples=demo_data)
|
|
|
|
|
|
|
84 |
iface.launch()
|