pluniak commited on
Commit
1ff6dd2
·
verified ·
1 Parent(s): 494fc94

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import sys
3
+ sys.path.append('../src/')
4
+ from utils.helpers import OCRD
5
+
6
+
7
+
8
+ def run_ocrd_pipeline(img_path, status=gr.Progress(), binarize_mode='detailed', min_pixel_sum=30, median_bounds=(None, None), font_size=30):
9
+ """
10
+ Executes the OCRD pipeline on an image from file loading to text overlay creation. This function orchestrates
11
+ the calling of various OCRD class methods to process the image, extract and recognize text, and then overlay
12
+ this text on the original image.
13
+
14
+ Parameters:
15
+ img_path (str): Path to the image file.
16
+ binarize_mode (str): Mode to be used for image binarization. Can be 'detailed', 'fast', or 'no'.
17
+ min_pixel_sum (int, optional): Minimum sum of pixels to consider a text line segmentation for extraction.
18
+ If 'default', default values are applied.
19
+ median_bounds (tuple, optional): Bounds to filter text line segmentations based on size relative to the median.
20
+ If 'default', default values are applied.
21
+ font_size (int, optional): Font size to be used in text overlay. If 'default', a default size or scaling logic is applied.
22
+
23
+ Returns:
24
+ Image: An image with overlay text, where text is extracted and recognized from the original image.
25
+
26
+ This function handles:
27
+ - Image binarization.
28
+ - Text line segmentation.
29
+ - Text line extraction and deskewing.
30
+ - Optical character recognition on text lines.
31
+ - Creating an image overlay with recognized text.
32
+ """
33
+
34
+ # prepare kwargs
35
+ efadt_kwargs = {}
36
+ if min_pixel_sum != 'default':
37
+ efadt_kwargs['min_pixel_sum'] = min_pixel_sum
38
+ if median_bounds != 'default':
39
+ efadt_kwargs['median_bounds'] = median_bounds
40
+
41
+ ctoi_kwargs = {}
42
+ if font_size != 'default':
43
+ ctoi_kwargs['font_size'] = font_size
44
+
45
+ # run pipeline
46
+ #status(0, desc="\nReading image...\n")
47
+ ocrd = OCRD(img_path)
48
+ status(0, desc='\nStep 1/5: Binarizing image...\n')
49
+ binarized = ocrd.binarize_image(ocrd.image, binarize_mode)
50
+ status(0, desc='\nStep 2/5: Segmenting textlines...\n')
51
+ textline_segments = ocrd.segment_textlines(binarized)
52
+ status(0, desc='\nStep 3/5: Extracting, filtering and de-skewing textlines...\n')
53
+ image_scaled = ocrd.scale_image(ocrd.image) # textline_segments were predicted on rescaled image
54
+ textline_images, _ = ocrd.extract_filter_and_deskew_textlines(image_scaled, textline_segments[...,0], **efadt_kwargs)
55
+ status(0, desc='\nStep 4/5: OCR on textlines...\n')
56
+ textline_preds = ocrd.ocr_on_textlines(textline_images)
57
+ status(0, desc='\nStep 5/5: Creating output overlay image...')
58
+ img_gen = ocrd.create_text_overlay_image(textline_images, textline_preds, (image_scaled.shape[0], image_scaled.shape[1]), **ctoi_kwargs)
59
+ status(1, desc='\nJOB COMPLETED\n')
60
+
61
+ return img_gen
62
+
63
+
64
+ demo_data = [
65
+ '../src/demo_data/act_image.jpg',
66
+ '../src/demo_data/newjersey1_image.jpg',
67
+ '../src/demo_data/newjersey2_image.jpg',
68
+ '../src/demo_data/notes_image.jpg',
69
+ '../src/demo_data/washington_image.jpg'
70
+ ]
71
+
72
+
73
+ iface = gr.Interface(run_ocrd_pipeline,
74
+ title="OCRD Pipeline",
75
+ description="<ul><li>This interactive demo showcases an 'Optical Character Recognition Digitization' pipeline that processes \
76
+ images to recognize text.</li> \
77
+ <li>Steps include binarization, text line segmentation, extraction, filtering and deskewing as well as OCR. \
78
+ Results are displayed as a generated overlay image.</li> \
79
+ <li>Optimized for English; other languages (e.g. German) may require OCR model fine-tuning.</li> \
80
+ <li>Uses free CPU-based compute, which is rather slow. A pipeline run will take up to 10 minutes. \
81
+ For lengthy waits, pre-computed demo results are available for download: https://github.com/pluniak/ocrd/tree/main/src/demo_data.</li> \
82
+ <li>Note: The demo is just a first version! OCR performance and computation speed can be optimized.</li> \
83
+ <li>The demo is based on code from my GitHub repository: https://github.com/pluniak/ocrd/tree/main</li></ul>",
84
+ inputs=[gr.Image(type='filepath', label='Input image')],
85
+ outputs=gr.Image(label='Output image: overlay with recognized text', type='pil', format='jpeg'),
86
+ examples=demo_data)
87
+ iface.launch()