pluniak commited on
Commit
f576ba7
·
verified ·
1 Parent(s): a27c0f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -86
app.py CHANGED
@@ -1,87 +1,84 @@
1
- import gradio as gr
2
- import sys
3
- sys.path.append('../src/')
4
- from utils.helpers import OCRD
5
-
6
-
7
-
8
- def run_ocrd_pipeline(img_path, status=gr.Progress(), binarize_mode='detailed', min_pixel_sum=30, median_bounds=(None, None), font_size=30):
9
- """
10
- Executes the OCRD pipeline on an image from file loading to text overlay creation. This function orchestrates
11
- the calling of various OCRD class methods to process the image, extract and recognize text, and then overlay
12
- this text on the original image.
13
-
14
- Parameters:
15
- img_path (str): Path to the image file.
16
- binarize_mode (str): Mode to be used for image binarization. Can be 'detailed', 'fast', or 'no'.
17
- min_pixel_sum (int, optional): Minimum sum of pixels to consider a text line segmentation for extraction.
18
- If 'default', default values are applied.
19
- median_bounds (tuple, optional): Bounds to filter text line segmentations based on size relative to the median.
20
- If 'default', default values are applied.
21
- font_size (int, optional): Font size to be used in text overlay. If 'default', a default size or scaling logic is applied.
22
-
23
- Returns:
24
- Image: An image with overlay text, where text is extracted and recognized from the original image.
25
-
26
- This function handles:
27
- - Image binarization.
28
- - Text line segmentation.
29
- - Text line extraction and deskewing.
30
- - Optical character recognition on text lines.
31
- - Creating an image overlay with recognized text.
32
- """
33
-
34
- # prepare kwargs
35
- efadt_kwargs = {}
36
- if min_pixel_sum != 'default':
37
- efadt_kwargs['min_pixel_sum'] = min_pixel_sum
38
- if median_bounds != 'default':
39
- efadt_kwargs['median_bounds'] = median_bounds
40
-
41
- ctoi_kwargs = {}
42
- if font_size != 'default':
43
- ctoi_kwargs['font_size'] = font_size
44
-
45
- # run pipeline
46
- #status(0, desc="\nReading image...\n")
47
- ocrd = OCRD(img_path)
48
- status(0, desc='\nStep 1/5: Binarizing image...\n')
49
- binarized = ocrd.binarize_image(ocrd.image, binarize_mode)
50
- status(0, desc='\nStep 2/5: Segmenting textlines...\n')
51
- textline_segments = ocrd.segment_textlines(binarized)
52
- status(0, desc='\nStep 3/5: Extracting, filtering and de-skewing textlines...\n')
53
- image_scaled = ocrd.scale_image(ocrd.image) # textline_segments were predicted on rescaled image
54
- textline_images, _ = ocrd.extract_filter_and_deskew_textlines(image_scaled, textline_segments[...,0], **efadt_kwargs)
55
- status(0, desc='\nStep 4/5: OCR on textlines...\n')
56
- textline_preds = ocrd.ocr_on_textlines(textline_images)
57
- status(0, desc='\nStep 5/5: Creating output overlay image...')
58
- img_gen = ocrd.create_text_overlay_image(textline_images, textline_preds, (image_scaled.shape[0], image_scaled.shape[1]), **ctoi_kwargs)
59
- status(1, desc='\nJOB COMPLETED\n')
60
-
61
- return img_gen
62
-
63
-
64
- demo_data = [
65
- '../src/demo_data/act_image.jpg',
66
- '../src/demo_data/newjersey1_image.jpg',
67
- '../src/demo_data/newjersey2_image.jpg',
68
- '../src/demo_data/notes_image.jpg',
69
- '../src/demo_data/washington_image.jpg'
70
- ]
71
-
72
-
73
- iface = gr.Interface(run_ocrd_pipeline,
74
- title="OCRD Pipeline",
75
- description="<ul><li>This interactive demo showcases an 'Optical Character Recognition Digitization' pipeline that processes \
76
- images to recognize text.</li> \
77
- <li>Steps include binarization, text line segmentation, extraction, filtering and deskewing as well as OCR. \
78
- Results are displayed as a generated overlay image.</li> \
79
- <li>Optimized for English; other languages (e.g. German) may require OCR model fine-tuning.</li> \
80
- <li>Uses free CPU-based compute, which is rather slow. A pipeline run will take up to 10 minutes. \
81
- For lengthy waits, pre-computed demo results are available for download: https://github.com/pluniak/ocrd/tree/main/src/demo_data.</li> \
82
- <li>Note: The demo is just a first version! OCR performance and computation speed can be optimized.</li> \
83
- <li>The demo is based on code from my GitHub repository: https://github.com/pluniak/ocrd/tree/main</li></ul>",
84
- inputs=[gr.Image(type='filepath', label='Input image')],
85
- outputs=gr.Image(label='Output image: overlay with recognized text', type='pil', format='jpeg'),
86
- examples=demo_data)
87
  iface.launch()
 
1
+ import gradio as gr
2
+ from helpers import OCRD
3
+
4
+
5
+ def run_ocrd_pipeline(img_path, status=gr.Progress(), binarize_mode='detailed', min_pixel_sum=30, median_bounds=(None, None), font_size=30):
6
+ """
7
+ Executes the OCRD pipeline on an image from file loading to text overlay creation. This function orchestrates
8
+ the calling of various OCRD class methods to process the image, extract and recognize text, and then overlay
9
+ this text on the original image.
10
+
11
+ Parameters:
12
+ img_path (str): Path to the image file.
13
+ binarize_mode (str): Mode to be used for image binarization. Can be 'detailed', 'fast', or 'no'.
14
+ min_pixel_sum (int, optional): Minimum sum of pixels to consider a text line segmentation for extraction.
15
+ If 'default', default values are applied.
16
+ median_bounds (tuple, optional): Bounds to filter text line segmentations based on size relative to the median.
17
+ If 'default', default values are applied.
18
+ font_size (int, optional): Font size to be used in text overlay. If 'default', a default size or scaling logic is applied.
19
+
20
+ Returns:
21
+ Image: An image with overlay text, where text is extracted and recognized from the original image.
22
+
23
+ This function handles:
24
+ - Image binarization.
25
+ - Text line segmentation.
26
+ - Text line extraction and deskewing.
27
+ - Optical character recognition on text lines.
28
+ - Creating an image overlay with recognized text.
29
+ """
30
+
31
+ # prepare kwargs
32
+ efadt_kwargs = {}
33
+ if min_pixel_sum != 'default':
34
+ efadt_kwargs['min_pixel_sum'] = min_pixel_sum
35
+ if median_bounds != 'default':
36
+ efadt_kwargs['median_bounds'] = median_bounds
37
+
38
+ ctoi_kwargs = {}
39
+ if font_size != 'default':
40
+ ctoi_kwargs['font_size'] = font_size
41
+
42
+ # run pipeline
43
+ #status(0, desc="\nReading image...\n")
44
+ ocrd = OCRD(img_path)
45
+ status(0, desc='\nStep 1/5: Binarizing image...\n')
46
+ binarized = ocrd.binarize_image(ocrd.image, binarize_mode)
47
+ status(0, desc='\nStep 2/5: Segmenting textlines...\n')
48
+ textline_segments = ocrd.segment_textlines(binarized)
49
+ status(0, desc='\nStep 3/5: Extracting, filtering and de-skewing textlines...\n')
50
+ image_scaled = ocrd.scale_image(ocrd.image) # textline_segments were predicted on rescaled image
51
+ textline_images, _ = ocrd.extract_filter_and_deskew_textlines(image_scaled, textline_segments[...,0], **efadt_kwargs)
52
+ status(0, desc='\nStep 4/5: OCR on textlines...\n')
53
+ textline_preds = ocrd.ocr_on_textlines(textline_images)
54
+ status(0, desc='\nStep 5/5: Creating output overlay image...')
55
+ img_gen = ocrd.create_text_overlay_image(textline_images, textline_preds, (image_scaled.shape[0], image_scaled.shape[1]), **ctoi_kwargs)
56
+ status(1, desc='\nJOB COMPLETED\n')
57
+
58
+ return img_gen
59
+
60
+
61
+ demo_data = [
62
+ '../src/demo_data/act_image.jpg',
63
+ '../src/demo_data/newjersey1_image.jpg',
64
+ '../src/demo_data/newjersey2_image.jpg',
65
+ '../src/demo_data/notes_image.jpg',
66
+ '../src/demo_data/washington_image.jpg'
67
+ ]
68
+
69
+
70
+ iface = gr.Interface(run_ocrd_pipeline,
71
+ title="OCRD Pipeline",
72
+ description="<ul><li>This interactive demo showcases an 'Optical Character Recognition Digitization' pipeline that processes \
73
+ images to recognize text.</li> \
74
+ <li>Steps include binarization, text line segmentation, extraction, filtering and deskewing as well as OCR. \
75
+ Results are displayed as a generated overlay image.</li> \
76
+ <li>Optimized for English; other languages (e.g. German) may require OCR model fine-tuning.</li> \
77
+ <li>Uses free CPU-based compute, which is rather slow. A pipeline run will take up to 10 minutes. \
78
+ For lengthy waits, pre-computed demo results are available for download: https://github.com/pluniak/ocrd/tree/main/src/demo_data.</li> \
79
+ <li>Note: The demo is just a first version! OCR performance and computation speed can be optimized.</li> \
80
+ <li>The demo is based on code from my GitHub repository: https://github.com/pluniak/ocrd/tree/main</li></ul>",
81
+ inputs=[gr.Image(type='filepath', label='Input image')],
82
+ outputs=gr.Image(label='Output image: overlay with recognized text', type='pil', format='jpeg'),
83
+ examples=demo_data)
 
 
 
84
  iface.launch()