Spaces:
Runtime error
Runtime error
Commit
·
1f583e7
1
Parent(s):
315e1bd
upgrade
Browse files- .gitignore +0 -1
- __pycache__/gardio.cpython-310.pyc +0 -0
- doc.md +0 -1
- gardio.py +0 -32
- main.py +0 -16
- test.ipynb +0 -0
- test.py +0 -37
.gitignore
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
**/__pycache__/
|
|
|
|
__pycache__/gardio.cpython-310.pyc
ADDED
Binary file (793 Bytes). View file
|
|
doc.md
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
# Extract Text from photo
|
|
|
|
gardio.py
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
from PIL import Image
|
3 |
-
import pytesseract
|
4 |
-
|
5 |
-
custom_css = """
|
6 |
-
.big-font textarea {
|
7 |
-
font-size: 20px !important;
|
8 |
-
}
|
9 |
-
"""
|
10 |
-
|
11 |
-
def extract_text(image):
|
12 |
-
# Convert the image to text using pytesseract
|
13 |
-
text = pytesseract.image_to_string(image)
|
14 |
-
return text
|
15 |
-
|
16 |
-
# Create the Gradio interface
|
17 |
-
iface = gr.Interface(
|
18 |
-
fn=extract_text,
|
19 |
-
inputs=gr.Image(type="pil"), # Accept PIL images directly
|
20 |
-
outputs=gr.Textbox(lines=20,
|
21 |
-
max_lines=10,
|
22 |
-
label='Extracted Text',
|
23 |
-
elem_classes=["big-font"]
|
24 |
-
),
|
25 |
-
title="Text Extraction",
|
26 |
-
description="Upload an image to extract text",
|
27 |
-
allow_flagging='never',
|
28 |
-
css=custom_css
|
29 |
-
)
|
30 |
-
|
31 |
-
# Launch the app
|
32 |
-
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main.py
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
import pytesseract
|
2 |
-
from PIL import Image
|
3 |
-
|
4 |
-
|
5 |
-
def extract_text(image_path):
|
6 |
-
img = Image.open(image_path)
|
7 |
-
et = pytesseract.image_to_string(img)
|
8 |
-
|
9 |
-
return et
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
# if __name__ == "__main__":
|
14 |
-
# text = extract_text(image_path="/home/manish/Desktop/projects/etfp/data/WhatsApp Image 2025-09-13 at 19.01.52.jpeg")
|
15 |
-
# print(text)
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
test.py
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
import cv2
|
2 |
-
import pytesseract
|
3 |
-
|
4 |
-
myconfig = r"--psm 11 --oem 3"
|
5 |
-
|
6 |
-
def extract_text(image_path):
|
7 |
-
|
8 |
-
#Loading image in CV2 format
|
9 |
-
img = cv2.imread(image_path)
|
10 |
-
|
11 |
-
#Image preprocessing
|
12 |
-
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
13 |
-
cv2.imwrite("temp/gray.png", gray)
|
14 |
-
blur = cv2.GaussianBlur(gray, (7,7), 0)
|
15 |
-
cv2.imwrite("temp/blur.png", blur)
|
16 |
-
|
17 |
-
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
|
18 |
-
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,13))
|
19 |
-
dilate = cv2.dilate(thresh, kernel, iterations=1)
|
20 |
-
|
21 |
-
#Finding Contours for creating boundaries
|
22 |
-
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
23 |
-
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
|
24 |
-
cnts = sorted(cnts, key=lambda x: cv2.boundingRect(x)[0])
|
25 |
-
|
26 |
-
for c in cnts:
|
27 |
-
x, y, w, h = cv2.boundingRect(c)
|
28 |
-
if h > 50 and w > 10:
|
29 |
-
cv2.rectangle(img, (x,y), (x+w, y+h), (36,200,12), 2)
|
30 |
-
|
31 |
-
cv2.imwrite("temp/boxes.png", img)
|
32 |
-
|
33 |
-
# Text extraction
|
34 |
-
text = pytesseract.image_to_string(gray,config=myconfig)
|
35 |
-
|
36 |
-
print("Extracted Text:\n", text)
|
37 |
-
return img, text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|