ProgramerSalar commited on
Commit
1f583e7
·
1 Parent(s): 315e1bd
Files changed (7) hide show
  1. .gitignore +0 -1
  2. __pycache__/gardio.cpython-310.pyc +0 -0
  3. doc.md +0 -1
  4. gardio.py +0 -32
  5. main.py +0 -16
  6. test.ipynb +0 -0
  7. test.py +0 -37
.gitignore DELETED
@@ -1 +0,0 @@
1
- **/__pycache__/
 
 
__pycache__/gardio.cpython-310.pyc ADDED
Binary file (793 Bytes). View file
 
doc.md DELETED
@@ -1 +0,0 @@
1
- # Extract Text from photo
 
 
gardio.py DELETED
@@ -1,32 +0,0 @@
1
- import gradio as gr
2
- from PIL import Image
3
- import pytesseract
4
-
5
- custom_css = """
6
- .big-font textarea {
7
- font-size: 20px !important;
8
- }
9
- """
10
-
11
- def extract_text(image):
12
- # Convert the image to text using pytesseract
13
- text = pytesseract.image_to_string(image)
14
- return text
15
-
16
- # Create the Gradio interface
17
- iface = gr.Interface(
18
- fn=extract_text,
19
- inputs=gr.Image(type="pil"), # Accept PIL images directly
20
- outputs=gr.Textbox(lines=20,
21
- max_lines=10,
22
- label='Extracted Text',
23
- elem_classes=["big-font"]
24
- ),
25
- title="Text Extraction",
26
- description="Upload an image to extract text",
27
- allow_flagging='never',
28
- css=custom_css
29
- )
30
-
31
- # Launch the app
32
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py DELETED
@@ -1,16 +0,0 @@
1
- import pytesseract
2
- from PIL import Image
3
-
4
-
5
- def extract_text(image_path):
6
- img = Image.open(image_path)
7
- et = pytesseract.image_to_string(img)
8
-
9
- return et
10
-
11
-
12
-
13
- # if __name__ == "__main__":
14
- # text = extract_text(image_path="/home/manish/Desktop/projects/etfp/data/WhatsApp Image 2025-09-13 at 19.01.52.jpeg")
15
- # print(text)
16
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
test.py DELETED
@@ -1,37 +0,0 @@
1
- import cv2
2
- import pytesseract
3
-
4
- myconfig = r"--psm 11 --oem 3"
5
-
6
- def extract_text(image_path):
7
-
8
- #Loading image in CV2 format
9
- img = cv2.imread(image_path)
10
-
11
- #Image preprocessing
12
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
13
- cv2.imwrite("temp/gray.png", gray)
14
- blur = cv2.GaussianBlur(gray, (7,7), 0)
15
- cv2.imwrite("temp/blur.png", blur)
16
-
17
- thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
18
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,13))
19
- dilate = cv2.dilate(thresh, kernel, iterations=1)
20
-
21
- #Finding Contours for creating boundaries
22
- cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
23
- cnts = cnts[0] if len(cnts) == 2 else cnts[1]
24
- cnts = sorted(cnts, key=lambda x: cv2.boundingRect(x)[0])
25
-
26
- for c in cnts:
27
- x, y, w, h = cv2.boundingRect(c)
28
- if h > 50 and w > 10:
29
- cv2.rectangle(img, (x,y), (x+w, y+h), (36,200,12), 2)
30
-
31
- cv2.imwrite("temp/boxes.png", img)
32
-
33
- # Text extraction
34
- text = pytesseract.image_to_string(gray,config=myconfig)
35
-
36
- print("Extracted Text:\n", text)
37
- return img, text