Spaces:

ProgramerSalar
/

ETFP

Runtime error

ProgramerSalar commited on Sep 14

Commit

315e1bd

1 Parent(s): b4a89a6

init

Files changed (7) hide show

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ **/__pycache__/

.vscode/settings.json ADDED Viewed

+{
+    "python-envs.defaultEnvManager": "ms-python.python:system",
+    "python-envs.pythonProjects": []
+}

doc.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Extract Text from photo

gardio.py ADDED Viewed

+import gradio as gr
+from PIL import Image
+import pytesseract
+custom_css = """
+    .big-font textarea {
+        font-size: 20px !important;
+    }
+"""
+def extract_text(image):
+    # Convert the image to text using pytesseract
+    text = pytesseract.image_to_string(image)
+    return text
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=extract_text,
+    inputs=gr.Image(type="pil"),  # Accept PIL images directly
+    outputs=gr.Textbox(lines=20,
+                       max_lines=10,
+                       label='Extracted Text',
+                       elem_classes=["big-font"]
+                       ),
+    title="Text Extraction",
+    description="Upload an image to extract text",
+    allow_flagging='never',
+    css=custom_css
+)
+# Launch the app
+iface.launch()

main.py ADDED Viewed

+import pytesseract
+from PIL import Image
+def extract_text(image_path):
+    img = Image.open(image_path)
+    et = pytesseract.image_to_string(img)
+    return et
+# if __name__ == "__main__":
+#     text = extract_text(image_path="/home/manish/Desktop/projects/etfp/data/WhatsApp Image 2025-09-13 at 19.01.52.jpeg")
+#     print(text)

test.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

test.py ADDED Viewed

+import cv2
+import pytesseract
+myconfig = r"--psm 11 --oem 3"
+def extract_text(image_path):
+  #Loading image in CV2 format
+  img = cv2.imread(image_path)
+  #Image preprocessing
+  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+  cv2.imwrite("temp/gray.png", gray)
+  blur = cv2.GaussianBlur(gray, (7,7), 0)
+  cv2.imwrite("temp/blur.png", blur)
+  thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
+  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,13))
+  dilate = cv2.dilate(thresh, kernel, iterations=1)
+  #Finding Contours for creating boundaries
+  cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+  cnts = cnts[0] if len(cnts) == 2 else cnts[1]
+  cnts = sorted(cnts, key=lambda x: cv2.boundingRect(x)[0])
+  for c in cnts:
+    x, y, w, h = cv2.boundingRect(c)
+    if h > 50 and w > 10:
+      cv2.rectangle(img, (x,y), (x+w, y+h), (36,200,12), 2)
+  cv2.imwrite("temp/boxes.png", img)
+  # Text extraction
+  text = pytesseract.image_to_string(gray,config=myconfig)
+  print("Extracted Text:\n", text)
+  return img, text