Spaces:

chandinisaisri
/

formiq

Running

App Files Files Community

chandini2595 commited on May 17

Commit

7d7f295

1 Parent(s): f825473

Added handwritten

Browse files

Files changed (3) hide show

app.py +50 -49
requirements.txt +2 -0
temp_uploaded_image_paddle.jpg +3 -0

app.py CHANGED Viewed

@@ -25,7 +25,7 @@ import matplotlib
 import boto3
 from decimal import Decimal
 import uuid
-matplotlib.use('Agg')
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -193,6 +193,27 @@ def merge_extractions(regex_fields, llm_fields):
     merged["products"] = llm_fields.get("products") or regex_fields.get("products")
     return merged
 def main():
     st.set_page_config(
         page_title="FormIQ - Intelligent Document Parser",
@@ -246,49 +267,43 @@ def main():
     )
     if uploaded_file is not None:
-        # Display uploaded image
-        if uploaded_file.type == "application/pdf":
-            images = convert_from_bytes(uploaded_file.read())
-            image = images[0]  # Use the first page
-        else:
-            image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded Document", width=600)
         # Process button
         if st.button("Process Document"):
             with st.spinner("Processing document..."):
                 try:
-                    # Save the uploaded file to a temporary location
                     temp_path = "temp_uploaded_image.jpg"
                     image.save(temp_path)
-                    # Extract fields using OCR + regex
-                    fields = extract_fields(temp_path)
-                    # Extract with Perplexity LLM
-                    with st.spinner("Extracting structured data with Perplexity LLM..."):
-                        llm_result = extract_with_perplexity_llm(pytesseract.image_to_string(Image.open(temp_path)))
-                        llm_json = extract_json_from_llm_output(llm_result)
-                        st.subheader("Structured Data (Perplexity LLM)")
-                        if llm_json:
-                            try:
-                                llm_data = json.loads(llm_json)
-                                st.json(llm_data)
-                                # Save to DynamoDB
-                                try:
-                                    save_to_dynamodb(llm_data)
-                                    st.success("Saved to DynamoDB!")
-                                except Exception as e:
-                                    st.error(f"Failed to save to DynamoDB: {e}")
-                            except Exception as e:
-                                st.error(f"Failed to parse LLM output as JSON: {e}")
-                        else:
-                            st.warning("No valid JSON found in LLM output.")
-                    # Display extracted products if present
-                    if "products" in llm_data and llm_data["products"]:
-                        st.subheader("Products (LLM Extracted)")
-                        st.dataframe(pd.DataFrame(llm_data["products"]))
                 except Exception as e:
                     logger.error(f"Error processing document: {str(e)}")
@@ -351,19 +366,5 @@ def main():
         else:
             st.info("Confusion matrix not found.")
-    # Load model and processor
-    processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
-    model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
-    # Load your image (crop to handwritten region if possible)
-    image = Image.open('handwritten_sample.jpg').convert("RGB")
-    # Preprocess and predict
-    pixel_values = processor(images=image, return_tensors="pt").pixel_values
-    generated_ids = model.generate(pixel_values)
-    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    print("Handwritten text:", generated_text)
 if __name__ == "__main__":
     main()

 import boto3
 from decimal import Decimal
 import uuid
+from paddleocr import PaddleOCR
 # Configure logging
 logging.basicConfig(level=logging.INFO)
     merged["products"] = llm_fields.get("products") or regex_fields.get("products")
     return merged
+def extract_handwritten_text(image):
+    processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
+    model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
+    pixel_values = processor(images=image, return_tensors="pt").pixel_values
+    generated_ids = model.generate(pixel_values)
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return generated_text
+@st.cache_resource
+def get_paddle_ocr():
+    return PaddleOCR(use_angle_cls=True, lang='en', show_log=False)
+def extract_handwritten_text_paddle(image):
+    ocr = get_paddle_ocr()
+    # Save PIL image to a temporary file
+    temp_path = 'temp_uploaded_image_paddle.jpg'
+    image.save(temp_path)
+    result = ocr.ocr(temp_path, cls=True)
+    lines = [line[1][0] for line in result[0]]
+    return '\n'.join(lines)
 def main():
     st.set_page_config(
         page_title="FormIQ - Intelligent Document Parser",
     )
     if uploaded_file is not None:
+        image = Image.open(uploaded_file).convert("RGB")
         st.image(image, caption="Uploaded Document", width=600)
+        handwritten_text = None
+        # Option to extract handwritten text with PaddleOCR
+        if st.checkbox("Extract handwritten text (PaddleOCR)?"):
+            with st.spinner("Extracting handwritten text with PaddleOCR..."):
+                handwritten_text = extract_handwritten_text_paddle(image)
+                st.subheader("Handwritten Text Extracted (PaddleOCR)")
+                st.write(handwritten_text)
         # Process button
         if st.button("Process Document"):
             with st.spinner("Processing document..."):
                 try:
                     temp_path = "temp_uploaded_image.jpg"
                     image.save(temp_path)
+                    # Use handwritten text if available, else fallback to pytesseract
+                    if handwritten_text:
+                        llm_input_text = handwritten_text
+                    else:
+                        llm_input_text = pytesseract.image_to_string(Image.open(temp_path))
+                    llm_result = extract_with_perplexity_llm(llm_input_text)
+                    llm_json = extract_json_from_llm_output(llm_result)
+                    st.subheader("Structured Data (Perplexity LLM)")
+                    if llm_json:
+                        try:
+                            llm_data = json.loads(llm_json)
+                            st.json(llm_data)
+                            save_to_dynamodb(llm_data)
+                            st.success("Saved to DynamoDB!")
+                        except Exception as e:
+                            st.error(f"Failed to parse LLM output as JSON: {e}")
+                    else:
+                        st.warning("No valid JSON found in LLM output.")
                 except Exception as e:
                     logger.error(f"Error processing document: {str(e)}")
         else:
             st.info("Confusion matrix not found.")
 if __name__ == "__main__":
     main()

requirements.txt CHANGED Viewed

@@ -34,3 +34,5 @@ plotly==5.18.0
 matplotlib
 scikit-learn
 pdf2image

 matplotlib
 scikit-learn
 pdf2image
+paddleocr
+paddlepaddle

temp_uploaded_image_paddle.jpg ADDED Viewed

Git LFS Details

SHA256: d904a26ba3ba067260100a2fa32b0626993765ba6075b832eec06ff0fc07c7c3
Pointer size: 131 Bytes
Size of remote file: 261 kB