chandini2595 commited on
Commit
7d7f295
·
1 Parent(s): f825473

Added handwritten

Browse files
Files changed (3) hide show
  1. app.py +50 -49
  2. requirements.txt +2 -0
  3. temp_uploaded_image_paddle.jpg +3 -0
app.py CHANGED
@@ -25,7 +25,7 @@ import matplotlib
25
  import boto3
26
  from decimal import Decimal
27
  import uuid
28
- matplotlib.use('Agg')
29
 
30
  # Configure logging
31
  logging.basicConfig(level=logging.INFO)
@@ -193,6 +193,27 @@ def merge_extractions(regex_fields, llm_fields):
193
  merged["products"] = llm_fields.get("products") or regex_fields.get("products")
194
  return merged
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  def main():
197
  st.set_page_config(
198
  page_title="FormIQ - Intelligent Document Parser",
@@ -246,49 +267,43 @@ def main():
246
  )
247
 
248
  if uploaded_file is not None:
249
- # Display uploaded image
250
- if uploaded_file.type == "application/pdf":
251
- images = convert_from_bytes(uploaded_file.read())
252
- image = images[0] # Use the first page
253
- else:
254
- image = Image.open(uploaded_file)
255
  st.image(image, caption="Uploaded Document", width=600)
256
 
 
 
 
 
 
 
 
 
257
  # Process button
258
  if st.button("Process Document"):
259
  with st.spinner("Processing document..."):
260
  try:
261
- # Save the uploaded file to a temporary location
262
  temp_path = "temp_uploaded_image.jpg"
263
  image.save(temp_path)
264
 
265
- # Extract fields using OCR + regex
266
- fields = extract_fields(temp_path)
267
-
268
- # Extract with Perplexity LLM
269
- with st.spinner("Extracting structured data with Perplexity LLM..."):
270
- llm_result = extract_with_perplexity_llm(pytesseract.image_to_string(Image.open(temp_path)))
271
- llm_json = extract_json_from_llm_output(llm_result)
272
- st.subheader("Structured Data (Perplexity LLM)")
273
- if llm_json:
274
- try:
275
- llm_data = json.loads(llm_json)
276
- st.json(llm_data)
277
- # Save to DynamoDB
278
- try:
279
- save_to_dynamodb(llm_data)
280
- st.success("Saved to DynamoDB!")
281
- except Exception as e:
282
- st.error(f"Failed to save to DynamoDB: {e}")
283
- except Exception as e:
284
- st.error(f"Failed to parse LLM output as JSON: {e}")
285
- else:
286
- st.warning("No valid JSON found in LLM output.")
287
-
288
- # Display extracted products if present
289
- if "products" in llm_data and llm_data["products"]:
290
- st.subheader("Products (LLM Extracted)")
291
- st.dataframe(pd.DataFrame(llm_data["products"]))
292
 
293
  except Exception as e:
294
  logger.error(f"Error processing document: {str(e)}")
@@ -351,19 +366,5 @@ def main():
351
  else:
352
  st.info("Confusion matrix not found.")
353
 
354
- # Load model and processor
355
- processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
356
- model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
357
-
358
- # Load your image (crop to handwritten region if possible)
359
- image = Image.open('handwritten_sample.jpg').convert("RGB")
360
-
361
- # Preprocess and predict
362
- pixel_values = processor(images=image, return_tensors="pt").pixel_values
363
- generated_ids = model.generate(pixel_values)
364
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
365
-
366
- print("Handwritten text:", generated_text)
367
-
368
  if __name__ == "__main__":
369
  main()
 
25
  import boto3
26
  from decimal import Decimal
27
  import uuid
28
+ from paddleocr import PaddleOCR
29
 
30
  # Configure logging
31
  logging.basicConfig(level=logging.INFO)
 
193
  merged["products"] = llm_fields.get("products") or regex_fields.get("products")
194
  return merged
195
 
196
+ def extract_handwritten_text(image):
197
+ processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
198
+ model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
199
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values
200
+ generated_ids = model.generate(pixel_values)
201
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
202
+ return generated_text
203
+
204
+ @st.cache_resource
205
+ def get_paddle_ocr():
206
+ return PaddleOCR(use_angle_cls=True, lang='en', show_log=False)
207
+
208
+ def extract_handwritten_text_paddle(image):
209
+ ocr = get_paddle_ocr()
210
+ # Save PIL image to a temporary file
211
+ temp_path = 'temp_uploaded_image_paddle.jpg'
212
+ image.save(temp_path)
213
+ result = ocr.ocr(temp_path, cls=True)
214
+ lines = [line[1][0] for line in result[0]]
215
+ return '\n'.join(lines)
216
+
217
  def main():
218
  st.set_page_config(
219
  page_title="FormIQ - Intelligent Document Parser",
 
267
  )
268
 
269
  if uploaded_file is not None:
270
+ image = Image.open(uploaded_file).convert("RGB")
 
 
 
 
 
271
  st.image(image, caption="Uploaded Document", width=600)
272
 
273
+ handwritten_text = None
274
+ # Option to extract handwritten text with PaddleOCR
275
+ if st.checkbox("Extract handwritten text (PaddleOCR)?"):
276
+ with st.spinner("Extracting handwritten text with PaddleOCR..."):
277
+ handwritten_text = extract_handwritten_text_paddle(image)
278
+ st.subheader("Handwritten Text Extracted (PaddleOCR)")
279
+ st.write(handwritten_text)
280
+
281
  # Process button
282
  if st.button("Process Document"):
283
  with st.spinner("Processing document..."):
284
  try:
 
285
  temp_path = "temp_uploaded_image.jpg"
286
  image.save(temp_path)
287
 
288
+ # Use handwritten text if available, else fallback to pytesseract
289
+ if handwritten_text:
290
+ llm_input_text = handwritten_text
291
+ else:
292
+ llm_input_text = pytesseract.image_to_string(Image.open(temp_path))
293
+
294
+ llm_result = extract_with_perplexity_llm(llm_input_text)
295
+ llm_json = extract_json_from_llm_output(llm_result)
296
+ st.subheader("Structured Data (Perplexity LLM)")
297
+ if llm_json:
298
+ try:
299
+ llm_data = json.loads(llm_json)
300
+ st.json(llm_data)
301
+ save_to_dynamodb(llm_data)
302
+ st.success("Saved to DynamoDB!")
303
+ except Exception as e:
304
+ st.error(f"Failed to parse LLM output as JSON: {e}")
305
+ else:
306
+ st.warning("No valid JSON found in LLM output.")
 
 
 
 
 
 
 
 
307
 
308
  except Exception as e:
309
  logger.error(f"Error processing document: {str(e)}")
 
366
  else:
367
  st.info("Confusion matrix not found.")
368
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  if __name__ == "__main__":
370
  main()
requirements.txt CHANGED
@@ -34,3 +34,5 @@ plotly==5.18.0
34
  matplotlib
35
  scikit-learn
36
  pdf2image
 
 
 
34
  matplotlib
35
  scikit-learn
36
  pdf2image
37
+ paddleocr
38
+ paddlepaddle
temp_uploaded_image_paddle.jpg ADDED

Git LFS Details

  • SHA256: d904a26ba3ba067260100a2fa32b0626993765ba6075b832eec06ff0fc07c7c3
  • Pointer size: 131 Bytes
  • Size of remote file: 261 kB