Spaces:
Sleeping
Sleeping
Update convert.py
Browse files- convert.py +8 -16
convert.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
-
import
|
| 2 |
-
from io import BytesIO
|
| 3 |
import streamlit as st
|
| 4 |
|
| 5 |
def ExtractPDFText(pdf):
|
|
@@ -7,21 +6,14 @@ def ExtractPDFText(pdf):
|
|
| 7 |
pdf_bytes = pdf.read()
|
| 8 |
|
| 9 |
try:
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
content += text
|
| 17 |
|
| 18 |
except Exception as e:
|
| 19 |
st.error(f"Error extracting text from PDF: {e}")
|
| 20 |
-
|
| 21 |
-
finally:
|
| 22 |
-
if "pdf_document" in locals():
|
| 23 |
-
pdf_document.close()
|
| 24 |
-
|
| 25 |
-
return content
|
| 26 |
-
|
| 27 |
|
|
|
|
|
|
| 1 |
+
import pdfplumber
|
|
|
|
| 2 |
import streamlit as st
|
| 3 |
|
| 4 |
def ExtractPDFText(pdf):
|
|
|
|
| 6 |
pdf_bytes = pdf.read()
|
| 7 |
|
| 8 |
try:
|
| 9 |
+
# Using pdfplumber to read the PDF bytes
|
| 10 |
+
with pdfplumber.open(BytesIO(pdf_bytes)) as pdf_document:
|
| 11 |
+
# Iterate through pages and extract text
|
| 12 |
+
for page in pdf_document.pages:
|
| 13 |
+
text = page.extract_text()
|
| 14 |
+
content += text if text else ""
|
|
|
|
| 15 |
|
| 16 |
except Exception as e:
|
| 17 |
st.error(f"Error extracting text from PDF: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
return content
|