Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,7 +12,8 @@ from utils import (
|
|
| 12 |
extract_csv_from_response,
|
| 13 |
pdf_to_images,
|
| 14 |
analyze_single_document,
|
| 15 |
-
process_local_pdf
|
|
|
|
| 16 |
)
|
| 17 |
import base64
|
| 18 |
from datetime import datetime
|
|
@@ -62,27 +63,7 @@ st.markdown("""
|
|
| 62 |
st.title("📄 PDF Document Analyzer")
|
| 63 |
st.markdown("Upload multiple PDFs to analyze each document directly using Gemini's native PDF processing")
|
| 64 |
|
| 65 |
-
# Load prompt
|
| 66 |
-
PROMPT ="""Please analyze the provided images of the real estate document set and perform the following actions:
|
| 67 |
|
| 68 |
-
1. *Identify Parties:* Determine and list Seller 1, Seller 2 (if applicable), Buyer 1, and Buyer 2.
|
| 69 |
-
2. *Identify Missing Items:* Locate and list all instances of missing signatures and missing initials for all parties across all documents.
|
| 70 |
-
3. *Identify Checked Boxes:* Locate and list all checkboxes that have been marked or checked.
|
| 71 |
-
4. *Generate Secondary Questions:* For checkboxes that indicate significant waivers (e.g., home warranty, inspection rights, lead paint assessment), specific conditions (e.g., cash sale, contingency status), potential conflicts, or reference other documents, formulate a relevant 'Secondary Question' designed to prompt confirmation or clarification from the user/parties involved.
|
| 72 |
-
5. *Check for Required Paperwork:* Based only on the checkboxes identified in step 3 that explicitly state or strongly imply a specific addendum or disclosure document should be attached (e.g., "Lead Based Paint Disclosure Addendum attached", "See Counter Offer Addendum", "Seller's Disclosure...Addendum attached", "Retainer Addendum attached", etc.), check if a document matching that description appears to be present within the provided image set. Note whether this implied paperwork is 'Found', 'Missing', or 'Potentially Missing/Ambiguous' within the provided images.
|
| 73 |
-
6. *Identify Conflicts:* Specifically look for and note any directly contradictory information or conflicting checked boxes (like the conflicting inspection clauses found previously).
|
| 74 |
-
7. *Provide Location:* For every identified item (missing signature/initial, checked box, required paperwork status, party identification, conflict), specify the approximate line number(s) or clear location on the page (e.g., Bottom Right Initials, Seller Signature Block).
|
| 75 |
-
8. *Format Output:* Present all findings comprehensively in CSV format. The CSV columns should be:
|
| 76 |
-
* Category (e.g., Parties, Missing Item, Checked Box, Required Paperwork, Conflict)
|
| 77 |
-
* Location (Document Name/Page, e.g., Sale Contract Pg 2)
|
| 78 |
-
* Line Item(s) (Approximate line number or location description)
|
| 79 |
-
* Item Type (e.g., Seller Initials, Home Warranty Waiver, Lead Paint Addendum Check, Lead Paint Addendum Document)
|
| 80 |
-
* Status (e.g., Identified, Missing, Checked, Found, Potentially Missing, Conflict Detected)
|
| 81 |
-
* Details (Specifics like names, text of the checkbox, description of the issue or document status)
|
| 82 |
-
* Secondary Question (if applicable) (The question generated in step 4)
|
| 83 |
-
|
| 84 |
-
Please apply this analysis to the entire set of documents provided.
|
| 85 |
-
"""
|
| 86 |
|
| 87 |
# Sidebar Configuration
|
| 88 |
with st.sidebar:
|
|
@@ -134,7 +115,7 @@ if uploaded_files and api_key:
|
|
| 134 |
# Analyze PDF directly
|
| 135 |
# Convert PDF to images
|
| 136 |
|
| 137 |
-
df =
|
| 138 |
|
| 139 |
# Display results in expandable section
|
| 140 |
with st.expander("View Analysis Results", expanded=True):
|
|
|
|
| 12 |
extract_csv_from_response,
|
| 13 |
pdf_to_images,
|
| 14 |
analyze_single_document,
|
| 15 |
+
process_local_pdf,
|
| 16 |
+
analyze_pdf_images_with_gemini
|
| 17 |
)
|
| 18 |
import base64
|
| 19 |
from datetime import datetime
|
|
|
|
| 63 |
st.title("📄 PDF Document Analyzer")
|
| 64 |
st.markdown("Upload multiple PDFs to analyze each document directly using Gemini's native PDF processing")
|
| 65 |
|
|
|
|
|
|
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
# Sidebar Configuration
|
| 69 |
with st.sidebar:
|
|
|
|
| 115 |
# Analyze PDF directly
|
| 116 |
# Convert PDF to images
|
| 117 |
|
| 118 |
+
df = analyze_pdf_images_with_gemini(uploaded_file.getvalue())
|
| 119 |
|
| 120 |
# Display results in expandable section
|
| 121 |
with st.expander("View Analysis Results", expanded=True):
|