Spaces:

chandinisaisri
/

formiq

Running

App Files Files Community

chandini2595 commited on May 9

Commit

83dd2a8

0 Parent(s):

Initial commit without binary files

Browse files

Files changed (17) hide show

.gitattributes +4 -0
.github/workflows/ci-cd.yml +117 -0
.gitignore +89 -0
Dockerfile +46 -0
README.md +54 -0
app.py +181 -0
chatbot_server.py +68 -0
insert_dummy_data.py +51 -0
requirements.txt +33 -0
src/api/main.py +104 -0
src/config/config.yaml +66 -0
src/frontend/app.py +299 -0
src/frontend/temp_uploaded_image.jpg +0 -0
src/models/layoutlm.py +144 -0
src/scripts/train.py +202 -0
tests/test_model.py +93 -0
transformers +1 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,4 @@

+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

.github/workflows/ci-cd.yml ADDED Viewed

	@@ -0,0 +1,117 @@

+name: FormIQ CI/CD
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.8, 3.9]
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install pytest pytest-cov
+    - name: Run tests
+      run: |
+        pytest tests/ --cov=src/ --cov-report=xml
+    - name: Upload coverage to Codecov
+      uses: codecov/codecov-action@v2
+      with:
+        file: ./coverage.xml
+        fail_ci_if_error: true
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.9
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8 black isort
+    - name: Run linters
+      run: |
+        flake8 src/ tests/
+        black --check src/ tests/
+        isort --check-only src/ tests/
+  build-and-push:
+    needs: [test, lint]
+    runs-on: ubuntu-latest
+    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v1
+    - name: Login to DockerHub
+      uses: docker/login-action@v1
+      with:
+        username: ${{ secrets.DOCKERHUB_USERNAME }}
+        password: ${{ secrets.DOCKERHUB_TOKEN }}
+    - name: Build and push
+      uses: docker/build-push-action@v2
+      with:
+        context: .
+        push: true
+        tags: |
+          ${{ secrets.DOCKERHUB_USERNAME }}/formiq:latest
+          ${{ secrets.DOCKERHUB_USERNAME }}/formiq:${{ github.sha }}
+        cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/formiq:buildcache
+        cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/formiq:buildcache,mode=max
+  deploy:
+    needs: build-and-push
+    runs-on: ubuntu-latest
+    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+    steps:
+    - uses: actions/checkout@v2
+    - name: Configure AWS credentials
+      uses: aws-actions/configure-aws-credentials@v1
+      with:
+        aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+        aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+        aws-region: us-east-1
+    - name: Deploy to SageMaker
+      run: |
+        # Update SageMaker endpoint with new model
+        aws sagemaker update-endpoint \
+          --endpoint-name formiq-endpoint \
+          --endpoint-config-name formiq-config-${{ github.sha }}
+    - name: Deploy to ECS
+      run: |
+        # Update ECS service with new container
+        aws ecs update-service \
+          --cluster formiq-cluster \
+          --service formiq-service \
+          --force-new-deployment

.gitignore ADDED Viewed

	@@ -0,0 +1,89 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual Environment
+venv/
+ENV/
+env/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# Project specific
+temp_uploaded_image.jpg
+.env
+*.log
+.DS_Store
+# Model files
+*.pt
+*.pth
+*.onnx
+*.h5
+*.model
+# Data
+*.csv
+*.json
+*.xlsx
+*.db
+*.sqlite3
+# Jupyter Notebook
+.ipynb_checkpoints
+*.ipynb
+# Logs
+logs/
+*.log
+# MLflow
+mlruns/
+mlflow.db
+# DVC
+.dvc/
+.dvc/cache/
+# Testing
+.coverage
+coverage.xml
+htmlcov/
+.pytest_cache/
+# Docker
+.docker/
+# AWS
+.aws/
+*.pem
+# Environment variables
+.env.*
+# Distribution
+dist/
+build/
+*.egg-info/

Dockerfile ADDED Viewed

	@@ -0,0 +1,46 @@

+# Use NVIDIA CUDA base image for GPU support
+FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    DEBIAN_FRONTEND=noninteractive \
+    PYTHON_VERSION=3.9
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    python${PYTHON_VERSION} \
+    python3-pip \
+    python${PYTHON_VERSION}-dev \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Copy requirements first to leverage Docker cache
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip3 install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create necessary directories
+RUN mkdir -p data/train data/val data/test logs
+# Set environment variables for the application
+ENV MODEL_SAVE_DIR=/app/models \
+    DATA_DIR=/app/data \
+    LOG_DIR=/app/logs
+# Expose ports
+EXPOSE 8000 8501
+# Create a non-root user
+RUN useradd -m -u 1000 appuser
+RUN chown -R appuser:appuser /app
+USER appuser
+# Start the application
+CMD ["sh", "-c", "uvicorn src.api.main:app --host 0.0.0.0 --port 8000 & streamlit run src/frontend/app.py --server.port 8501 --server.address 0.0.0.0"]

README.md ADDED Viewed

	@@ -0,0 +1,54 @@

+# FormIQ - Intelligent Document Parser
+FormIQ is an intelligent document parser that uses advanced AI models to extract and validate information from various types of documents.
+## Features
+- Document image upload and processing
+- OCR text extraction using Tesseract
+- Advanced document understanding using LayoutLMv3
+- Structured information extraction using Perplexity AI
+- Interactive web interface built with Streamlit
+## Technologies Used
+- **Frontend**: Streamlit
+- **OCR**: Tesseract
+- **Document Understanding**: LayoutLMv3
+- **Text Processing**: Perplexity AI
+- **Data Processing**: Pandas, NumPy
+- **Visualization**: Plotly
+## Setup
+1. Clone the repository
+2. Install dependencies:
+   ```bash
+   pip install -r requirements.txt
+   ```
+3. Set up environment variables:
+   ```bash
+   PERPLEXITY_API_KEY=your_api_key_here
+   ```
+## Usage
+1. Run the Streamlit app:
+   ```bash
+   streamlit run app.py
+   ```
+2. Open your browser and navigate to the provided URL
+3. Upload a document image
+4. Click "Process Document" to extract information
+## Hugging Face Spaces Deployment
+This project is deployed on Hugging Face Spaces. You can access the live demo at: [Your Spaces URL]
+## Contributing
+Contributions are welcome! Please feel free to submit a Pull Request.
+## License
+This project is licensed under the MIT License - see the LICENSE file for details.

app.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import streamlit as st
+import torch
+from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
+from PIL import Image
+import io
+import json
+import pandas as pd
+import plotly.express as px
+import numpy as np
+from typing import Dict, Any
+import logging
+import pytesseract
+import re
+from openai import OpenAI
+import os
+from dotenv import load_dotenv
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Load environment variables
+load_dotenv()
+# Initialize OpenAI client for Perplexity
+client = OpenAI(
+    api_key=os.getenv('PERPLEXITY_API_KEY'),
+    base_url="https://api.perplexity.ai"
+)
+# Initialize LayoutLM model
+@st.cache_resource
+def load_model():
+    model_name = "microsoft/layoutlmv3-base"
+    processor = LayoutLMv3Processor.from_pretrained(model_name)
+    model = LayoutLMv3ForTokenClassification.from_pretrained(model_name)
+    return processor, model
+def extract_json_from_llm_output(llm_result):
+    match = re.search(r'\{.*\}', llm_result, re.DOTALL)
+    if match:
+        return match.group(0)
+    return None
+def extract_fields(image_path):
+    # OCR
+    text = pytesseract.image_to_string(Image.open(image_path))
+    # Display OCR output for debugging
+    st.subheader("Raw OCR Output")
+    st.code(text)
+    # Improved Regex patterns for fields
+    patterns = {
+        "name": r"Mrs\s+\w+\s+\w+",
+        "date": r"Date[:\s]+([\d/]+)",
+        "product": r"\d+\s+\w+.*Style\s+\d+",
+        "amount_paid": r"Total Paid\s+\$?([\d.,]+)",
+        "receipt_no": r"Receipt No\.?\s*:?\s*(\d+)"
+    }
+    results = {}
+    for field, pattern in patterns.items():
+        match = re.search(pattern, text, re.IGNORECASE)
+        if match:
+            results[field] = match.group(1) if match.groups() else match.group(0)
+        else:
+            results[field] = None
+    return results
+def extract_with_perplexity_llm(ocr_text):
+    prompt = f"""
+Extract the following fields from this receipt text:
+- name
+- date
+- product
+- amount_paid
+- receipt_no
+Text:
+\"\"\"{ocr_text}\"\"\"
+Return the result as a JSON object with those fields.
+"""
+    messages = [
+        {
+            "role": "system",
+            "content": "You are an AI assistant that extracts structured information from text."
+        },
+        {
+            "role": "user",
+            "content": prompt
+        }
+    ]
+    response = client.chat.completions.create(
+        model="sonar-pro",
+        messages=messages
+    )
+    return response.choices[0].message.content
+def main():
+    st.set_page_config(
+        page_title="FormIQ - Intelligent Document Parser",
+        page_icon="📄",
+        layout="wide"
+    )
+    st.title("FormIQ: Intelligent Document Parser")
+    st.markdown("""
+    Upload your documents to extract and validate information using advanced AI models.
+    """)
+    # Sidebar
+    with st.sidebar:
+        st.header("Settings")
+        document_type = st.selectbox(
+            "Document Type",
+            options=["invoice", "receipt", "form"],
+            index=0
+        )
+        confidence_threshold = st.slider(
+            "Confidence Threshold",
+            min_value=0.0,
+            max_value=1.0,
+            value=0.5,
+            step=0.05
+        )
+        st.markdown("---")
+        st.markdown("### About")
+        st.markdown("""
+        FormIQ uses LayoutLMv3 and Perplexity AI to extract and validate information from documents.
+        """)
+    # Main content
+    uploaded_file = st.file_uploader(
+        "Upload Document",
+        type=["png", "jpg", "jpeg", "pdf"],
+        help="Upload a document image to process"
+    )
+    if uploaded_file is not None:
+        # Display uploaded image
+        image = Image.open(uploaded_file)
+        st.image(image, caption="Uploaded Document", width=600)
+        # Process button
+        if st.button("Process Document"):
+            with st.spinner("Processing document..."):
+                try:
+                    # Save the uploaded file to a temporary location
+                    temp_path = "temp_uploaded_image.jpg"
+                    image.save(temp_path)
+                    # Extract fields using OCR + regex
+                    fields = extract_fields(temp_path)
+                    # Extract with Perplexity LLM
+                    with st.spinner("Extracting structured data with Perplexity LLM..."):
+                        try:
+                            llm_result = extract_with_perplexity_llm(pytesseract.image_to_string(Image.open(temp_path)))
+                            st.subheader("Structured Data (Perplexity LLM)")
+                            st.code(llm_result, language="json")
+                            # Display extracted fields
+                            st.subheader("Extracted Fields")
+                            fields_df = pd.DataFrame([fields])
+                            st.dataframe(fields_df)
+                        except Exception as e:
+                            st.error(f"LLM extraction failed: {e}")
+                except Exception as e:
+                    logger.error(f"Error processing document: {str(e)}")
+                    st.error(f"Error processing document: {str(e)}")
+if __name__ == "__main__":
+    main()

chatbot_server.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+from fastapi import FastAPI
+from pydantic import BaseModel
+from openai import OpenAI
+from dotenv import load_dotenv
+import boto3
+# Load environment variables from .env
+load_dotenv()
+# Initialize OpenAI client for Perplexity
+client = OpenAI(
+    api_key=os.getenv('PERPLEXITY_API_KEY'),
+    base_url="https://api.perplexity.ai"
+)
+app = FastAPI()
+class ChatRequest(BaseModel):
+    question: str
+@app.post("/chat")
+def chat_endpoint(chat_request: ChatRequest):
+    # Connect to DynamoDB
+    dynamodb = boto3.resource('dynamodb', region_name='us-east-1')
+    table = dynamodb.Table('Receipts')
+    # Get question and search DynamoDB
+    question = chat_request.question
+    response = table.scan()
+    items = response.get('Items', [])
+    # Format items for context with all receipt details
+    context = "\n".join([
+        f"Receipt {item['receipt_no']}:\n"
+        f"  Name: {item['name']}\n"
+        f"  Date: {item['date']}\n"
+        f"  Product: {item['product']}\n"
+        f"  Amount Paid: {item['amount_paid']}\n"
+        for item in items
+    ])
+    question = f"Based on these receipts:\n{context}\n\nQuestion: {question}\nPlease provide a 2-3 line answer."
+    # Prepare messages for the chat
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                "You are an artificial intelligence assistant and you need to "
+                "engage in a helpful, detailed, polite conversation with a user."
+                "Give a 2-3 line answer."
+            )
+        },
+        {
+            "role": "user",
+            "content": question
+        }
+    ]
+    try:
+        # Get response from Perplexity
+        response = client.chat.completions.create(
+            model="sonar",
+            messages=messages
+        )
+        return {"answer": response.choices[0].message.content}
+    except Exception as e:
+        return {"error": f"Error from LLM: {str(e)}"}

insert_dummy_data.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import boto3
+# Initialize DynamoDB resource (ensure AWS credentials and region are set)
+dynamodb = boto3.resource('dynamodb', region_name='us-east-1')  # Change region if needed
+table = dynamodb.Table('Receipts')  # Replace with your table name
+# List of dummy items to insert with meaningful receipt numbers
+dummy_items = [
+    {
+        'receipt_no': 'RCPT-2024-0001',
+        'amount_paid': '100.00',
+        'date': '2024-01-01',
+        'name': 'John Doe',
+        'product': 'Widget A'
+    },
+    {
+        'receipt_no': 'RCPT-2024-0002',
+        'amount_paid': '250.50',
+        'date': '2024-02-15',
+        'name': 'Jane Smith',
+        'product': 'Gadget B'
+    },
+    {
+        'receipt_no': 'RCPT-2024-0003',
+        'amount_paid': '75.25',
+        'date': '2024-03-10',
+        'name': 'Alice Johnson',
+        'product': 'Thingamajig C'
+    },
+    {
+        'receipt_no': 'RCPT-2024-0004',
+        'amount_paid': '180.00',
+        'date': '2024-04-05',
+        'name': 'Bob Lee',
+        'product': 'Gizmo D'
+    },
+    {
+        'receipt_no': 'RCPT-2024-0005',
+        'amount_paid': '320.75',
+        'date': '2024-05-20',
+        'name': 'Carol King',
+        'product': 'Device E'
+    }
+]
+# Insert each item
+for item in dummy_items:
+    table.put_item(Item=item)
+    print(f"Inserted: {item['receipt_no']}")
+print("Dummy data inserted successfully.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,33 @@

+# Core ML dependencies
+torch==2.2.0
+transformers==4.37.2
+datasets>=2.12.0
+pytesseract==0.3.10
+Pillow==10.2.0
+# API and Web Framework
+fastapi==0.109.2
+uvicorn==0.27.1
+streamlit==1.32.0
+python-multipart==0.0.9
+# MLOps and Monitoring
+wandb>=0.15.0
+mlflow>=2.4.0
+dvc>=3.0.0
+hydra-core>=1.3.2
+evidently>=0.2.0
+tensorboard>=2.12.0
+# Cloud and Deployment
+boto3==1.34.34
+sagemaker>=2.160.0
+# Utilities
+numpy==1.26.3
+pandas==2.2.0
+python-dotenv==1.0.1
+pydantic>=2.0.0
+openai
+streamlit
+plotly==5.18.0

src/api/main.py ADDED Viewed

	@@ -0,0 +1,104 @@

+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from PIL import Image
+import io
+import logging
+from typing import Dict, Any
+import json
+from src.models.layoutlm import FormIQModel
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Initialize FastAPI app
+app = FastAPI(
+    title="FormIQ API",
+    description="Intelligent Document Parser API",
+    version="1.0.0"
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Initialize model
+model = FormIQModel()
+@app.get("/")
+async def root():
+    """Health check endpoint."""
+    return {"status": "healthy", "service": "FormIQ API"}
+@app.post("/extract")
+async def extract_information(
+    file: UploadFile = File(...),
+    confidence_threshold: float = 0.5,
+    document_type: str = "invoice"
+) -> Dict[str, Any]:
+    """Extract information from uploaded document.
+    Args:
+        file: Uploaded document image
+        confidence_threshold: Minimum confidence score for predictions
+        document_type: Type of document being processed
+    Returns:
+        Dictionary containing extracted fields and metadata
+    """
+    try:
+        # Read and validate image
+        contents = await file.read()
+        image = Image.open(io.BytesIO(contents))
+        if image.mode != "RGB":
+            image = image.convert("RGB")
+        # Process image
+        extraction_results = model.predict(
+            image=image,
+            confidence_threshold=confidence_threshold
+        )
+        # Validate extraction
+        validation_results = model.validate_extraction(
+            extracted_fields=extraction_results,
+            document_type=document_type
+        )
+        # Combine results
+        response = {
+            "extraction": extraction_results,
+            "validation": validation_results,
+            "metadata": {
+                "document_type": document_type,
+                "confidence_threshold": confidence_threshold
+            }
+        }
+        return response
+    except Exception as e:
+        logger.error(f"Error processing document: {str(e)}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error processing document: {str(e)}"
+        )
+@app.get("/model-info")
+async def get_model_info() -> Dict[str, Any]:
+    """Get information about the current model."""
+    return {
+        "model_name": model.model.config.model_type,
+        "device": model.device,
+        "version": "1.0.0"
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

src/config/config.yaml ADDED Viewed

	@@ -0,0 +1,66 @@

+# Model Configuration
+model:
+  name: "microsoft/layoutlmv3-base"
+  device: "cuda"  # or "cpu"
+  confidence_threshold: 0.5
+  max_length: 512
+# Training Configuration
+training:
+  batch_size: 8
+  learning_rate: 2e-5
+  num_epochs: 10
+  warmup_steps: 100
+  weight_decay: 0.01
+  gradient_accumulation_steps: 4
+# Dataset Configuration
+dataset:
+  train_path: "data/train"
+  val_path: "data/val"
+  test_path: "data/test"
+  max_samples: null  # Set to null for all samples
+  augmentation:
+    enabled: true
+    rotation_range: 10
+    width_shift_range: 0.1
+    height_shift_range: 0.1
+    zoom_range: 0.1
+    fill_mode: "nearest"
+# Logging Configuration
+logging:
+  level: "INFO"
+  wandb:
+    enabled: true
+    project: "formiq"
+    entity: null  # Set your W&B username
+  tensorboard:
+    enabled: true
+    log_dir: "logs"
+# API Configuration
+api:
+  host: "0.0.0.0"
+  port: 8000
+  workers: 4
+  timeout: 60
+# Frontend Configuration
+frontend:
+  host: "localhost"
+  port: 8501
+  debug: false
+# MLOps Configuration
+mlops:
+  dvc:
+    remote: "s3://formiq-data"
+    cache_dir: ".dvc/cache"
+  mlflow:
+    tracking_uri: "http://localhost:5000"
+    experiment_name: "formiq"
+  evidently:
+    enabled: true
+    drift_threshold: 0.1
+    window_size: 1000

src/frontend/app.py ADDED Viewed

	@@ -0,0 +1,299 @@

+import streamlit as st
+import requests
+from PIL import Image
+import io
+import json
+import pandas as pd
+import plotly.express as px
+import numpy as np
+from typing import Dict, Any
+import logging
+import pytesseract
+import re
+from openai import OpenAI, OpenAIError
+import boto3
+from botocore.exceptions import ClientError
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Constants
+API_URL = "http://localhost:8000"
+SUPPORTED_DOCUMENT_TYPES = ["invoice", "receipt", "form"]
+api_key = os.getenv("PERPLEXITY_API_KEY")
+client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
+REGION = "us-east-1"
+dynamodb = boto3.resource('dynamodb', region_name=REGION)
+def extract_json_from_llm_output(llm_result):
+    match = re.search(r'\{.*\}', llm_result, re.DOTALL)
+    if match:
+        return match.group(0)
+    return None
+def save_to_dynamodb(data, table_name="Receipts"):
+    dynamodb = boto3.resource("dynamodb")
+    table = dynamodb.Table(table_name)
+    try:
+        table.put_item(Item=data)
+        return True
+    except ClientError as e:
+        st.error(f"Failed to save to DynamoDB: {e}")
+        return False
+def main():
+    st.set_page_config(
+        page_title="FormIQ - Intelligent Document Parser",
+        page_icon="📄",
+        layout="wide"
+    )
+    st.title("FormIQ: Intelligent Document Parser")
+    st.markdown("""
+    Upload your documents to extract and validate information using advanced AI models.
+    """)
+    # Sidebar
+    with st.sidebar:
+        st.header("Settings")
+        document_type = st.selectbox(
+            "Document Type",
+            options=SUPPORTED_DOCUMENT_TYPES,
+            index=0
+        )
+        confidence_threshold = st.slider(
+            "Confidence Threshold",
+            min_value=0.0,
+            max_value=1.0,
+            value=0.5,
+            step=0.05
+        )
+        st.markdown("---")
+        st.markdown("### About")
+        st.markdown("""
+        FormIQ uses LayoutLMv3 and GPT-4 to extract and validate information from documents.
+        """)
+    # Main content
+    uploaded_file = st.file_uploader(
+        "Upload Document",
+        type=["png", "jpg", "jpeg", "pdf"],
+        help="Upload a document image to process"
+    )
+    if uploaded_file is not None:
+        # Display uploaded image
+        image = Image.open(uploaded_file)
+        st.image(image, caption="Uploaded Document", width=600)
+        # Process button
+        if st.button("Process Document"):
+            with st.spinner("Processing document..."):
+                try:
+                    # Save the uploaded file to a temporary location
+                    temp_path = "temp_uploaded_image.jpg"
+                    image.save(temp_path)
+                    # Extract fields using OCR + regex
+                    fields = extract_fields(temp_path)
+                    # Extract with Perplexity LLM using the provided API key
+                    with st.spinner("Extracting structured data with Perplexity LLM..."):
+                        try:
+                            llm_result = extract_with_perplexity_llm(pytesseract.image_to_string(Image.open(temp_path)))
+                            st.subheader("Structured Data (Perplexity LLM)")
+                            st.code(llm_result, language="json")
+                            # Extract and save JSON to DynamoDB
+                            raw_json = extract_json_from_llm_output(llm_result)
+                            if raw_json:
+                                try:
+                                    llm_data = json.loads(raw_json)
+                                    if save_to_dynamodb(llm_data):
+                                        st.success("Data saved to DynamoDB!")
+                                except Exception as e:
+                                    st.error(f"Failed to parse/save JSON: {e}")
+                            else:
+                                st.error("No valid JSON found in LLM output.")
+                        except Exception as e:
+                            st.error(f"LLM extraction failed: {e}")
+                except Exception as e:
+                    logger.error(f"Error processing document: {str(e)}")
+                    st.error(f"Error processing document: {str(e)}")
+def display_results(results: Dict[str, Any]):
+    """Display extraction and validation results."""
+    # Create tabs for different views
+    tab1, tab2, tab3 = st.tabs(["Extracted Fields", "Validation", "Visualization"])
+    with tab1:
+        st.subheader("Extracted Fields")
+        if "fields" in results["extraction"]:
+            fields_df = pd.DataFrame(results["extraction"]["fields"])
+            st.dataframe(fields_df)
+        else:
+            st.info("No fields extracted")
+    with tab2:
+        st.subheader("Validation Results")
+        validation = results["validation"]
+        # Display validation status
+        status_color = "green" if validation["is_valid"] else "red"
+        st.markdown(f"### Status: :{status_color}[{validation['is_valid']}]")
+        # Display validation errors if any
+        if validation["validation_errors"]:
+            st.error("Validation Errors:")
+            for error in validation["validation_errors"]:
+                st.markdown(f"- {error}")
+        # Display confidence score
+        st.metric(
+            "Overall Confidence",
+            f"{validation['confidence_score']:.2%}"
+        )
+    with tab3:
+        st.subheader("Confidence Visualization")
+        if "confidence_scores" in results["extraction"]["metadata"]:
+            scores = results["extraction"]["metadata"]["confidence_scores"]
+            # Create confidence distribution plot
+            fig = px.histogram(
+                x=scores,
+                nbins=20,
+                title="Confidence Score Distribution",
+                labels={"x": "Confidence Score", "y": "Count"}
+            )
+            st.plotly_chart(fig)
+            # Display heatmap if available
+            if "bbox" in results["extraction"]["fields"][0]:
+                st.subheader("Field Location Heatmap")
+                # TODO: Implement heatmap visualization
+                st.info("Heatmap visualization coming soon!")
+def group_tokens_by_label(tokens, labels):
+    structured = {}
+    current_label = None
+    current_tokens = []
+    for token, label in zip(tokens, labels):
+        if label != current_label:
+            if current_label is not None:
+                structured.setdefault(current_label, []).append(' '.join(current_tokens))
+            current_label = label
+            current_tokens = [token]
+        else:
+            current_tokens.append(token)
+    if current_label is not None:
+        structured.setdefault(current_label, []).append(' '.join(current_tokens))
+    return structured
+def extract_fields(image_path):
+    # OCR
+    text = pytesseract.image_to_string(Image.open(image_path))
+    # Display OCR output for debugging
+    st.subheader("Raw OCR Output (for debugging)")
+    st.code(text)
+    # Improved Regex patterns for fields
+    patterns = {
+        "name": r"Mrs\s+\w+\s+\w+",
+        "date": r"Date[:\s]+([\d/]+)",
+        "product": r"\d+\s+\w+.*Style\s+\d+",
+        "amount_paid": r"Total Paid\s+\$?([\d.,]+)",
+        # Improved pattern for receipt number (handles optional dot, colon, spaces)
+        "receipt_no": r"Receipt No\.?\s*:?\s*(\d+)"
+    }
+    results = {}
+    for field, pattern in patterns.items():
+        match = re.search(pattern, text, re.IGNORECASE)
+        if match:
+            results[field] = match.group(1) if match.groups() else match.group(0)
+        else:
+            results[field] = None
+    return results
+def extract_with_perplexity_llm(ocr_text):
+    prompt = f"""
+Extract the following fields from this receipt text:
+- name
+- date
+- product
+- amount_paid
+- receipt_no
+Text:
+\"\"\"{ocr_text}\"\"\"
+Return the result as a JSON object with those fields.
+"""
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                "You are an artificial intelligence assistant. "
+                "Answer user questions as concisely and directly as possible. "
+                "Limit your responses to 2-3 sentences unless the user asks for more detail."
+            )
+        },
+        {
+            "role": "user",
+            "content": prompt
+        }
+    ]
+    response = client.chat.completions.create(
+        model="sonar-pro",  # Use a valid model name for your account
+        messages=messages,
+    )
+    return response.choices[0].message.content
+def interactive_chatbot_ui():
+    st.header("🤖 Chatbot")
+    if "chat_history" not in st.session_state:
+        st.session_state.chat_history = []
+    # Display chat history as chat bubbles
+    for sender, msg in st.session_state.chat_history:
+        if sender == "You":
+            st.markdown(f"<div style='text-align: right; background: #262730; color: #fff; padding: 8px 12px; border-radius: 12px; margin: 4px 0 4px 40px;'><b>You:</b> {msg}</div>", unsafe_allow_html=True)
+        else:
+            st.markdown(f"<div style='text-align: left; background: #31333F; color: #fff; padding: 8px 12px; border-radius: 12px; margin: 4px 40px 4px 0;'><b>Bot:</b> {msg}</div>", unsafe_allow_html=True)
+    # Input at the bottom
+    with st.form(key="chat_form", clear_on_submit=True):
+        user_input = st.text_input("Type your message...", key="chat_input_main", placeholder="Ask me anything...")
+        submitted = st.form_submit_button("Send")
+        if submitted and user_input:
+            st.session_state.chat_history.append(("You", user_input))
+            try:
+                response = requests.post(
+                    f"{API_URL}/chat",
+                    json={"question": user_input}
+                )
+                if response.status_code == 200:
+                    bot_reply = response.json()["answer"]
+                else:
+                    bot_reply = f"Error: Server returned status code {response.status_code}"
+            except Exception as e:
+                bot_reply = f"Error: {e}"
+            st.session_state.chat_history.append(("Bot", bot_reply))
+if __name__ == "__main__":
+    main()
+    st.markdown("---")
+    interactive_chatbot_ui()

src/frontend/temp_uploaded_image.jpg ADDED Viewed

src/models/layoutlm.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import torch
+from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
+from PIL import Image
+import numpy as np
+from typing import Dict, List, Tuple, Optional
+import logging
+logger = logging.getLogger(__name__)
+class FormIQModel:
+    def __init__(
+        self,
+        model_name: str = "microsoft/layoutlmv3-base",
+        device: str = "cuda" if torch.cuda.is_available() else "cpu"
+    ):
+        """Initialize the FormIQ model with LayoutLMv3.
+        Args:
+            model_name: Name of the pre-trained model to use
+            device: Device to run the model on ('cuda' or 'cpu')
+        """
+        self.device = device
+        self.processor = LayoutLMv3Processor.from_pretrained(model_name)
+        self.model = LayoutLMv3ForTokenClassification.from_pretrained(model_name)
+        self.model.to(device)
+        logger.info(f"Model initialized on {device}")
+    def preprocess_image(self, image: Image.Image) -> Dict[str, torch.Tensor]:
+        """Preprocess the input image for the model.
+        Args:
+            image: PIL Image to process
+        Returns:
+            Dictionary of processed inputs
+        """
+        try:
+            # Process image and text
+            encoding = self.processor(
+                image,
+                return_tensors="pt",
+                truncation=True,
+                max_length=512
+            )
+            # Move tensors to device
+            encoding = {k: v.to(self.device) for k, v in encoding.items()}
+            return encoding
+        except Exception as e:
+            logger.error(f"Error preprocessing image: {str(e)}")
+            raise
+    def predict(
+        self,
+        image: Image.Image,
+        confidence_threshold: float = 0.5
+    ) -> Dict[str, List[Dict[str, any]]]:
+        """Extract information from the document image.
+        Args:
+            image: PIL Image of the document
+            confidence_threshold: Minimum confidence score for predictions
+        Returns:
+            Dictionary containing extracted fields and their metadata
+        """
+        try:
+            # Preprocess image
+            inputs = self.preprocess_image(image)
+            # Get model predictions
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                predictions = outputs.logits.argmax(-1).squeeze().cpu().numpy()
+                scores = torch.softmax(outputs.logits, dim=-1).max(-1)[0].squeeze().cpu().numpy()
+            # Process predictions
+            extracted_fields = self._process_predictions(predictions, scores, confidence_threshold)
+            return {
+                "fields": extracted_fields,
+                "metadata": {
+                    "confidence_scores": scores.tolist(),
+                    "model_version": self.model.config.model_type
+                }
+            }
+        except Exception as e:
+            logger.error(f"Error during prediction: {str(e)}")
+            raise
+    def _process_predictions(
+        self,
+        predictions: np.ndarray,
+        scores: np.ndarray,
+        confidence_threshold: float
+    ) -> List[Dict[str, any]]:
+        """Process raw model predictions into structured output.
+        Args:
+            predictions: Array of predicted class indices
+            scores: Array of confidence scores
+            confidence_threshold: Minimum confidence score
+        Returns:
+            List of dictionaries containing field information
+        """
+        # TODO: Implement field-specific post-processing
+        # This is a placeholder implementation
+        processed_fields = []
+        for pred, score in zip(predictions, scores):
+            if score >= confidence_threshold:
+                field_info = {
+                    "label": self.model.config.id2label[pred],
+                    "confidence": float(score),
+                    "bbox": None  # TODO: Add bounding box information
+                }
+                processed_fields.append(field_info)
+        return processed_fields
+    def validate_extraction(
+        self,
+        extracted_fields: Dict[str, List[Dict[str, any]]],
+        document_type: str
+    ) -> Dict[str, any]:
+        """Validate extracted fields based on document type rules.
+        Args:
+            extracted_fields: Dictionary of extracted fields
+            document_type: Type of document (e.g., 'invoice', 'receipt')
+        Returns:
+            Dictionary containing validation results
+        """
+        # TODO: Implement field validation logic
+        # This is a placeholder implementation
+        return {
+            "is_valid": True,
+            "validation_errors": [],
+            "confidence_score": 1.0
+        }

src/scripts/train.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import hydra
+from omegaconf import DictConfig, OmegaConf
+import torch
+from torch.utils.data import DataLoader
+from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
+from datasets import load_dataset
+import mlflow
+import wandb
+from pathlib import Path
+import logging
+from typing import Dict, Any
+import numpy as np
+from tqdm import tqdm
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class FormIQTrainer:
+    def __init__(self, config: DictConfig):
+        """Initialize the trainer with configuration."""
+        self.config = config
+        self.device = torch.device(config.model.device)
+        # Initialize model and processor
+        self.processor = LayoutLMv3Processor.from_pretrained(config.model.name)
+        self.model = LayoutLMv3ForTokenClassification.from_pretrained(
+            config.model.name,
+            num_labels=config.model.num_labels
+        )
+        self.model.to(self.device)
+        # Initialize optimizer
+        self.optimizer = torch.optim.AdamW(
+            self.model.parameters(),
+            lr=config.training.learning_rate,
+            weight_decay=config.training.weight_decay
+        )
+        # Setup logging
+        self.setup_logging()
+    def setup_logging(self):
+        """Setup MLflow and W&B logging."""
+        if self.config.logging.mlflow.enabled:
+            mlflow.set_tracking_uri(self.config.logging.mlflow.tracking_uri)
+            mlflow.set_experiment(self.config.logging.mlflow.experiment_name)
+        if self.config.logging.wandb.enabled:
+            wandb.init(
+                project=self.config.logging.wandb.project,
+                entity=self.config.logging.wandb.entity,
+                config=OmegaConf.to_container(self.config, resolve=True)
+            )
+    def prepare_dataset(self):
+        """Prepare the dataset for training."""
+        # TODO: Implement dataset preparation
+        # This is a placeholder implementation
+        return None, None
+    def train_epoch(self, train_loader: DataLoader) -> Dict[str, float]:
+        """Train for one epoch.
+        Args:
+            train_loader: DataLoader for training data
+        Returns:
+            Dictionary containing training metrics
+        """
+        self.model.train()
+        total_loss = 0
+        correct_predictions = 0
+        total_predictions = 0
+        progress_bar = tqdm(train_loader, desc="Training")
+        for batch in progress_bar:
+            # Move batch to device
+            batch = {k: v.to(self.device) for k, v in batch.items()}
+            # Forward pass
+            outputs = self.model(**batch)
+            loss = outputs.loss
+            # Backward pass
+            loss.backward()
+            # Update weights
+            self.optimizer.step()
+            self.optimizer.zero_grad()
+            # Update metrics
+            total_loss += loss.item()
+            predictions = outputs.logits.argmax(-1)
+            correct_predictions += (predictions == batch["labels"]).sum().item()
+            total_predictions += batch["labels"].numel()
+            # Update progress bar
+            progress_bar.set_postfix({
+                "loss": loss.item(),
+                "accuracy": correct_predictions / total_predictions
+            })
+        # Calculate epoch metrics
+        metrics = {
+            "train_loss": total_loss / len(train_loader),
+            "train_accuracy": correct_predictions / total_predictions
+        }
+        return metrics
+    def evaluate(self, eval_loader: DataLoader) -> Dict[str, float]:
+        """Evaluate the model.
+        Args:
+            eval_loader: DataLoader for evaluation data
+        Returns:
+            Dictionary containing evaluation metrics
+        """
+        self.model.eval()
+        total_loss = 0
+        correct_predictions = 0
+        total_predictions = 0
+        with torch.no_grad():
+            for batch in tqdm(eval_loader, desc="Evaluating"):
+                # Move batch to device
+                batch = {k: v.to(self.device) for k, v in batch.items()}
+                # Forward pass
+                outputs = self.model(**batch)
+                loss = outputs.loss
+                # Update metrics
+                total_loss += loss.item()
+                predictions = outputs.logits.argmax(-1)
+                correct_predictions += (predictions == batch["labels"]).sum().item()
+                total_predictions += batch["labels"].numel()
+        # Calculate evaluation metrics
+        metrics = {
+            "eval_loss": total_loss / len(eval_loader),
+            "eval_accuracy": correct_predictions / total_predictions
+        }
+        return metrics
+    def train(self):
+        """Train the model."""
+        # Prepare datasets
+        train_loader, eval_loader = self.prepare_dataset()
+        # Training loop
+        best_eval_loss = float('inf')
+        for epoch in range(self.config.training.num_epochs):
+            logger.info(f"Epoch {epoch + 1}/{self.config.training.num_epochs}")
+            # Train
+            train_metrics = self.train_epoch(train_loader)
+            # Evaluate
+            eval_metrics = self.evaluate(eval_loader)
+            # Log metrics
+            metrics = {**train_metrics, **eval_metrics}
+            if self.config.logging.mlflow.enabled:
+                mlflow.log_metrics(metrics, step=epoch)
+            if self.config.logging.wandb.enabled:
+                wandb.log(metrics, step=epoch)
+            # Save best model
+            if eval_metrics["eval_loss"] < best_eval_loss:
+                best_eval_loss = eval_metrics["eval_loss"]
+                self.save_model("best_model")
+            # Save checkpoint
+            self.save_model(f"checkpoint_epoch_{epoch + 1}")
+    def save_model(self, name: str):
+        """Save the model.
+        Args:
+            name: Name of the saved model
+        """
+        save_path = Path(self.config.model.save_dir) / name
+        save_path.mkdir(parents=True, exist_ok=True)
+        self.model.save_pretrained(save_path)
+        self.processor.save_pretrained(save_path)
+        if self.config.logging.mlflow.enabled:
+            mlflow.log_artifacts(str(save_path), f"models/{name}")
+@hydra.main(config_path="../config", config_name="config")
+def main(config: DictConfig):
+    """Main training function."""
+    trainer = FormIQTrainer(config)
+    trainer.train()
+if __name__ == "__main__":
+    main()

tests/test_model.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import pytest
+import torch
+from PIL import Image
+import numpy as np
+from src.models.layoutlm import FormIQModel
+@pytest.fixture
+def model():
+    """Create a model instance for testing."""
+    return FormIQModel(device="cpu")
+@pytest.fixture
+def sample_image():
+    """Create a sample image for testing."""
+    # Create a random image
+    image_array = np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8)
+    return Image.fromarray(image_array)
+def test_model_initialization(model):
+    """Test model initialization."""
+    assert model.device == "cpu"
+    assert model.model is not None
+    assert model.processor is not None
+def test_preprocess_image(model, sample_image):
+    """Test image preprocessing."""
+    processed = model.preprocess_image(sample_image)
+    # Check if all required keys are present
+    assert "input_ids" in processed
+    assert "attention_mask" in processed
+    assert "bbox" in processed
+    assert "pixel_values" in processed
+    # Check tensor types and shapes
+    assert isinstance(processed["input_ids"], torch.Tensor)
+    assert isinstance(processed["attention_mask"], torch.Tensor)
+    assert isinstance(processed["bbox"], torch.Tensor)
+    assert isinstance(processed["pixel_values"], torch.Tensor)
+def test_predict(model, sample_image):
+    """Test prediction functionality."""
+    results = model.predict(sample_image, confidence_threshold=0.5)
+    # Check result structure
+    assert "fields" in results
+    assert "metadata" in results
+    assert isinstance(results["fields"], list)
+    assert isinstance(results["metadata"], dict)
+    # Check metadata
+    assert "confidence_scores" in results["metadata"]
+    assert "model_version" in results["metadata"]
+def test_validate_extraction(model):
+    """Test field validation."""
+    # Create sample extraction results
+    sample_extraction = {
+        "fields": [
+            {"label": "amount", "confidence": 0.95, "value": "100.00"},
+            {"label": "date", "confidence": 0.85, "value": "2024-03-20"}
+        ]
+    }
+    # Test validation
+    validation_results = model.validate_extraction(
+        sample_extraction,
+        document_type="invoice"
+    )
+    # Check validation results structure
+    assert "is_valid" in validation_results
+    assert "validation_errors" in validation_results
+    assert "confidence_score" in validation_results
+    # Check types
+    assert isinstance(validation_results["is_valid"], bool)
+    assert isinstance(validation_results["validation_errors"], list)
+    assert isinstance(validation_results["confidence_score"], float)
+def test_error_handling(model):
+    """Test error handling."""
+    # Test with invalid image
+    with pytest.raises(Exception):
+        model.predict(Image.new("RGB", (0, 0)))
+    # Test with invalid confidence threshold
+    with pytest.raises(Exception):
+        model.predict(Image.new("RGB", (224, 224)), confidence_threshold=2.0)
+    # Test with invalid document type
+    with pytest.raises(Exception):
+        model.validate_extraction({}, document_type="invalid_type")

transformers ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit b3db4ddb2255bb4c8c4340fa630a53ac1cc53dee