AI-DrivenBMS / app.py
Anupam202224's picture
Update app.py
0c8a4a7 verified
import pandas as pd
import numpy as np
import gradio as gr
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
from PIL import Image
import pytesseract
import io
import json
import cv2
import os
import numpy as np
class DocumentProcessor:
def __init__(self):
self.upload_folder = "uploaded_documents"
os.makedirs(self.upload_folder, exist_ok=True)
def process_image(self, image):
try:
if image is None:
return "No image uploaded", None
# Convert gradio image input to CV2 format
if isinstance(image, np.ndarray):
img_array = image
else:
img_array = np.array(image)
# Convert to grayscale if the image is in color
if len(img_array.shape) == 3:
gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
else:
gray = img_array
# Image preprocessing
gray = cv2.convertScaleAbs(gray, alpha=1.5, beta=0)
_, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Perform OCR
text = pytesseract.image_to_string(threshold)
# Parse the extracted text
parsed_data = self.parse_text(text)
return f"Document processed successfully!\n\nExtracted Text:\n{text}", parsed_data
except Exception as e:
return f"Error processing document: {str(e)}", None
def parse_text(self, text):
lines = text.split('\n')
parsed_data = {
'raw_text': text,
'line_count': len(lines),
'processed_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'extracted_lines': [line for line in lines if line.strip()]
}
return parsed_data
class BusinessManagementSystem:
def __init__(self):
self.doc_processor = DocumentProcessor()
self.load_data()
def load_data(self):
try:
self.bank_data = pd.read_csv('bank_statements.csv')
self.marketing_data = pd.read_csv('marketing_data.csv')
self.account_data = pd.read_csv('account_data.csv')
self.invoices = pd.read_csv('invoices.csv')
except FileNotFoundError:
print("CSV files not found. Using mock data...")
self.bank_data = self.mock_bank_data()
self.marketing_data = self.mock_marketing_data()
def mock_bank_data(self):
return pd.DataFrame({
'date': pd.date_range(start='2024-01-01', periods=10),
'transaction': [f'Transaction {i}' for i in range(10)],
'amount': np.random.randint(1000, 10000, 10)
})
def mock_marketing_data(self):
return pd.DataFrame({
'campaign': [f'Campaign {i}' for i in range(5)],
'clicks': np.random.randint(100, 1000, 5),
'conversions': np.random.randint(10, 100, 5)
})
def process_document(self, image):
return self.doc_processor.process_image(image)
def generate_bank_report(self):
try:
fig = go.Figure()
fig.add_trace(go.Scatter(
x=self.bank_data['date'],
y=self.bank_data['amount'],
mode='lines+markers',
name='Transactions'
))
fig.update_layout(
title='Bank Transaction History',
xaxis_title='Date',
yaxis_title='Amount ($)'
)
total_transactions = len(self.bank_data)
total_amount = self.bank_data['amount'].sum()
avg_transaction = self.bank_data['amount'].mean()
summary = f"""
Banking Summary:
Total Transactions: {total_transactions}
Total Amount: ${total_amount:,.2f}
Average Transaction: ${avg_transaction:,.2f}
"""
return fig, summary
except Exception as e:
return None, f"Error generating bank report: {str(e)}"
def analyze_marketing(self):
try:
self.marketing_data['conversion_rate'] = (
self.marketing_data['conversions'] / self.marketing_data['clicks'] * 100
)
fig = px.bar(
self.marketing_data,
x='campaign',
y=['clicks', 'conversions'],
title='Campaign Performance',
barmode='group'
)
summary = f"""
Marketing Summary:
Total Campaigns: {len(self.marketing_data)}
Total Clicks: {self.marketing_data['clicks'].sum():,}
Total Conversions: {self.marketing_data['conversions'].sum():,}
Average Conversion Rate: {self.marketing_data['conversion_rate'].mean():.2f}%
"""
return fig, summary
except Exception as e:
return None, f"Error analyzing marketing data: {str(e)}"
def create_gradio_interface():
bms = BusinessManagementSystem()
with gr.Blocks(theme=gr.themes.Soft()) as interface:
gr.Markdown("""
# AI-Driven Business Management System
Upload documents, analyze banking data, and track marketing campaigns.
""")
with gr.Tabs():
# Document Processing Tab
with gr.Tab("Document Processing"):
gr.Markdown("""
### Upload and Process Documents
Support for PNG, JPG, and PDF files. The system will extract text and data from the documents.
""")
with gr.Row():
with gr.Column():
doc_input = gr.Image(
label="Upload Document",
type="numpy"
)
process_btn = gr.Button("Process Document", variant="primary")
with gr.Column():
doc_output = gr.Textbox(
label="Processing Results",
lines=10
)
json_output = gr.JSON(
label="Extracted Data"
)
process_btn.click(
fn=bms.process_document,
inputs=[doc_input],
outputs=[doc_output, json_output]
)
# Banking Tab
with gr.Tab("Banking"):
gr.Markdown("### Banking Analysis")
bank_btn = gr.Button("Generate Bank Report", variant="primary")
bank_plot = gr.Plot(label="Transaction History")
bank_summary = gr.Textbox(
label="Banking Summary",
lines=5
)
bank_btn.click(
fn=bms.generate_bank_report,
inputs=[],
outputs=[bank_plot, bank_summary]
)
# Marketing Tab
with gr.Tab("Marketing"):
gr.Markdown("### Marketing Campaign Analysis")
marketing_btn = gr.Button("Analyze Marketing Campaigns", variant="primary")
marketing_plot = gr.Plot(label="Campaign Performance")
marketing_summary = gr.Textbox(
label="Marketing Summary",
lines=5
)
marketing_btn.click(
fn=bms.analyze_marketing,
inputs=[],
outputs=[marketing_plot, marketing_summary]
)
return interface
# For Google Colab, first run these installations
#!pip install -q pytesseract opencv-python
#!apt-get install -y tesseract-ocr > /dev/null 2>&1
# Launch the interface
if __name__ == "__main__":
interface = create_gradio_interface()
interface.launch(share=True)