Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
import gradio as gr | |
from datetime import datetime | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from PIL import Image | |
import pytesseract | |
import io | |
import json | |
import cv2 | |
import os | |
import numpy as np | |
class DocumentProcessor: | |
def __init__(self): | |
self.upload_folder = "uploaded_documents" | |
os.makedirs(self.upload_folder, exist_ok=True) | |
def process_image(self, image): | |
try: | |
if image is None: | |
return "No image uploaded", None | |
# Convert gradio image input to CV2 format | |
if isinstance(image, np.ndarray): | |
img_array = image | |
else: | |
img_array = np.array(image) | |
# Convert to grayscale if the image is in color | |
if len(img_array.shape) == 3: | |
gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY) | |
else: | |
gray = img_array | |
# Image preprocessing | |
gray = cv2.convertScaleAbs(gray, alpha=1.5, beta=0) | |
_, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
# Perform OCR | |
text = pytesseract.image_to_string(threshold) | |
# Parse the extracted text | |
parsed_data = self.parse_text(text) | |
return f"Document processed successfully!\n\nExtracted Text:\n{text}", parsed_data | |
except Exception as e: | |
return f"Error processing document: {str(e)}", None | |
def parse_text(self, text): | |
lines = text.split('\n') | |
parsed_data = { | |
'raw_text': text, | |
'line_count': len(lines), | |
'processed_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
'extracted_lines': [line for line in lines if line.strip()] | |
} | |
return parsed_data | |
class BusinessManagementSystem: | |
def __init__(self): | |
self.doc_processor = DocumentProcessor() | |
self.load_data() | |
def load_data(self): | |
try: | |
self.bank_data = pd.read_csv('bank_statements.csv') | |
self.marketing_data = pd.read_csv('marketing_data.csv') | |
self.account_data = pd.read_csv('account_data.csv') | |
self.invoices = pd.read_csv('invoices.csv') | |
except FileNotFoundError: | |
print("CSV files not found. Using mock data...") | |
self.bank_data = self.mock_bank_data() | |
self.marketing_data = self.mock_marketing_data() | |
def mock_bank_data(self): | |
return pd.DataFrame({ | |
'date': pd.date_range(start='2024-01-01', periods=10), | |
'transaction': [f'Transaction {i}' for i in range(10)], | |
'amount': np.random.randint(1000, 10000, 10) | |
}) | |
def mock_marketing_data(self): | |
return pd.DataFrame({ | |
'campaign': [f'Campaign {i}' for i in range(5)], | |
'clicks': np.random.randint(100, 1000, 5), | |
'conversions': np.random.randint(10, 100, 5) | |
}) | |
def process_document(self, image): | |
return self.doc_processor.process_image(image) | |
def generate_bank_report(self): | |
try: | |
fig = go.Figure() | |
fig.add_trace(go.Scatter( | |
x=self.bank_data['date'], | |
y=self.bank_data['amount'], | |
mode='lines+markers', | |
name='Transactions' | |
)) | |
fig.update_layout( | |
title='Bank Transaction History', | |
xaxis_title='Date', | |
yaxis_title='Amount ($)' | |
) | |
total_transactions = len(self.bank_data) | |
total_amount = self.bank_data['amount'].sum() | |
avg_transaction = self.bank_data['amount'].mean() | |
summary = f""" | |
Banking Summary: | |
Total Transactions: {total_transactions} | |
Total Amount: ${total_amount:,.2f} | |
Average Transaction: ${avg_transaction:,.2f} | |
""" | |
return fig, summary | |
except Exception as e: | |
return None, f"Error generating bank report: {str(e)}" | |
def analyze_marketing(self): | |
try: | |
self.marketing_data['conversion_rate'] = ( | |
self.marketing_data['conversions'] / self.marketing_data['clicks'] * 100 | |
) | |
fig = px.bar( | |
self.marketing_data, | |
x='campaign', | |
y=['clicks', 'conversions'], | |
title='Campaign Performance', | |
barmode='group' | |
) | |
summary = f""" | |
Marketing Summary: | |
Total Campaigns: {len(self.marketing_data)} | |
Total Clicks: {self.marketing_data['clicks'].sum():,} | |
Total Conversions: {self.marketing_data['conversions'].sum():,} | |
Average Conversion Rate: {self.marketing_data['conversion_rate'].mean():.2f}% | |
""" | |
return fig, summary | |
except Exception as e: | |
return None, f"Error analyzing marketing data: {str(e)}" | |
def create_gradio_interface(): | |
bms = BusinessManagementSystem() | |
with gr.Blocks(theme=gr.themes.Soft()) as interface: | |
gr.Markdown(""" | |
# AI-Driven Business Management System | |
Upload documents, analyze banking data, and track marketing campaigns. | |
""") | |
with gr.Tabs(): | |
# Document Processing Tab | |
with gr.Tab("Document Processing"): | |
gr.Markdown(""" | |
### Upload and Process Documents | |
Support for PNG, JPG, and PDF files. The system will extract text and data from the documents. | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
doc_input = gr.Image( | |
label="Upload Document", | |
type="numpy" | |
) | |
process_btn = gr.Button("Process Document", variant="primary") | |
with gr.Column(): | |
doc_output = gr.Textbox( | |
label="Processing Results", | |
lines=10 | |
) | |
json_output = gr.JSON( | |
label="Extracted Data" | |
) | |
process_btn.click( | |
fn=bms.process_document, | |
inputs=[doc_input], | |
outputs=[doc_output, json_output] | |
) | |
# Banking Tab | |
with gr.Tab("Banking"): | |
gr.Markdown("### Banking Analysis") | |
bank_btn = gr.Button("Generate Bank Report", variant="primary") | |
bank_plot = gr.Plot(label="Transaction History") | |
bank_summary = gr.Textbox( | |
label="Banking Summary", | |
lines=5 | |
) | |
bank_btn.click( | |
fn=bms.generate_bank_report, | |
inputs=[], | |
outputs=[bank_plot, bank_summary] | |
) | |
# Marketing Tab | |
with gr.Tab("Marketing"): | |
gr.Markdown("### Marketing Campaign Analysis") | |
marketing_btn = gr.Button("Analyze Marketing Campaigns", variant="primary") | |
marketing_plot = gr.Plot(label="Campaign Performance") | |
marketing_summary = gr.Textbox( | |
label="Marketing Summary", | |
lines=5 | |
) | |
marketing_btn.click( | |
fn=bms.analyze_marketing, | |
inputs=[], | |
outputs=[marketing_plot, marketing_summary] | |
) | |
return interface | |
# For Google Colab, first run these installations | |
#!pip install -q pytesseract opencv-python | |
#!apt-get install -y tesseract-ocr > /dev/null 2>&1 | |
# Launch the interface | |
if __name__ == "__main__": | |
interface = create_gradio_interface() | |
interface.launch(share=True) |