import pandas as pd import numpy as np import gradio as gr from datetime import datetime import plotly.express as px import plotly.graph_objects as go from PIL import Image import pytesseract import io import json import cv2 import os import numpy as np class DocumentProcessor: def __init__(self): self.upload_folder = "uploaded_documents" os.makedirs(self.upload_folder, exist_ok=True) def process_image(self, image): try: if image is None: return "No image uploaded", None # Convert gradio image input to CV2 format if isinstance(image, np.ndarray): img_array = image else: img_array = np.array(image) # Convert to grayscale if the image is in color if len(img_array.shape) == 3: gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY) else: gray = img_array # Image preprocessing gray = cv2.convertScaleAbs(gray, alpha=1.5, beta=0) _, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # Perform OCR text = pytesseract.image_to_string(threshold) # Parse the extracted text parsed_data = self.parse_text(text) return f"Document processed successfully!\n\nExtracted Text:\n{text}", parsed_data except Exception as e: return f"Error processing document: {str(e)}", None def parse_text(self, text): lines = text.split('\n') parsed_data = { 'raw_text': text, 'line_count': len(lines), 'processed_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'extracted_lines': [line for line in lines if line.strip()] } return parsed_data class BusinessManagementSystem: def __init__(self): self.doc_processor = DocumentProcessor() self.load_data() def load_data(self): try: self.bank_data = pd.read_csv('bank_statements.csv') self.marketing_data = pd.read_csv('marketing_data.csv') self.account_data = pd.read_csv('account_data.csv') self.invoices = pd.read_csv('invoices.csv') except FileNotFoundError: print("CSV files not found. Using mock data...") self.bank_data = self.mock_bank_data() self.marketing_data = self.mock_marketing_data() def mock_bank_data(self): return pd.DataFrame({ 'date': pd.date_range(start='2024-01-01', periods=10), 'transaction': [f'Transaction {i}' for i in range(10)], 'amount': np.random.randint(1000, 10000, 10) }) def mock_marketing_data(self): return pd.DataFrame({ 'campaign': [f'Campaign {i}' for i in range(5)], 'clicks': np.random.randint(100, 1000, 5), 'conversions': np.random.randint(10, 100, 5) }) def process_document(self, image): return self.doc_processor.process_image(image) def generate_bank_report(self): try: fig = go.Figure() fig.add_trace(go.Scatter( x=self.bank_data['date'], y=self.bank_data['amount'], mode='lines+markers', name='Transactions' )) fig.update_layout( title='Bank Transaction History', xaxis_title='Date', yaxis_title='Amount ($)' ) total_transactions = len(self.bank_data) total_amount = self.bank_data['amount'].sum() avg_transaction = self.bank_data['amount'].mean() summary = f""" Banking Summary: Total Transactions: {total_transactions} Total Amount: ${total_amount:,.2f} Average Transaction: ${avg_transaction:,.2f} """ return fig, summary except Exception as e: return None, f"Error generating bank report: {str(e)}" def analyze_marketing(self): try: self.marketing_data['conversion_rate'] = ( self.marketing_data['conversions'] / self.marketing_data['clicks'] * 100 ) fig = px.bar( self.marketing_data, x='campaign', y=['clicks', 'conversions'], title='Campaign Performance', barmode='group' ) summary = f""" Marketing Summary: Total Campaigns: {len(self.marketing_data)} Total Clicks: {self.marketing_data['clicks'].sum():,} Total Conversions: {self.marketing_data['conversions'].sum():,} Average Conversion Rate: {self.marketing_data['conversion_rate'].mean():.2f}% """ return fig, summary except Exception as e: return None, f"Error analyzing marketing data: {str(e)}" def create_gradio_interface(): bms = BusinessManagementSystem() with gr.Blocks(theme=gr.themes.Soft()) as interface: gr.Markdown(""" # AI-Driven Business Management System Upload documents, analyze banking data, and track marketing campaigns. """) with gr.Tabs(): # Document Processing Tab with gr.Tab("Document Processing"): gr.Markdown(""" ### Upload and Process Documents Support for PNG, JPG, and PDF files. The system will extract text and data from the documents. """) with gr.Row(): with gr.Column(): doc_input = gr.Image( label="Upload Document", type="numpy" ) process_btn = gr.Button("Process Document", variant="primary") with gr.Column(): doc_output = gr.Textbox( label="Processing Results", lines=10 ) json_output = gr.JSON( label="Extracted Data" ) process_btn.click( fn=bms.process_document, inputs=[doc_input], outputs=[doc_output, json_output] ) # Banking Tab with gr.Tab("Banking"): gr.Markdown("### Banking Analysis") bank_btn = gr.Button("Generate Bank Report", variant="primary") bank_plot = gr.Plot(label="Transaction History") bank_summary = gr.Textbox( label="Banking Summary", lines=5 ) bank_btn.click( fn=bms.generate_bank_report, inputs=[], outputs=[bank_plot, bank_summary] ) # Marketing Tab with gr.Tab("Marketing"): gr.Markdown("### Marketing Campaign Analysis") marketing_btn = gr.Button("Analyze Marketing Campaigns", variant="primary") marketing_plot = gr.Plot(label="Campaign Performance") marketing_summary = gr.Textbox( label="Marketing Summary", lines=5 ) marketing_btn.click( fn=bms.analyze_marketing, inputs=[], outputs=[marketing_plot, marketing_summary] ) return interface # For Google Colab, first run these installations #!pip install -q pytesseract opencv-python #!apt-get install -y tesseract-ocr > /dev/null 2>&1 # Launch the interface if __name__ == "__main__": interface = create_gradio_interface() interface.launch(share=True)