Anupam202224 commited on
Commit
03ccac1
·
verified ·
1 Parent(s): 3020a43

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +232 -0
app.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import gradio as gr
4
+ from datetime import datetime
5
+ import plotly.express as px
6
+ import plotly.graph_objects as go
7
+ from PIL import Image
8
+ import pytesseract
9
+ import io
10
+ import json
11
+ import cv2
12
+ import os
13
+ import numpy as np
14
+
15
+ class DocumentProcessor:
16
+ def __init__(self):
17
+ self.upload_folder = "uploaded_documents"
18
+ os.makedirs(self.upload_folder, exist_ok=True)
19
+
20
+ def process_image(self, image):
21
+ try:
22
+ if image is None:
23
+ return "No image uploaded", None
24
+
25
+ # Convert gradio image input to CV2 format
26
+ if isinstance(image, np.ndarray):
27
+ img_array = image
28
+ else:
29
+ img_array = np.array(image)
30
+
31
+ # Convert to grayscale if the image is in color
32
+ if len(img_array.shape) == 3:
33
+ gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
34
+ else:
35
+ gray = img_array
36
+
37
+ # Image preprocessing
38
+ gray = cv2.convertScaleAbs(gray, alpha=1.5, beta=0)
39
+ _, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
40
+
41
+ # Perform OCR
42
+ text = pytesseract.image_to_string(threshold)
43
+
44
+ # Parse the extracted text
45
+ parsed_data = self.parse_text(text)
46
+
47
+ return f"Document processed successfully!\n\nExtracted Text:\n{text}", parsed_data
48
+
49
+ except Exception as e:
50
+ return f"Error processing document: {str(e)}", None
51
+
52
+ def parse_text(self, text):
53
+ lines = text.split('\n')
54
+ parsed_data = {
55
+ 'raw_text': text,
56
+ 'line_count': len(lines),
57
+ 'processed_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
58
+ 'extracted_lines': [line for line in lines if line.strip()]
59
+ }
60
+ return parsed_data
61
+
62
+ class BusinessManagementSystem:
63
+ def __init__(self):
64
+ self.doc_processor = DocumentProcessor()
65
+ self.load_data()
66
+
67
+ def load_data(self):
68
+ try:
69
+ self.bank_data = pd.read_csv('bank_statements.csv')
70
+ self.marketing_data = pd.read_csv('marketing_data.csv')
71
+ self.account_data = pd.read_csv('account_data.csv')
72
+ self.invoices = pd.read_csv('invoices.csv')
73
+ except FileNotFoundError:
74
+ print("CSV files not found. Using mock data...")
75
+ self.bank_data = self.mock_bank_data()
76
+ self.marketing_data = self.mock_marketing_data()
77
+
78
+ def mock_bank_data(self):
79
+ return pd.DataFrame({
80
+ 'date': pd.date_range(start='2024-01-01', periods=10),
81
+ 'transaction': [f'Transaction {i}' for i in range(10)],
82
+ 'amount': np.random.randint(1000, 10000, 10)
83
+ })
84
+
85
+ def mock_marketing_data(self):
86
+ return pd.DataFrame({
87
+ 'campaign': [f'Campaign {i}' for i in range(5)],
88
+ 'clicks': np.random.randint(100, 1000, 5),
89
+ 'conversions': np.random.randint(10, 100, 5)
90
+ })
91
+
92
+ def process_document(self, image):
93
+ return self.doc_processor.process_image(image)
94
+
95
+ def generate_bank_report(self):
96
+ try:
97
+ fig = go.Figure()
98
+ fig.add_trace(go.Scatter(
99
+ x=self.bank_data['date'],
100
+ y=self.bank_data['amount'],
101
+ mode='lines+markers',
102
+ name='Transactions'
103
+ ))
104
+ fig.update_layout(
105
+ title='Bank Transaction History',
106
+ xaxis_title='Date',
107
+ yaxis_title='Amount ($)'
108
+ )
109
+
110
+ total_transactions = len(self.bank_data)
111
+ total_amount = self.bank_data['amount'].sum()
112
+ avg_transaction = self.bank_data['amount'].mean()
113
+
114
+ summary = f"""
115
+ Banking Summary:
116
+ Total Transactions: {total_transactions}
117
+ Total Amount: ${total_amount:,.2f}
118
+ Average Transaction: ${avg_transaction:,.2f}
119
+ """
120
+
121
+ return fig, summary
122
+ except Exception as e:
123
+ return None, f"Error generating bank report: {str(e)}"
124
+
125
+ def analyze_marketing(self):
126
+ try:
127
+ self.marketing_data['conversion_rate'] = (
128
+ self.marketing_data['conversions'] / self.marketing_data['clicks'] * 100
129
+ )
130
+
131
+ fig = px.bar(
132
+ self.marketing_data,
133
+ x='campaign',
134
+ y=['clicks', 'conversions'],
135
+ title='Campaign Performance',
136
+ barmode='group'
137
+ )
138
+
139
+ summary = f"""
140
+ Marketing Summary:
141
+ Total Campaigns: {len(self.marketing_data)}
142
+ Total Clicks: {self.marketing_data['clicks'].sum():,}
143
+ Total Conversions: {self.marketing_data['conversions'].sum():,}
144
+ Average Conversion Rate: {self.marketing_data['conversion_rate'].mean():.2f}%
145
+ """
146
+
147
+ return fig, summary
148
+ except Exception as e:
149
+ return None, f"Error analyzing marketing data: {str(e)}"
150
+
151
+ def create_gradio_interface():
152
+ bms = BusinessManagementSystem()
153
+
154
+ with gr.Blocks(theme=gr.themes.Soft()) as interface:
155
+ gr.Markdown("""
156
+ # AI-Driven Business Management System
157
+ Upload documents, analyze banking data, and track marketing campaigns.
158
+ """)
159
+
160
+ with gr.Tabs():
161
+ # Document Processing Tab
162
+ with gr.Tab("Document Processing"):
163
+ gr.Markdown("""
164
+ ### Upload and Process Documents
165
+ Support for PNG, JPG, and PDF files. The system will extract text and data from the documents.
166
+ """)
167
+
168
+ with gr.Row():
169
+ with gr.Column():
170
+ doc_input = gr.Image(
171
+ label="Upload Document",
172
+ type="numpy"
173
+ )
174
+ process_btn = gr.Button("Process Document", variant="primary")
175
+
176
+ with gr.Column():
177
+ doc_output = gr.Textbox(
178
+ label="Processing Results",
179
+ lines=10
180
+ )
181
+ json_output = gr.JSON(
182
+ label="Extracted Data"
183
+ )
184
+
185
+ process_btn.click(
186
+ fn=bms.process_document,
187
+ inputs=[doc_input],
188
+ outputs=[doc_output, json_output]
189
+ )
190
+
191
+ # Banking Tab
192
+ with gr.Tab("Banking"):
193
+ gr.Markdown("### Banking Analysis")
194
+ bank_btn = gr.Button("Generate Bank Report", variant="primary")
195
+ bank_plot = gr.Plot(label="Transaction History")
196
+ bank_summary = gr.Textbox(
197
+ label="Banking Summary",
198
+ lines=5
199
+ )
200
+
201
+ bank_btn.click(
202
+ fn=bms.generate_bank_report,
203
+ inputs=[],
204
+ outputs=[bank_plot, bank_summary]
205
+ )
206
+
207
+ # Marketing Tab
208
+ with gr.Tab("Marketing"):
209
+ gr.Markdown("### Marketing Campaign Analysis")
210
+ marketing_btn = gr.Button("Analyze Marketing Campaigns", variant="primary")
211
+ marketing_plot = gr.Plot(label="Campaign Performance")
212
+ marketing_summary = gr.Textbox(
213
+ label="Marketing Summary",
214
+ lines=5
215
+ )
216
+
217
+ marketing_btn.click(
218
+ fn=bms.analyze_marketing,
219
+ inputs=[],
220
+ outputs=[marketing_plot, marketing_summary]
221
+ )
222
+
223
+ return interface
224
+
225
+ # For Google Colab, first run these installations
226
+ !pip install -q pytesseract opencv-python
227
+ !apt-get install -y tesseract-ocr > /dev/null 2>&1
228
+
229
+ # Launch the interface
230
+ if __name__ == "__main__":
231
+ interface = create_gradio_interface()
232
+ interface.launch(share=True)