Anupam202224 commited on
Commit
ed192dc
·
verified ·
1 Parent(s): 27c9c3e

Create appy.py

Browse files
Files changed (1) hide show
  1. appy.py +234 -0
appy.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.cluster import KMeans
4
+ from sklearn.model_selection import train_test_split
5
+ from sklearn.ensemble import RandomForestClassifier
6
+ from sklearn.metrics import accuracy_score
7
+ import matplotlib.pyplot as plt
8
+ import seaborn as sns
9
+ import gradio as gr
10
+ import sqlite3
11
+ from datetime import datetime, timedelta
12
+
13
+ def generate_sample_data():
14
+ # Generate sample data
15
+ np.random.seed(42)
16
+ n_customers = 1000
17
+ days_ago = [int(x) for x in np.random.randint(0, 365, n_customers)]
18
+
19
+ crm_data = pd.DataFrame({
20
+ 'customer_id': range(1, n_customers + 1),
21
+ 'interactions': np.random.randint(1, 100, n_customers),
22
+ 'transactions': np.random.uniform(10, 1000, n_customers),
23
+ 'converted': np.random.choice([0, 1], n_customers, p=[0.7, 0.3]),
24
+ 'timestamp': [datetime.now() - timedelta(days=d) for d in days_ago]
25
+ })
26
+
27
+ social_days = [int(x) for x in np.random.randint(0, 365, n_customers)]
28
+ social_data = pd.DataFrame({
29
+ 'customer_id': range(1, n_customers + 1),
30
+ 'interactions': np.random.randint(1, 200, n_customers),
31
+ 'open_rate': np.random.uniform(0.1, 0.9, n_customers),
32
+ 'timestamp': [datetime.now() - timedelta(days=d) for d in social_days]
33
+ })
34
+
35
+ # Enhanced financial data with more relevant metrics
36
+ financial_days = [int(x) for x in np.random.randint(0, 365, n_customers)]
37
+ financial_data = pd.DataFrame({
38
+ 'customer_id': range(1, n_customers + 1),
39
+ 'transaction_amount': np.random.uniform(50, 5000, n_customers),
40
+ 'transaction_frequency': np.random.randint(1, 20, n_customers), # New column
41
+ 'average_purchase': np.random.uniform(100, 2000, n_customers), # New column
42
+ 'total_spend': np.random.uniform(1000, 50000, n_customers), # New column
43
+ 'transaction_date': [datetime.now() - timedelta(days=d) for d in financial_days]
44
+ })
45
+
46
+ return crm_data, social_data, financial_data
47
+
48
+ def init_database():
49
+ conn = sqlite3.connect('sales_intelligence.db')
50
+ cursor = conn.cursor()
51
+
52
+ # Create tables if they don't exist
53
+ cursor.execute('''
54
+ CREATE TABLE IF NOT EXISTS financial_data (
55
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
56
+ customer_id INTEGER,
57
+ transaction_amount FLOAT,
58
+ transaction_frequency INTEGER,
59
+ average_purchase FLOAT,
60
+ total_spend FLOAT,
61
+ transaction_date DATETIME
62
+ )
63
+ ''')
64
+
65
+ cursor.execute('''
66
+ CREATE TABLE IF NOT EXISTS crm_data (
67
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
68
+ customer_id INTEGER,
69
+ interactions INTEGER,
70
+ transactions FLOAT,
71
+ converted INTEGER,
72
+ timestamp DATETIME
73
+ )
74
+ ''')
75
+
76
+ cursor.execute('''
77
+ CREATE TABLE IF NOT EXISTS social_media_data (
78
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
79
+ customer_id INTEGER,
80
+ interactions INTEGER,
81
+ open_rate FLOAT,
82
+ timestamp DATETIME
83
+ )
84
+ ''')
85
+
86
+ # Generate and insert sample data
87
+ crm_data, social_data, financial_data = generate_sample_data()
88
+
89
+ try:
90
+ crm_data.to_sql('crm_data', conn, if_exists='replace', index=False)
91
+ social_data.to_sql('social_media_data', conn, if_exists='replace', index=False)
92
+ financial_data.to_sql('financial_data', conn, if_exists='replace', index=False)
93
+
94
+ print(f"Inserted {len(crm_data)} CRM records")
95
+ print(f"Inserted {len(social_data)} social media records")
96
+ print(f"Inserted {len(financial_data)} financial records")
97
+
98
+ except sqlite3.Error as e:
99
+ print(f"Error inserting data: {e}")
100
+
101
+ conn.commit()
102
+ conn.close()
103
+ print("Database initialized with sample data!")
104
+
105
+ def segment_prospects(df, data_source):
106
+ print("Segmenting prospects...")
107
+
108
+ if data_source.lower() == 'financial_databases':
109
+ # Special handling for financial data
110
+ kmeans = KMeans(n_clusters=3)
111
+ df['segment'] = kmeans.fit_predict(df[['transaction_amount', 'transaction_frequency', 'average_purchase']])
112
+ segment_labels = ['Low Value', 'Medium Value', 'High Value']
113
+ df['segment_label'] = [segment_labels[s] for s in df['segment']]
114
+
115
+ elif 'interactions' in df.columns and 'transactions' in df.columns:
116
+ kmeans = KMeans(n_clusters=3)
117
+ df['segment'] = kmeans.fit_predict(df[['interactions', 'transactions']])
118
+
119
+ print("Columns after segmentation:", df.columns)
120
+ return df
121
+
122
+ def performance_analysis(df, data_source):
123
+ print("Analyzing performance...")
124
+ insights = {}
125
+
126
+ if data_source.lower() == 'financial_databases':
127
+ # Specific analysis for financial data
128
+ if 'segment' in df.columns:
129
+ # Overall metrics
130
+ insights['overall_metrics'] = {
131
+ 'total_revenue': float(df['total_spend'].sum()),
132
+ 'average_transaction': float(df['transaction_amount'].mean()),
133
+ 'total_customers': len(df),
134
+ 'average_frequency': float(df['transaction_frequency'].mean())
135
+ }
136
+
137
+ # Segment-specific metrics
138
+ segment_metrics = df.groupby('segment').agg({
139
+ 'transaction_amount': ['mean', 'max'],
140
+ 'transaction_frequency': 'mean',
141
+ 'total_spend': 'sum',
142
+ 'average_purchase': 'mean'
143
+ }).round(2)
144
+
145
+ # Convert the segment metrics to a more readable format
146
+ for segment in df['segment'].unique():
147
+ insights[f'segment_{segment}'] = {
148
+ 'avg_transaction': float(segment_metrics.loc[segment, ('transaction_amount', 'mean')]),
149
+ 'max_transaction': float(segment_metrics.loc[segment, ('transaction_amount', 'max')]),
150
+ 'avg_frequency': float(segment_metrics.loc[segment, ('transaction_frequency', 'mean')]),
151
+ 'total_revenue': float(segment_metrics.loc[segment, ('total_spend', 'sum')]),
152
+ 'avg_purchase': float(segment_metrics.loc[segment, ('average_purchase', 'mean')])
153
+ }
154
+
155
+ return pd.DataFrame.from_dict(insights, orient='index')
156
+ else:
157
+ # Original analysis for other data sources
158
+ if 'segment' in df.columns:
159
+ insights = df.groupby('segment').mean()
160
+ return insights
161
+
162
+ return pd.DataFrame()
163
+
164
+ def load_data(data_source):
165
+ conn = sqlite3.connect('sales_intelligence.db')
166
+ if data_source.lower() == 'crm':
167
+ return pd.read_sql('SELECT * FROM crm_data', conn)
168
+ elif data_source.lower() == 'social_media':
169
+ return pd.read_sql('SELECT * FROM social_media_data', conn)
170
+ elif data_source.lower() == 'financial_databases':
171
+ return pd.read_sql('SELECT * FROM financial_data', conn)
172
+ else:
173
+ return pd.DataFrame()
174
+
175
+ def preprocess_data(df):
176
+ # Add any necessary preprocessing steps here
177
+ return df
178
+
179
+ def predict_lead_conversion(df):
180
+ # Example model for lead conversion prediction
181
+ X = df[['interactions', 'transactions']]
182
+ y = df['converted']
183
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
184
+ model = RandomForestClassifier(n_estimators=100, random_state=42)
185
+ model.fit(X_train, y_train)
186
+ y_pred = model.predict(X_test)
187
+ accuracy = accuracy_score(y_test, y_pred)
188
+ return model, accuracy
189
+
190
+ def sales_intelligence_platform(data_source):
191
+ print("Processing data source:", data_source)
192
+ data = load_data(data_source)
193
+
194
+ if data.empty:
195
+ return {"error": f"No data found for source: {data_source}. Valid sources are: 'CRM', 'social_media', 'financial_databases'"}
196
+
197
+ data = preprocess_data(data)
198
+ data = segment_prospects(data, data_source)
199
+ model, accuracy = predict_lead_conversion(data) if data_source.lower() == 'crm' else (None, None)
200
+ insights = performance_analysis(data, data_source)
201
+
202
+ if insights.empty:
203
+ return {"error": "Could not generate insights from the data"}
204
+
205
+ result_dict = insights.to_dict()
206
+
207
+ # Add some helpful messages
208
+ if data_source.lower() == 'financial_databases':
209
+ result_dict['analysis_description'] = {
210
+ 'segment_0': 'Low Value Customers',
211
+ 'segment_1': 'Medium Value Customers',
212
+ 'segment_2': 'High Value Customers'
213
+ }
214
+
215
+ return result_dict
216
+
217
+ # Initialize the database with sample data
218
+ init_database()
219
+
220
+ # Create Gradio interface
221
+ iface = gr.Interface(
222
+ fn=sales_intelligence_platform,
223
+ inputs=gr.Dropdown(
224
+ choices=["CRM", "social_media", "financial_databases"],
225
+ label="Select Data Source"
226
+ ),
227
+ outputs="json",
228
+ title="Sales Intelligence Platform",
229
+ description="A platform powered by AI to manage sales data and provide insights. Choose a data source to analyze.",
230
+ theme="dark"
231
+ )
232
+
233
+ if __name__ == "__main__":
234
+ iface.launch()