import pandas as pd import numpy as np from sklearn.cluster import KMeans from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score import matplotlib.pyplot as plt import seaborn as sns import gradio as gr import sqlite3 from datetime import datetime, timedelta def generate_sample_data(): # Generate sample data np.random.seed(42) n_customers = 1000 days_ago = [int(x) for x in np.random.randint(0, 365, n_customers)] crm_data = pd.DataFrame({ 'customer_id': range(1, n_customers + 1), 'interactions': np.random.randint(1, 100, n_customers), 'transactions': np.random.uniform(10, 1000, n_customers), 'converted': np.random.choice([0, 1], n_customers, p=[0.7, 0.3]), 'timestamp': [datetime.now() - timedelta(days=d) for d in days_ago] }) social_days = [int(x) for x in np.random.randint(0, 365, n_customers)] social_data = pd.DataFrame({ 'customer_id': range(1, n_customers + 1), 'interactions': np.random.randint(1, 200, n_customers), 'open_rate': np.random.uniform(0.1, 0.9, n_customers), 'timestamp': [datetime.now() - timedelta(days=d) for d in social_days] }) # Enhanced financial data with more relevant metrics financial_days = [int(x) for x in np.random.randint(0, 365, n_customers)] financial_data = pd.DataFrame({ 'customer_id': range(1, n_customers + 1), 'transaction_amount': np.random.uniform(50, 5000, n_customers), 'transaction_frequency': np.random.randint(1, 20, n_customers), # New column 'average_purchase': np.random.uniform(100, 2000, n_customers), # New column 'total_spend': np.random.uniform(1000, 50000, n_customers), # New column 'transaction_date': [datetime.now() - timedelta(days=d) for d in financial_days] }) return crm_data, social_data, financial_data def init_database(): conn = sqlite3.connect('sales_intelligence.db') cursor = conn.cursor() # Create tables if they don't exist cursor.execute(''' CREATE TABLE IF NOT EXISTS financial_data ( id INTEGER PRIMARY KEY AUTOINCREMENT, customer_id INTEGER, transaction_amount FLOAT, transaction_frequency INTEGER, average_purchase FLOAT, total_spend FLOAT, transaction_date DATETIME ) ''') cursor.execute(''' CREATE TABLE IF NOT EXISTS crm_data ( id INTEGER PRIMARY KEY AUTOINCREMENT, customer_id INTEGER, interactions INTEGER, transactions FLOAT, converted INTEGER, timestamp DATETIME ) ''') cursor.execute(''' CREATE TABLE IF NOT EXISTS social_media_data ( id INTEGER PRIMARY KEY AUTOINCREMENT, customer_id INTEGER, interactions INTEGER, open_rate FLOAT, timestamp DATETIME ) ''') # Generate and insert sample data crm_data, social_data, financial_data = generate_sample_data() try: crm_data.to_sql('crm_data', conn, if_exists='replace', index=False) social_data.to_sql('social_media_data', conn, if_exists='replace', index=False) financial_data.to_sql('financial_data', conn, if_exists='replace', index=False) print(f"Inserted {len(crm_data)} CRM records") print(f"Inserted {len(social_data)} social media records") print(f"Inserted {len(financial_data)} financial records") except sqlite3.Error as e: print(f"Error inserting data: {e}") conn.commit() conn.close() print("Database initialized with sample data!") def segment_prospects(df, data_source): print("Segmenting prospects...") if data_source.lower() == 'financial_databases': # Special handling for financial data kmeans = KMeans(n_clusters=3) df['segment'] = kmeans.fit_predict(df[['transaction_amount', 'transaction_frequency', 'average_purchase']]) segment_labels = ['Low Value', 'Medium Value', 'High Value'] df['segment_label'] = [segment_labels[s] for s in df['segment']] elif 'interactions' in df.columns and 'transactions' in df.columns: kmeans = KMeans(n_clusters=3) df['segment'] = kmeans.fit_predict(df[['interactions', 'transactions']]) print("Columns after segmentation:", df.columns) return df def performance_analysis(df, data_source): print("Analyzing performance...") insights = {} if data_source.lower() == 'financial_databases': # Specific analysis for financial data if 'segment' in df.columns: # Overall metrics insights['overall_metrics'] = { 'total_revenue': float(df['total_spend'].sum()), 'average_transaction': float(df['transaction_amount'].mean()), 'total_customers': len(df), 'average_frequency': float(df['transaction_frequency'].mean()) } # Segment-specific metrics segment_metrics = df.groupby('segment').agg({ 'transaction_amount': ['mean', 'max'], 'transaction_frequency': 'mean', 'total_spend': 'sum', 'average_purchase': 'mean' }).round(2) # Convert the segment metrics to a more readable format for segment in df['segment'].unique(): insights[f'segment_{segment}'] = { 'avg_transaction': float(segment_metrics.loc[segment, ('transaction_amount', 'mean')]), 'max_transaction': float(segment_metrics.loc[segment, ('transaction_amount', 'max')]), 'avg_frequency': float(segment_metrics.loc[segment, ('transaction_frequency', 'mean')]), 'total_revenue': float(segment_metrics.loc[segment, ('total_spend', 'sum')]), 'avg_purchase': float(segment_metrics.loc[segment, ('average_purchase', 'mean')]) } return pd.DataFrame.from_dict(insights, orient='index') else: # Original analysis for other data sources if 'segment' in df.columns: insights = df.groupby('segment').mean() return insights return pd.DataFrame() def load_data(data_source): conn = sqlite3.connect('sales_intelligence.db') if data_source.lower() == 'crm': return pd.read_sql('SELECT * FROM crm_data', conn) elif data_source.lower() == 'social_media': return pd.read_sql('SELECT * FROM social_media_data', conn) elif data_source.lower() == 'financial_databases': return pd.read_sql('SELECT * FROM financial_data', conn) else: return pd.DataFrame() def preprocess_data(df): # Add any necessary preprocessing steps here return df def predict_lead_conversion(df): # Example model for lead conversion prediction X = df[['interactions', 'transactions']] y = df['converted'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = RandomForestClassifier(n_estimators=100, random_state=42) model.fit(X_train, y_train) y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) return model, accuracy def sales_intelligence_platform(data_source): print("Processing data source:", data_source) data = load_data(data_source) if data.empty: return {"error": f"No data found for source: {data_source}. Valid sources are: 'CRM', 'social_media', 'financial_databases'"} data = preprocess_data(data) data = segment_prospects(data, data_source) model, accuracy = predict_lead_conversion(data) if data_source.lower() == 'crm' else (None, None) insights = performance_analysis(data, data_source) if insights.empty: return {"error": "Could not generate insights from the data"} result_dict = insights.to_dict() # Add some helpful messages if data_source.lower() == 'financial_databases': result_dict['analysis_description'] = { 'segment_0': 'Low Value Customers', 'segment_1': 'Medium Value Customers', 'segment_2': 'High Value Customers' } return result_dict # Initialize the database with sample data init_database() # Create Gradio interface iface = gr.Interface( fn=sales_intelligence_platform, inputs=gr.Dropdown( choices=["CRM", "social_media", "financial_databases"], label="Select Data Source" ), outputs="json", title="Sales Intelligence Platform", description="A platform powered by AI to manage sales data and provide insights. Choose a data source to analyze.", theme="dark" ) if __name__ == "__main__": iface.launch()