import pandas as pd import matplotlib.pyplot as plt import seaborn as sns # Load normalized stock data csv_path = 'data\\merged\\norm\\stocks_features_improved_normalized.csv' df = pd.read_csv(csv_path) # 1. Show basic info and head print('Data shape:', df.shape) print(df.head()) # 2. Feature distribution histograms features = [ 'price_momentum', 'volume_price_ratio', 'daily_range', 'avg_sentiment', 'technical_strength' ] existing_features = [f for f in features if f in df.columns] if existing_features: df[existing_features].hist(bins=30, figsize=(12, 8)) plt.suptitle('Feature Distributions') plt.tight_layout() plt.show() else: print('No engineered features found for distribution plots.') # 3. Correlation heatmap if len(existing_features) > 1: plt.figure(figsize=(8, 6)) sns.heatmap(df[existing_features].corr(), annot=True, cmap='coolwarm') plt.title('Feature Correlation Heatmap') plt.show() # 4. Outlier boxplots for engineered features for feat in existing_features: plt.figure(figsize=(6, 2)) sns.boxplot(x=df[feat]) plt.title(f'Boxplot: {feat}') plt.show() # 5. Pairplot (if you have a target column, e.g., "target") # Uncomment and adjust if you have a target/label # sns.pairplot(df, vars=existing_features, hue='target') # plt.show() print('Visualization complete. You can add more plots as needed!')