import logging import math import matplotlib.pyplot as plt from statsmodels.graphics.tsaplots import plot_acf, plot_pacf import numpy as np import pandas as pd import seaborn as sns from sklearn.preprocessing import MinMaxScaler class Visualiser(): def __init__(self) -> None: logging.debug('Init Visualiser') self.scaler = MinMaxScaler() def fit(self, data: pd.DataFrame): self.data = data self.norm_data = pd.DataFrame( self.scaler.fit_transform(data.values), columns=data.columns, index=data.index) # ------------ # # ACF and PACF # # ------------ # def plot_auto_correlation(self, func): n_rows = len(self.data.columns) fig, axs = plt.subplots( n_rows, 1, figsize=(8, 2*n_rows), sharex=True, sharey=True) for i, col in enumerate(self.data.columns): func(self.data[col], ax=axs[i], zero=False) axs[i].set_title(f'Autocorrelation - {col}') fig.tight_layout() return fig def acf(self): return self.plot_auto_correlation(plot_acf) def pacf(self): return self.plot_auto_correlation(plot_pacf) # ----- Enf of [ACF and PACF] ----- # def corr(self): # Generate a mask for the upper triangle corr = self.data.corr(numeric_only=True) mask = np.triu(np.ones_like(corr, dtype=bool)) fig, ax = plt.subplots(figsize=(8, 8)) sns.heatmap( corr, mask=mask, square=True, annot=True, cmap='coolwarm', linewidths=.5, cbar_kws={"shrink": .5}, ax=ax) return fig def distributions(self, norm=True): data: pd.DataFrame = self.norm_data if norm else self.data plot_col = min(math.ceil(math.sqrt(data.shape[1])), 5) plot_row = math.ceil(data.shape[1] / plot_col) fig, axs = plt.subplots( plot_row, plot_col, figsize=(4*plot_row, 1.5*plot_col), sharex=norm, sharey=norm) for idx, col in enumerate(data.columns): axs_x = math.floor(idx/plot_col) axs_y = idx - axs_x * plot_col ax = axs[axs_x, axs_y] # sns.distplot(self.ts_df[col], ax=axs[axs_x, axs_y]) sns.histplot(data[col], ax=ax, kde=True) ax.set(xlabel=None) ax.set_title(col) fig.suptitle(f'Distributions - Normalised ({str(norm)})') return fig