Spaces:
Runtime error
Runtime error
File size: 2,594 Bytes
8cf4695 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import logging
import math
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
class Visualiser():
def __init__(self) -> None:
logging.debug('Init Visualiser')
self.scaler = MinMaxScaler()
def fit(self, data: pd.DataFrame):
self.data = data
self.norm_data = pd.DataFrame(
self.scaler.fit_transform(data.values),
columns=data.columns,
index=data.index)
# ------------ #
# ACF and PACF #
# ------------ #
def plot_auto_correlation(self, func):
n_rows = len(self.data.columns)
fig, axs = plt.subplots(
n_rows,
1,
figsize=(8, 2*n_rows),
sharex=True,
sharey=True)
for i, col in enumerate(self.data.columns):
func(self.data[col], ax=axs[i], zero=False)
axs[i].set_title(f'Autocorrelation - {col}')
fig.tight_layout()
return fig
def acf(self):
return self.plot_auto_correlation(plot_acf)
def pacf(self):
return self.plot_auto_correlation(plot_pacf)
# ----- Enf of [ACF and PACF] ----- #
def corr(self):
# Generate a mask for the upper triangle
corr = self.data.corr(numeric_only=True)
mask = np.triu(np.ones_like(corr, dtype=bool))
fig, ax = plt.subplots(figsize=(8, 8))
sns.heatmap(
corr,
mask=mask,
square=True,
annot=True,
cmap='coolwarm',
linewidths=.5,
cbar_kws={"shrink": .5},
ax=ax)
return fig
def distributions(self, norm=True):
data: pd.DataFrame = self.norm_data if norm else self.data
plot_col = min(math.ceil(math.sqrt(data.shape[1])), 5)
plot_row = math.ceil(data.shape[1] / plot_col)
fig, axs = plt.subplots(
plot_row,
plot_col,
figsize=(4*plot_row, 1.5*plot_col),
sharex=norm,
sharey=norm)
for idx, col in enumerate(data.columns):
axs_x = math.floor(idx/plot_col)
axs_y = idx - axs_x * plot_col
ax = axs[axs_x, axs_y]
# sns.distplot(self.ts_df[col], ax=axs[axs_x, axs_y])
sns.histplot(data[col], ax=ax, kde=True)
ax.set(xlabel=None)
ax.set_title(col)
fig.suptitle(f'Distributions - Normalised ({str(norm)})')
return fig
|