File size: 2,594 Bytes
8cf4695
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import logging
import math

import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler


class Visualiser():
    def __init__(self) -> None:
        logging.debug('Init Visualiser')
        self.scaler = MinMaxScaler()

    def fit(self, data: pd.DataFrame):
        self.data = data
        self.norm_data = pd.DataFrame(
            self.scaler.fit_transform(data.values),
            columns=data.columns,
            index=data.index)

    # ------------ #
    # ACF and PACF #
    # ------------ #

    def plot_auto_correlation(self, func):
        n_rows = len(self.data.columns)
        fig, axs = plt.subplots(
            n_rows,
            1,
            figsize=(8, 2*n_rows),
            sharex=True,
            sharey=True)
        for i, col in enumerate(self.data.columns):
            func(self.data[col], ax=axs[i], zero=False)
            axs[i].set_title(f'Autocorrelation - {col}')
        fig.tight_layout()
        return fig

    def acf(self):
        return self.plot_auto_correlation(plot_acf)

    def pacf(self):
        return self.plot_auto_correlation(plot_pacf)

    # ----- Enf of [ACF and PACF] ----- #

    def corr(self):
        # Generate a mask for the upper triangle
        corr = self.data.corr(numeric_only=True)
        mask = np.triu(np.ones_like(corr, dtype=bool))
        fig, ax = plt.subplots(figsize=(8, 8))

        sns.heatmap(
            corr,
            mask=mask,
            square=True,
            annot=True,
            cmap='coolwarm',
            linewidths=.5,
            cbar_kws={"shrink": .5},
            ax=ax)

        return fig

    def distributions(self, norm=True):
        data: pd.DataFrame = self.norm_data if norm else self.data

        plot_col = min(math.ceil(math.sqrt(data.shape[1])), 5)
        plot_row = math.ceil(data.shape[1] / plot_col)

        fig, axs = plt.subplots(
            plot_row,
            plot_col,
            figsize=(4*plot_row, 1.5*plot_col),
            sharex=norm,
            sharey=norm)

        for idx, col in enumerate(data.columns):

            axs_x = math.floor(idx/plot_col)
            axs_y = idx - axs_x * plot_col
            ax = axs[axs_x, axs_y]

            # sns.distplot(self.ts_df[col], ax=axs[axs_x, axs_y])
            sns.histplot(data[col], ax=ax, kde=True)
            ax.set(xlabel=None)
            ax.set_title(col)

        fig.suptitle(f'Distributions - Normalised ({str(norm)})')

        return fig