from typing import List

import logging
import pandas as pd
from statsmodels.tsa.tsatools import freq_to_period
from sklearn.metrics import mean_squared_error
from math import sqrt

from .models import AllModels

logging.basicConfig(level=logging.DEBUG)


class Forecaster():

    def __init__(
        self,
    ) -> None:
        logging.debug('Forecaster init')

        self.models = {}  # Init models dict

    def fit(self, data):
        '''
        Fot data into the forecaster
        '''
        self.data = data
        pass

    def forecast(
            self,
            data: pd.DataFrame,
            models: str or List[str] = 'all',
            test: bool = False,
            enable_exog: bool = True
    ):
        '''
        Main function, will perform the entire forecast operation

        data : pd.DataFrame, required
            Data for training the model, must contain "datetime", "y" columns, any additional column 
            will be considered as exogenuous columns and be used for multivariate forecasting
            data must be cleaned without any missing value
            data's datetime column must be valid datetime strings, the frequency must be able to inference

        models : str or List[str], default='all'
            Selected model(s) to use fore forecasting. Default is "all",
            which will use all available models registered in models.AllModels

        test : bool, default=False
            Decide if the forecasting purpose is for testing or actual prediction
            Testing and prediction will not happen at the same time. 20% of the data
            will be splitted for testing

        enable_exog : bool, default=True
            If disabled, exog data will not be used in the model training, and the data will be considered as univariate data
            If enabled, and the data does contains exog data, for multivariate forecasting purpose, the data must be shifted 
            by n_predict steps. This will cause a few things:
            1. y column will be remapped to exog data that is n_predict unit of time ago
            2. n_predict length of the oldest y will be trimmed off
            3. n_predict length of exog values will be used for the forecasting
        '''
        logging.debug('Start forecasting ...')

        self.enable_exog = enable_exog

        # Below properties will be init by prep_data()
        self.data: pd.DataFrame = None
        self.y = None
        self.exog = None
        self.freq: str = None
        self.period: int = None

        self.y_test = None

        self.n_predict: int = None  # init by calculate_n_predict()

        self.kwargs = {}

        self.results = []  # Contains all result value

        # Prepare data, including set the datetime index, slit y and exog columns
        self.prep_data(data)

        # Calculate n_predict value based on self.period
        self.calculate_n_predict()

        # Init the basic kwargs for models to use
        self.init_kwargs()

        # Shift exog value by n_predict unit of time
        self.shift_exog()

        # Split test set for testing purpose
        if test:
            logging.debug('Testing ...')
            self.train_test_split()

        # ================================ #
        # Train models and make prediction #
        # ================================ #

        self.init_models(models)

        for model_name, model in self.models.items():
            result = {
                'model': model_name,
                'result': None,
                'evaluate': None,
                'rmse': None,
            }

            fcst = model.forecast()

            # Assign the models result to the result dict
            if 'forecast' in fcst.keys():
                result['result'] = fcst['forecast']
            else:
                result['result'] = fcst

            if 'evaluate' in fcst.keys():
                result['evaluate'] = fcst['evaluate']

            if test:
                mse = mean_squared_error(self.y_test, result['result'])
                result['rmse'] = sqrt(mse)

            self.results.append(result)

        # - END of forecast - #

    def init_models(self, models):
        ''' 
        Initialize models based on the provided parameter.
        Get self.models ready for forecasting
        '''
        logging.debug('Init models')

        all_models = AllModels(models)

        self.models = all_models.init_models(
            self.y,
            self.n_predict,
            self.exog,
            **self.kwargs)

    def prep_data(
        self,
        data: pd.DataFrame
    ) -> None:
        logging.debug('Prep data')

        self.data = data.copy()
        self.data.set_index('datetime', inplace=True)
        self.data.index = pd.to_datetime(self.data.index)

        logging.debug('Inferencing freq and period')
        self.freq = pd.infer_freq(self.data.index)
        self.period = freq_to_period(self.freq)

        self.y = self.data['y']

        if len(self.data.columns) > 1 and self.enable_exog:
            self.exog = self.data.drop(columns='y')

    def calculate_n_predict(self):
        '''
        The n_predict will be the smaller number in 20, self.period value

        By default, try only predict 1 seasonal cycle
        '''
        n_predict = min(20, self.period)

        # Set a max prediction size to be 20% of given data size
        if n_predict > int(len(self.data)*0.2):
            n_predict = int(len(self.data)*0.2)

        # Set a min prediction to be 4
        if n_predict < 4:
            n_predict = 4

        self.n_predict = n_predict

    def init_kwargs(self):
        '''
        kwargs will be used for initializing models. 
        kwargs contains all necessary information about the data
        '''
        self.kwargs['period'] = self.period

    def train_test_split(self):
        '''
        n_predict length of y value will be splitted out for testing
        although, each model will probably have it's own cross validator
        '''
        logging.debug('Train test split')
        self.y_test = self.y[-self.n_predict:]
        self.y = self.y[:-self.n_predict]

        if self.exog is not None:
            self.exog = self.exog[:-self.n_predict]

    def shift_exog(self):

        if self.exog is not None:
            logging.debug('Shifted exog datetime index by n_predict period')
            self.exog.index = self.exog.index.shift(
                self.n_predict, freq=self.freq)

            logging.debug(
                'Trimmed y by n_predict, so it is aligned with shifted exog')
            self.y = self.y[self.n_predict:]