import pandas as pd from xgboost import XGBRegressor from sktime.forecasting.compose import make_reduction from statsmodels.tsa.tsatools import freq_to_period from sklearn.model_selection import train_test_split from sktime.forecasting.base import ForecastingHorizon from sktime.performance_metrics.forecasting import mean_absolute_percentage_error class MultivariateForecasting(): def __init__( self, n_predict: int, data: pd.DataFrame, window_length: 10 ): ''' data: data must contains datetime column, and y column as target. Everything else will be considered as exogenous data n_predict: how may future values are expected to produce window_length default to 10 following sktime's default setting test_size : test size is same size as n predict ''' self.data = data.copy() self.n_predict = n_predict # Set datetime as the index self.data.set_index('datetime', inplace=True) self.data.index = pd.to_datetime(self.data.index) self.exog = self.data.drop(columns=['y']).reset_index(drop=True) self.window_length = window_length # Keep n_predict rows of latest exog data for prediction use self.exog_train = self.exog[:- self.n_predict] self.exog_pred = self.exog[-self.n_predict:] # If we need to use n_predict rows of exog data for prediction, we are going to remove n_predict rows of y historical data # This will make sure the shape of 2 data consistent self.y = self.data['y'][self.n_predict:] # test size is same size as the forecast window_length # self.X_train = self.X[:-self.window_length] # self.X_test = self.X[-self.window_length:] # self.y_train = self.y[:-self.window_length] # self.y_test = self.y[-self.window_length:] # self.fh_test = ForecastingHorizon( # self.y_test.index, # is_relative=False) # self.fh = ForecastingHorizon( # self.exog.index, # is_relative=False) self.models = {} def train_xgboost(self): regressor = XGBRegressor( objective='reg:squarederror', random_state=42) forecaster = make_reduction( regressor, strategy='recursive', window_length=self.window_length) forecaster.fit(y=self.y_train, X=self.X_train) y_pred = forecaster.predict(fh=self.fh_test, X=self.X_test) self.models['xgboost'] = {} self.models['xgboost']['mape'] = mean_absolute_percentage_error( self.y_test, y_pred, symmetric=False) self.models['xgboost']['test'] = y_pred forecaster.fit(y=self.y, X=self.X) self.models['xgboost']['forecast'] = forecaster.predict( fh=self.fh, X=self.exog)