demand-forecasting / src /forecast /multivariate.py
zhang qiao
Upload folder using huggingface_hub
8cf4695
import pandas as pd
from xgboost import XGBRegressor
from sktime.forecasting.compose import make_reduction
from statsmodels.tsa.tsatools import freq_to_period
from sklearn.model_selection import train_test_split
from sktime.forecasting.base import ForecastingHorizon
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
class MultivariateForecasting():
def __init__(
self,
n_predict: int,
data: pd.DataFrame,
window_length: 10
):
'''
data: data must contains datetime column, and y column as target. Everything else will be considered as exogenous data
n_predict: how may future values are expected to produce
window_length default to 10 following sktime's default setting
test_size : test size is same size as n predict
'''
self.data = data.copy()
self.n_predict = n_predict
# Set datetime as the index
self.data.set_index('datetime', inplace=True)
self.data.index = pd.to_datetime(self.data.index)
self.exog = self.data.drop(columns=['y']).reset_index(drop=True)
self.window_length = window_length
# Keep n_predict rows of latest exog data for prediction use
self.exog_train = self.exog[:- self.n_predict]
self.exog_pred = self.exog[-self.n_predict:]
# If we need to use n_predict rows of exog data for prediction, we are going to remove n_predict rows of y historical data
# This will make sure the shape of 2 data consistent
self.y = self.data['y'][self.n_predict:]
# test size is same size as the forecast window_length
# self.X_train = self.X[:-self.window_length]
# self.X_test = self.X[-self.window_length:]
# self.y_train = self.y[:-self.window_length]
# self.y_test = self.y[-self.window_length:]
# self.fh_test = ForecastingHorizon(
# self.y_test.index,
# is_relative=False)
# self.fh = ForecastingHorizon(
# self.exog.index,
# is_relative=False)
self.models = {}
def train_xgboost(self):
regressor = XGBRegressor(
objective='reg:squarederror',
random_state=42)
forecaster = make_reduction(
regressor, strategy='recursive',
window_length=self.window_length)
forecaster.fit(y=self.y_train, X=self.X_train)
y_pred = forecaster.predict(fh=self.fh_test, X=self.X_test)
self.models['xgboost'] = {}
self.models['xgboost']['mape'] = mean_absolute_percentage_error(
self.y_test, y_pred, symmetric=False)
self.models['xgboost']['test'] = y_pred
forecaster.fit(y=self.y, X=self.X)
self.models['xgboost']['forecast'] = forecaster.predict(
fh=self.fh, X=self.exog)