import pandas as pd from xgboost import XGBRegressor from sktime.forecasting.compose import make_reduction from sktime.forecasting.model_selection import SlidingWindowSplitter, SingleWindowSplitter from sktime.forecasting.model_selection import ForecastingRandomizedSearchCV from sktime.performance_metrics.forecasting import mean_absolute_percentage_error class XGBoost(): def __init__(self) -> None: self.estimator = XGBRegressor( objective='reg:squarederror', random_state=42) # self.cv_params = { # "max_depth": [3, 5, 6, 10, 15, 20], # "learning_rate": [0.01, 0.1, 0.2, 0.3], # "subsample": [0.5, 0.6, 0.7, 0.8, 0.9, 1.0], # "colsample_bytree": [0.5, 0.6, 0.7, 0.8, 0.9, 1.0], # "colsample_bylevel": [0.5, 0.6, 0.7, 0.8, 0.9, 1.0], # "n_estimators": [100, 500, 1000] # } self.cv_params = { 'n_estimators': [100, 500], "learning_rate": [0.01, 0.1, 0.2] } self.round_result = True def fit_predict( self, y: pd.DataFrame, y_train: pd.DataFrame, window_length, fh, fh_test, params, X: pd.DataFrame = None, X_train=None, X_test=None, X_future: pd.DataFrame = None ): print('[XGboost fit predict]') param_grid = {} for k, v in params.items(): param_grid[f"estimator__{k}"] = v forecaster = make_reduction( self.estimator, strategy='recursive', window_length=window_length) cv = SingleWindowSplitter( window_length=len(y_train) - len(fh), fh=len(fh)) gscv = ForecastingRandomizedSearchCV( forecaster, cv=cv, param_distributions=param_grid, n_iter=100, random_state=42, scoring=mean_absolute_percentage_error, update_behaviour='inner_only', error_score='raise') gscv.fit(y=y_train, X=X_train) y_pred = gscv.predict(fh=fh_test, X=X_test) gscv.update(y=y, X=X, update_params=False) y_forecast = gscv.predict(fh=fh, X=X_future) best_params = gscv.best_params_ if self.round_result: y_pred = round(y_pred) y_forecast = round(y_forecast) return (y_pred, y_forecast, best_params)