Create arima_forecast.py
Browse files- arima_forecast.py +27 -0
arima_forecast.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# arima_forecast.py
|
2 |
+
import pandas as pd
|
3 |
+
from statsmodels.tsa.arima.model import ARIMAResults
|
4 |
+
from sklearn.metrics import mean_absolute_percentage_error
|
5 |
+
|
6 |
+
def load_model(path="best_arima_model.pkl"):
|
7 |
+
return ARIMAResults.load(path)
|
8 |
+
|
9 |
+
def load_timeseries(df, comm_code):
|
10 |
+
monthly_cols = [col for col in df.columns if col.startswith('INDX')]
|
11 |
+
df_filtered = df[df['COMM_CODE'] == comm_code]
|
12 |
+
ts_values = df_filtered[monthly_cols].values.flatten()
|
13 |
+
ts = pd.Series(ts_values, index=pd.date_range(start='2012-04-01', periods=len(monthly_cols), freq='MS'))
|
14 |
+
return ts
|
15 |
+
|
16 |
+
def forecast(comm_code, forecast_months=6, csv_path="data.csv"):
|
17 |
+
df = pd.read_csv(csv_path)
|
18 |
+
ts = load_timeseries(df, comm_code)
|
19 |
+
train = ts[:-forecast_months]
|
20 |
+
test = ts[-forecast_months:]
|
21 |
+
|
22 |
+
model = load_model()
|
23 |
+
forecast = model.forecast(steps=forecast_months)
|
24 |
+
forecast.index = test.index
|
25 |
+
|
26 |
+
mape = mean_absolute_percentage_error(test, forecast)
|
27 |
+
return forecast, test, mape
|