implement the prediction, add requirements.txt
Browse files- app.py +100 -2
- requirements.txt +5 -0
app.py
CHANGED
@@ -1,4 +1,102 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from neuralprophet import NeuralProphet
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import torch
|
6 |
|
7 |
+
# Set the random seed for reproducibility
|
8 |
+
torch.manual_seed(42)
|
9 |
+
|
10 |
+
|
11 |
+
def preprocess_data(data, ticker):
|
12 |
+
# Convert 'Date' to datetime
|
13 |
+
data['Date'] = pd.to_datetime(data['Date'], utc=True)
|
14 |
+
#st.write(f"Data shape before proceeding: {data.shape}")
|
15 |
+
|
16 |
+
# Filter data for the selected ticker
|
17 |
+
ticker_data = data[data['Ticker'] == ticker]
|
18 |
+
|
19 |
+
# Remove duplicates by keeping the row with the highest 'Volume' for each 'Date'
|
20 |
+
ticker_data = ticker_data.sort_values(['Date', 'Volume'], ascending=[True, False]).drop_duplicates(subset=['Date'],
|
21 |
+
keep='first')
|
22 |
+
#st.write(f"Ticker data shape after removing duplicates: {ticker_data.shape}")
|
23 |
+
|
24 |
+
# Rename 'Date' to 'ds' and 'Close' to 'y' for NeuralProphet
|
25 |
+
ticker_data = ticker_data[['Date', 'Close']].rename(columns={'Date': 'ds', 'Close': 'y'})
|
26 |
+
|
27 |
+
if ticker == "ADDYY":
|
28 |
+
ticker_data = ticker_data[ticker_data['ds'] >= '2015-01-01']
|
29 |
+
return ticker_data
|
30 |
+
|
31 |
+
|
32 |
+
def forecast(data, epochs):
|
33 |
+
# begin
|
34 |
+
n = len(data)
|
35 |
+
# Set the test period to the last 90 days
|
36 |
+
test_size = 90
|
37 |
+
# Define the test set as the last 90 days
|
38 |
+
test_data = data.iloc[-test_size:]
|
39 |
+
|
40 |
+
# Split the remaining data (excluding the test set) into training and validation
|
41 |
+
train_valid_data = data.iloc[:n - test_size]
|
42 |
+
|
43 |
+
# Determine the split for training and validation (80% training, 20% validation)
|
44 |
+
train_size = 0.80
|
45 |
+
train_end = int(train_size * len(train_valid_data))
|
46 |
+
|
47 |
+
train_data = train_valid_data.iloc[:train_end]
|
48 |
+
valid_data = train_valid_data.iloc[train_end:]
|
49 |
+
# end
|
50 |
+
model: NeuralProphet = NeuralProphet(trend_reg=0.0001, yearly_seasonality=True, weekly_seasonality=True,
|
51 |
+
daily_seasonality=False,
|
52 |
+
learning_rate=0.001, seasonality_mode='multiplicative')
|
53 |
+
|
54 |
+
# Train the model on training data and validate on validation data
|
55 |
+
# freq='B' tells NeuralProphet to expect data only on business days (excluding weekends)
|
56 |
+
metrics = model.fit(train_data, validation_df=valid_data, freq='B', epochs=epochs, early_stopping=True)
|
57 |
+
|
58 |
+
# After training, test the model on the test data
|
59 |
+
forecast_test = model.predict(test_data) # Predict on the test set
|
60 |
+
return forecast_test, model, metrics
|
61 |
+
|
62 |
+
|
63 |
+
def plot_training_loss(metrics):
|
64 |
+
# Extract training and validation loss from metrics
|
65 |
+
training_loss = metrics['Loss'].values
|
66 |
+
validation_loss = metrics['Loss_val'].values
|
67 |
+
|
68 |
+
# Plotting
|
69 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
70 |
+
ax.plot(training_loss, label="Training Loss")
|
71 |
+
ax.plot(validation_loss, label="Validation Loss")
|
72 |
+
ax.set_title("Training vs Validation Loss")
|
73 |
+
ax.set_xlabel("Epoch")
|
74 |
+
ax.set_ylabel("Loss")
|
75 |
+
ax.legend()
|
76 |
+
st.pyplot(fig)
|
77 |
+
|
78 |
+
|
79 |
+
# Streamlit App
|
80 |
+
st.title("Stock Prediction with NeuralProphet")
|
81 |
+
st.write("Upload your stock data and select a ticker to forecast.")
|
82 |
+
|
83 |
+
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
|
84 |
+
|
85 |
+
if uploaded_file:
|
86 |
+
data = pd.read_csv(uploaded_file)
|
87 |
+
tickers = data['Ticker'].unique()
|
88 |
+
#get the index of Adidas ticker ADDYY
|
89 |
+
default_index = tickers.tolist().index("ADDYY")
|
90 |
+
ticker = st.selectbox("Select Ticker", tickers, index=default_index)
|
91 |
+
|
92 |
+
data_processed = preprocess_data(data, ticker)
|
93 |
+
|
94 |
+
epochs = st.slider("Select Epochs", 10, 200, 200, step=10)
|
95 |
+
|
96 |
+
if st.button("Train Model and Forecast"):
|
97 |
+
forecast_data, model, metrics = forecast(data_processed, epochs)
|
98 |
+
fig = model.plot(forecast_data)
|
99 |
+
st.plotly_chart(fig)
|
100 |
+
plot_training_loss(metrics)
|
101 |
+
fig_components = model.plot_components(forecast_data)
|
102 |
+
st.plotly_chart(fig_components)
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
matplotlib==3.8.4
|
2 |
+
neuralprophet==0.9.0
|
3 |
+
pandas==2.2.3
|
4 |
+
streamlit==1.42.2
|
5 |
+
torch==2.6.0
|