File size: 4,434 Bytes
5c6133b fd9c460 de1d2d0 2689c6a de1d2d0 2689c6a fd9c460 7cf22f7 fd9c460 2689c6a fd9c460 5c6133b fd9c460 2689c6a fd9c460 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import os
from io import BytesIO
import streamlit as st
import pandas as pd
from neuralprophet import NeuralProphet
import matplotlib.pyplot as plt
import torch
# Set the random seed for reproducibility
torch.manual_seed(42)
def preprocess_data(data, ticker):
# Convert 'Date' to datetime
data['Date'] = pd.to_datetime(data['Date'], utc=True)
#st.write(f"Data shape before proceeding: {data.shape}")
# Filter data for the selected ticker
ticker_data = data[data['Ticker'] == ticker]
# Remove duplicates by keeping the row with the highest 'Volume' for each 'Date'
ticker_data = ticker_data.sort_values(['Date', 'Volume'], ascending=[True, False]).drop_duplicates(subset=['Date'],
keep='first')
#st.write(f"Ticker data shape after removing duplicates: {ticker_data.shape}")
# Rename 'Date' to 'ds' and 'Close' to 'y' for NeuralProphet
ticker_data = ticker_data[['Date', 'Close']].rename(columns={'Date': 'ds', 'Close': 'y'})
if ticker == "ADDYY":
ticker_data = ticker_data[ticker_data['ds'] >= '2015-01-01']
return ticker_data
def forecast(data, epochs):
# begin
n = len(data)
# Set the test period to the last 90 days
test_size = 90
# Define the test set as the last 90 days
test_data = data.iloc[-test_size:]
# Split the remaining data (excluding the test set) into training and validation
train_valid_data = data.iloc[:n - test_size]
# Determine the split for training and validation (80% training, 20% validation)
train_size = 0.80
train_end = int(train_size * len(train_valid_data))
train_data = train_valid_data.iloc[:train_end]
valid_data = train_valid_data.iloc[train_end:]
# end
model: NeuralProphet = NeuralProphet(trend_reg=0.0001, yearly_seasonality=True, weekly_seasonality=True,
daily_seasonality=False,
learning_rate=0.001, seasonality_mode='multiplicative')
# Train the model on training data and validate on validation data
# freq='B' tells NeuralProphet to expect data only on business days (excluding weekends)
metrics = model.fit(train_data, validation_df=valid_data, freq='B', epochs=epochs, early_stopping=True)
# After training, test the model on the test data
forecast_test = model.predict(test_data) # Predict on the test set
return forecast_test, model, metrics
def plot_training_loss(metrics):
# Extract training and validation loss from metrics
training_loss = metrics['Loss'].values
validation_loss = metrics['Loss_val'].values
# Plotting
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(training_loss, label="Training Loss")
ax.plot(validation_loss, label="Validation Loss")
ax.set_title("Training vs Validation Loss")
ax.set_xlabel("Epoch")
ax.set_ylabel("Loss")
ax.legend()
st.pyplot(fig)
# Streamlit App
st.title("Stock Prediction with NeuralProphet")
st.info("Test stock data is auto loaded. Please select a ticker to start forecasting.")
def get_default_file():
try:
with open('World-Stock-Prices-Dataset.csv', 'rb') as f:
return BytesIO(f.read())
except FileNotFoundError:
return None
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
# If no file is uploaded, use the default file
if uploaded_file is None:
uploaded_file = get_default_file()
if uploaded_file is not None:
st.info("Using default dataset: World-Stock-Prices-Dataset.csv")
else:
st.warning("Default dataset World-Stock-Prices-Dataset.csv not found.")
if uploaded_file:
data = pd.read_csv(uploaded_file)
tickers = data['Ticker'].unique()
#get the index of Adidas ticker ADDYY
default_index = tickers.tolist().index("ADDYY")
ticker = st.selectbox("Select Ticker", tickers, index=default_index)
data_processed = preprocess_data(data, ticker)
epochs = st.slider("Select Epochs", 10, 200, 200, step=10)
if st.button("Train Model and Forecast"):
forecast_data, model, metrics = forecast(data_processed, epochs)
fig = model.plot(forecast_data)
st.plotly_chart(fig)
plot_training_loss(metrics)
fig_components = model.plot_components(forecast_data)
st.plotly_chart(fig_components)
|