File size: 4,434 Bytes
5c6133b
fd9c460
 
de1d2d0
2689c6a
 
 
 
de1d2d0
2689c6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd9c460
7cf22f7
fd9c460
 
 
 
 
 
 
 
2689c6a
 
 
fd9c460
 
 
 
 
5c6133b
 
fd9c460
2689c6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd9c460
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
from io import BytesIO

import streamlit as st
import pandas as pd
from neuralprophet import NeuralProphet
import matplotlib.pyplot as plt
import torch

# Set the random seed for reproducibility
torch.manual_seed(42)


def preprocess_data(data, ticker):
    # Convert 'Date' to datetime
    data['Date'] = pd.to_datetime(data['Date'], utc=True)
    #st.write(f"Data shape before proceeding: {data.shape}")

    # Filter data for the selected ticker
    ticker_data = data[data['Ticker'] == ticker]

    # Remove duplicates by keeping the row with the highest 'Volume' for each 'Date'
    ticker_data = ticker_data.sort_values(['Date', 'Volume'], ascending=[True, False]).drop_duplicates(subset=['Date'],
                                                                                                       keep='first')
    #st.write(f"Ticker data shape after removing duplicates: {ticker_data.shape}")

    # Rename 'Date' to 'ds' and 'Close' to 'y' for NeuralProphet
    ticker_data = ticker_data[['Date', 'Close']].rename(columns={'Date': 'ds', 'Close': 'y'})

    if ticker == "ADDYY":
        ticker_data = ticker_data[ticker_data['ds'] >= '2015-01-01']
    return ticker_data


def forecast(data, epochs):
    # begin
    n = len(data)
    # Set the test period to the last 90 days
    test_size = 90
    # Define the test set as the last 90 days
    test_data = data.iloc[-test_size:]

    # Split the remaining data (excluding the test set) into training and validation
    train_valid_data = data.iloc[:n - test_size]

    # Determine the split for training and validation (80% training, 20% validation)
    train_size = 0.80
    train_end = int(train_size * len(train_valid_data))

    train_data = train_valid_data.iloc[:train_end]
    valid_data = train_valid_data.iloc[train_end:]
    # end
    model: NeuralProphet = NeuralProphet(trend_reg=0.0001, yearly_seasonality=True, weekly_seasonality=True,
                                         daily_seasonality=False,
                                         learning_rate=0.001, seasonality_mode='multiplicative')

    # Train the model on training data and validate on validation data
    # freq='B' tells NeuralProphet to expect data only on business days (excluding weekends)
    metrics = model.fit(train_data, validation_df=valid_data, freq='B', epochs=epochs, early_stopping=True)

    # After training, test the model on the test data
    forecast_test = model.predict(test_data)  # Predict on the test set
    return forecast_test, model, metrics


def plot_training_loss(metrics):
    # Extract training and validation loss from metrics
    training_loss = metrics['Loss'].values
    validation_loss = metrics['Loss_val'].values

    # Plotting
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.plot(training_loss, label="Training Loss")
    ax.plot(validation_loss, label="Validation Loss")
    ax.set_title("Training vs Validation Loss")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")
    ax.legend()
    st.pyplot(fig)


# Streamlit App
st.title("Stock Prediction with NeuralProphet")
st.info("Test stock data is auto loaded. Please select a ticker to start forecasting.")


def get_default_file():
    try:
        with open('World-Stock-Prices-Dataset.csv', 'rb') as f:
            return BytesIO(f.read())
    except FileNotFoundError:
        return None


uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

# If no file is uploaded, use the default file
if uploaded_file is None:
    uploaded_file = get_default_file()
    if uploaded_file is not None:
        st.info("Using default dataset: World-Stock-Prices-Dataset.csv")
    else:
        st.warning("Default dataset World-Stock-Prices-Dataset.csv not found.")

if uploaded_file:
    data = pd.read_csv(uploaded_file)
    tickers = data['Ticker'].unique()
    #get the index of Adidas ticker ADDYY
    default_index = tickers.tolist().index("ADDYY")
    ticker = st.selectbox("Select Ticker", tickers, index=default_index)

    data_processed = preprocess_data(data, ticker)

    epochs = st.slider("Select Epochs", 10, 200, 200, step=10)

    if st.button("Train Model and Forecast"):
        forecast_data, model, metrics = forecast(data_processed, epochs)
        fig = model.plot(forecast_data)
        st.plotly_chart(fig)
        plot_training_loss(metrics)
        fig_components = model.plot_components(forecast_data)
        st.plotly_chart(fig_components)