Spaces:

Pierre918
/

ML_project_stock_price

Sleeping

File size: 7,467 Bytes

import yfinance as yf
import pickle
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
import gradio as gr
import pickle
import warnings
warnings.filterwarnings("ignore", category=FutureWarning, module="numpy._core.fromnumeric")

# Charger et préparer les données
df = pd.read_csv("datatset/sphist.csv")
df['Date'] = pd.to_datetime(df["Date"])
df = df.sort_values(by='Date', ascending=True)

year_i = -1
day_i = -1
mean_d = np.nan
std_d = np.nan
std_d_v = np.nan
df['std 5'] = np.nan
df['mean 5'] = np.nan
mean_y = np.nan
std_y = np.nan
ratio = np.nan
df['mean 365'] = np.nan
df['std 365'] = np.nan
j = 0
for i, elt in df.iterrows():
    if j==0:
        j+=1
        continue
    if df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=365):
        if year_i == -1:
            year_i = 0
        mean_y = np.mean(df.iloc[year_i:j-1]['Open'])
        std_y = np.std(df.iloc[year_i:j-1]['Open'])
        year_i += 1
    df.iloc[j, df.columns.get_loc("std 365")] = std_y
    df.iloc[j, df.columns.get_loc("mean 365")] = mean_y
    if df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=5):
        if day_i == -1:
            day_i = 0
        mean_d = np.mean(df.iloc[day_i:j-1]["Open"])
        std_d = np.std(df.iloc[day_i:j-1]['Open'])
        std_d_v = np.std(df.iloc[day_i:j-1]['Volume'])
        day_i += 1
    df.iloc[j, df.columns.get_loc("mean 5")] = mean_d
    df.iloc[j, df.columns.get_loc("std 5")] = std_d
    j += 1

# Ajouter des indicateurs utiles pour notre modèle de ML
df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean()
df['Year'] = df['Date'].apply(lambda x: x.year)
df['5 Days High'] = df['High'].rolling(center=False, window=5).mean()
df['5 Days Low'] = df['Low'].rolling(center=False, window=5).mean()
df['5 Days Volume'] = df['Volume'].rolling(center=False, window=5).mean()

# Déplacer la colonne d'un jour
df['5 Days Open'] = df['5 Days Open'].shift(1)
df = df.dropna(axis=0)
df = df.drop(df[df["Date"] < datetime(year=1951, month=1, day=3)].index, axis=0)
test = df[df['Date'] >= datetime(year=2013, month=1, day=1)]
train = df[df['Date'] < datetime(year=2013, month=1, day=1)]

# Entraîner le modèle
lr = LinearRegression().fit(train.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']), train["Close"])
pred = lr.predict(test.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']))

with open('linear_regression_model.pkl', 'wb') as file:
    pickle.dump(lr, file)
# Calculer les erreurs
err = mean_absolute_error(test["Close"], pred)
errP = mean_absolute_percentage_error(test["Close"], pred)

# Créer le DataFrame pour le tableau
result_df = pd.DataFrame({'Predictions': pred, 'Actual Close': test['Close']})

# Fonction pour afficher l'erreur et le tableau
def display_results():
    return str(err), str(errP), result_df

# Définir le symbole du S&P 500
def predire(date):
    # Définir le symbole du S&P 500
    symbole = "^GSPC"

    # selected_date=datetime.fromtimestamp(date)
    day, month,year  = int(date.split('/')[0]),int(date.split('/')[1]),int(date.split('/')[2])
    print(year,month,day)
    # Définir la période
    def get_datas(year,month,day):
        date_debut = datetime(year=year-2, month=month, day=day)
        date_fin = datetime.now()

        # Télécharger les données
        data = yf.download(symbole, start=date_debut, end=date_fin)
        return data
    # Sélectionner les colonnes souhaitées
    df = get_datas(year,month,day)[['Open', 'High', 'Low', 'Close', 'Volume']]
    df['Date'] = df.index
    # Afficher les premières lignes

    def add_features(df):
        year_i = -1
        day_i = -1
        mean_d = np.nan
        std_d = np.nan
        df['std 5'] = np.nan
        df['mean 5'] = np.nan
        mean_y = np.nan
        std_y = np.nan
        df['mean 365'] = np.nan
        df['std 365'] = np.nan
        j = 0
        for i, elt in df.iterrows():
            if j==0:
                j+=1
                continue
            if (df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=365)).iloc[0]:
                if year_i == -1:
                    year_i = 0
                mean_y = np.mean(df.iloc[year_i:j-1]['Open'])
                std_y = np.std(df.iloc[year_i:j-1]['Open'])
                year_i += 1
            df.iloc[j, df.columns.get_loc("std 365")] = std_y
            df.iloc[j, df.columns.get_loc("mean 365")] = mean_y
            if (df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=5)).iloc[0]:
                if day_i == -1:
                    day_i = 0
                mean_d = np.mean(df.iloc[day_i:j-1]["Open"])
                std_d = np.std(df.iloc[day_i:j-1]['Open'])
                day_i += 1
            df.iloc[j, df.columns.get_loc("mean 5")] = mean_d
            df.iloc[j, df.columns.get_loc("std 5")] = std_d
            j += 1
        # Ajouter des indicateurs utiles pour notre modèle de ML
        df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean()
        df['Year'] = df['Date'].apply(lambda x: x.year)
        df['5 Days High'] = df['High'].rolling(center=False, window=5).mean()
        df['5 Days Low'] = df['Low'].rolling(center=False, window=5).mean()
        df['5 Days Volume'] = df['Volume'].rolling(center=False, window=5).mean()
        
        # Déplacer la colonne d'un jour
        df['5 Days Open'] = df['5 Days Open'].shift(1)
        
        df = df.dropna(axis=0)
        print(df.tail())
        return df

    df= add_features(df)
    test=df
    test.iloc[-2:-1]['Close']
    # Charger le modèle à partir du fichier pickle
    with open('linear_regression_model.pkl', 'rb') as file:
        lr = pickle.load(file)


    a= lr.predict(df[df['Date'] == datetime(year=year,month=month,day=day)].drop(columns=["Open", 'High', 'Low', 'Volume', 'Close', 'Date']))[-1],float(df[df['Date'] == datetime(year=year,month=month,day=day)]['Close'][symbole])

    return a




# Créer l'interface Gradio
with gr.Blocks() as demo:
    gr.Markdown("# Linear Regression Model Results")
    gr.Markdown("""This model was trained on S&P 500 stock price before 2013. The predictions below are taken betweek 2013 and 2015.
                
    0.4% of average error was reached using LinearRegression.""")
    with gr.Row():
        with gr.Column():
            error = gr.Textbox(label="Mean Absolute Error")
            errorP = gr.Textbox(label="Mean Absolute Percentage Error")
            table = gr.Dataframe(label="Predictions vs Actual Close Prices")

    with gr.Row():
        with gr.Column():
            btn = gr.Button("Show Results")
    gr.Markdown("## Dynamic prediction")
    gr.Markdown("Select a weekday before today and it will predict the close price of S&P 500 at your date.")
    with gr.Row():
        with gr.Column():
            date_input = gr.Textbox(label="Select Date (DD/MM/YYYY)")
            prediction = gr.Textbox(label="Prediction")
        with gr.Column():
            true_val = gr.Textbox("Real close price")
    with gr.Row():
        with gr.Column():
            btn2 = gr.Button("Predict for your date")
        
    btn.click(display_results, outputs=[error, errorP, table])
    btn2.click(predire, inputs=date_input, outputs=[prediction, true_val])
# Lancer l'interface Gradio
demo.launch()