import yfinance as yf import pickle import pandas as pd import numpy as np from datetime import datetime, timedelta from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error import gradio as gr import pickle import warnings warnings.filterwarnings("ignore", category=FutureWarning, module="numpy._core.fromnumeric") # Charger et préparer les données df = pd.read_csv("datatset/sphist.csv") df['Date'] = pd.to_datetime(df["Date"]) df = df.sort_values(by='Date', ascending=True) year_i = -1 day_i = -1 mean_d = np.nan std_d = np.nan std_d_v = np.nan df['std 5'] = np.nan df['mean 5'] = np.nan mean_y = np.nan std_y = np.nan ratio = np.nan df['mean 365'] = np.nan df['std 365'] = np.nan j = 0 for i, elt in df.iterrows(): if j==0: j+=1 continue if df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=365): if year_i == -1: year_i = 0 mean_y = np.mean(df.iloc[year_i:j-1]['Open']) std_y = np.std(df.iloc[year_i:j-1]['Open']) year_i += 1 df.iloc[j, df.columns.get_loc("std 365")] = std_y df.iloc[j, df.columns.get_loc("mean 365")] = mean_y if df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=5): if day_i == -1: day_i = 0 mean_d = np.mean(df.iloc[day_i:j-1]["Open"]) std_d = np.std(df.iloc[day_i:j-1]['Open']) std_d_v = np.std(df.iloc[day_i:j-1]['Volume']) day_i += 1 df.iloc[j, df.columns.get_loc("mean 5")] = mean_d df.iloc[j, df.columns.get_loc("std 5")] = std_d j += 1 # Ajouter des indicateurs utiles pour notre modèle de ML df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean() df['Year'] = df['Date'].apply(lambda x: x.year) df['5 Days High'] = df['High'].rolling(center=False, window=5).mean() df['5 Days Low'] = df['Low'].rolling(center=False, window=5).mean() df['5 Days Volume'] = df['Volume'].rolling(center=False, window=5).mean() # Déplacer la colonne d'un jour df['5 Days Open'] = df['5 Days Open'].shift(1) df = df.dropna(axis=0) df = df.drop(df[df["Date"] < datetime(year=1951, month=1, day=3)].index, axis=0) test = df[df['Date'] >= datetime(year=2013, month=1, day=1)] train = df[df['Date'] < datetime(year=2013, month=1, day=1)] # Entraîner le modèle lr = LinearRegression().fit(train.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']), train["Close"]) pred = lr.predict(test.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date'])) with open('linear_regression_model.pkl', 'wb') as file: pickle.dump(lr, file) # Calculer les erreurs err = mean_absolute_error(test["Close"], pred) errP = mean_absolute_percentage_error(test["Close"], pred) # Créer le DataFrame pour le tableau result_df = pd.DataFrame({'Predictions': pred, 'Actual Close': test['Close']}) # Fonction pour afficher l'erreur et le tableau def display_results(): return str(err), str(errP), result_df # Définir le symbole du S&P 500 def predire(date): # Définir le symbole du S&P 500 symbole = "^GSPC" # selected_date=datetime.fromtimestamp(date) day, month,year = int(date.split('/')[0]),int(date.split('/')[1]),int(date.split('/')[2]) print(year,month,day) # Définir la période def get_datas(year,month,day): date_debut = datetime(year=year-2, month=month, day=day) date_fin = datetime.now() # Télécharger les données data = yf.download(symbole, start=date_debut, end=date_fin) return data # Sélectionner les colonnes souhaitées df = get_datas(year,month,day)[['Open', 'High', 'Low', 'Close', 'Volume']] df['Date'] = df.index # Afficher les premières lignes def add_features(df): year_i = -1 day_i = -1 mean_d = np.nan std_d = np.nan df['std 5'] = np.nan df['mean 5'] = np.nan mean_y = np.nan std_y = np.nan df['mean 365'] = np.nan df['std 365'] = np.nan j = 0 for i, elt in df.iterrows(): if j==0: j+=1 continue if (df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=365)).iloc[0]: if year_i == -1: year_i = 0 mean_y = np.mean(df.iloc[year_i:j-1]['Open']) std_y = np.std(df.iloc[year_i:j-1]['Open']) year_i += 1 df.iloc[j, df.columns.get_loc("std 365")] = std_y df.iloc[j, df.columns.get_loc("mean 365")] = mean_y if (df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=5)).iloc[0]: if day_i == -1: day_i = 0 mean_d = np.mean(df.iloc[day_i:j-1]["Open"]) std_d = np.std(df.iloc[day_i:j-1]['Open']) day_i += 1 df.iloc[j, df.columns.get_loc("mean 5")] = mean_d df.iloc[j, df.columns.get_loc("std 5")] = std_d j += 1 # Ajouter des indicateurs utiles pour notre modèle de ML df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean() df['Year'] = df['Date'].apply(lambda x: x.year) df['5 Days High'] = df['High'].rolling(center=False, window=5).mean() df['5 Days Low'] = df['Low'].rolling(center=False, window=5).mean() df['5 Days Volume'] = df['Volume'].rolling(center=False, window=5).mean() # Déplacer la colonne d'un jour df['5 Days Open'] = df['5 Days Open'].shift(1) df = df.dropna(axis=0) print(df.tail()) return df df= add_features(df) test=df test.iloc[-2:-1]['Close'] # Charger le modèle à partir du fichier pickle with open('linear_regression_model.pkl', 'rb') as file: lr = pickle.load(file) a= lr.predict(df[df['Date'] == datetime(year=year,month=month,day=day)].drop(columns=["Open", 'High', 'Low', 'Volume', 'Close', 'Date']))[-1],float(df[df['Date'] == datetime(year=year,month=month,day=day)]['Close'][symbole]) return a # Créer l'interface Gradio with gr.Blocks() as demo: gr.Markdown("# Linear Regression Model Results") gr.Markdown("""This model was trained on S&P 500 stock price before 2013. The predictions below are taken betweek 2013 and 2015. 0.4% of average error was reached using LinearRegression.""") with gr.Row(): with gr.Column(): error = gr.Textbox(label="Mean Absolute Error") errorP = gr.Textbox(label="Mean Absolute Percentage Error") table = gr.Dataframe(label="Predictions vs Actual Close Prices") with gr.Row(): with gr.Column(): btn = gr.Button("Show Results") gr.Markdown("## Dynamic prediction") gr.Markdown("Select a weekday before today and it will predict the close price of S&P 500 at your date.") with gr.Row(): with gr.Column(): date_input = gr.Textbox(label="Select Date (DD/MM/YYYY)") prediction = gr.Textbox(label="Prediction") with gr.Column(): true_val = gr.Textbox("Real close price") with gr.Row(): with gr.Column(): btn2 = gr.Button("Predict for your date") btn.click(display_results, outputs=[error, errorP, table]) btn2.click(predire, inputs=date_input, outputs=[prediction, true_val]) # Lancer l'interface Gradio demo.launch()