Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
from datetime import datetime, timedelta | |
from sklearn.linear_model import LinearRegression | |
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error | |
import gradio as gr | |
# Charger et préparer les données | |
df = pd.read_csv("datatset/sphist.csv") | |
df['Date'] = pd.to_datetime(df["Date"]) | |
df = df.sort_values(by='Date', ascending=True) | |
year_i = -1 | |
day_i = -1 | |
mean_d = np.nan | |
std_d = np.nan | |
std_d_v = np.nan | |
df['std 5'] = np.nan | |
df['mean 5'] = np.nan | |
mean_y = np.nan | |
std_y = np.nan | |
ratio = np.nan | |
df['mean 365'] = np.nan | |
df['std 365'] = np.nan | |
j = 0 | |
for i, elt in df.iterrows(): | |
if df.iloc[j]['Date'] - df.iloc[0]['Date'] > timedelta(days=365): | |
if year_i == -1: | |
year_i = 0 | |
mean_y = np.mean(df.iloc[year_i:j]['Open']) | |
std_y = np.std(df.iloc[year_i:j]['Open']) | |
year_i += 1 | |
df.iloc[j, df.columns.get_loc("std 365")] = std_y | |
df.iloc[j, df.columns.get_loc("mean 365")] = mean_y | |
if df.iloc[j]['Date'] - df.iloc[0]['Date'] > timedelta(days=5): | |
if day_i == -1: | |
day_i = 0 | |
mean_d = np.mean(df.iloc[day_i:j]["Open"]) | |
std_d = np.std(df.iloc[day_i:j]['Open']) | |
std_d_v = np.std(df.iloc[day_i:j]['Volume']) | |
day_i += 1 | |
df.iloc[j, df.columns.get_loc("mean 5")] = mean_d | |
df.iloc[j, df.columns.get_loc("std 5")] = std_d | |
j += 1 | |
# Ajouter des indicateurs utiles pour notre modèle de ML | |
df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean() | |
df['Year'] = df['Date'].apply(lambda x: x.year) | |
df['5 Days High'] = df['High'].rolling(center=False, window=5).mean() | |
df['5 Days Low'] = df['Low'].rolling(center=False, window=5).mean() | |
df['5 Days Volume'] = df['Volume'].rolling(center=False, window=5).mean() | |
# Déplacer la colonne d'un jour | |
df['5 Days Open'] = df['5 Days Open'].shift(1) | |
df = df.dropna(axis=0) | |
df = df.drop(df[df["Date"] < datetime(year=1951, month=1, day=3)].index, axis=0) | |
test = df[df['Date'] >= datetime(year=2013, month=1, day=1)] | |
train = df[df['Date'] < datetime(year=2013, month=1, day=1)] | |
# Entraîner le modèle | |
lr = LinearRegression().fit(train.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']), train["Close"]) | |
pred = lr.predict(test.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date'])) | |
# Calculer les erreurs | |
err = mean_absolute_error(test["Close"], pred) | |
errP = mean_absolute_percentage_error(test["Close"], pred) | |
# Créer le DataFrame pour le tableau | |
result_df = pd.DataFrame({'Predictions': pred, 'Actual Close': test['Close']}) | |
# Fonction pour afficher l'erreur et le tableau | |
def display_results(): | |
return str(err), str(errP), result_df | |
# Créer l'interface Gradio | |
with gr.Blocks() as demo: | |
gr.Markdown("# Linear Regression Model Results") | |
gr.Markdown("""This model was trained on S&P 500 stock price before 2013. The predictions below are taken betweek 2013 and 2015. | |
0.4% of average error was reached using LinearRegression.""") | |
with gr.Row(): | |
with gr.Column(): | |
error = gr.Textbox(label="Mean Absolute Error") | |
errorP = gr.Textbox(label="Mean Absolute Percentage Error") | |
table = gr.Dataframe(label="Predictions vs Actual Close Prices") | |
with gr.Row(): | |
with gr.Column(): | |
btn = gr.Button("Show Results") | |
btn.click(display_results, outputs=[error, errorP, table]) | |
# Lancer l'interface Gradio | |
demo.launch() |