Spaces:

Pierre918
/

ML_project_stock_price

Sleeping

App Files Files Community

Pierre918 commited on Feb 24

Commit

868fa55

verified ·

1 Parent(s): d51c276

update add feature

Browse files

Files changed (1) hide show

app.py +196 -93

app.py CHANGED Viewed

@@ -1,94 +1,197 @@
-import pandas as pd
-import numpy as np
-from datetime import datetime, timedelta
-from sklearn.linear_model import LinearRegression
-from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
-import gradio as gr
-# Charger et préparer les données
-df = pd.read_csv("datatset/sphist.csv")
-df['Date'] = pd.to_datetime(df["Date"])
-df = df.sort_values(by='Date', ascending=True)
-year_i = -1
-day_i = -1
-mean_d = np.nan
-std_d = np.nan
-std_d_v = np.nan
-df['std 5'] = np.nan
-df['mean 5'] = np.nan
-mean_y = np.nan
-std_y = np.nan
-ratio = np.nan
-df['mean 365'] = np.nan
-df['std 365'] = np.nan
-j = 0
-for i, elt in df.iterrows():
-    if df.iloc[j]['Date'] - df.iloc[0]['Date'] > timedelta(days=365):
-        if year_i == -1:
-            year_i = 0
-        mean_y = np.mean(df.iloc[year_i:j]['Open'])
-        std_y = np.std(df.iloc[year_i:j]['Open'])
-        year_i += 1
-    df.iloc[j, df.columns.get_loc("std 365")] = std_y
-    df.iloc[j, df.columns.get_loc("mean 365")] = mean_y
-    if df.iloc[j]['Date'] - df.iloc[0]['Date'] > timedelta(days=5):
-        if day_i == -1:
-            day_i = 0
-        mean_d = np.mean(df.iloc[day_i:j]["Open"])
-        std_d = np.std(df.iloc[day_i:j]['Open'])
-        std_d_v = np.std(df.iloc[day_i:j]['Volume'])
-        day_i += 1
-    df.iloc[j, df.columns.get_loc("mean 5")] = mean_d
-    df.iloc[j, df.columns.get_loc("std 5")] = std_d
-    j += 1
-# Ajouter des indicateurs utiles pour notre modèle de ML
-df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean()
-df['Year'] = df['Date'].apply(lambda x: x.year)
-df['5 Days High'] = df['High'].rolling(center=False, window=5).mean()
-df['5 Days Low'] = df['Low'].rolling(center=False, window=5).mean()
-df['5 Days Volume'] = df['Volume'].rolling(center=False, window=5).mean()
-# Déplacer la colonne d'un jour
-df['5 Days Open'] = df['5 Days Open'].shift(1)
-df = df.dropna(axis=0)
-df = df.drop(df[df["Date"] < datetime(year=1951, month=1, day=3)].index, axis=0)
-test = df[df['Date'] >= datetime(year=2013, month=1, day=1)]
-train = df[df['Date'] < datetime(year=2013, month=1, day=1)]
-# Entraîner le modèle
-lr = LinearRegression().fit(train.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']), train["Close"])
-pred = lr.predict(test.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']))
-# Calculer les erreurs
-err = mean_absolute_error(test["Close"], pred)
-errP = mean_absolute_percentage_error(test["Close"], pred)
-# Créer le DataFrame pour le tableau
-result_df = pd.DataFrame({'Predictions': pred, 'Actual Close': test['Close']})
-# Fonction pour afficher l'erreur et le tableau
-def display_results():
-    return str(err), str(errP), result_df
-# Créer l'interface Gradio
-with gr.Blocks() as demo:
-    gr.Markdown("# Linear Regression Model Results")
-    gr.Markdown("""This model was trained on S&P 500 stock price before 2013. The predictions below are taken betweek 2013 and 2015.
-    0.4% of average error was reached using LinearRegression.""")
-    with gr.Row():
-        with gr.Column():
-            error = gr.Textbox(label="Mean Absolute Error")
-            errorP = gr.Textbox(label="Mean Absolute Percentage Error")
-            table = gr.Dataframe(label="Predictions vs Actual Close Prices")
-    with gr.Row():
-        with gr.Column():
-            btn = gr.Button("Show Results")
-    btn.click(display_results, outputs=[error, errorP, table])
-# Lancer l'interface Gradio
 demo.launch()

+import yfinance as yf
+import pickle
+import pandas as pd
+import numpy as np
+from datetime import datetime, timedelta
+from sklearn.linear_model import LinearRegression
+from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
+import gradio as gr
+import pickle
+import warnings
+warnings.filterwarnings("ignore", category=FutureWarning, module="numpy._core.fromnumeric")
+# Charger et préparer les données
+df = pd.read_csv("datatset/sphist.csv")
+df['Date'] = pd.to_datetime(df["Date"])
+df = df.sort_values(by='Date', ascending=True)
+year_i = -1
+day_i = -1
+mean_d = np.nan
+std_d = np.nan
+std_d_v = np.nan
+df['std 5'] = np.nan
+df['mean 5'] = np.nan
+mean_y = np.nan
+std_y = np.nan
+ratio = np.nan
+df['mean 365'] = np.nan
+df['std 365'] = np.nan
+j = 0
+for i, elt in df.iterrows():
+    if j==0:
+        j+=1
+        continue
+    if df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=365):
+        if year_i == -1:
+            year_i = 0
+        mean_y = np.mean(df.iloc[year_i:j-1]['Open'])
+        std_y = np.std(df.iloc[year_i:j-1]['Open'])
+        year_i += 1
+    df.iloc[j, df.columns.get_loc("std 365")] = std_y
+    df.iloc[j, df.columns.get_loc("mean 365")] = mean_y
+    if df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=5):
+        if day_i == -1:
+            day_i = 0
+        mean_d = np.mean(df.iloc[day_i:j-1]["Open"])
+        std_d = np.std(df.iloc[day_i:j-1]['Open'])
+        std_d_v = np.std(df.iloc[day_i:j-1]['Volume'])
+        day_i += 1
+    df.iloc[j, df.columns.get_loc("mean 5")] = mean_d
+    df.iloc[j, df.columns.get_loc("std 5")] = std_d
+    j += 1
+# Ajouter des indicateurs utiles pour notre modèle de ML
+df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean()
+df['Year'] = df['Date'].apply(lambda x: x.year)
+df['5 Days High'] = df['High'].rolling(center=False, window=5).mean()
+df['5 Days Low'] = df['Low'].rolling(center=False, window=5).mean()
+df['5 Days Volume'] = df['Volume'].rolling(center=False, window=5).mean()
+# Déplacer la colonne d'un jour
+df['5 Days Open'] = df['5 Days Open'].shift(1)
+df = df.dropna(axis=0)
+df = df.drop(df[df["Date"] < datetime(year=1951, month=1, day=3)].index, axis=0)
+test = df[df['Date'] >= datetime(year=2013, month=1, day=1)]
+train = df[df['Date'] < datetime(year=2013, month=1, day=1)]
+# Entraîner le modèle
+lr = LinearRegression().fit(train.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']), train["Close"])
+pred = lr.predict(test.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']))
+with open('linear_regression_model.pkl', 'wb') as file:
+    pickle.dump(lr, file)
+# Calculer les erreurs
+err = mean_absolute_error(test["Close"], pred)
+errP = mean_absolute_percentage_error(test["Close"], pred)
+# Créer le DataFrame pour le tableau
+result_df = pd.DataFrame({'Predictions': pred, 'Actual Close': test['Close']})
+# Fonction pour afficher l'erreur et le tableau
+def display_results():
+    return str(err), str(errP), result_df
+# Définir le symbole du S&P 500
+def predire(date):
+    # Définir le symbole du S&P 500
+    symbole = "^GSPC"
+    selected_date=datetime.fromtimestamp(date)
+    year, month, day = selected_date.year, selected_date.month, selected_date.day
+    print(year,month,day)
+    # Définir la période
+    def get_datas(year,month,day):
+        date_debut = datetime(year=year-2, month=month, day=day)
+        date_fin = datetime.now()
+        # Télécharger les données
+        data = yf.download(symbole, start=date_debut, end=date_fin)
+        return data
+    # Sélectionner les colonnes souhaitées
+    df = get_datas(year,month,day)[['Open', 'High', 'Low', 'Close', 'Volume']]
+    df['Date'] = df.index
+    # Afficher les premières lignes
+    def add_features(df):
+        year_i = -1
+        day_i = -1
+        mean_d = np.nan
+        std_d = np.nan
+        df['std 5'] = np.nan
+        df['mean 5'] = np.nan
+        mean_y = np.nan
+        std_y = np.nan
+        df['mean 365'] = np.nan
+        df['std 365'] = np.nan
+        j = 0
+        for i, elt in df.iterrows():
+            if j==0:
+                j+=1
+                continue
+            if (df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=365)).iloc[0]:
+                if year_i == -1:
+                    year_i = 0
+                mean_y = np.mean(df.iloc[year_i:j-1]['Open'])
+                std_y = np.std(df.iloc[year_i:j-1]['Open'])
+                year_i += 1
+            df.iloc[j, df.columns.get_loc("std 365")] = std_y
+            df.iloc[j, df.columns.get_loc("mean 365")] = mean_y
+            if (df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=5)).iloc[0]:
+                if day_i == -1:
+                    day_i = 0
+                mean_d = np.mean(df.iloc[day_i:j-1]["Open"])
+                std_d = np.std(df.iloc[day_i:j-1]['Open'])
+                day_i += 1
+            df.iloc[j, df.columns.get_loc("mean 5")] = mean_d
+            df.iloc[j, df.columns.get_loc("std 5")] = std_d
+            j += 1
+        # Ajouter des indicateurs utiles pour notre modèle de ML
+        df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean()
+        df['Year'] = df['Date'].apply(lambda x: x.year)
+        df['5 Days High'] = df['High'].rolling(center=False, window=5).mean()
+        df['5 Days Low'] = df['Low'].rolling(center=False, window=5).mean()
+        df['5 Days Volume'] = df['Volume'].rolling(center=False, window=5).mean()
+        # Déplacer la colonne d'un jour
+        df['5 Days Open'] = df['5 Days Open'].shift(1)
+        df = df.dropna(axis=0)
+        print(df.tail())
+        return df
+    df= add_features(df)
+    test=df
+    test.iloc[-2:-1]['Close']
+    # Charger le modèle à partir du fichier pickle
+    with open('linear_regression_model.pkl', 'rb') as file:
+        lr = pickle.load(file)
+    a= lr.predict(df[df['Date'] == datetime(year=year,month=month,day=day)].drop(columns=["Open", 'High', 'Low', 'Volume', 'Close', 'Date']))[-1],float(df[df['Date'] == datetime(year=year,month=month,day=day)]['Close'][symbole])
+    return a
+# Créer l'interface Gradio
+with gr.Blocks() as demo:
+    gr.Markdown("# Linear Regression Model Results")
+    gr.Markdown("""This model was trained on S&P 500 stock price before 2013. The predictions below are taken betweek 2013 and 2015.
+    0.4% of average error was reached using LinearRegression.""")
+    with gr.Row():
+        with gr.Column():
+            error = gr.Textbox(label="Mean Absolute Error")
+            errorP = gr.Textbox(label="Mean Absolute Percentage Error")
+            table = gr.Dataframe(label="Predictions vs Actual Close Prices")
+    with gr.Row():
+        with gr.Column():
+            btn = gr.Button("Show Results")
+    gr.Markdown("## Dynamic prediction")
+    with gr.Row():
+        with gr.Column():
+            date_input = gr.DateTime(label="Select Date")
+            prediction = gr.Textbox(label="Prediction")
+        with gr.Column():
+            true_val = gr.Textbox("Real close price")
+    with gr.Row():
+        with gr.Column():
+            btn2 = gr.Button("Predict for your date")
+    btn.click(display_results, outputs=[error, errorP, table])
+    btn2.click(predire, inputs=date_input, outputs=[prediction, true_val])
+# Lancer l'interface Gradio
 demo.launch()