Spaces:
Sleeping
Sleeping
first commit
Browse files- datatset/sphist.csv +0 -0
- predict.py +94 -0
- requirements.txt +0 -0
datatset/sphist.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
predict.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from datetime import datetime, timedelta
|
4 |
+
from sklearn.linear_model import LinearRegression
|
5 |
+
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
|
6 |
+
import gradio as gr
|
7 |
+
|
8 |
+
# Charger et préparer les données
|
9 |
+
df = pd.read_csv("datatset/sphist.csv")
|
10 |
+
df['Date'] = pd.to_datetime(df["Date"])
|
11 |
+
df = df.sort_values(by='Date', ascending=True)
|
12 |
+
|
13 |
+
year_i = -1
|
14 |
+
day_i = -1
|
15 |
+
mean_d = np.nan
|
16 |
+
std_d = np.nan
|
17 |
+
std_d_v = np.nan
|
18 |
+
df['std 5'] = np.nan
|
19 |
+
df['mean 5'] = np.nan
|
20 |
+
mean_y = np.nan
|
21 |
+
std_y = np.nan
|
22 |
+
ratio = np.nan
|
23 |
+
df['mean 365'] = np.nan
|
24 |
+
df['std 365'] = np.nan
|
25 |
+
j = 0
|
26 |
+
for i, elt in df.iterrows():
|
27 |
+
if df.iloc[j]['Date'] - df.iloc[0]['Date'] > timedelta(days=365):
|
28 |
+
if year_i == -1:
|
29 |
+
year_i = 0
|
30 |
+
mean_y = np.mean(df.iloc[year_i:j]['Open'])
|
31 |
+
std_y = np.std(df.iloc[year_i:j]['Open'])
|
32 |
+
year_i += 1
|
33 |
+
df.iloc[j, df.columns.get_loc("std 365")] = std_y
|
34 |
+
df.iloc[j, df.columns.get_loc("mean 365")] = mean_y
|
35 |
+
if df.iloc[j]['Date'] - df.iloc[0]['Date'] > timedelta(days=5):
|
36 |
+
if day_i == -1:
|
37 |
+
day_i = 0
|
38 |
+
mean_d = np.mean(df.iloc[day_i:j]["Open"])
|
39 |
+
std_d = np.std(df.iloc[day_i:j]['Open'])
|
40 |
+
std_d_v = np.std(df.iloc[day_i:j]['Volume'])
|
41 |
+
day_i += 1
|
42 |
+
df.iloc[j, df.columns.get_loc("mean 5")] = mean_d
|
43 |
+
df.iloc[j, df.columns.get_loc("std 5")] = std_d
|
44 |
+
j += 1
|
45 |
+
|
46 |
+
# Ajouter des indicateurs utiles pour notre modèle de ML
|
47 |
+
df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean()
|
48 |
+
df['Year'] = df['Date'].apply(lambda x: x.year)
|
49 |
+
df['5 Days High'] = df['High'].rolling(center=False, window=5).mean()
|
50 |
+
df['5 Days Low'] = df['Low'].rolling(center=False, window=5).mean()
|
51 |
+
df['5 Days Volume'] = df['Volume'].rolling(center=False, window=5).mean()
|
52 |
+
|
53 |
+
# Déplacer la colonne d'un jour
|
54 |
+
df['5 Days Open'] = df['5 Days Open'].shift(1)
|
55 |
+
df = df.dropna(axis=0)
|
56 |
+
df = df.drop(df[df["Date"] < datetime(year=1951, month=1, day=3)].index, axis=0)
|
57 |
+
test = df[df['Date'] >= datetime(year=2013, month=1, day=1)]
|
58 |
+
train = df[df['Date'] < datetime(year=2013, month=1, day=1)]
|
59 |
+
|
60 |
+
# Entraîner le modèle
|
61 |
+
lr = LinearRegression().fit(train.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']), train["Close"])
|
62 |
+
pred = lr.predict(test.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']))
|
63 |
+
|
64 |
+
# Calculer les erreurs
|
65 |
+
err = mean_absolute_error(test["Close"], pred)
|
66 |
+
errP = mean_absolute_percentage_error(test["Close"], pred)
|
67 |
+
|
68 |
+
# Créer le DataFrame pour le tableau
|
69 |
+
result_df = pd.DataFrame({'Predictions': pred, 'Actual Close': test['Close']})
|
70 |
+
|
71 |
+
# Fonction pour afficher l'erreur et le tableau
|
72 |
+
def display_results():
|
73 |
+
return str(err), str(errP), result_df
|
74 |
+
|
75 |
+
# Créer l'interface Gradio
|
76 |
+
with gr.Blocks() as demo:
|
77 |
+
gr.Markdown("# Linear Regression Model Results")
|
78 |
+
gr.Markdown("""This model was trained on S&P 500 stock price before 2013. The predictions below are taken betweek 2013 and 2015.
|
79 |
+
|
80 |
+
0.4% of average error was reached using LinearRegression.""")
|
81 |
+
with gr.Row():
|
82 |
+
with gr.Column():
|
83 |
+
error = gr.Textbox(label="Mean Absolute Error")
|
84 |
+
errorP = gr.Textbox(label="Mean Absolute Percentage Error")
|
85 |
+
table = gr.Dataframe(label="Predictions vs Actual Close Prices")
|
86 |
+
|
87 |
+
with gr.Row():
|
88 |
+
with gr.Column():
|
89 |
+
btn = gr.Button("Show Results")
|
90 |
+
|
91 |
+
btn.click(display_results, outputs=[error, errorP, table])
|
92 |
+
|
93 |
+
# Lancer l'interface Gradio
|
94 |
+
demo.launch()
|
requirements.txt
ADDED
Binary file (3.09 kB). View file
|
|