Spaces:
Sleeping
Sleeping
update add feature
Browse files
app.py
CHANGED
@@ -1,94 +1,197 @@
|
|
1 |
-
import
|
2 |
-
import
|
3 |
-
|
4 |
-
|
5 |
-
from
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
df['
|
24 |
-
df['
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
if df.iloc[j]['Date'] - df.iloc[0]['Date'] > timedelta(days=
|
36 |
-
if
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
df.iloc[j, df.columns.get_loc("mean
|
43 |
-
df.iloc[j
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
df[
|
51 |
-
df[
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
df = df.
|
56 |
-
df = df
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
#
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
demo.launch()
|
|
|
1 |
+
import yfinance as yf
|
2 |
+
import pickle
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from datetime import datetime, timedelta
|
6 |
+
from sklearn.linear_model import LinearRegression
|
7 |
+
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
|
8 |
+
import gradio as gr
|
9 |
+
import pickle
|
10 |
+
import warnings
|
11 |
+
warnings.filterwarnings("ignore", category=FutureWarning, module="numpy._core.fromnumeric")
|
12 |
+
|
13 |
+
# Charger et préparer les données
|
14 |
+
df = pd.read_csv("datatset/sphist.csv")
|
15 |
+
df['Date'] = pd.to_datetime(df["Date"])
|
16 |
+
df = df.sort_values(by='Date', ascending=True)
|
17 |
+
|
18 |
+
year_i = -1
|
19 |
+
day_i = -1
|
20 |
+
mean_d = np.nan
|
21 |
+
std_d = np.nan
|
22 |
+
std_d_v = np.nan
|
23 |
+
df['std 5'] = np.nan
|
24 |
+
df['mean 5'] = np.nan
|
25 |
+
mean_y = np.nan
|
26 |
+
std_y = np.nan
|
27 |
+
ratio = np.nan
|
28 |
+
df['mean 365'] = np.nan
|
29 |
+
df['std 365'] = np.nan
|
30 |
+
j = 0
|
31 |
+
for i, elt in df.iterrows():
|
32 |
+
if j==0:
|
33 |
+
j+=1
|
34 |
+
continue
|
35 |
+
if df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=365):
|
36 |
+
if year_i == -1:
|
37 |
+
year_i = 0
|
38 |
+
mean_y = np.mean(df.iloc[year_i:j-1]['Open'])
|
39 |
+
std_y = np.std(df.iloc[year_i:j-1]['Open'])
|
40 |
+
year_i += 1
|
41 |
+
df.iloc[j, df.columns.get_loc("std 365")] = std_y
|
42 |
+
df.iloc[j, df.columns.get_loc("mean 365")] = mean_y
|
43 |
+
if df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=5):
|
44 |
+
if day_i == -1:
|
45 |
+
day_i = 0
|
46 |
+
mean_d = np.mean(df.iloc[day_i:j-1]["Open"])
|
47 |
+
std_d = np.std(df.iloc[day_i:j-1]['Open'])
|
48 |
+
std_d_v = np.std(df.iloc[day_i:j-1]['Volume'])
|
49 |
+
day_i += 1
|
50 |
+
df.iloc[j, df.columns.get_loc("mean 5")] = mean_d
|
51 |
+
df.iloc[j, df.columns.get_loc("std 5")] = std_d
|
52 |
+
j += 1
|
53 |
+
|
54 |
+
# Ajouter des indicateurs utiles pour notre modèle de ML
|
55 |
+
df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean()
|
56 |
+
df['Year'] = df['Date'].apply(lambda x: x.year)
|
57 |
+
df['5 Days High'] = df['High'].rolling(center=False, window=5).mean()
|
58 |
+
df['5 Days Low'] = df['Low'].rolling(center=False, window=5).mean()
|
59 |
+
df['5 Days Volume'] = df['Volume'].rolling(center=False, window=5).mean()
|
60 |
+
|
61 |
+
# Déplacer la colonne d'un jour
|
62 |
+
df['5 Days Open'] = df['5 Days Open'].shift(1)
|
63 |
+
df = df.dropna(axis=0)
|
64 |
+
df = df.drop(df[df["Date"] < datetime(year=1951, month=1, day=3)].index, axis=0)
|
65 |
+
test = df[df['Date'] >= datetime(year=2013, month=1, day=1)]
|
66 |
+
train = df[df['Date'] < datetime(year=2013, month=1, day=1)]
|
67 |
+
|
68 |
+
# Entraîner le modèle
|
69 |
+
lr = LinearRegression().fit(train.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']), train["Close"])
|
70 |
+
pred = lr.predict(test.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']))
|
71 |
+
|
72 |
+
with open('linear_regression_model.pkl', 'wb') as file:
|
73 |
+
pickle.dump(lr, file)
|
74 |
+
# Calculer les erreurs
|
75 |
+
err = mean_absolute_error(test["Close"], pred)
|
76 |
+
errP = mean_absolute_percentage_error(test["Close"], pred)
|
77 |
+
|
78 |
+
# Créer le DataFrame pour le tableau
|
79 |
+
result_df = pd.DataFrame({'Predictions': pred, 'Actual Close': test['Close']})
|
80 |
+
|
81 |
+
# Fonction pour afficher l'erreur et le tableau
|
82 |
+
def display_results():
|
83 |
+
return str(err), str(errP), result_df
|
84 |
+
|
85 |
+
# Définir le symbole du S&P 500
|
86 |
+
def predire(date):
|
87 |
+
# Définir le symbole du S&P 500
|
88 |
+
symbole = "^GSPC"
|
89 |
+
|
90 |
+
selected_date=datetime.fromtimestamp(date)
|
91 |
+
year, month, day = selected_date.year, selected_date.month, selected_date.day
|
92 |
+
print(year,month,day)
|
93 |
+
# Définir la période
|
94 |
+
def get_datas(year,month,day):
|
95 |
+
date_debut = datetime(year=year-2, month=month, day=day)
|
96 |
+
date_fin = datetime.now()
|
97 |
+
|
98 |
+
# Télécharger les données
|
99 |
+
data = yf.download(symbole, start=date_debut, end=date_fin)
|
100 |
+
return data
|
101 |
+
# Sélectionner les colonnes souhaitées
|
102 |
+
df = get_datas(year,month,day)[['Open', 'High', 'Low', 'Close', 'Volume']]
|
103 |
+
df['Date'] = df.index
|
104 |
+
# Afficher les premières lignes
|
105 |
+
|
106 |
+
def add_features(df):
|
107 |
+
year_i = -1
|
108 |
+
day_i = -1
|
109 |
+
mean_d = np.nan
|
110 |
+
std_d = np.nan
|
111 |
+
df['std 5'] = np.nan
|
112 |
+
df['mean 5'] = np.nan
|
113 |
+
mean_y = np.nan
|
114 |
+
std_y = np.nan
|
115 |
+
df['mean 365'] = np.nan
|
116 |
+
df['std 365'] = np.nan
|
117 |
+
j = 0
|
118 |
+
for i, elt in df.iterrows():
|
119 |
+
if j==0:
|
120 |
+
j+=1
|
121 |
+
continue
|
122 |
+
if (df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=365)).iloc[0]:
|
123 |
+
if year_i == -1:
|
124 |
+
year_i = 0
|
125 |
+
mean_y = np.mean(df.iloc[year_i:j-1]['Open'])
|
126 |
+
std_y = np.std(df.iloc[year_i:j-1]['Open'])
|
127 |
+
year_i += 1
|
128 |
+
df.iloc[j, df.columns.get_loc("std 365")] = std_y
|
129 |
+
df.iloc[j, df.columns.get_loc("mean 365")] = mean_y
|
130 |
+
if (df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=5)).iloc[0]:
|
131 |
+
if day_i == -1:
|
132 |
+
day_i = 0
|
133 |
+
mean_d = np.mean(df.iloc[day_i:j-1]["Open"])
|
134 |
+
std_d = np.std(df.iloc[day_i:j-1]['Open'])
|
135 |
+
day_i += 1
|
136 |
+
df.iloc[j, df.columns.get_loc("mean 5")] = mean_d
|
137 |
+
df.iloc[j, df.columns.get_loc("std 5")] = std_d
|
138 |
+
j += 1
|
139 |
+
# Ajouter des indicateurs utiles pour notre modèle de ML
|
140 |
+
df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean()
|
141 |
+
df['Year'] = df['Date'].apply(lambda x: x.year)
|
142 |
+
df['5 Days High'] = df['High'].rolling(center=False, window=5).mean()
|
143 |
+
df['5 Days Low'] = df['Low'].rolling(center=False, window=5).mean()
|
144 |
+
df['5 Days Volume'] = df['Volume'].rolling(center=False, window=5).mean()
|
145 |
+
|
146 |
+
# Déplacer la colonne d'un jour
|
147 |
+
df['5 Days Open'] = df['5 Days Open'].shift(1)
|
148 |
+
|
149 |
+
df = df.dropna(axis=0)
|
150 |
+
print(df.tail())
|
151 |
+
return df
|
152 |
+
|
153 |
+
df= add_features(df)
|
154 |
+
test=df
|
155 |
+
test.iloc[-2:-1]['Close']
|
156 |
+
# Charger le modèle à partir du fichier pickle
|
157 |
+
with open('linear_regression_model.pkl', 'rb') as file:
|
158 |
+
lr = pickle.load(file)
|
159 |
+
|
160 |
+
|
161 |
+
a= lr.predict(df[df['Date'] == datetime(year=year,month=month,day=day)].drop(columns=["Open", 'High', 'Low', 'Volume', 'Close', 'Date']))[-1],float(df[df['Date'] == datetime(year=year,month=month,day=day)]['Close'][symbole])
|
162 |
+
|
163 |
+
return a
|
164 |
+
|
165 |
+
|
166 |
+
|
167 |
+
|
168 |
+
# Créer l'interface Gradio
|
169 |
+
with gr.Blocks() as demo:
|
170 |
+
gr.Markdown("# Linear Regression Model Results")
|
171 |
+
gr.Markdown("""This model was trained on S&P 500 stock price before 2013. The predictions below are taken betweek 2013 and 2015.
|
172 |
+
|
173 |
+
0.4% of average error was reached using LinearRegression.""")
|
174 |
+
with gr.Row():
|
175 |
+
with gr.Column():
|
176 |
+
error = gr.Textbox(label="Mean Absolute Error")
|
177 |
+
errorP = gr.Textbox(label="Mean Absolute Percentage Error")
|
178 |
+
table = gr.Dataframe(label="Predictions vs Actual Close Prices")
|
179 |
+
|
180 |
+
with gr.Row():
|
181 |
+
with gr.Column():
|
182 |
+
btn = gr.Button("Show Results")
|
183 |
+
gr.Markdown("## Dynamic prediction")
|
184 |
+
with gr.Row():
|
185 |
+
with gr.Column():
|
186 |
+
date_input = gr.DateTime(label="Select Date")
|
187 |
+
prediction = gr.Textbox(label="Prediction")
|
188 |
+
with gr.Column():
|
189 |
+
true_val = gr.Textbox("Real close price")
|
190 |
+
with gr.Row():
|
191 |
+
with gr.Column():
|
192 |
+
btn2 = gr.Button("Predict for your date")
|
193 |
+
|
194 |
+
btn.click(display_results, outputs=[error, errorP, table])
|
195 |
+
btn2.click(predire, inputs=date_input, outputs=[prediction, true_val])
|
196 |
+
# Lancer l'interface Gradio
|
197 |
demo.launch()
|