Pierre918 commited on
Commit
868fa55
·
verified ·
1 Parent(s): d51c276

update add feature

Browse files
Files changed (1) hide show
  1. app.py +196 -93
app.py CHANGED
@@ -1,94 +1,197 @@
1
- import pandas as pd
2
- import numpy as np
3
- from datetime import datetime, timedelta
4
- from sklearn.linear_model import LinearRegression
5
- from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
6
- import gradio as gr
7
-
8
- # Charger et préparer les données
9
- df = pd.read_csv("datatset/sphist.csv")
10
- df['Date'] = pd.to_datetime(df["Date"])
11
- df = df.sort_values(by='Date', ascending=True)
12
-
13
- year_i = -1
14
- day_i = -1
15
- mean_d = np.nan
16
- std_d = np.nan
17
- std_d_v = np.nan
18
- df['std 5'] = np.nan
19
- df['mean 5'] = np.nan
20
- mean_y = np.nan
21
- std_y = np.nan
22
- ratio = np.nan
23
- df['mean 365'] = np.nan
24
- df['std 365'] = np.nan
25
- j = 0
26
- for i, elt in df.iterrows():
27
- if df.iloc[j]['Date'] - df.iloc[0]['Date'] > timedelta(days=365):
28
- if year_i == -1:
29
- year_i = 0
30
- mean_y = np.mean(df.iloc[year_i:j]['Open'])
31
- std_y = np.std(df.iloc[year_i:j]['Open'])
32
- year_i += 1
33
- df.iloc[j, df.columns.get_loc("std 365")] = std_y
34
- df.iloc[j, df.columns.get_loc("mean 365")] = mean_y
35
- if df.iloc[j]['Date'] - df.iloc[0]['Date'] > timedelta(days=5):
36
- if day_i == -1:
37
- day_i = 0
38
- mean_d = np.mean(df.iloc[day_i:j]["Open"])
39
- std_d = np.std(df.iloc[day_i:j]['Open'])
40
- std_d_v = np.std(df.iloc[day_i:j]['Volume'])
41
- day_i += 1
42
- df.iloc[j, df.columns.get_loc("mean 5")] = mean_d
43
- df.iloc[j, df.columns.get_loc("std 5")] = std_d
44
- j += 1
45
-
46
- # Ajouter des indicateurs utiles pour notre modèle de ML
47
- df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean()
48
- df['Year'] = df['Date'].apply(lambda x: x.year)
49
- df['5 Days High'] = df['High'].rolling(center=False, window=5).mean()
50
- df['5 Days Low'] = df['Low'].rolling(center=False, window=5).mean()
51
- df['5 Days Volume'] = df['Volume'].rolling(center=False, window=5).mean()
52
-
53
- # Déplacer la colonne d'un jour
54
- df['5 Days Open'] = df['5 Days Open'].shift(1)
55
- df = df.dropna(axis=0)
56
- df = df.drop(df[df["Date"] < datetime(year=1951, month=1, day=3)].index, axis=0)
57
- test = df[df['Date'] >= datetime(year=2013, month=1, day=1)]
58
- train = df[df['Date'] < datetime(year=2013, month=1, day=1)]
59
-
60
- # Entraîner le modèle
61
- lr = LinearRegression().fit(train.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']), train["Close"])
62
- pred = lr.predict(test.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']))
63
-
64
- # Calculer les erreurs
65
- err = mean_absolute_error(test["Close"], pred)
66
- errP = mean_absolute_percentage_error(test["Close"], pred)
67
-
68
- # Créer le DataFrame pour le tableau
69
- result_df = pd.DataFrame({'Predictions': pred, 'Actual Close': test['Close']})
70
-
71
- # Fonction pour afficher l'erreur et le tableau
72
- def display_results():
73
- return str(err), str(errP), result_df
74
-
75
- # Créer l'interface Gradio
76
- with gr.Blocks() as demo:
77
- gr.Markdown("# Linear Regression Model Results")
78
- gr.Markdown("""This model was trained on S&P 500 stock price before 2013. The predictions below are taken betweek 2013 and 2015.
79
-
80
- 0.4% of average error was reached using LinearRegression.""")
81
- with gr.Row():
82
- with gr.Column():
83
- error = gr.Textbox(label="Mean Absolute Error")
84
- errorP = gr.Textbox(label="Mean Absolute Percentage Error")
85
- table = gr.Dataframe(label="Predictions vs Actual Close Prices")
86
-
87
- with gr.Row():
88
- with gr.Column():
89
- btn = gr.Button("Show Results")
90
-
91
- btn.click(display_results, outputs=[error, errorP, table])
92
-
93
- # Lancer l'interface Gradio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  demo.launch()
 
1
+ import yfinance as yf
2
+ import pickle
3
+ import pandas as pd
4
+ import numpy as np
5
+ from datetime import datetime, timedelta
6
+ from sklearn.linear_model import LinearRegression
7
+ from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
8
+ import gradio as gr
9
+ import pickle
10
+ import warnings
11
+ warnings.filterwarnings("ignore", category=FutureWarning, module="numpy._core.fromnumeric")
12
+
13
+ # Charger et préparer les données
14
+ df = pd.read_csv("datatset/sphist.csv")
15
+ df['Date'] = pd.to_datetime(df["Date"])
16
+ df = df.sort_values(by='Date', ascending=True)
17
+
18
+ year_i = -1
19
+ day_i = -1
20
+ mean_d = np.nan
21
+ std_d = np.nan
22
+ std_d_v = np.nan
23
+ df['std 5'] = np.nan
24
+ df['mean 5'] = np.nan
25
+ mean_y = np.nan
26
+ std_y = np.nan
27
+ ratio = np.nan
28
+ df['mean 365'] = np.nan
29
+ df['std 365'] = np.nan
30
+ j = 0
31
+ for i, elt in df.iterrows():
32
+ if j==0:
33
+ j+=1
34
+ continue
35
+ if df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=365):
36
+ if year_i == -1:
37
+ year_i = 0
38
+ mean_y = np.mean(df.iloc[year_i:j-1]['Open'])
39
+ std_y = np.std(df.iloc[year_i:j-1]['Open'])
40
+ year_i += 1
41
+ df.iloc[j, df.columns.get_loc("std 365")] = std_y
42
+ df.iloc[j, df.columns.get_loc("mean 365")] = mean_y
43
+ if df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=5):
44
+ if day_i == -1:
45
+ day_i = 0
46
+ mean_d = np.mean(df.iloc[day_i:j-1]["Open"])
47
+ std_d = np.std(df.iloc[day_i:j-1]['Open'])
48
+ std_d_v = np.std(df.iloc[day_i:j-1]['Volume'])
49
+ day_i += 1
50
+ df.iloc[j, df.columns.get_loc("mean 5")] = mean_d
51
+ df.iloc[j, df.columns.get_loc("std 5")] = std_d
52
+ j += 1
53
+
54
+ # Ajouter des indicateurs utiles pour notre modèle de ML
55
+ df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean()
56
+ df['Year'] = df['Date'].apply(lambda x: x.year)
57
+ df['5 Days High'] = df['High'].rolling(center=False, window=5).mean()
58
+ df['5 Days Low'] = df['Low'].rolling(center=False, window=5).mean()
59
+ df['5 Days Volume'] = df['Volume'].rolling(center=False, window=5).mean()
60
+
61
+ # Déplacer la colonne d'un jour
62
+ df['5 Days Open'] = df['5 Days Open'].shift(1)
63
+ df = df.dropna(axis=0)
64
+ df = df.drop(df[df["Date"] < datetime(year=1951, month=1, day=3)].index, axis=0)
65
+ test = df[df['Date'] >= datetime(year=2013, month=1, day=1)]
66
+ train = df[df['Date'] < datetime(year=2013, month=1, day=1)]
67
+
68
+ # Entraîner le modèle
69
+ lr = LinearRegression().fit(train.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']), train["Close"])
70
+ pred = lr.predict(test.drop(columns=["Open", 'High', 'Low', 'Volume', 'Adj Close', 'Close', 'Date']))
71
+
72
+ with open('linear_regression_model.pkl', 'wb') as file:
73
+ pickle.dump(lr, file)
74
+ # Calculer les erreurs
75
+ err = mean_absolute_error(test["Close"], pred)
76
+ errP = mean_absolute_percentage_error(test["Close"], pred)
77
+
78
+ # Créer le DataFrame pour le tableau
79
+ result_df = pd.DataFrame({'Predictions': pred, 'Actual Close': test['Close']})
80
+
81
+ # Fonction pour afficher l'erreur et le tableau
82
+ def display_results():
83
+ return str(err), str(errP), result_df
84
+
85
+ # Définir le symbole du S&P 500
86
+ def predire(date):
87
+ # Définir le symbole du S&P 500
88
+ symbole = "^GSPC"
89
+
90
+ selected_date=datetime.fromtimestamp(date)
91
+ year, month, day = selected_date.year, selected_date.month, selected_date.day
92
+ print(year,month,day)
93
+ # Définir la période
94
+ def get_datas(year,month,day):
95
+ date_debut = datetime(year=year-2, month=month, day=day)
96
+ date_fin = datetime.now()
97
+
98
+ # Télécharger les données
99
+ data = yf.download(symbole, start=date_debut, end=date_fin)
100
+ return data
101
+ # Sélectionner les colonnes souhaitées
102
+ df = get_datas(year,month,day)[['Open', 'High', 'Low', 'Close', 'Volume']]
103
+ df['Date'] = df.index
104
+ # Afficher les premières lignes
105
+
106
+ def add_features(df):
107
+ year_i = -1
108
+ day_i = -1
109
+ mean_d = np.nan
110
+ std_d = np.nan
111
+ df['std 5'] = np.nan
112
+ df['mean 5'] = np.nan
113
+ mean_y = np.nan
114
+ std_y = np.nan
115
+ df['mean 365'] = np.nan
116
+ df['std 365'] = np.nan
117
+ j = 0
118
+ for i, elt in df.iterrows():
119
+ if j==0:
120
+ j+=1
121
+ continue
122
+ if (df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=365)).iloc[0]:
123
+ if year_i == -1:
124
+ year_i = 0
125
+ mean_y = np.mean(df.iloc[year_i:j-1]['Open'])
126
+ std_y = np.std(df.iloc[year_i:j-1]['Open'])
127
+ year_i += 1
128
+ df.iloc[j, df.columns.get_loc("std 365")] = std_y
129
+ df.iloc[j, df.columns.get_loc("mean 365")] = mean_y
130
+ if (df.iloc[j-1]['Date'] - df.iloc[0]['Date'] > timedelta(days=5)).iloc[0]:
131
+ if day_i == -1:
132
+ day_i = 0
133
+ mean_d = np.mean(df.iloc[day_i:j-1]["Open"])
134
+ std_d = np.std(df.iloc[day_i:j-1]['Open'])
135
+ day_i += 1
136
+ df.iloc[j, df.columns.get_loc("mean 5")] = mean_d
137
+ df.iloc[j, df.columns.get_loc("std 5")] = std_d
138
+ j += 1
139
+ # Ajouter des indicateurs utiles pour notre modèle de ML
140
+ df['5 Days Open'] = df['Open'].rolling(center=False, window=5).mean()
141
+ df['Year'] = df['Date'].apply(lambda x: x.year)
142
+ df['5 Days High'] = df['High'].rolling(center=False, window=5).mean()
143
+ df['5 Days Low'] = df['Low'].rolling(center=False, window=5).mean()
144
+ df['5 Days Volume'] = df['Volume'].rolling(center=False, window=5).mean()
145
+
146
+ # Déplacer la colonne d'un jour
147
+ df['5 Days Open'] = df['5 Days Open'].shift(1)
148
+
149
+ df = df.dropna(axis=0)
150
+ print(df.tail())
151
+ return df
152
+
153
+ df= add_features(df)
154
+ test=df
155
+ test.iloc[-2:-1]['Close']
156
+ # Charger le modèle à partir du fichier pickle
157
+ with open('linear_regression_model.pkl', 'rb') as file:
158
+ lr = pickle.load(file)
159
+
160
+
161
+ a= lr.predict(df[df['Date'] == datetime(year=year,month=month,day=day)].drop(columns=["Open", 'High', 'Low', 'Volume', 'Close', 'Date']))[-1],float(df[df['Date'] == datetime(year=year,month=month,day=day)]['Close'][symbole])
162
+
163
+ return a
164
+
165
+
166
+
167
+
168
+ # Créer l'interface Gradio
169
+ with gr.Blocks() as demo:
170
+ gr.Markdown("# Linear Regression Model Results")
171
+ gr.Markdown("""This model was trained on S&P 500 stock price before 2013. The predictions below are taken betweek 2013 and 2015.
172
+
173
+ 0.4% of average error was reached using LinearRegression.""")
174
+ with gr.Row():
175
+ with gr.Column():
176
+ error = gr.Textbox(label="Mean Absolute Error")
177
+ errorP = gr.Textbox(label="Mean Absolute Percentage Error")
178
+ table = gr.Dataframe(label="Predictions vs Actual Close Prices")
179
+
180
+ with gr.Row():
181
+ with gr.Column():
182
+ btn = gr.Button("Show Results")
183
+ gr.Markdown("## Dynamic prediction")
184
+ with gr.Row():
185
+ with gr.Column():
186
+ date_input = gr.DateTime(label="Select Date")
187
+ prediction = gr.Textbox(label="Prediction")
188
+ with gr.Column():
189
+ true_val = gr.Textbox("Real close price")
190
+ with gr.Row():
191
+ with gr.Column():
192
+ btn2 = gr.Button("Predict for your date")
193
+
194
+ btn.click(display_results, outputs=[error, errorP, table])
195
+ btn2.click(predire, inputs=date_input, outputs=[prediction, true_val])
196
+ # Lancer l'interface Gradio
197
  demo.launch()