James McCool
commited on
Commit
·
680a8ac
1
Parent(s):
db65076
Refactor name matching analysis in app.py to streamline the process of matching portfolio and projection names. This update enhances the accuracy of name matching, improves session state management, and ensures proper handling of unmatched names during user interactions.
Browse files
app.py
CHANGED
@@ -179,117 +179,116 @@ with tab1:
|
|
179 |
|
180 |
projections = projections.apply(lambda x: x.replace(player_wrong_names_mlb, player_right_names_mlb))
|
181 |
st.dataframe(projections.head(10))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
if match:
|
233 |
-
projections_match_dict[projections_name] = match[0]
|
234 |
-
if match[1] < 100:
|
235 |
-
st.write(f"{projections_name} matched from projections to site csv {match[0]} with a score of {match[1]}%")
|
236 |
-
else:
|
237 |
-
projections_match_dict[projections_name] = projections_name
|
238 |
-
unmatched_proj_names.append(projections_name)
|
239 |
-
|
240 |
-
# Update projections with matched names
|
241 |
-
projections['player_names'] = projections['player_names'].map(lambda x: projections_match_dict.get(x, x))
|
242 |
-
st.session_state['projections_df'] = projections
|
243 |
-
|
244 |
-
projections_names = st.session_state['projections_df']['player_names'].tolist()
|
245 |
-
portfolio_names = get_portfolio_names(st.session_state['portfolio'])
|
246 |
-
|
247 |
-
# Create match dictionary for portfolio names to projection names
|
248 |
-
projections_match_dict = {}
|
249 |
-
unmatched_proj_names = []
|
250 |
-
for projections_name in projection_names:
|
251 |
-
match = process.extractOne(
|
252 |
-
projections_name,
|
253 |
-
portfolio_names,
|
254 |
-
score_cutoff=87
|
255 |
-
)
|
256 |
-
if match:
|
257 |
-
projections_match_dict[projections_name] = match[0]
|
258 |
-
if match[1] < 100:
|
259 |
-
st.write(f"{projections_name} matched from portfolio to projections {match[0]} with a score of {match[1]}%")
|
260 |
-
else:
|
261 |
-
projections_match_dict[projections_name] = projections_name
|
262 |
-
unmatched_proj_names.append(projections_name)
|
263 |
-
|
264 |
-
# Update projections with matched names
|
265 |
-
projections['player_names'] = projections['player_names'].map(lambda x: projections_match_dict.get(x, x))
|
266 |
-
st.session_state['projections_df'] = projections
|
267 |
-
|
268 |
-
if sport_var in stacking_sports:
|
269 |
-
team_dict = dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team']))
|
270 |
-
st.session_state['portfolio']['Stack'] = st.session_state['portfolio'].apply(
|
271 |
-
lambda row: Counter(
|
272 |
-
team_dict.get(player, '') for player in row[2:]
|
273 |
-
if team_dict.get(player, '') != ''
|
274 |
-
).most_common(1)[0][0] if any(team_dict.get(player, '') for player in row[2:]) else '',
|
275 |
-
axis=1
|
276 |
-
)
|
277 |
-
st.session_state['portfolio']['Size'] = st.session_state['portfolio'].apply(
|
278 |
-
lambda row: Counter(
|
279 |
-
team_dict.get(player, '') for player in row[2:]
|
280 |
-
if team_dict.get(player, '') != ''
|
281 |
-
).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row[2:]) else 0,
|
282 |
-
axis=1
|
283 |
-
)
|
284 |
-
stack_dict = dict(zip(st.session_state['portfolio'].index, st.session_state['portfolio']['Stack']))
|
285 |
-
size_dict = dict(zip(st.session_state['portfolio'].index, st.session_state['portfolio']['Size']))
|
286 |
-
|
287 |
-
working_frame = st.session_state['portfolio'].copy()
|
288 |
-
try:
|
289 |
-
st.session_state['export_dict'] = dict(zip(st.session_state['csv_file']['Name'], st.session_state['csv_file']['Name + ID']))
|
290 |
-
except:
|
291 |
-
st.session_state['export_dict'] = dict(zip(st.session_state['csv_file']['Nickname'], st.session_state['csv_file']['Id']))
|
292 |
-
st.session_state['origin_portfolio'] = st.session_state['portfolio'].copy()
|
293 |
|
294 |
# with tab2:
|
295 |
# if st.button('Clear data', key='reset2'):
|
|
|
179 |
|
180 |
projections = projections.apply(lambda x: x.replace(player_wrong_names_mlb, player_right_names_mlb))
|
181 |
st.dataframe(projections.head(10))
|
182 |
+
|
183 |
+
if portfolio_file and projections_file:
|
184 |
+
if st.session_state['portfolio'] is not None and projections is not None:
|
185 |
+
st.subheader("Name Matching Analysis")
|
186 |
+
# Initialize projections_df in session state if it doesn't exist
|
187 |
+
# Get unique names from portfolio
|
188 |
+
portfolio_names = get_portfolio_names(st.session_state['portfolio'])
|
189 |
+
try:
|
190 |
+
csv_names = st.session_state['csv_file']['Name'].tolist()
|
191 |
+
except:
|
192 |
+
csv_names = st.session_state['csv_file']['Nickname'].tolist()
|
193 |
+
projection_names = projections['player_names'].tolist()
|
194 |
+
|
195 |
+
# Create match dictionary for portfolio names to projection names
|
196 |
+
portfolio_match_dict = {}
|
197 |
+
unmatched_names = []
|
198 |
+
for portfolio_name in portfolio_names:
|
199 |
+
match = process.extractOne(
|
200 |
+
portfolio_name,
|
201 |
+
csv_names,
|
202 |
+
score_cutoff=87
|
203 |
+
)
|
204 |
+
if match:
|
205 |
+
portfolio_match_dict[portfolio_name] = match[0]
|
206 |
+
if match[1] < 100:
|
207 |
+
st.write(f"{portfolio_name} matched from portfolio to site csv {match[0]} with a score of {match[1]}%")
|
208 |
+
else:
|
209 |
+
portfolio_match_dict[portfolio_name] = portfolio_name
|
210 |
+
unmatched_names.append(portfolio_name)
|
211 |
+
|
212 |
+
# Update portfolio with matched names
|
213 |
+
portfolio = st.session_state['portfolio'].copy()
|
214 |
+
player_columns = [col for col in portfolio.columns
|
215 |
+
if col not in ['salary', 'median', 'Own']]
|
216 |
+
|
217 |
+
# For each player column, update names using the match dictionary
|
218 |
+
for col in player_columns:
|
219 |
+
portfolio[col] = portfolio[col].map(lambda x: portfolio_match_dict.get(x, x))
|
220 |
+
st.session_state['portfolio'] = portfolio
|
221 |
+
|
222 |
+
# Create match dictionary for portfolio names to projection names
|
223 |
+
projections_match_dict = {}
|
224 |
+
unmatched_proj_names = []
|
225 |
+
for projections_name in projection_names:
|
226 |
+
match = process.extractOne(
|
227 |
+
projections_name,
|
228 |
+
csv_names,
|
229 |
+
score_cutoff=87
|
230 |
+
)
|
231 |
+
if match:
|
232 |
+
projections_match_dict[projections_name] = match[0]
|
233 |
+
if match[1] < 100:
|
234 |
+
st.write(f"{projections_name} matched from projections to site csv {match[0]} with a score of {match[1]}%")
|
235 |
+
else:
|
236 |
+
projections_match_dict[projections_name] = projections_name
|
237 |
+
unmatched_proj_names.append(projections_name)
|
238 |
+
|
239 |
+
# Update projections with matched names
|
240 |
+
projections['player_names'] = projections['player_names'].map(lambda x: projections_match_dict.get(x, x))
|
241 |
+
st.session_state['projections_df'] = projections
|
242 |
|
243 |
+
projections_names = st.session_state['projections_df']['player_names'].tolist()
|
244 |
+
portfolio_names = get_portfolio_names(st.session_state['portfolio'])
|
245 |
+
|
246 |
+
# Create match dictionary for portfolio names to projection names
|
247 |
+
projections_match_dict = {}
|
248 |
+
unmatched_proj_names = []
|
249 |
+
for projections_name in projection_names:
|
250 |
+
match = process.extractOne(
|
251 |
+
projections_name,
|
252 |
+
portfolio_names,
|
253 |
+
score_cutoff=87
|
254 |
+
)
|
255 |
+
if match:
|
256 |
+
projections_match_dict[projections_name] = match[0]
|
257 |
+
if match[1] < 100:
|
258 |
+
st.write(f"{projections_name} matched from portfolio to projections {match[0]} with a score of {match[1]}%")
|
259 |
+
else:
|
260 |
+
projections_match_dict[projections_name] = projections_name
|
261 |
+
unmatched_proj_names.append(projections_name)
|
262 |
+
|
263 |
+
# Update projections with matched names
|
264 |
+
projections['player_names'] = projections['player_names'].map(lambda x: projections_match_dict.get(x, x))
|
265 |
+
st.session_state['projections_df'] = projections
|
266 |
+
|
267 |
+
if sport_var in stacking_sports:
|
268 |
+
team_dict = dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team']))
|
269 |
+
st.session_state['portfolio']['Stack'] = st.session_state['portfolio'].apply(
|
270 |
+
lambda row: Counter(
|
271 |
+
team_dict.get(player, '') for player in row[2:]
|
272 |
+
if team_dict.get(player, '') != ''
|
273 |
+
).most_common(1)[0][0] if any(team_dict.get(player, '') for player in row[2:]) else '',
|
274 |
+
axis=1
|
275 |
+
)
|
276 |
+
st.session_state['portfolio']['Size'] = st.session_state['portfolio'].apply(
|
277 |
+
lambda row: Counter(
|
278 |
+
team_dict.get(player, '') for player in row[2:]
|
279 |
+
if team_dict.get(player, '') != ''
|
280 |
+
).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row[2:]) else 0,
|
281 |
+
axis=1
|
282 |
+
)
|
283 |
+
stack_dict = dict(zip(st.session_state['portfolio'].index, st.session_state['portfolio']['Stack']))
|
284 |
+
size_dict = dict(zip(st.session_state['portfolio'].index, st.session_state['portfolio']['Size']))
|
285 |
+
|
286 |
+
working_frame = st.session_state['portfolio'].copy()
|
287 |
+
try:
|
288 |
+
st.session_state['export_dict'] = dict(zip(st.session_state['csv_file']['Name'], st.session_state['csv_file']['Name + ID']))
|
289 |
+
except:
|
290 |
+
st.session_state['export_dict'] = dict(zip(st.session_state['csv_file']['Nickname'], st.session_state['csv_file']['Id']))
|
291 |
+
st.session_state['origin_portfolio'] = st.session_state['portfolio'].copy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
|
293 |
# with tab2:
|
294 |
# if st.button('Clear data', key='reset2'):
|