James McCool
commited on
Commit
·
dc1c8da
1
Parent(s):
1604a8f
Enhance player matching interface in `find_name_mismatches.py` and update `app.py` for improved data display
Browse files- Updated the matching logic to allow for automatic matches with a confidence level of 90% or higher, improving accuracy in identifying players.
- Introduced a form for batch processing of player matches, enhancing user experience during manual selections.
- Added dataframes in `app.py` to display the top 100 entries of `contest_df` and `projections_df`, improving visibility of the data being processed.
- app.py +2 -0
- global_func/find_name_mismatches.py +53 -52
app.py
CHANGED
|
@@ -78,6 +78,8 @@ with tab1:
|
|
| 78 |
|
| 79 |
# Update projections_df with any new matches
|
| 80 |
st.session_state['contest_df'], st.session_state['projections_df'] = find_name_mismatches(st.session_state['Contest'], st.session_state['projections_df'])
|
|
|
|
|
|
|
| 81 |
|
| 82 |
with tab2:
|
| 83 |
if st.button('Clear data', key='reset3'):
|
|
|
|
| 78 |
|
| 79 |
# Update projections_df with any new matches
|
| 80 |
st.session_state['contest_df'], st.session_state['projections_df'] = find_name_mismatches(st.session_state['Contest'], st.session_state['projections_df'])
|
| 81 |
+
st.dataframe(st.session_state['contest_df'].head(100))
|
| 82 |
+
st.dataframe(st.session_state['projections_df'].head(100))
|
| 83 |
|
| 84 |
with tab2:
|
| 85 |
if st.button('Clear data', key='reset3'):
|
global_func/find_name_mismatches.py
CHANGED
|
@@ -24,74 +24,75 @@ def find_name_mismatches(contest_df, projections_df):
|
|
| 24 |
projection_players_list = list(projection_players)
|
| 25 |
|
| 26 |
# Find players in portfolio that are missing from projections
|
| 27 |
-
players_missing_from_projections = list(projection_players -portfolio_players)
|
| 28 |
|
| 29 |
-
# Automatically handle
|
|
|
|
| 30 |
players_to_process = []
|
| 31 |
for player in players_missing_from_projections:
|
| 32 |
if not isinstance(player, str):
|
| 33 |
st.warning(f"Skipping non-string value: {player}")
|
| 34 |
continue
|
| 35 |
closest_matches = process.extract(player, portfolio_players_list, limit=1)
|
| 36 |
-
if closest_matches[0][1]
|
| 37 |
match_name = closest_matches[0][0]
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
contest_df[col] = contest_df[col].replace(player, match_name)
|
| 41 |
-
st.success(f"Automatically matched '{player}' with '{match_name}' (100% match)")
|
| 42 |
else:
|
| 43 |
players_to_process.append(player)
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
st.session_state.current_player_index = 0
|
| 48 |
-
st.session_state.players_to_process = players_to_process
|
| 49 |
-
|
| 50 |
-
# Display results
|
| 51 |
-
if players_missing_from_projections:
|
| 52 |
-
st.warning("Players in portfolio but missing from projections")
|
| 53 |
|
| 54 |
-
#
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
if st.session_state.current_player_index < len(st.session_state.players_to_process):
|
| 60 |
-
current_player = st.session_state.players_to_process[st.session_state.current_player_index]
|
| 61 |
-
|
| 62 |
-
# Find the top 3 closest matches
|
| 63 |
-
closest_matches = process.extract(current_player, portfolio_players_list, limit=3)
|
| 64 |
|
| 65 |
-
|
|
|
|
| 66 |
|
| 67 |
-
#
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
options,
|
| 74 |
-
key=f"radio_{current_player}"
|
| 75 |
-
)
|
| 76 |
|
| 77 |
-
if
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
# Update all occurrences in contest_df
|
| 81 |
for col in name_columns:
|
| 82 |
-
contest_df[col] = contest_df[col].replace(
|
| 83 |
-
st.success(f"Replaced '{current_player}' with '{selected_name}'")
|
| 84 |
-
st.session_state['contest_df'] = contest_df
|
| 85 |
|
| 86 |
-
#
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
| 94 |
else:
|
| 95 |
-
st.success("All
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
projection_players_list = list(projection_players)
|
| 25 |
|
| 26 |
# Find players in portfolio that are missing from projections
|
| 27 |
+
players_missing_from_projections = list(projection_players - portfolio_players)
|
| 28 |
|
| 29 |
+
# Automatically handle 90%+ matches before starting interactive process
|
| 30 |
+
auto_matches = {}
|
| 31 |
players_to_process = []
|
| 32 |
for player in players_missing_from_projections:
|
| 33 |
if not isinstance(player, str):
|
| 34 |
st.warning(f"Skipping non-string value: {player}")
|
| 35 |
continue
|
| 36 |
closest_matches = process.extract(player, portfolio_players_list, limit=1)
|
| 37 |
+
if closest_matches[0][1] >= 90: # If high confidence match found
|
| 38 |
match_name = closest_matches[0][0]
|
| 39 |
+
auto_matches[player] = match_name
|
| 40 |
+
st.success(f"Automatically matched '{player}' with '{match_name}' ({closest_matches[0][1]}% match)")
|
|
|
|
|
|
|
| 41 |
else:
|
| 42 |
players_to_process.append(player)
|
| 43 |
|
| 44 |
+
if players_to_process:
|
| 45 |
+
st.warning(f"Found {len(players_to_process)} players that need manual matching")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
+
# Create a form for batch processing
|
| 48 |
+
with st.form("name_matching_form"):
|
| 49 |
+
# Create tabs for each player
|
| 50 |
+
tabs = st.tabs([f"Player {i+1}" for i in range(len(players_to_process))])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
+
# Dictionary to store selections
|
| 53 |
+
selections = {}
|
| 54 |
|
| 55 |
+
# Populate each tab
|
| 56 |
+
for i, player in enumerate(players_to_process):
|
| 57 |
+
with tabs[i]:
|
| 58 |
+
st.write(f"**Projection Name:** {player}")
|
| 59 |
+
|
| 60 |
+
# Find the top 3 closest matches
|
| 61 |
+
closest_matches = process.extract(player, portfolio_players_list, limit=3)
|
| 62 |
+
|
| 63 |
+
# Create radio buttons for selection
|
| 64 |
+
options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
|
| 65 |
+
options.append("None of these")
|
| 66 |
+
|
| 67 |
+
selections[player] = st.radio(
|
| 68 |
+
f"Select correct match:",
|
| 69 |
+
options,
|
| 70 |
+
key=f"radio_{player}"
|
| 71 |
+
)
|
| 72 |
|
| 73 |
+
# Submit button for the entire form
|
| 74 |
+
submitted = st.form_submit_button("Apply All Changes")
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
+
if submitted:
|
| 77 |
+
# Process automatic matches
|
| 78 |
+
for projection_name, contest_name in auto_matches.items():
|
|
|
|
| 79 |
for col in name_columns:
|
| 80 |
+
contest_df[col] = contest_df[col].replace(contest_name, projection_name)
|
|
|
|
|
|
|
| 81 |
|
| 82 |
+
# Process manual selections
|
| 83 |
+
for projection_name, selection in selections.items():
|
| 84 |
+
if selection != "None of these":
|
| 85 |
+
selected_name = selection.split(" (")[0]
|
| 86 |
+
for col in name_columns:
|
| 87 |
+
contest_df[col] = contest_df[col].replace(selected_name, projection_name)
|
| 88 |
+
st.success(f"Replaced '{selected_name}' with '{projection_name}'")
|
| 89 |
+
|
| 90 |
+
st.success("All changes applied successfully!")
|
| 91 |
+
return contest_df, projections_df
|
| 92 |
else:
|
| 93 |
+
st.success("All players have been automatically matched!")
|
| 94 |
+
# Apply automatic matches
|
| 95 |
+
for projection_name, contest_name in auto_matches.items():
|
| 96 |
+
for col in name_columns:
|
| 97 |
+
contest_df[col] = contest_df[col].replace(contest_name, projection_name)
|
| 98 |
+
return contest_df, projections_df
|