Spaces:
Sleeping
Sleeping
James McCool
Refactor import statements across multiple files to replace 'fuzzywuzzy' with 'rapidfuzz' for improved performance and consistency in string matching functionality. Additionally, clean up unused imports in app.py and related global functions to enhance code clarity and maintainability.
d9db89f
import streamlit as st | |
import numpy as np | |
import pandas as pd | |
import time | |
from rapidfuzz import process | |
def find_name_mismatches(portfolio_df, projections_df): | |
""" | |
Find and handle name mismatches between portfolio and projections dataframes. | |
Returns the updated projections dataframe with matched names. | |
""" | |
# Get all player names from portfolio | |
portfolio_players = set() | |
for col in portfolio_df.columns: | |
if col not in ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Win%', 'Lineup Edge']: | |
portfolio_players.update(portfolio_df[col].unique()) | |
# Get all player names from projections | |
projection_players_list = projections_df['player_names'].tolist() | |
# Find players in portfolio that are missing from projections | |
players_missing_from_projections = [player for player in portfolio_players if player not in projection_players_list] | |
# Automatically handle 100% matches before starting interactive process | |
players_to_process = [] | |
for player in players_missing_from_projections: | |
if not isinstance(player, str): | |
st.warning(f"Skipping non-string value: {player}") | |
continue | |
closest_matches = process.extract(player, projection_players_list, limit=1) | |
if closest_matches[0][1] == 90: # If perfect match found | |
match_name = closest_matches[0][0] | |
projections_df.loc[projections_df['player_names'] == match_name, 'player_names'] = player | |
st.success(f"Automatically matched '{match_name}' with '{player}' (100% match)") | |
else: | |
players_to_process.append(player) | |
# Display results | |
if players_missing_from_projections: | |
st.warning("Players in portfolio but missing from projections") | |
# Display remaining players | |
if players_to_process: | |
st.info(f"Players to process ({len(players_to_process)}):\n" + | |
"\n".join(f"- {player}" for player in players_to_process)) | |
# Create a form for batch processing | |
with st.form("player_matching_form"): | |
# Create tabs for each player | |
tabs = st.tabs([f"Player {i+1}" for i in range(len(players_to_process))]) | |
# Dictionary to store selections | |
selections = {} | |
# Process each tab | |
for idx, (tab, player) in enumerate(zip(tabs, players_to_process)): | |
with tab: | |
st.write(f"**Missing Player {idx + 1} of {len(players_to_process)}:** {player}") | |
# Find the top 3 closest matches | |
closest_matches = process.extract(player, projection_players_list, limit=3) | |
# Create radio buttons for selection | |
options = [f"{match[0]} ({match[1]}%)" for match in closest_matches] | |
options.append("None of these") | |
selected_option = st.radio( | |
f"Select correct match for {player}:", | |
options, | |
key=f"radio_{player}" | |
) | |
selections[player] = selected_option | |
# Submit button for the entire form | |
submitted = st.form_submit_button("Submit All Changes") | |
if submitted: | |
# Process all selections | |
for player, selection in selections.items(): | |
if selection != "None of these": | |
selected_name = selection.split(" (")[0] | |
projections_df.loc[projections_df['player_names'] == selected_name, 'player_names'] = player | |
st.success(f"Replaced '{selected_name}' with '{player}'") | |
# Update session state | |
st.session_state['projections_df'] = projections_df | |
st.success("All player name changes have been applied!") | |
else: | |
st.success("All portfolio players found in projections!") | |
return projections_df |