DFS_Portfolio_Manager / global_func /find_name_mismatches.py
James McCool
Refactor import statements across multiple files to replace 'fuzzywuzzy' with 'rapidfuzz' for improved performance and consistency in string matching functionality. Additionally, clean up unused imports in app.py and related global functions to enhance code clarity and maintainability.
d9db89f
import streamlit as st
import numpy as np
import pandas as pd
import time
from rapidfuzz import process
def find_name_mismatches(portfolio_df, projections_df):
"""
Find and handle name mismatches between portfolio and projections dataframes.
Returns the updated projections dataframe with matched names.
"""
# Get all player names from portfolio
portfolio_players = set()
for col in portfolio_df.columns:
if col not in ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Win%', 'Lineup Edge']:
portfolio_players.update(portfolio_df[col].unique())
# Get all player names from projections
projection_players_list = projections_df['player_names'].tolist()
# Find players in portfolio that are missing from projections
players_missing_from_projections = [player for player in portfolio_players if player not in projection_players_list]
# Automatically handle 100% matches before starting interactive process
players_to_process = []
for player in players_missing_from_projections:
if not isinstance(player, str):
st.warning(f"Skipping non-string value: {player}")
continue
closest_matches = process.extract(player, projection_players_list, limit=1)
if closest_matches[0][1] == 90: # If perfect match found
match_name = closest_matches[0][0]
projections_df.loc[projections_df['player_names'] == match_name, 'player_names'] = player
st.success(f"Automatically matched '{match_name}' with '{player}' (100% match)")
else:
players_to_process.append(player)
# Display results
if players_missing_from_projections:
st.warning("Players in portfolio but missing from projections")
# Display remaining players
if players_to_process:
st.info(f"Players to process ({len(players_to_process)}):\n" +
"\n".join(f"- {player}" for player in players_to_process))
# Create a form for batch processing
with st.form("player_matching_form"):
# Create tabs for each player
tabs = st.tabs([f"Player {i+1}" for i in range(len(players_to_process))])
# Dictionary to store selections
selections = {}
# Process each tab
for idx, (tab, player) in enumerate(zip(tabs, players_to_process)):
with tab:
st.write(f"**Missing Player {idx + 1} of {len(players_to_process)}:** {player}")
# Find the top 3 closest matches
closest_matches = process.extract(player, projection_players_list, limit=3)
# Create radio buttons for selection
options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
options.append("None of these")
selected_option = st.radio(
f"Select correct match for {player}:",
options,
key=f"radio_{player}"
)
selections[player] = selected_option
# Submit button for the entire form
submitted = st.form_submit_button("Submit All Changes")
if submitted:
# Process all selections
for player, selection in selections.items():
if selection != "None of these":
selected_name = selection.split(" (")[0]
projections_df.loc[projections_df['player_names'] == selected_name, 'player_names'] = player
st.success(f"Replaced '{selected_name}' with '{player}'")
# Update session state
st.session_state['projections_df'] = projections_df
st.success("All player name changes have been applied!")
else:
st.success("All portfolio players found in projections!")
return projections_df