Spaces:

Multichem-PD
/

DFS_Contest_Analyzer

Running

James McCool commited on Jun 2

Commit

91e473e

1 Parent(s): 5de7ed9

Refactor name matching logic and update dependencies

- Replaced the fuzzywuzzy library with rapidfuzz for improved performance in name matching operations.
- Removed the deprecated find_name_mismatches function and introduced a new get_contest_names function to streamline the retrieval of unique player names from contest data.
- Enhanced the load_contest_file function to utilize the new name matching logic, ensuring consistent player name handling across the application.
- Maintained existing functionality while improving code clarity and efficiency.

Files changed (4) hide show

app.py +1 -1
global_func/find_name_mismatches.py +0 -99
global_func/get_contest_names.py +26 -0
global_func/load_contest_file.py +20 -1

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import streamlit as st
 st.set_page_config(layout="wide")
 import numpy as np
 import pandas as pd
-from fuzzywuzzy import process
 from collections import Counter
 from pymongo.mongo_client import MongoClient
 from pymongo.server_api import ServerApi

 st.set_page_config(layout="wide")
 import numpy as np
 import pandas as pd
+from rapidfuzz import process, fuzz
 from collections import Counter
 from pymongo.mongo_client import MongoClient
 from pymongo.server_api import ServerApi

global_func/find_name_mismatches.py DELETED Viewed

@@ -1,99 +0,0 @@
-import streamlit as st
-from fuzzywuzzy import process
-def find_name_mismatches(contest_df, projections_df, ownership_df, fpts_df):
-    name_columns = [col for col in contest_df.columns if not col in ['BaseName', 'EntryCount']]
-    if 'player_names' not in projections_df.columns:
-        st.error("No 'player_names' column found in projections file")
-        return contest_df, projections_df
-    # Get unique player names from portfolio and projections
-    portfolio_players = set()
-    for col in name_columns:
-        portfolio_players.update(contest_df[col].unique())
-    projection_players = set(projections_df['player_names'].unique())
-    portfolio_players_list = list(portfolio_players)
-    projection_players_list = list(projection_players)
-    # Find players in portfolio that are missing from projections
-    players_missing_from_projections = list(projection_players - portfolio_players)
-    # Automatically handle 90%+ matches before starting interactive process
-    auto_matches = {}
-    players_to_process = []
-    for player in players_missing_from_projections:
-        if not isinstance(player, str):
-            st.warning(f"Skipping non-string value: {player}")
-            continue
-        closest_matches = process.extract(player, portfolio_players_list, limit=1)
-        if closest_matches[0][1] >= 95:  # If high confidence match found
-            match_name = closest_matches[0][0]
-            auto_matches[player] = match_name
-            st.success(f"Automatically matched '{player}' with '{match_name}' ({closest_matches[0][1]}% match)")
-        elif closest_matches[0][1] >= 75:
-            players_to_process.append(player)
-        else:
-            st.warning(f"No match found for '{player}'")
-    if players_to_process:
-        st.warning(f"Found {len(players_to_process)} players that need manual matching")
-        # Create a form for batch processing
-        with st.form("name_matching_form"):
-            # Create tabs for each player
-            tabs = st.tabs([f"Player {i+1}" for i in range(len(players_to_process))])
-            # Dictionary to store selections
-            selections = {}
-            # Populate each tab
-            for i, player in enumerate(players_to_process):
-                with tabs[i]:
-                    st.write(f"**Projection Name:** {player}")
-                    # Find the top 3 closest matches
-                    closest_matches = process.extract(player, portfolio_players_list, limit=3)
-                    # Create radio buttons for selection
-                    options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
-                    options.append("None of these")
-                    selections[player] = st.radio(
-                        f"Select correct match:",
-                        options,
-                        key=f"radio_{player}"
-                    )
-            # Submit button for the entire form
-            submitted = st.form_submit_button("Apply All Changes")
-            if submitted:
-                # Process automatic matches
-                for projection_name, contest_name in auto_matches.items():
-                    for col in name_columns:
-                        contest_df[col] = contest_df[col].replace(contest_name, projection_name)
-                    ownership_df['Player'] = ownership_df['Player'].replace(contest_name, projection_name)
-                    fpts_df['Player'] = fpts_df['Player'].replace(contest_name, projection_name)
-                # Process manual selections
-                for projection_name, selection in selections.items():
-                    if selection != "None of these":
-                        selected_name = selection.split(" (")[0]
-                        for col in name_columns:
-                            contest_df[col] = contest_df[col].replace(selected_name, projection_name)
-                        ownership_df['Player'] = ownership_df['Player'].replace(selected_name, projection_name)
-                        fpts_df['Player'] = fpts_df['Player'].replace(selected_name, projection_name)
-                        st.success(f"Replaced '{selected_name}' with '{projection_name}'")
-                st.success("All changes applied successfully!")
-                return contest_df, projections_df, ownership_df, fpts_df
-    else:
-        st.success("All players have been automatically matched!")
-        # Apply automatic matches
-        for projection_name, contest_name in auto_matches.items():
-            for col in name_columns:
-                contest_df[col] = contest_df[col].replace(contest_name, projection_name)
-            ownership_df['Player'] = ownership_df['Player'].replace(contest_name, projection_name)
-            fpts_df['Player'] = fpts_df['Player'].replace(contest_name, projection_name)
-        return contest_df, projections_df, ownership_df, fpts_df

global_func/get_contest_names.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import streamlit as st
+import numpy as np
+import pandas as pd
+import time
+from rapidfuzz import process, fuzz
+def get_contest_names(contest_frame):
+    """
+    Get all unique names from the contest dataframe's player columns.
+    Args:
+        contest_frame: DataFrame containing contest data
+    Returns:
+        list: List of unique player names
+    """
+    # Get columns that contain player names (excluding non-player columns)
+    player_columns = [col for col in contest_frame.columns
+                     if col not in ['BaseName', 'EntryCount']]
+    # Get all unique values from these columns
+    unique_names = contest_frame[player_columns].values.flatten()
+    unique_names = pd.unique(unique_names)  # Remove duplicates
+    unique_names = unique_names[~pd.isna(unique_names)]  # Remove any NaN values
+    return list(unique_names)

global_func/load_contest_file.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import streamlit as st
 import pandas as pd
 def load_contest_file(upload, helper = None, sport = None):
     if sport == 'MLB':
@@ -52,6 +54,23 @@ def load_contest_file(upload, helper = None, sport = None):
                 df_helper = helper_df[['Player', 'Salary', 'Team']]
             print('Made it through helper')
             # Create separate dataframes for different player attributes
             if helper is not None:
@@ -95,7 +114,7 @@ def load_contest_file(upload, helper = None, sport = None):
                 cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
             elif sport == 'GOLF':
                 cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
             print('Made it through check_lineups')
             # Get unique entry names

 import streamlit as st
 import pandas as pd
+from get_contest_names import get_contest_names
+from rapidfuzz import process, fuzz
 def load_contest_file(upload, helper = None, sport = None):
     if sport == 'MLB':
                 df_helper = helper_df[['Player', 'Salary', 'Team']]
             print('Made it through helper')
+            contest_names = df.Player.unique()
+            helper_names = helper_df.Player.unique()
+            contest_match_dict = {}
+            for names in helper_names:
+                match = process.extractOne(
+                    names,
+                    contest_names,
+                    score_cutoff = 85
+                )
+                if match:
+                    contest_match_dict[names] = match[0]
+                else:
+                    contest_match_dict[names] = names
+            df_helper['Player'] = df_helper['Player'].map(contest_match_dict)
             # Create separate dataframes for different player attributes
             if helper is not None:
                 cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
             elif sport == 'GOLF':
                 cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
             print('Made it through check_lineups')
             # Get unique entry names