James McCool
Refactor reassess_lineup_edge function in reassess_edge.py to simplify the application of lineup edge calculations, enhancing code clarity and maintainability.
4edaf5f
| # Rerunning the predict_dupes.py function on a small portfolio after running exposure_spread.py breaks the metrics | |
| # This is because the preeict_dupes.py function is exclusive of the lineups it takes it, and is meant to give edge around the median expectation of those lineups | |
| # So what we need to do instead is find the differences across the original set and the post-exposure_spread.py set and calculate new metrics around the diff | |
| # Need to find the diff in salary, median, and Own to calculate new Dupes, finish_percentile, Win%, Edge, Weighted Own, and Geomean | |
| # Then at the end run the Diveristy function to set a new column for Diversity | |
| # This way we only make the changes to the metrics where we have a difference in salary, median, and Own and leave the rest of the lineups alone | |
| import pandas as pd | |
| import numpy as np | |
| import math | |
| def calculate_weighted_ownership_vectorized(ownership_array): | |
| """ | |
| Vectorized version of calculate_weighted_ownership using NumPy operations. | |
| Args: | |
| ownership_array: 2D array of ownership values (rows x players) | |
| Returns: | |
| array: Calculated weighted ownership values for each row | |
| """ | |
| # Convert percentages to decimals and handle NaN values | |
| ownership_array = np.where(np.isnan(ownership_array), 0, ownership_array) / 100 | |
| # Calculate row means | |
| row_means = np.mean(ownership_array, axis=1, keepdims=True) | |
| # Calculate average of each value with the overall mean | |
| value_means = (ownership_array + row_means) / 2 | |
| # Take average of all those means | |
| avg_of_means = np.mean(value_means, axis=1) | |
| # Multiply by count of values | |
| weighted = avg_of_means * ownership_array.shape[1] | |
| # Subtract (max - min) for each row | |
| row_max = np.max(ownership_array, axis=1) | |
| row_min = np.min(ownership_array, axis=1) | |
| weighted = weighted - (row_max - row_min) | |
| # Convert back to percentage form | |
| return weighted * 10000 | |
| def calculate_weighted_ownership_wrapper(row_ownerships): | |
| """ | |
| Wrapper function for the original calculate_weighted_ownership to work with Pandas .apply() | |
| Args: | |
| row_ownerships: Series containing ownership values in percentage form | |
| Returns: | |
| float: Calculated weighted ownership value | |
| """ | |
| # Convert Series to 2D array for vectorized function | |
| ownership_array = row_ownerships.values.reshape(1, -1) | |
| return calculate_weighted_ownership_vectorized(ownership_array)[0] | |
| def reassess_dupes(row: pd.Series, salary_max: int) -> float: | |
| return math.ceil(row['Dupes'] + ((row['salary_diff'] / 100) + ((salary_max + (salary_max - row['salary'])) / 100)) * (1 - (row['own_diff'] / 100))) | |
| def reassess_lineup_edge(row: pd.Series, Contest_Size: int) -> float: | |
| row['Lineup Edge'] = row['Win%'] * ((.5 - row['Finish_percentile']) * (Contest_Size / 2.5)) | |
| row['Lineup Edge'] = row.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge']) | |
| return row['Lineup Edge'] - row['Lineup Edge'].mean() | |
| def reassess_edge(refactored_frame: pd.DataFrame, original_frame: pd.DataFrame, maps_dict: dict, site_var: str, type_var: str, Contest_Size: int, strength_var: str, sport_var: str, salary_max: int) -> pd.DataFrame: | |
| orig_df = original_frame.copy() | |
| orig_df = orig_df.reset_index(drop=True) | |
| refactored_df = refactored_frame.copy() | |
| refactored_df = refactored_df.reset_index(drop=True) | |
| refactored_df['salary_diff'] = refactored_df['salary'] - orig_df['salary'] | |
| refactored_df['median_diff'] = refactored_df['median'] - orig_df['median'] | |
| refactored_df['own_diff'] = refactored_df['Own'] - orig_df['Own'] | |
| change_mask = refactored_df[refactored_df['median_diff'] != 0] | |
| num_players = len([col for col in refactored_df.columns if col not in ['salary', 'median', 'Own', 'Finish_percentile', 'Win%', 'Edge', 'Weighted Own', 'Geomean', 'salary_diff', 'median_diff', 'own_diff']]) | |
| own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)] | |
| for lineups in change_mask.index: | |
| refactored_df.loc[lineups, 'Dupes'] = reassess_dupes(refactored_df.loc[lineups, :], salary_max) | |
| refactored_df.loc[lineups, 'Finish_percentile'] = refactored_df.loc[lineups, 'Finish_percentile'] | |
| refactored_df.loc[lineups, 'Win%'] = refactored_df.loc[lineups, 'Win%'] | |
| refactored_df.loc[lineups, 'Edge'] = reassess_lineup_edge(refactored_df.loc[lineups, :], Contest_Size) | |
| refactored_df.loc[lineups, 'Weighted Own'] = refactored_df[own_columns].apply(calculate_weighted_ownership_wrapper, axis=1) | |
| refactored_df.loc[lineups, 'Geomean'] = np.power((refactored_df.loc[lineups, own_columns] * 100).product(axis=1), 1 / len(own_columns)) | |
| return refactored_df |