James McCool
Refactor reassess_lineup_edge function in reassess_edge.py to simplify the application of lineup edge calculations, enhancing code clarity and maintainability.
4edaf5f
raw
history blame
4.79 kB
# Rerunning the predict_dupes.py function on a small portfolio after running exposure_spread.py breaks the metrics
# This is because the preeict_dupes.py function is exclusive of the lineups it takes it, and is meant to give edge around the median expectation of those lineups
# So what we need to do instead is find the differences across the original set and the post-exposure_spread.py set and calculate new metrics around the diff
# Need to find the diff in salary, median, and Own to calculate new Dupes, finish_percentile, Win%, Edge, Weighted Own, and Geomean
# Then at the end run the Diveristy function to set a new column for Diversity
# This way we only make the changes to the metrics where we have a difference in salary, median, and Own and leave the rest of the lineups alone
import pandas as pd
import numpy as np
import math
def calculate_weighted_ownership_vectorized(ownership_array):
"""
Vectorized version of calculate_weighted_ownership using NumPy operations.
Args:
ownership_array: 2D array of ownership values (rows x players)
Returns:
array: Calculated weighted ownership values for each row
"""
# Convert percentages to decimals and handle NaN values
ownership_array = np.where(np.isnan(ownership_array), 0, ownership_array) / 100
# Calculate row means
row_means = np.mean(ownership_array, axis=1, keepdims=True)
# Calculate average of each value with the overall mean
value_means = (ownership_array + row_means) / 2
# Take average of all those means
avg_of_means = np.mean(value_means, axis=1)
# Multiply by count of values
weighted = avg_of_means * ownership_array.shape[1]
# Subtract (max - min) for each row
row_max = np.max(ownership_array, axis=1)
row_min = np.min(ownership_array, axis=1)
weighted = weighted - (row_max - row_min)
# Convert back to percentage form
return weighted * 10000
def calculate_weighted_ownership_wrapper(row_ownerships):
"""
Wrapper function for the original calculate_weighted_ownership to work with Pandas .apply()
Args:
row_ownerships: Series containing ownership values in percentage form
Returns:
float: Calculated weighted ownership value
"""
# Convert Series to 2D array for vectorized function
ownership_array = row_ownerships.values.reshape(1, -1)
return calculate_weighted_ownership_vectorized(ownership_array)[0]
def reassess_dupes(row: pd.Series, salary_max: int) -> float:
return math.ceil(row['Dupes'] + ((row['salary_diff'] / 100) + ((salary_max + (salary_max - row['salary'])) / 100)) * (1 - (row['own_diff'] / 100)))
def reassess_lineup_edge(row: pd.Series, Contest_Size: int) -> float:
row['Lineup Edge'] = row['Win%'] * ((.5 - row['Finish_percentile']) * (Contest_Size / 2.5))
row['Lineup Edge'] = row.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge'])
return row['Lineup Edge'] - row['Lineup Edge'].mean()
def reassess_edge(refactored_frame: pd.DataFrame, original_frame: pd.DataFrame, maps_dict: dict, site_var: str, type_var: str, Contest_Size: int, strength_var: str, sport_var: str, salary_max: int) -> pd.DataFrame:
orig_df = original_frame.copy()
orig_df = orig_df.reset_index(drop=True)
refactored_df = refactored_frame.copy()
refactored_df = refactored_df.reset_index(drop=True)
refactored_df['salary_diff'] = refactored_df['salary'] - orig_df['salary']
refactored_df['median_diff'] = refactored_df['median'] - orig_df['median']
refactored_df['own_diff'] = refactored_df['Own'] - orig_df['Own']
change_mask = refactored_df[refactored_df['median_diff'] != 0]
num_players = len([col for col in refactored_df.columns if col not in ['salary', 'median', 'Own', 'Finish_percentile', 'Win%', 'Edge', 'Weighted Own', 'Geomean', 'salary_diff', 'median_diff', 'own_diff']])
own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
for lineups in change_mask.index:
refactored_df.loc[lineups, 'Dupes'] = reassess_dupes(refactored_df.loc[lineups, :], salary_max)
refactored_df.loc[lineups, 'Finish_percentile'] = refactored_df.loc[lineups, 'Finish_percentile']
refactored_df.loc[lineups, 'Win%'] = refactored_df.loc[lineups, 'Win%']
refactored_df.loc[lineups, 'Edge'] = reassess_lineup_edge(refactored_df.loc[lineups, :], Contest_Size)
refactored_df.loc[lineups, 'Weighted Own'] = refactored_df[own_columns].apply(calculate_weighted_ownership_wrapper, axis=1)
refactored_df.loc[lineups, 'Geomean'] = np.power((refactored_df.loc[lineups, own_columns] * 100).product(axis=1), 1 / len(own_columns))
return refactored_df