# Rerunning the predict_dupes.py function on a small portfolio after running exposure_spread.py breaks the metrics # This is because the preeict_dupes.py function is exclusive of the lineups it takes it, and is meant to give edge around the median expectation of those lineups # So what we need to do instead is find the differences across the original set and the post-exposure_spread.py set and calculate new metrics around the diff # Need to find the diff in salary, median, and Own to calculate new Dupes, finish_percentile, Win%, Edge, Weighted Own, and Geomean # Then at the end run the Diveristy function to set a new column for Diversity # This way we only make the changes to the metrics where we have a difference in salary, median, and Own and leave the rest of the lineups alone import pandas as pd import numpy as np import math def calculate_weighted_ownership_vectorized(ownership_array): """ Vectorized version of calculate_weighted_ownership using NumPy operations. Args: ownership_array: 2D array of ownership values (rows x players) Returns: array: Calculated weighted ownership values for each row """ # Convert percentages to decimals and handle NaN values ownership_array = np.where(np.isnan(ownership_array), 0, ownership_array) / 100 # Calculate row means row_means = np.mean(ownership_array, axis=1, keepdims=True) # Calculate average of each value with the overall mean value_means = (ownership_array + row_means) / 2 # Take average of all those means avg_of_means = np.mean(value_means, axis=1) # Multiply by count of values weighted = avg_of_means * ownership_array.shape[1] # Subtract (max - min) for each row row_max = np.max(ownership_array, axis=1) row_min = np.min(ownership_array, axis=1) weighted = weighted - (row_max - row_min) # Convert back to percentage form return weighted * 10000 def calculate_weighted_ownership_wrapper(row_ownerships): """ Wrapper function for the original calculate_weighted_ownership to work with Pandas .apply() Args: row_ownerships: Series containing ownership values in percentage form Returns: float: Calculated weighted ownership value """ # Convert Series to 2D array for vectorized function ownership_array = row_ownerships.values.reshape(1, -1) return calculate_weighted_ownership_vectorized(ownership_array)[0] def reassess_dupes(row: pd.Series, salary_max: int) -> float: return math.ceil(row['Dupes'] + ((row['salary_diff'] / 100) + ((salary_max + (salary_max - row['salary'])) / 100)) * (1 - (row['own_diff'] / 100))) def reassess_lineup_edge(row: pd.Series, Contest_Size: int) -> float: row['Lineup Edge'] = row['Win%'] * ((.5 - row['Finish_percentile']) * (Contest_Size / 2.5)) row['Lineup Edge'] = row.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge']) return row['Lineup Edge'] - row['Lineup Edge'].mean() def reassess_edge(refactored_frame: pd.DataFrame, original_frame: pd.DataFrame, maps_dict: dict, site_var: str, type_var: str, Contest_Size: int, strength_var: str, sport_var: str, salary_max: int) -> pd.DataFrame: orig_df = original_frame.copy() orig_df = orig_df.reset_index(drop=True) refactored_df = refactored_frame.copy() refactored_df = refactored_df.reset_index(drop=True) refactored_df['salary_diff'] = refactored_df['salary'] - orig_df['salary'] refactored_df['median_diff'] = refactored_df['median'] - orig_df['median'] refactored_df['own_diff'] = refactored_df['Own'] - orig_df['Own'] change_mask = refactored_df[refactored_df['median_diff'] != 0] num_players = len([col for col in refactored_df.columns if col not in ['salary', 'median', 'Own', 'Finish_percentile', 'Win%', 'Edge', 'Weighted Own', 'Geomean', 'salary_diff', 'median_diff', 'own_diff']]) own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)] for lineups in change_mask.index: refactored_df.loc[lineups, 'Dupes'] = reassess_dupes(refactored_df.loc[lineups, :], salary_max) refactored_df.loc[lineups, 'Finish_percentile'] = refactored_df.loc[lineups, 'Finish_percentile'] refactored_df.loc[lineups, 'Win%'] = refactored_df.loc[lineups, 'Win%'] refactored_df.loc[lineups, 'Edge'] = reassess_lineup_edge(refactored_df.loc[lineups, :], Contest_Size) refactored_df.loc[lineups, 'Weighted Own'] = refactored_df[own_columns].apply(calculate_weighted_ownership_wrapper, axis=1) refactored_df.loc[lineups, 'Geomean'] = np.power((refactored_df.loc[lineups, own_columns] * 100).product(axis=1), 1 / len(own_columns)) return refactored_df