Spaces:

Multichem-PD
/

DFS_Portfolio_Manager

Running

DFS_Portfolio_Manager / global_func /reassess_edge.py

James McCool

Refactor reassess_lineup_edge function in reassess_edge.py to simplify the application of lineup edge calculations, enhancing code clarity and maintainability.

4edaf5f 3 months ago

raw

history blame

4.79 kB

	# Rerunning the predict_dupes.py function on a small portfolio after running exposure_spread.py breaks the metrics
	# This is because the preeict_dupes.py function is exclusive of the lineups it takes it, and is meant to give edge around the median expectation of those lineups
	# So what we need to do instead is find the differences across the original set and the post-exposure_spread.py set and calculate new metrics around the diff
	# Need to find the diff in salary, median, and Own to calculate new Dupes, finish_percentile, Win%, Edge, Weighted Own, and Geomean
	# Then at the end run the Diveristy function to set a new column for Diversity
	# This way we only make the changes to the metrics where we have a difference in salary, median, and Own and leave the rest of the lineups alone

	import pandas as pd
	import numpy as np
	import math

	def calculate_weighted_ownership_vectorized(ownership_array):
	"""
	Vectorized version of calculate_weighted_ownership using NumPy operations.

	Args:
	ownership_array: 2D array of ownership values (rows x players)

	Returns:
	array: Calculated weighted ownership values for each row
	"""
	# Convert percentages to decimals and handle NaN values
	ownership_array = np.where(np.isnan(ownership_array), 0, ownership_array) / 100

	# Calculate row means
	row_means = np.mean(ownership_array, axis=1, keepdims=True)

	# Calculate average of each value with the overall mean
	value_means = (ownership_array + row_means) / 2

	# Take average of all those means
	avg_of_means = np.mean(value_means, axis=1)

	# Multiply by count of values
	weighted = avg_of_means * ownership_array.shape[1]

	# Subtract (max - min) for each row
	row_max = np.max(ownership_array, axis=1)
	row_min = np.min(ownership_array, axis=1)
	weighted = weighted - (row_max - row_min)

	# Convert back to percentage form
	return weighted * 10000

	def calculate_weighted_ownership_wrapper(row_ownerships):
	"""
	Wrapper function for the original calculate_weighted_ownership to work with Pandas .apply()

	Args:
	row_ownerships: Series containing ownership values in percentage form

	Returns:
	float: Calculated weighted ownership value
	"""
	# Convert Series to 2D array for vectorized function
	ownership_array = row_ownerships.values.reshape(1, -1)
	return calculate_weighted_ownership_vectorized(ownership_array)[0]

	def reassess_dupes(row: pd.Series, salary_max: int) -> float:
	return math.ceil(row['Dupes'] + ((row['salary_diff'] / 100) + ((salary_max + (salary_max - row['salary'])) / 100)) * (1 - (row['own_diff'] / 100)))

	def reassess_lineup_edge(row: pd.Series, Contest_Size: int) -> float:
	row['Lineup Edge'] = row['Win%'] * ((.5 - row['Finish_percentile']) * (Contest_Size / 2.5))
	row['Lineup Edge'] = row.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge'])

	return row['Lineup Edge'] - row['Lineup Edge'].mean()

	def reassess_edge(refactored_frame: pd.DataFrame, original_frame: pd.DataFrame, maps_dict: dict, site_var: str, type_var: str, Contest_Size: int, strength_var: str, sport_var: str, salary_max: int) -> pd.DataFrame:
	orig_df = original_frame.copy()
	orig_df = orig_df.reset_index(drop=True)
	refactored_df = refactored_frame.copy()
	refactored_df = refactored_df.reset_index(drop=True)

	refactored_df['salary_diff'] = refactored_df['salary'] - orig_df['salary']
	refactored_df['median_diff'] = refactored_df['median'] - orig_df['median']
	refactored_df['own_diff'] = refactored_df['Own'] - orig_df['Own']

	change_mask = refactored_df[refactored_df['median_diff'] != 0]

	num_players = len([col for col in refactored_df.columns if col not in ['salary', 'median', 'Own', 'Finish_percentile', 'Win%', 'Edge', 'Weighted Own', 'Geomean', 'salary_diff', 'median_diff', 'own_diff']])
	own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]

	for lineups in change_mask.index:
	refactored_df.loc[lineups, 'Dupes'] = reassess_dupes(refactored_df.loc[lineups, :], salary_max)
	refactored_df.loc[lineups, 'Finish_percentile'] = refactored_df.loc[lineups, 'Finish_percentile']
	refactored_df.loc[lineups, 'Win%'] = refactored_df.loc[lineups, 'Win%']
	refactored_df.loc[lineups, 'Edge'] = reassess_lineup_edge(refactored_df.loc[lineups, :], Contest_Size)
	refactored_df.loc[lineups, 'Weighted Own'] = refactored_df[own_columns].apply(calculate_weighted_ownership_wrapper, axis=1)
	refactored_df.loc[lineups, 'Geomean'] = np.power((refactored_df.loc[lineups, own_columns] * 100).product(axis=1), 1 / len(own_columns))

	return refactored_df