DFS_Portfolio_Manager / global_func /stratification_function.py
James McCool
Refactor similarity score calculations in stratification_function to use quantiles instead of min/max thresholds, improving accuracy in target similarity score generation.
8b50a4a
raw
history blame
1.76 kB
import pandas as pd
import numpy as np
def stratification_function(portfolio: pd.DataFrame, lineup_target: int, exclude_cols: list, sport: str, sorting_choice: str, low_threshold: float, high_threshold: float):
excluded_cols = ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Size', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity']
player_columns = [col for col in portfolio.columns if col not in excluded_cols]
concat_portfolio = portfolio.copy()
if sorting_choice == 'Finish_percentile':
concat_portfolio = concat_portfolio.sort_values(by=sorting_choice, ascending=True).reset_index(drop=True)
else:
concat_portfolio = concat_portfolio.sort_values(by=sorting_choice, ascending=False).reset_index(drop=True)
# Calculate target similarity scores for linear progression
similarity_floor = concat_portfolio[sorting_choice].quantile(low_threshold / 100)
similarity_ceiling = concat_portfolio[sorting_choice].quantile(high_threshold / 100)
# Create evenly spaced target similarity scores
target_similarities = np.linspace(similarity_floor, similarity_ceiling, lineup_target)
# Find the closest lineup to each target similarity score
selected_indices = []
for target_sim in target_similarities:
# Find the index of the closest similarity score
closest_idx = (concat_portfolio[sorting_choice] - target_sim).abs().idxmin()
if closest_idx not in selected_indices: # Avoid duplicates
selected_indices.append(closest_idx)
# Select the lineups
concat_portfolio = concat_portfolio.loc[selected_indices].reset_index(drop=True)
return concat_portfolio.sort_values(by=sorting_choice, ascending=False)