import pandas as pd import numpy as np def stratification_function(portfolio: pd.DataFrame, lineup_target: int, exclude_cols: list, sport: str, sorting_choice: str, low_threshold: float, high_threshold: float): excluded_cols = ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Size', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity'] player_columns = [col for col in portfolio.columns if col not in excluded_cols] # Work with indices instead of copying entire DataFrame if sorting_choice == 'Finish_percentile': sorted_indices = portfolio[sorting_choice].sort_values(ascending=True).index else: sorted_indices = portfolio[sorting_choice].sort_values(ascending=False).index # Calculate quantiles without copying similarity_floor = portfolio[sorting_choice].quantile(low_threshold / 100) similarity_ceiling = portfolio[sorting_choice].quantile(high_threshold / 100) # Create evenly spaced target similarity scores target_similarities = np.linspace(similarity_floor, similarity_ceiling, lineup_target) # Find the closest lineup to each target similarity score selected_indices = [] for target_sim in target_similarities: # Find the index of the closest similarity score closest_idx = (portfolio[sorting_choice] - target_sim).abs().idxmin() if closest_idx not in selected_indices: # Avoid duplicates selected_indices.append(closest_idx) # Return view instead of copy return portfolio.loc[selected_indices].sort_values(by=sorting_choice, ascending=False)