Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	James McCool
				
			
		Enhance dupes calculation in predict_dupes function by adjusting the Finish_percentile formula to include ownership ratio and Contest_Size, improving the accuracy of duplicate predictions.
		1046e61
		
		| import streamlit as st | |
| import numpy as np | |
| import pandas as pd | |
| import time | |
| import math | |
| from difflib import SequenceMatcher | |
| def calculate_weighted_ownership_vectorized(ownership_array): | |
| """ | |
| Vectorized version of calculate_weighted_ownership using NumPy operations. | |
| Args: | |
| ownership_array: 2D array of ownership values (rows x players) | |
| Returns: | |
| array: Calculated weighted ownership values for each row | |
| """ | |
| # Convert percentages to decimals and handle NaN values | |
| ownership_array = np.where(np.isnan(ownership_array), 0, ownership_array) / 100 | |
| # Calculate row means | |
| row_means = np.mean(ownership_array, axis=1, keepdims=True) | |
| # Calculate average of each value with the overall mean | |
| value_means = (ownership_array + row_means) / 2 | |
| # Take average of all those means | |
| avg_of_means = np.mean(value_means, axis=1) | |
| # Multiply by count of values | |
| weighted = avg_of_means * ownership_array.shape[1] | |
| # Subtract (max - min) for each row | |
| row_max = np.max(ownership_array, axis=1) | |
| row_min = np.min(ownership_array, axis=1) | |
| weighted = weighted - (row_max - row_min) | |
| # Convert back to percentage form | |
| return weighted * 10000 | |
| def calculate_weighted_ownership_wrapper(row_ownerships): | |
| """ | |
| Wrapper function for the original calculate_weighted_ownership to work with Pandas .apply() | |
| Args: | |
| row_ownerships: Series containing ownership values in percentage form | |
| Returns: | |
| float: Calculated weighted ownership value | |
| """ | |
| # Convert Series to 2D array for vectorized function | |
| ownership_array = row_ownerships.values.reshape(1, -1) | |
| return calculate_weighted_ownership_vectorized(ownership_array)[0] | |
| def calculate_player_similarity_score_vectorized(portfolio, player_columns): | |
| """ | |
| Vectorized version of calculate_player_similarity_score using NumPy operations. | |
| """ | |
| # Extract player data and convert to string array | |
| player_data = portfolio[player_columns].astype(str).fillna('').values | |
| # Get all unique players and create a mapping to numeric IDs | |
| all_players = set() | |
| for row in player_data: | |
| for val in row: | |
| if isinstance(val, str) and val.strip() != '': | |
| all_players.add(val) | |
| # Create player ID mapping | |
| player_to_id = {player: idx for idx, player in enumerate(sorted(all_players))} | |
| # Convert each row to a binary vector (1 if player is present, 0 if not) | |
| n_players = len(all_players) | |
| n_rows = len(portfolio) | |
| binary_matrix = np.zeros((n_rows, n_players), dtype=np.int8) | |
| # Vectorized binary matrix creation | |
| for i, row in enumerate(player_data): | |
| for val in row: | |
| if isinstance(val, str) and str(val).strip() != '' and str(val) in player_to_id: | |
| binary_matrix[i, player_to_id[str(val)]] = 1 | |
| # Vectorized Jaccard distance calculation | |
| intersection_matrix = np.dot(binary_matrix, binary_matrix.T) | |
| row_sums = np.sum(binary_matrix, axis=1) | |
| union_matrix = row_sums[:, np.newaxis] + row_sums - intersection_matrix | |
| # Calculate Jaccard distance: 1 - (intersection / union) | |
| with np.errstate(divide='ignore', invalid='ignore'): | |
| jaccard_similarity = np.divide(intersection_matrix, union_matrix, | |
| out=np.zeros_like(intersection_matrix, dtype=float), | |
| where=union_matrix != 0) | |
| jaccard_distance = 1 - jaccard_similarity | |
| # Exclude self-comparison and calculate average distance for each row | |
| np.fill_diagonal(jaccard_distance, 0) | |
| row_counts = n_rows - 1 | |
| similarity_scores = np.sum(jaccard_distance, axis=1) / row_counts | |
| # Normalize to 0-1 scale | |
| score_range = similarity_scores.max() - similarity_scores.min() | |
| if score_range > 0: | |
| similarity_scores = (similarity_scores - similarity_scores.min()) / score_range | |
| return similarity_scores | |
| def predict_dupes_vectorized(portfolio, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var): | |
| """ | |
| Vectorized version of predict_dupes using NumPy arrays for better performance. | |
| """ | |
| # Set multipliers based on strength | |
| if strength_var == 'Weak': | |
| dupes_multiplier = 0.75 | |
| percentile_multiplier = 0.90 | |
| elif strength_var == 'Average': | |
| dupes_multiplier = 1.00 | |
| percentile_multiplier = 1.00 | |
| elif strength_var == 'Sharp': | |
| dupes_multiplier = 1.25 | |
| percentile_multiplier = 1.10 | |
| max_ownership = max(maps_dict['own_map'].values()) / 100 | |
| average_ownership = np.mean(list(maps_dict['own_map'].values())) / 100 | |
| # Convert portfolio to NumPy arrays for faster operations | |
| portfolio_values = portfolio.values | |
| n_rows = len(portfolio) | |
| # Pre-allocate arrays for ownership data | |
| if site_var == 'Fanduel': | |
| if type_var == 'Showdown': | |
| num_players = 5 | |
| salary_cap = 60000 | |
| player_cols = list(range(5)) # First 5 columns are players | |
| elif type_var == 'Classic': | |
| if sport_var == 'WNBA': | |
| num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']]) | |
| salary_cap = 40000 | |
| player_cols = list(range(num_players)) | |
| else: | |
| num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']]) | |
| salary_cap = 60000 | |
| player_cols = list(range(num_players)) | |
| elif site_var == 'Draftkings': | |
| if type_var == 'Showdown': | |
| num_players = 6 | |
| salary_cap = 50000 | |
| player_cols = list(range(6)) | |
| elif type_var == 'Classic': | |
| if sport_var == 'CS2': | |
| num_players = 6 | |
| salary_cap = 50000 | |
| player_cols = list(range(6)) | |
| elif sport_var == 'LOL': | |
| num_players = 7 | |
| salary_cap = 50000 | |
| player_cols = list(range(7)) | |
| else: | |
| num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']]) | |
| salary_cap = 50000 | |
| player_cols = list(range(num_players)) | |
| # Pre-allocate ownership arrays | |
| ownership_array = np.zeros((n_rows, num_players), dtype=np.float32) | |
| ownership_rank_array = np.zeros((n_rows, num_players), dtype=np.float32) | |
| # Vectorized ownership mapping | |
| for i, col_idx in enumerate(player_cols): | |
| if i == 0 and type_var == 'Showdown': # Captain | |
| ownership_array[:, i] = np.vectorize(lambda x: maps_dict['cpt_own_map'].get(x, 0))(portfolio_values[:, col_idx]) / 100 | |
| ownership_rank_array[:, i] = np.vectorize(lambda x: maps_dict['cpt_own_map'].get(x, 0))(portfolio_values[:, col_idx]) | |
| else: # Flex players | |
| ownership_array[:, i] = np.vectorize(lambda x: maps_dict['own_map'].get(x, 0))(portfolio_values[:, col_idx]) / 100 | |
| ownership_rank_array[:, i] = np.vectorize(lambda x: maps_dict['own_map'].get(x, 0))(portfolio_values[:, col_idx]) | |
| # Calculate ranks for flex players (excluding captain) | |
| if type_var == 'Showdown': | |
| flex_ownerships = ownership_rank_array[:, 1:].flatten() | |
| flex_rank = pd.Series(flex_ownerships).rank(pct=True).values.reshape(n_rows, -1) | |
| ownership_rank_array[:, 1:] = flex_rank | |
| # Convert to percentile ranks | |
| ownership_rank_array = ownership_rank_array / 100 | |
| # Vectorized calculations | |
| own_product = np.prod(ownership_array, axis=1) | |
| own_average = (portfolio_values[:, portfolio.columns.get_loc('Own')].max() * 0.33) / 100 | |
| own_sum = np.sum(ownership_array, axis=1) | |
| avg_own_rank = np.mean(ownership_rank_array, axis=1) | |
| # Calculate dupes formula vectorized | |
| salary_col = portfolio.columns.get_loc('salary') | |
| own_col = portfolio.columns.get_loc('Own') | |
| dupes_calc = (own_product * avg_own_rank) * Contest_Size + \ | |
| ((portfolio_values[:, salary_col] - (salary_cap - portfolio_values[:, own_col])) / 100) - \ | |
| ((salary_cap - portfolio_values[:, salary_col]) / 100) | |
| dupes_calc *= dupes_multiplier | |
| # Round and handle negative values | |
| dupes = np.where(np.round(dupes_calc, 0) <= 0, 0, np.round(dupes_calc, 0) - 1) | |
| # Calculate own_ratio vectorized | |
| max_own_mask = np.any(ownership_array == max_ownership, axis=1) | |
| own_ratio = np.where(max_own_mask, | |
| own_sum / own_average, | |
| (own_sum - max_ownership) / own_average) | |
| # Calculate Finish_percentile vectorized | |
| percentile_cut_scalar = portfolio_values[:, portfolio.columns.get_loc('median')].max() | |
| if type_var == 'Classic': | |
| own_ratio_nerf = 2 if sport_var == 'CS2' or sport_var == 'LOL' else 1.5 | |
| elif type_var == 'Showdown': | |
| own_ratio_nerf = 1.5 | |
| median_col = portfolio.columns.get_loc('median') | |
| finish_percentile = (own_ratio - own_ratio_nerf) / ((5 * (portfolio_values[:, median_col] / percentile_cut_scalar)) / 3) | |
| finish_percentile = np.where(finish_percentile < 0.0005, 0.0005, finish_percentile / 2) | |
| # Calculate other metrics vectorized | |
| ref_proj = portfolio_values[:, median_col].max() | |
| max_proj = ref_proj + 10 | |
| min_proj = ref_proj - 10 | |
| avg_ref = (max_proj + min_proj) / 2 | |
| win_percent = (((portfolio_values[:, median_col] / avg_ref) - (0.1 + ((ref_proj - portfolio_values[:, median_col])/100))) / (Contest_Size / 1000)) / 10 | |
| max_allowed_win = (1 / Contest_Size) * 5 | |
| win_percent = win_percent / win_percent.max() * max_allowed_win | |
| finish_percentile = finish_percentile + 0.005 + (0.005 * (Contest_Size / 10000)) | |
| finish_percentile *= percentile_multiplier | |
| win_percent *= (1 - finish_percentile) | |
| # Calculate low ownership count vectorized | |
| low_own_count = np.sum(ownership_array < 0.10, axis=1) | |
| finish_percentile = np.where(low_own_count <= 0, | |
| finish_percentile, | |
| finish_percentile / low_own_count) | |
| # Calculate Lineup Edge vectorized | |
| lineup_edge = win_percent * ((0.5 - finish_percentile) * (Contest_Size / 2.5)) | |
| lineup_edge = np.where(dupes > 0, lineup_edge / (dupes + 1), lineup_edge) | |
| lineup_edge = lineup_edge - lineup_edge.mean() | |
| # Calculate Weighted Own vectorized | |
| weighted_own = calculate_weighted_ownership_vectorized(ownership_array) | |
| # Calculate Geomean vectorized | |
| geomean = np.power(np.prod(ownership_array * 100, axis=1), 1 / num_players) | |
| # Calculate Diversity vectorized | |
| diversity = calculate_player_similarity_score_vectorized(portfolio, player_cols) | |
| # Create result DataFrame with optimized data types | |
| result_data = { | |
| 'Dupes': dupes.astype('uint16'), | |
| 'median': portfolio_values[:, portfolio.columns.get_loc('median')].astype('float32'), | |
| 'Own': portfolio_values[:, portfolio.columns.get_loc('Own')].astype('float32'), | |
| 'salary': portfolio_values[:, portfolio.columns.get_loc('salary')].astype('uint16'), | |
| 'Finish_percentile': finish_percentile.astype('float32'), | |
| 'Win%': win_percent.astype('float32'), | |
| 'Lineup Edge': lineup_edge.astype('float32'), | |
| 'Weighted Own': weighted_own.astype('float32'), | |
| 'Geomean': geomean.astype('float32'), | |
| 'Diversity': diversity.astype('float32') | |
| } | |
| # Add Size column if it exists | |
| if 'Size' in portfolio.columns: | |
| result_data['Size'] = portfolio_values[:, portfolio.columns.get_loc('Size')].astype('uint16') | |
| # Add player columns back | |
| for i, col_name in enumerate(portfolio.columns[:num_players]): | |
| result_data[col_name] = portfolio_values[:, i] | |
| return pd.DataFrame(result_data) | |
| # Keep the original function for backward compatibility | |
| def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var): | |
| if strength_var == 'Weak': | |
| dupes_multiplier = .75 | |
| percentile_multiplier = .90 | |
| elif strength_var == 'Average': | |
| dupes_multiplier = 1.00 | |
| percentile_multiplier = 1.00 | |
| elif strength_var == 'Sharp': | |
| dupes_multiplier = 1.25 | |
| percentile_multiplier = 1.10 | |
| max_ownership = max(maps_dict['own_map'].values()) / 100 | |
| average_ownership = np.mean(list(maps_dict['own_map'].values())) / 100 | |
| if site_var == 'Fanduel': | |
| if type_var == 'Showdown': | |
| dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank'] | |
| own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own'] | |
| calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] | |
| # Get the original player columns (first 5 columns excluding salary, median, Own) | |
| player_columns = [col for col in portfolio.columns[:5] if col not in ['salary', 'median', 'Own']] | |
| flex_ownerships = pd.concat([ | |
| portfolio.iloc[:,1].map(maps_dict['own_map']), | |
| portfolio.iloc[:,2].map(maps_dict['own_map']), | |
| portfolio.iloc[:,3].map(maps_dict['own_map']), | |
| portfolio.iloc[:,4].map(maps_dict['own_map']) | |
| ]) | |
| flex_rank = flex_ownerships.rank(pct=True) | |
| # Assign ranks back to individual columns using the same rank scale | |
| portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True) | |
| portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100 | |
| portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) | |
| portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 | |
| portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) | |
| portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) | |
| # Calculate dupes formula | |
| portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * (portfolio['Own'] / 100) * Contest_Size + ((portfolio['salary'] - (60000 - portfolio['Own'])) / 100) - ((60000 - portfolio['salary']) / 100) | |
| portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier * (portfolio['Own'] / (100 + (Contest_Size / 1000))) | |
| # Round and handle negative values | |
| portfolio['Dupes'] = np.where( | |
| np.round(portfolio['dupes_calc'], 0) <= 0, | |
| 0, | |
| np.round(portfolio['dupes_calc'], 0) - 1 | |
| ) | |
| elif type_var == 'Classic': | |
| num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']]) | |
| dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)] | |
| own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)] | |
| calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] | |
| # Get the original player columns (first num_players columns excluding salary, median, Own) | |
| player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']] | |
| for i in range(1, num_players + 1): | |
| portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank']) | |
| portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) | |
| portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 | |
| portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) | |
| portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) | |
| portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (60000 - portfolio['Own'])) / 100) - ((60000 - portfolio['salary']) / 100) | |
| portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier * (portfolio['Own'] / (100 + (Contest_Size / 1000))) | |
| # Round and handle negative values | |
| portfolio['Dupes'] = np.where( | |
| np.round(portfolio['dupes_calc'], 0) <= 0, | |
| 0, | |
| np.round(portfolio['dupes_calc'], 0) - 1 | |
| ) | |
| elif site_var == 'Draftkings': | |
| if type_var == 'Showdown': | |
| if sport_var == 'GOLF': | |
| dup_count_columns = ['FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank', 'FLEX6_Own_percent_rank'] | |
| own_columns = ['FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own', 'FLEX6_Own'] | |
| else: | |
| dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank'] | |
| own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own'] | |
| calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] | |
| # Get the original player columns (first 6 columns excluding salary, median, Own) | |
| player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']] | |
| if sport_var == 'GOLF': | |
| flex_ownerships = pd.concat([ | |
| portfolio.iloc[:,0].map(maps_dict['own_map']), | |
| portfolio.iloc[:,1].map(maps_dict['own_map']), | |
| portfolio.iloc[:,2].map(maps_dict['own_map']), | |
| portfolio.iloc[:,3].map(maps_dict['own_map']), | |
| portfolio.iloc[:,4].map(maps_dict['own_map']), | |
| portfolio.iloc[:,5].map(maps_dict['own_map']) | |
| ]) | |
| else: | |
| flex_ownerships = pd.concat([ | |
| portfolio.iloc[:,1].map(maps_dict['own_map']), | |
| portfolio.iloc[:,2].map(maps_dict['own_map']), | |
| portfolio.iloc[:,3].map(maps_dict['own_map']), | |
| portfolio.iloc[:,4].map(maps_dict['own_map']), | |
| portfolio.iloc[:,5].map(maps_dict['own_map']) | |
| ]) | |
| flex_rank = flex_ownerships.rank(pct=True) | |
| # Assign ranks back to individual columns using the same rank scale | |
| if sport_var == 'GOLF': | |
| portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX6_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX1_Own'] = portfolio.iloc[:,0].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX2_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX3_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX4_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX5_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX6_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100 | |
| else: | |
| portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True) | |
| portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100 | |
| portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) | |
| portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 | |
| portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) | |
| portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) | |
| # Calculate dupes formula | |
| portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100) | |
| portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier * (portfolio['Own'] / (100 + (Contest_Size / 1000))) | |
| # Round and handle negative values | |
| portfolio['Dupes'] = np.where( | |
| np.round(portfolio['dupes_calc'], 0) <= 0, | |
| 0, | |
| np.round(portfolio['dupes_calc'], 0) - 1 | |
| ) | |
| elif type_var == 'Classic': | |
| if sport_var == 'CS2': | |
| dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank'] | |
| own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own'] | |
| calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] | |
| # Get the original player columns (first 6 columns excluding salary, median, Own) | |
| player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']] | |
| flex_ownerships = pd.concat([ | |
| portfolio.iloc[:,1].map(maps_dict['own_map']), | |
| portfolio.iloc[:,2].map(maps_dict['own_map']), | |
| portfolio.iloc[:,3].map(maps_dict['own_map']), | |
| portfolio.iloc[:,4].map(maps_dict['own_map']), | |
| portfolio.iloc[:,5].map(maps_dict['own_map']) | |
| ]) | |
| flex_rank = flex_ownerships.rank(pct=True) | |
| # Assign ranks back to individual columns using the same rank scale | |
| portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True) | |
| portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100 | |
| portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) | |
| portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 | |
| portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) | |
| portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) | |
| # Calculate dupes formula | |
| portfolio['dupes_calc'] = ((portfolio['own_product'] * 10) * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 50) - ((50000 - portfolio['salary']) / 50) | |
| portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier * (portfolio['Own'] / (100 + (Contest_Size / 1000))) | |
| # Round and handle negative values | |
| portfolio['Dupes'] = np.where( | |
| np.round(portfolio['dupes_calc'], 0) <= 0, | |
| 0, | |
| np.round(portfolio['dupes_calc'], 0) - 1 | |
| ) | |
| if sport_var == 'LOL': | |
| dup_count_columns = ['CPT_Own_percent_rank', 'TOP_Own_percent_rank', 'JNG_Own_percent_rank', 'MID_Own_percent_rank', 'ADC_Own_percent_rank', 'SUP_Own_percent_rank', 'Team_Own_percent_rank'] | |
| own_columns = ['CPT_Own', 'TOP_Own', 'JNG_Own', 'MID_Own', 'ADC_Own', 'SUP_Own', 'Team_Own'] | |
| calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] | |
| # Get the original player columns (first 6 columns excluding salary, median, Own) | |
| player_columns = [col for col in portfolio.columns[:7] if col not in ['salary', 'median', 'Own']] | |
| flex_ownerships = pd.concat([ | |
| portfolio.iloc[:,1].map(maps_dict['own_map']), | |
| portfolio.iloc[:,2].map(maps_dict['own_map']), | |
| portfolio.iloc[:,3].map(maps_dict['own_map']), | |
| portfolio.iloc[:,4].map(maps_dict['own_map']), | |
| portfolio.iloc[:,5].map(maps_dict['own_map']), | |
| portfolio.iloc[:,6].map(maps_dict['own_map']) | |
| ]) | |
| flex_rank = flex_ownerships.rank(pct=True) | |
| # Assign ranks back to individual columns using the same rank scale | |
| portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True) | |
| portfolio['TOP_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['JNG_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['MID_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['ADC_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['SUP_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['Team_Own_percent_rank'] = portfolio.iloc[:,6].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100 | |
| portfolio['TOP_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['JNG_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['MID_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['ADC_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['SUP_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['Team_Own'] = portfolio.iloc[:,6].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) | |
| portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 | |
| portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) | |
| portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) | |
| # Calculate dupes formula | |
| portfolio['dupes_calc'] = ((portfolio['own_product'] * 10) * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 50) - ((50000 - portfolio['salary']) / 50) | |
| portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier * (portfolio['Own'] / (100 + (Contest_Size / 1000))) | |
| # Round and handle negative values | |
| portfolio['Dupes'] = np.where( | |
| np.round(portfolio['dupes_calc'], 0) <= 0, | |
| 0, | |
| np.round(portfolio['dupes_calc'], 0) - 1 | |
| ) | |
| elif sport_var != 'CS2' and sport_var != 'LOL': | |
| num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']]) | |
| dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)] | |
| own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)] | |
| calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] | |
| # Get the original player columns (first num_players columns excluding salary, median, Own) | |
| player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']] | |
| for i in range(1, num_players + 1): | |
| portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank']) | |
| portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']).astype('float32') / 100 | |
| portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) | |
| portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 | |
| portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) | |
| portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) | |
| portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100) | |
| portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier * (portfolio['Own'] / (100 + (Contest_Size / 1000))) | |
| # Round and handle negative values | |
| portfolio['Dupes'] = np.where( | |
| np.round(portfolio['dupes_calc'], 0) <= 0, | |
| 0, | |
| np.round(portfolio['dupes_calc'], 0) - 1 | |
| ) | |
| portfolio['Dupes'] = np.round(portfolio['Dupes'], 0) | |
| portfolio['own_ratio'] = np.where( | |
| portfolio[own_columns].isin([max_ownership]).any(axis=1), | |
| portfolio['own_sum'] / portfolio['own_average'], | |
| (portfolio['own_sum'] - max_ownership) / portfolio['own_average'] | |
| ) | |
| percentile_cut_scalar = portfolio['median'].max() # Get scalar value | |
| if type_var == 'Classic': | |
| if sport_var == 'CS2': | |
| own_ratio_nerf = 2 | |
| elif sport_var == 'LOL': | |
| own_ratio_nerf = 2 | |
| else: | |
| own_ratio_nerf = 1.5 | |
| elif type_var == 'Showdown': | |
| own_ratio_nerf = 1.5 | |
| portfolio['Finish_percentile'] = portfolio.apply( | |
| lambda row: .0005 if (row['own_ratio'] - own_ratio_nerf) / ((5 * (row['median'] / percentile_cut_scalar)) / 3) < .0005 | |
| else ((row['own_ratio'] - own_ratio_nerf) / ((5 * (row['median'] / percentile_cut_scalar)) / 3)) / 2, | |
| axis=1 | |
| ) | |
| portfolio['Ref_Proj'] = portfolio['median'].max() | |
| portfolio['Max_Proj'] = portfolio['Ref_Proj'] + 10 | |
| portfolio['Min_Proj'] = portfolio['Ref_Proj'] - 10 | |
| portfolio['Avg_Ref'] = (portfolio['Max_Proj'] + portfolio['Min_Proj']) / 2 | |
| portfolio['Win%'] = (((portfolio['median'] / portfolio['Avg_Ref']) - (0.1 + ((portfolio['Ref_Proj'] - portfolio['median'])/100))) / (Contest_Size / 1000)) / 10 | |
| max_allowed_win = (1 / Contest_Size) * 5 | |
| portfolio['Win%'] = portfolio['Win%'] / portfolio['Win%'].max() * max_allowed_win | |
| portfolio['Finish_percentile'] = portfolio['Finish_percentile'] + .005 + (.005 * (Contest_Size / 10000)) | |
| portfolio['Finish_percentile'] = portfolio['Finish_percentile'] * percentile_multiplier * (portfolio['Own'] / (100 + (Contest_Size / 1000))) | |
| portfolio['Win%'] = portfolio['Win%'] * (1 - portfolio['Finish_percentile']) | |
| portfolio['Win%'] = portfolio['Win%'].clip(lower=0, upper=max_allowed_win) | |
| portfolio['low_own_count'] = portfolio[own_columns].apply(lambda row: (row < 0.10).sum(), axis=1) | |
| portfolio['Finish_percentile'] = portfolio.apply(lambda row: row['Finish_percentile'] if row['low_own_count'] <= 0 else row['Finish_percentile'] / row['low_own_count'], axis=1) | |
| portfolio['Lineup Edge'] = portfolio['Win%'] * ((.5 - portfolio['Finish_percentile']) * (Contest_Size / 2.5)) | |
| portfolio['Lineup Edge'] = portfolio.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge'], axis=1) | |
| portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean() | |
| portfolio['Weighted Own'] = portfolio[own_columns].apply(calculate_weighted_ownership_wrapper, axis=1) | |
| portfolio['Geomean'] = np.power((portfolio[own_columns] * 100).product(axis=1), 1 / len(own_columns)) | |
| # Calculate similarity score based on actual player selection | |
| portfolio['Diversity'] = calculate_player_similarity_score_vectorized(portfolio, player_columns) | |
| portfolio = portfolio.drop(columns=dup_count_columns) | |
| portfolio = portfolio.drop(columns=own_columns) | |
| portfolio = portfolio.drop(columns=calc_columns) | |
| int16_columns_stacks = ['Dupes', 'Size', 'salary'] | |
| int16_columns_nstacks = ['Dupes', 'salary'] | |
| float32_columns = ['median', 'Own', 'Finish_percentile', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity'] | |
| print(portfolio.columns) | |
| print(portfolio.head(10)) | |
| try: | |
| portfolio[int16_columns_stacks] = portfolio[int16_columns_stacks].astype('uint16') | |
| except: | |
| pass | |
| try: | |
| portfolio[int16_columns_nstacks] = portfolio[int16_columns_nstacks].astype('uint16') | |
| except: | |
| pass | |
| if sport_var != 'LOL': | |
| try: | |
| portfolio[float32_columns] = portfolio[float32_columns].astype('float32') | |
| except: | |
| pass | |
| return portfolio | |
