James McCool
		
	commited on
		
		
					Commit 
							
							·
						
						46a28f1
	
1
								Parent(s):
							
							dd94c84
								
Refactor predict_dupes.py to implement vectorized calculations for ownership and similarity scores, improving performance. Introduce new functions for weighted ownership and player similarity, while maintaining backward compatibility. Update data type handling for portfolio results to optimize memory usage.
Browse files- global_func/predict_dupes.py +290 -83
    	
        global_func/predict_dupes.py
    CHANGED
    
    | @@ -5,59 +5,64 @@ import time | |
| 5 | 
             
            import math
         | 
| 6 | 
             
            from difflib import SequenceMatcher
         | 
| 7 |  | 
| 8 | 
            -
            def  | 
| 9 | 
             
                """
         | 
| 10 | 
            -
                 | 
| 11 | 
            -
                (AVERAGE of (each value's average with overall average)) * count - (max - min)
         | 
| 12 |  | 
| 13 | 
             
                Args:
         | 
| 14 | 
            -
                     | 
| 15 |  | 
| 16 | 
             
                Returns:
         | 
| 17 | 
            -
                     | 
| 18 | 
             
                """
         | 
| 19 | 
            -
                #  | 
| 20 | 
            -
                 | 
| 21 |  | 
| 22 | 
            -
                #  | 
| 23 | 
            -
                 | 
| 24 |  | 
| 25 | 
             
                # Calculate average of each value with the overall mean
         | 
| 26 | 
            -
                value_means =  | 
| 27 |  | 
| 28 | 
             
                # Take average of all those means
         | 
| 29 | 
            -
                avg_of_means =  | 
| 30 |  | 
| 31 | 
             
                # Multiply by count of values
         | 
| 32 | 
            -
                weighted = avg_of_means *  | 
| 33 |  | 
| 34 | 
            -
                # Subtract (max - min)
         | 
| 35 | 
            -
                 | 
|  | |
|  | |
| 36 |  | 
| 37 | 
            -
                # Convert back to percentage form | 
| 38 | 
             
                return weighted * 10000
         | 
| 39 |  | 
| 40 | 
            -
            def  | 
| 41 | 
             
                """
         | 
| 42 | 
            -
                 | 
| 43 | 
            -
                based on actual player selection. Optimized for speed using vectorized operations.
         | 
| 44 | 
            -
                Higher scores indicate more unique/different lineups.
         | 
| 45 |  | 
| 46 | 
             
                Args:
         | 
| 47 | 
            -
                     | 
| 48 | 
            -
                    player_columns: List of column names containing player names
         | 
| 49 |  | 
| 50 | 
             
                Returns:
         | 
| 51 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 52 | 
             
                """
         | 
| 53 | 
            -
                 | 
| 54 | 
            -
                 | 
|  | |
|  | |
| 55 |  | 
| 56 | 
             
                # Get all unique players and create a mapping to numeric IDs
         | 
| 57 | 
             
                all_players = set()
         | 
| 58 | 
            -
                for  | 
| 59 | 
            -
                     | 
| 60 | 
            -
                    for val in unique_vals:
         | 
| 61 | 
             
                        if isinstance(val, str) and val.strip() != '':
         | 
| 62 | 
             
                            all_players.add(val)
         | 
| 63 |  | 
| @@ -69,46 +74,207 @@ def calculate_player_similarity_score(portfolio, player_columns): | |
| 69 | 
             
                n_rows = len(portfolio)
         | 
| 70 | 
             
                binary_matrix = np.zeros((n_rows, n_players), dtype=np.int8)
         | 
| 71 |  | 
| 72 | 
            -
                 | 
| 73 | 
            -
             | 
|  | |
| 74 | 
             
                        if isinstance(val, str) and str(val).strip() != '' and str(val) in player_to_id:
         | 
| 75 | 
             
                            binary_matrix[i, player_to_id[str(val)]] = 1
         | 
| 76 |  | 
| 77 | 
             
                # Vectorized Jaccard distance calculation
         | 
| 78 | 
            -
                # Use matrix operations to compute all pairwise distances at once
         | 
| 79 | 
            -
                similarity_scores = np.zeros(n_rows)
         | 
| 80 | 
            -
                
         | 
| 81 | 
            -
                # Compute intersection and union matrices
         | 
| 82 | 
            -
                # intersection[i,j] = number of players in common between row i and row j
         | 
| 83 | 
            -
                # union[i,j] = total number of unique players between row i and row j
         | 
| 84 | 
             
                intersection_matrix = np.dot(binary_matrix, binary_matrix.T)
         | 
| 85 | 
            -
                
         | 
| 86 | 
            -
                # For union, we need: |A ∪ B| = |A| + |B| - |A ∩ B|
         | 
| 87 | 
             
                row_sums = np.sum(binary_matrix, axis=1)
         | 
| 88 | 
             
                union_matrix = row_sums[:, np.newaxis] + row_sums - intersection_matrix
         | 
| 89 |  | 
| 90 | 
             
                # Calculate Jaccard distance: 1 - (intersection / union)
         | 
| 91 | 
            -
                # Avoid division by zero
         | 
| 92 | 
             
                with np.errstate(divide='ignore', invalid='ignore'):
         | 
| 93 | 
             
                    jaccard_similarity = np.divide(intersection_matrix, union_matrix, 
         | 
| 94 | 
             
                                                 out=np.zeros_like(intersection_matrix, dtype=float), 
         | 
| 95 | 
             
                                                 where=union_matrix != 0)
         | 
| 96 |  | 
| 97 | 
            -
                # Convert similarity to distance and calculate average distance for each row
         | 
| 98 | 
             
                jaccard_distance = 1 - jaccard_similarity
         | 
| 99 |  | 
| 100 | 
            -
                #  | 
| 101 | 
            -
                # Exclude self-comparison (diagonal elements)
         | 
| 102 | 
             
                np.fill_diagonal(jaccard_distance, 0)
         | 
| 103 | 
            -
                row_counts = n_rows - 1 | 
| 104 | 
             
                similarity_scores = np.sum(jaccard_distance, axis=1) / row_counts
         | 
| 105 |  | 
| 106 | 
            -
                # Normalize to 0-1 scale | 
| 107 | 
            -
                 | 
| 108 | 
            -
             | 
|  | |
| 109 |  | 
| 110 | 
             
                return similarity_scores
         | 
| 111 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 112 | 
             
            def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var):
         | 
| 113 | 
             
                if strength_var == 'Weak':
         | 
| 114 | 
             
                    dupes_multiplier = .75
         | 
| @@ -143,12 +309,14 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng | |
| 143 | 
             
                        portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 144 | 
             
                        portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 145 | 
             
                        portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
|  | |
| 146 |  | 
| 147 | 
            -
                        portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100
         | 
| 148 | 
            -
                        portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100
         | 
| 149 | 
            -
                        portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100
         | 
| 150 | 
            -
                        portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100
         | 
| 151 | 
            -
                        portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100
         | 
|  | |
| 152 |  | 
| 153 | 
             
                        portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
         | 
| 154 | 
             
                        portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
         | 
| @@ -175,7 +343,7 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng | |
| 175 |  | 
| 176 | 
             
                        for i in range(1, num_players + 1):
         | 
| 177 | 
             
                            portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
         | 
| 178 | 
            -
                            portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
         | 
| 179 |  | 
| 180 | 
             
                        portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
         | 
| 181 | 
             
                        portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
         | 
| @@ -193,35 +361,63 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng | |
| 193 |  | 
| 194 | 
             
                elif site_var == 'Draftkings':
         | 
| 195 | 
             
                    if type_var == 'Showdown':
         | 
| 196 | 
            -
                         | 
| 197 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
| 198 | 
             
                        calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
         | 
| 199 | 
             
                        # Get the original player columns (first 6 columns excluding salary, median, Own)
         | 
| 200 | 
             
                        player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
         | 
| 201 | 
            -
                        
         | 
| 202 | 
            -
             | 
| 203 | 
            -
             | 
| 204 | 
            -
             | 
| 205 | 
            -
             | 
| 206 | 
            -
             | 
| 207 | 
            -
             | 
| 208 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 209 | 
             
                        flex_rank = flex_ownerships.rank(pct=True)
         | 
| 210 |  | 
| 211 | 
             
                        # Assign ranks back to individual columns using the same rank scale
         | 
| 212 | 
            -
                         | 
| 213 | 
            -
             | 
| 214 | 
            -
             | 
| 215 | 
            -
             | 
| 216 | 
            -
             | 
| 217 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 218 |  | 
| 219 | 
            -
             | 
| 220 | 
            -
             | 
| 221 | 
            -
             | 
| 222 | 
            -
             | 
| 223 | 
            -
             | 
| 224 | 
            -
             | 
| 225 |  | 
| 226 | 
             
                        portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
         | 
| 227 | 
             
                        portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
         | 
| @@ -263,12 +459,12 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng | |
| 263 | 
             
                            portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 264 | 
             
                            portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 265 |  | 
| 266 | 
            -
                            portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100
         | 
| 267 | 
            -
                            portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100
         | 
| 268 | 
            -
                            portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100
         | 
| 269 | 
            -
                            portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100
         | 
| 270 | 
            -
                            portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100
         | 
| 271 | 
            -
                            portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']) / 100
         | 
| 272 |  | 
| 273 | 
             
                            portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
         | 
| 274 | 
             
                            portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
         | 
| @@ -295,7 +491,7 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng | |
| 295 |  | 
| 296 | 
             
                            for i in range(1, num_players + 1):
         | 
| 297 | 
             
                                portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
         | 
| 298 | 
            -
                                portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
         | 
| 299 |  | 
| 300 | 
             
                            portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
         | 
| 301 | 
             
                            portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
         | 
| @@ -348,14 +544,25 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng | |
| 348 | 
             
                portfolio['Lineup Edge'] = portfolio['Win%'] * ((.5 - portfolio['Finish_percentile']) * (Contest_Size / 2.5))
         | 
| 349 | 
             
                portfolio['Lineup Edge'] = portfolio.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge'], axis=1)
         | 
| 350 | 
             
                portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean()
         | 
| 351 | 
            -
                portfolio['Weighted Own'] = portfolio[own_columns].apply( | 
| 352 | 
             
                portfolio['Geomean'] = np.power((portfolio[own_columns] * 100).product(axis=1), 1 / len(own_columns))
         | 
| 353 |  | 
| 354 | 
             
                # Calculate similarity score based on actual player selection
         | 
| 355 | 
            -
                portfolio['Diversity'] =  | 
| 356 |  | 
| 357 | 
             
                portfolio = portfolio.drop(columns=dup_count_columns)
         | 
| 358 | 
             
                portfolio = portfolio.drop(columns=own_columns)
         | 
| 359 | 
             
                portfolio = portfolio.drop(columns=calc_columns)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 360 |  | 
| 361 | 
            -
                return portfolio
         | 
|  | |
| 5 | 
             
            import math
         | 
| 6 | 
             
            from difflib import SequenceMatcher
         | 
| 7 |  | 
| 8 | 
            +
            def calculate_weighted_ownership_vectorized(ownership_array):
         | 
| 9 | 
             
                """
         | 
| 10 | 
            +
                Vectorized version of calculate_weighted_ownership using NumPy operations.
         | 
|  | |
| 11 |  | 
| 12 | 
             
                Args:
         | 
| 13 | 
            +
                    ownership_array: 2D array of ownership values (rows x players)
         | 
| 14 |  | 
| 15 | 
             
                Returns:
         | 
| 16 | 
            +
                    array: Calculated weighted ownership values for each row
         | 
| 17 | 
             
                """
         | 
| 18 | 
            +
                # Convert percentages to decimals and handle NaN values
         | 
| 19 | 
            +
                ownership_array = np.where(np.isnan(ownership_array), 0, ownership_array) / 100
         | 
| 20 |  | 
| 21 | 
            +
                # Calculate row means
         | 
| 22 | 
            +
                row_means = np.mean(ownership_array, axis=1, keepdims=True)
         | 
| 23 |  | 
| 24 | 
             
                # Calculate average of each value with the overall mean
         | 
| 25 | 
            +
                value_means = (ownership_array + row_means) / 2
         | 
| 26 |  | 
| 27 | 
             
                # Take average of all those means
         | 
| 28 | 
            +
                avg_of_means = np.mean(value_means, axis=1)
         | 
| 29 |  | 
| 30 | 
             
                # Multiply by count of values
         | 
| 31 | 
            +
                weighted = avg_of_means * ownership_array.shape[1]
         | 
| 32 |  | 
| 33 | 
            +
                # Subtract (max - min) for each row
         | 
| 34 | 
            +
                row_max = np.max(ownership_array, axis=1)
         | 
| 35 | 
            +
                row_min = np.min(ownership_array, axis=1)
         | 
| 36 | 
            +
                weighted = weighted - (row_max - row_min)
         | 
| 37 |  | 
| 38 | 
            +
                # Convert back to percentage form
         | 
| 39 | 
             
                return weighted * 10000
         | 
| 40 |  | 
| 41 | 
            +
            def calculate_weighted_ownership_wrapper(row_ownerships):
         | 
| 42 | 
             
                """
         | 
| 43 | 
            +
                Wrapper function for the original calculate_weighted_ownership to work with Pandas .apply()
         | 
|  | |
|  | |
| 44 |  | 
| 45 | 
             
                Args:
         | 
| 46 | 
            +
                    row_ownerships: Series containing ownership values in percentage form
         | 
|  | |
| 47 |  | 
| 48 | 
             
                Returns:
         | 
| 49 | 
            +
                    float: Calculated weighted ownership value
         | 
| 50 | 
            +
                """
         | 
| 51 | 
            +
                # Convert Series to 2D array for vectorized function
         | 
| 52 | 
            +
                ownership_array = row_ownerships.values.reshape(1, -1)
         | 
| 53 | 
            +
                return calculate_weighted_ownership_vectorized(ownership_array)[0]
         | 
| 54 | 
            +
             | 
| 55 | 
            +
            def calculate_player_similarity_score_vectorized(portfolio, player_columns):
         | 
| 56 | 
             
                """
         | 
| 57 | 
            +
                Vectorized version of calculate_player_similarity_score using NumPy operations.
         | 
| 58 | 
            +
                """
         | 
| 59 | 
            +
                # Extract player data and convert to string array
         | 
| 60 | 
            +
                player_data = portfolio[player_columns].astype(str).fillna('').values
         | 
| 61 |  | 
| 62 | 
             
                # Get all unique players and create a mapping to numeric IDs
         | 
| 63 | 
             
                all_players = set()
         | 
| 64 | 
            +
                for row in player_data:
         | 
| 65 | 
            +
                    for val in row:
         | 
|  | |
| 66 | 
             
                        if isinstance(val, str) and val.strip() != '':
         | 
| 67 | 
             
                            all_players.add(val)
         | 
| 68 |  | 
|  | |
| 74 | 
             
                n_rows = len(portfolio)
         | 
| 75 | 
             
                binary_matrix = np.zeros((n_rows, n_players), dtype=np.int8)
         | 
| 76 |  | 
| 77 | 
            +
                # Vectorized binary matrix creation
         | 
| 78 | 
            +
                for i, row in enumerate(player_data):
         | 
| 79 | 
            +
                    for val in row:
         | 
| 80 | 
             
                        if isinstance(val, str) and str(val).strip() != '' and str(val) in player_to_id:
         | 
| 81 | 
             
                            binary_matrix[i, player_to_id[str(val)]] = 1
         | 
| 82 |  | 
| 83 | 
             
                # Vectorized Jaccard distance calculation
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 84 | 
             
                intersection_matrix = np.dot(binary_matrix, binary_matrix.T)
         | 
|  | |
|  | |
| 85 | 
             
                row_sums = np.sum(binary_matrix, axis=1)
         | 
| 86 | 
             
                union_matrix = row_sums[:, np.newaxis] + row_sums - intersection_matrix
         | 
| 87 |  | 
| 88 | 
             
                # Calculate Jaccard distance: 1 - (intersection / union)
         | 
|  | |
| 89 | 
             
                with np.errstate(divide='ignore', invalid='ignore'):
         | 
| 90 | 
             
                    jaccard_similarity = np.divide(intersection_matrix, union_matrix, 
         | 
| 91 | 
             
                                                 out=np.zeros_like(intersection_matrix, dtype=float), 
         | 
| 92 | 
             
                                                 where=union_matrix != 0)
         | 
| 93 |  | 
|  | |
| 94 | 
             
                jaccard_distance = 1 - jaccard_similarity
         | 
| 95 |  | 
| 96 | 
            +
                # Exclude self-comparison and calculate average distance for each row
         | 
|  | |
| 97 | 
             
                np.fill_diagonal(jaccard_distance, 0)
         | 
| 98 | 
            +
                row_counts = n_rows - 1
         | 
| 99 | 
             
                similarity_scores = np.sum(jaccard_distance, axis=1) / row_counts
         | 
| 100 |  | 
| 101 | 
            +
                # Normalize to 0-1 scale
         | 
| 102 | 
            +
                score_range = similarity_scores.max() - similarity_scores.min()
         | 
| 103 | 
            +
                if score_range > 0:
         | 
| 104 | 
            +
                    similarity_scores = (similarity_scores - similarity_scores.min()) / score_range
         | 
| 105 |  | 
| 106 | 
             
                return similarity_scores
         | 
| 107 |  | 
| 108 | 
            +
            def predict_dupes_vectorized(portfolio, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var):
         | 
| 109 | 
            +
                """
         | 
| 110 | 
            +
                Vectorized version of predict_dupes using NumPy arrays for better performance.
         | 
| 111 | 
            +
                """
         | 
| 112 | 
            +
                # Set multipliers based on strength
         | 
| 113 | 
            +
                if strength_var == 'Weak':
         | 
| 114 | 
            +
                    dupes_multiplier = 0.75
         | 
| 115 | 
            +
                    percentile_multiplier = 0.90
         | 
| 116 | 
            +
                elif strength_var == 'Average':
         | 
| 117 | 
            +
                    dupes_multiplier = 1.00
         | 
| 118 | 
            +
                    percentile_multiplier = 1.00
         | 
| 119 | 
            +
                elif strength_var == 'Sharp':
         | 
| 120 | 
            +
                    dupes_multiplier = 1.25
         | 
| 121 | 
            +
                    percentile_multiplier = 1.10
         | 
| 122 | 
            +
                
         | 
| 123 | 
            +
                max_ownership = max(maps_dict['own_map'].values()) / 100
         | 
| 124 | 
            +
                average_ownership = np.mean(list(maps_dict['own_map'].values())) / 100
         | 
| 125 | 
            +
                
         | 
| 126 | 
            +
                # Convert portfolio to NumPy arrays for faster operations
         | 
| 127 | 
            +
                portfolio_values = portfolio.values
         | 
| 128 | 
            +
                n_rows = len(portfolio)
         | 
| 129 | 
            +
                
         | 
| 130 | 
            +
                # Pre-allocate arrays for ownership data
         | 
| 131 | 
            +
                if site_var == 'Fanduel':
         | 
| 132 | 
            +
                    if type_var == 'Showdown':
         | 
| 133 | 
            +
                        num_players = 5
         | 
| 134 | 
            +
                        salary_cap = 60000
         | 
| 135 | 
            +
                        player_cols = list(range(5))  # First 5 columns are players
         | 
| 136 | 
            +
                    elif type_var == 'Classic':
         | 
| 137 | 
            +
                        if sport_var == 'WNBA':
         | 
| 138 | 
            +
                            num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']])
         | 
| 139 | 
            +
                            salary_cap = 40000
         | 
| 140 | 
            +
                            player_cols = list(range(num_players))
         | 
| 141 | 
            +
                        else:
         | 
| 142 | 
            +
                            num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']])
         | 
| 143 | 
            +
                            salary_cap = 60000
         | 
| 144 | 
            +
                            player_cols = list(range(num_players))
         | 
| 145 | 
            +
                elif site_var == 'Draftkings':
         | 
| 146 | 
            +
                    if type_var == 'Showdown':
         | 
| 147 | 
            +
                        num_players = 6
         | 
| 148 | 
            +
                        salary_cap = 50000
         | 
| 149 | 
            +
                        player_cols = list(range(6))
         | 
| 150 | 
            +
                    elif type_var == 'Classic':
         | 
| 151 | 
            +
                        if sport_var == 'CS2':
         | 
| 152 | 
            +
                            num_players = 6
         | 
| 153 | 
            +
                            salary_cap = 50000
         | 
| 154 | 
            +
                            player_cols = list(range(6))
         | 
| 155 | 
            +
                        else:
         | 
| 156 | 
            +
                            num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']])
         | 
| 157 | 
            +
                            salary_cap = 50000
         | 
| 158 | 
            +
                            player_cols = list(range(num_players))
         | 
| 159 | 
            +
                
         | 
| 160 | 
            +
                # Pre-allocate ownership arrays
         | 
| 161 | 
            +
                ownership_array = np.zeros((n_rows, num_players), dtype=np.float32)
         | 
| 162 | 
            +
                ownership_rank_array = np.zeros((n_rows, num_players), dtype=np.float32)
         | 
| 163 | 
            +
                
         | 
| 164 | 
            +
                # Vectorized ownership mapping
         | 
| 165 | 
            +
                for i, col_idx in enumerate(player_cols):
         | 
| 166 | 
            +
                    if i == 0 and type_var == 'Showdown':  # Captain
         | 
| 167 | 
            +
                        ownership_array[:, i] = np.vectorize(lambda x: maps_dict['cpt_own_map'].get(x, 0))(portfolio_values[:, col_idx]) / 100
         | 
| 168 | 
            +
                        ownership_rank_array[:, i] = np.vectorize(lambda x: maps_dict['cpt_own_map'].get(x, 0))(portfolio_values[:, col_idx])
         | 
| 169 | 
            +
                    else:  # Flex players
         | 
| 170 | 
            +
                        ownership_array[:, i] = np.vectorize(lambda x: maps_dict['own_map'].get(x, 0))(portfolio_values[:, col_idx]) / 100
         | 
| 171 | 
            +
                        ownership_rank_array[:, i] = np.vectorize(lambda x: maps_dict['own_map'].get(x, 0))(portfolio_values[:, col_idx])
         | 
| 172 | 
            +
                
         | 
| 173 | 
            +
                # Calculate ranks for flex players (excluding captain)
         | 
| 174 | 
            +
                if type_var == 'Showdown':
         | 
| 175 | 
            +
                    flex_ownerships = ownership_rank_array[:, 1:].flatten()
         | 
| 176 | 
            +
                    flex_rank = pd.Series(flex_ownerships).rank(pct=True).values.reshape(n_rows, -1)
         | 
| 177 | 
            +
                    ownership_rank_array[:, 1:] = flex_rank
         | 
| 178 | 
            +
                
         | 
| 179 | 
            +
                # Convert to percentile ranks
         | 
| 180 | 
            +
                ownership_rank_array = ownership_rank_array / 100
         | 
| 181 | 
            +
                
         | 
| 182 | 
            +
                # Vectorized calculations
         | 
| 183 | 
            +
                own_product = np.prod(ownership_array, axis=1)
         | 
| 184 | 
            +
                own_average = (portfolio_values[:, portfolio.columns.get_loc('Own')].max() * 0.33) / 100
         | 
| 185 | 
            +
                own_sum = np.sum(ownership_array, axis=1)
         | 
| 186 | 
            +
                avg_own_rank = np.mean(ownership_rank_array, axis=1)
         | 
| 187 | 
            +
                
         | 
| 188 | 
            +
                # Calculate dupes formula vectorized
         | 
| 189 | 
            +
                salary_col = portfolio.columns.get_loc('salary')
         | 
| 190 | 
            +
                own_col = portfolio.columns.get_loc('Own')
         | 
| 191 | 
            +
                
         | 
| 192 | 
            +
                dupes_calc = (own_product * avg_own_rank) * Contest_Size + \
         | 
| 193 | 
            +
                             ((portfolio_values[:, salary_col] - (salary_cap - portfolio_values[:, own_col])) / 100) - \
         | 
| 194 | 
            +
                             ((salary_cap - portfolio_values[:, salary_col]) / 100)
         | 
| 195 | 
            +
                
         | 
| 196 | 
            +
                dupes_calc *= dupes_multiplier
         | 
| 197 | 
            +
                
         | 
| 198 | 
            +
                # Round and handle negative values
         | 
| 199 | 
            +
                dupes = np.where(np.round(dupes_calc, 0) <= 0, 0, np.round(dupes_calc, 0) - 1)
         | 
| 200 | 
            +
                
         | 
| 201 | 
            +
                # Calculate own_ratio vectorized
         | 
| 202 | 
            +
                max_own_mask = np.any(ownership_array == max_ownership, axis=1)
         | 
| 203 | 
            +
                own_ratio = np.where(max_own_mask, 
         | 
| 204 | 
            +
                                     own_sum / own_average,
         | 
| 205 | 
            +
                                     (own_sum - max_ownership) / own_average)
         | 
| 206 | 
            +
                
         | 
| 207 | 
            +
                # Calculate Finish_percentile vectorized
         | 
| 208 | 
            +
                percentile_cut_scalar = portfolio_values[:, portfolio.columns.get_loc('median')].max()
         | 
| 209 | 
            +
                
         | 
| 210 | 
            +
                if type_var == 'Classic':
         | 
| 211 | 
            +
                    own_ratio_nerf = 2 if sport_var == 'CS2' else 1.5
         | 
| 212 | 
            +
                elif type_var == 'Showdown':
         | 
| 213 | 
            +
                    own_ratio_nerf = 1.5
         | 
| 214 | 
            +
                
         | 
| 215 | 
            +
                median_col = portfolio.columns.get_loc('median')
         | 
| 216 | 
            +
                finish_percentile = (own_ratio - own_ratio_nerf) / ((5 * (portfolio_values[:, median_col] / percentile_cut_scalar)) / 3)
         | 
| 217 | 
            +
                finish_percentile = np.where(finish_percentile < 0.0005, 0.0005, finish_percentile / 2)
         | 
| 218 | 
            +
                
         | 
| 219 | 
            +
                # Calculate other metrics vectorized
         | 
| 220 | 
            +
                ref_proj = portfolio_values[:, median_col].max()
         | 
| 221 | 
            +
                max_proj = ref_proj + 10
         | 
| 222 | 
            +
                min_proj = ref_proj - 10
         | 
| 223 | 
            +
                avg_ref = (max_proj + min_proj) / 2
         | 
| 224 | 
            +
                
         | 
| 225 | 
            +
                win_percent = (((portfolio_values[:, median_col] / avg_ref) - (0.1 + ((ref_proj - portfolio_values[:, median_col])/100))) / (Contest_Size / 1000)) / 10
         | 
| 226 | 
            +
                max_allowed_win = (1 / Contest_Size) * 5
         | 
| 227 | 
            +
                win_percent = win_percent / win_percent.max() * max_allowed_win
         | 
| 228 | 
            +
                
         | 
| 229 | 
            +
                finish_percentile = finish_percentile + 0.005 + (0.005 * (Contest_Size / 10000))
         | 
| 230 | 
            +
                finish_percentile *= percentile_multiplier
         | 
| 231 | 
            +
                win_percent *= (1 - finish_percentile)
         | 
| 232 | 
            +
                
         | 
| 233 | 
            +
                # Calculate low ownership count vectorized
         | 
| 234 | 
            +
                low_own_count = np.sum(ownership_array < 0.10, axis=1)
         | 
| 235 | 
            +
                finish_percentile = np.where(low_own_count <= 0, 
         | 
| 236 | 
            +
                                            finish_percentile, 
         | 
| 237 | 
            +
                                            finish_percentile / low_own_count)
         | 
| 238 | 
            +
                
         | 
| 239 | 
            +
                # Calculate Lineup Edge vectorized
         | 
| 240 | 
            +
                lineup_edge = win_percent * ((0.5 - finish_percentile) * (Contest_Size / 2.5))
         | 
| 241 | 
            +
                lineup_edge = np.where(dupes > 0, lineup_edge / (dupes + 1), lineup_edge)
         | 
| 242 | 
            +
                lineup_edge = lineup_edge - lineup_edge.mean()
         | 
| 243 | 
            +
                
         | 
| 244 | 
            +
                # Calculate Weighted Own vectorized
         | 
| 245 | 
            +
                weighted_own = calculate_weighted_ownership_vectorized(ownership_array)
         | 
| 246 | 
            +
                
         | 
| 247 | 
            +
                # Calculate Geomean vectorized
         | 
| 248 | 
            +
                geomean = np.power(np.prod(ownership_array * 100, axis=1), 1 / num_players)
         | 
| 249 | 
            +
                
         | 
| 250 | 
            +
                # Calculate Diversity vectorized
         | 
| 251 | 
            +
                diversity = calculate_player_similarity_score_vectorized(portfolio, player_cols)
         | 
| 252 | 
            +
                
         | 
| 253 | 
            +
                # Create result DataFrame with optimized data types
         | 
| 254 | 
            +
                result_data = {
         | 
| 255 | 
            +
                    'Dupes': dupes.astype('uint16'),
         | 
| 256 | 
            +
                    'median': portfolio_values[:, portfolio.columns.get_loc('median')].astype('float32'),
         | 
| 257 | 
            +
                    'Own': portfolio_values[:, portfolio.columns.get_loc('Own')].astype('float32'),
         | 
| 258 | 
            +
                    'salary': portfolio_values[:, portfolio.columns.get_loc('salary')].astype('uint16'),
         | 
| 259 | 
            +
                    'Finish_percentile': finish_percentile.astype('float32'),
         | 
| 260 | 
            +
                    'Win%': win_percent.astype('float32'),
         | 
| 261 | 
            +
                    'Lineup Edge': lineup_edge.astype('float32'),
         | 
| 262 | 
            +
                    'Weighted Own': weighted_own.astype('float32'),
         | 
| 263 | 
            +
                    'Geomean': geomean.astype('float32'),
         | 
| 264 | 
            +
                    'Diversity': diversity.astype('float32')
         | 
| 265 | 
            +
                }
         | 
| 266 | 
            +
                
         | 
| 267 | 
            +
                # Add Size column if it exists
         | 
| 268 | 
            +
                if 'Size' in portfolio.columns:
         | 
| 269 | 
            +
                    result_data['Size'] = portfolio_values[:, portfolio.columns.get_loc('Size')].astype('uint16')
         | 
| 270 | 
            +
                
         | 
| 271 | 
            +
                # Add player columns back
         | 
| 272 | 
            +
                for i, col_name in enumerate(portfolio.columns[:num_players]):
         | 
| 273 | 
            +
                    result_data[col_name] = portfolio_values[:, i]
         | 
| 274 | 
            +
                
         | 
| 275 | 
            +
                return pd.DataFrame(result_data)
         | 
| 276 | 
            +
             | 
| 277 | 
            +
            # Keep the original function for backward compatibility
         | 
| 278 | 
             
            def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var):
         | 
| 279 | 
             
                if strength_var == 'Weak':
         | 
| 280 | 
             
                    dupes_multiplier = .75
         | 
|  | |
| 309 | 
             
                        portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 310 | 
             
                        portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 311 | 
             
                        portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 312 | 
            +
                        portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 313 |  | 
| 314 | 
            +
                        portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
         | 
| 315 | 
            +
                        portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 316 | 
            +
                        portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 317 | 
            +
                        portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 318 | 
            +
                        portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 319 | 
            +
                        portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 320 |  | 
| 321 | 
             
                        portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
         | 
| 322 | 
             
                        portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
         | 
|  | |
| 343 |  | 
| 344 | 
             
                        for i in range(1, num_players + 1):
         | 
| 345 | 
             
                            portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
         | 
| 346 | 
            +
                            portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 347 |  | 
| 348 | 
             
                        portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
         | 
| 349 | 
             
                        portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
         | 
|  | |
| 361 |  | 
| 362 | 
             
                elif site_var == 'Draftkings':
         | 
| 363 | 
             
                    if type_var == 'Showdown':
         | 
| 364 | 
            +
                        if sport_var == 'GOLF':
         | 
| 365 | 
            +
                            dup_count_columns = ['FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank', 'FLEX6_Own_percent_rank']
         | 
| 366 | 
            +
                            own_columns = ['FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own', 'FLEX6_Own']
         | 
| 367 | 
            +
                        else:
         | 
| 368 | 
            +
                            dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
         | 
| 369 | 
            +
                            own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
         | 
| 370 | 
             
                        calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
         | 
| 371 | 
             
                        # Get the original player columns (first 6 columns excluding salary, median, Own)
         | 
| 372 | 
             
                        player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
         | 
| 373 | 
            +
                        if sport_var == 'GOLF':
         | 
| 374 | 
            +
                            flex_ownerships = pd.concat([
         | 
| 375 | 
            +
                                portfolio.iloc[:,0].map(maps_dict['own_map']),
         | 
| 376 | 
            +
                                portfolio.iloc[:,1].map(maps_dict['own_map']),
         | 
| 377 | 
            +
                                portfolio.iloc[:,2].map(maps_dict['own_map']),
         | 
| 378 | 
            +
                                portfolio.iloc[:,3].map(maps_dict['own_map']),
         | 
| 379 | 
            +
                                portfolio.iloc[:,4].map(maps_dict['own_map']),
         | 
| 380 | 
            +
                                portfolio.iloc[:,5].map(maps_dict['own_map'])
         | 
| 381 | 
            +
                            ])
         | 
| 382 | 
            +
                        else:
         | 
| 383 | 
            +
                            flex_ownerships = pd.concat([
         | 
| 384 | 
            +
                                portfolio.iloc[:,1].map(maps_dict['own_map']),
         | 
| 385 | 
            +
                                portfolio.iloc[:,2].map(maps_dict['own_map']),
         | 
| 386 | 
            +
                                portfolio.iloc[:,3].map(maps_dict['own_map']),
         | 
| 387 | 
            +
                                portfolio.iloc[:,4].map(maps_dict['own_map']),
         | 
| 388 | 
            +
                                portfolio.iloc[:,5].map(maps_dict['own_map'])
         | 
| 389 | 
            +
                            ])
         | 
| 390 | 
             
                        flex_rank = flex_ownerships.rank(pct=True)
         | 
| 391 |  | 
| 392 | 
             
                        # Assign ranks back to individual columns using the same rank scale
         | 
| 393 | 
            +
                        if sport_var == 'GOLF':
         | 
| 394 | 
            +
                            portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 395 | 
            +
                            portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 396 | 
            +
                            portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 397 | 
            +
                            portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 398 | 
            +
                            portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 399 | 
            +
                            portfolio['FLEX6_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 400 | 
            +
             | 
| 401 | 
            +
                            portfolio['FLEX1_Own'] = portfolio.iloc[:,0].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 402 | 
            +
                            portfolio['FLEX2_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 403 | 
            +
                            portfolio['FLEX3_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 404 | 
            +
                            portfolio['FLEX4_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 405 | 
            +
                            portfolio['FLEX5_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 406 | 
            +
                            portfolio['FLEX6_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 407 | 
            +
                        else:    
         | 
| 408 | 
            +
                            portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
         | 
| 409 | 
            +
                            portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 410 | 
            +
                            portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 411 | 
            +
                            portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 412 | 
            +
                            portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 413 | 
            +
                            portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 414 |  | 
| 415 | 
            +
                            portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
         | 
| 416 | 
            +
                            portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 417 | 
            +
                            portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 418 | 
            +
                            portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 419 | 
            +
                            portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 420 | 
            +
                            portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 421 |  | 
| 422 | 
             
                        portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
         | 
| 423 | 
             
                        portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
         | 
|  | |
| 459 | 
             
                            portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 460 | 
             
                            portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
         | 
| 461 |  | 
| 462 | 
            +
                            portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
         | 
| 463 | 
            +
                            portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 464 | 
            +
                            portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 465 | 
            +
                            portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 466 | 
            +
                            portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 467 | 
            +
                            portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 468 |  | 
| 469 | 
             
                            portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
         | 
| 470 | 
             
                            portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
         | 
|  | |
| 491 |  | 
| 492 | 
             
                            for i in range(1, num_players + 1):
         | 
| 493 | 
             
                                portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
         | 
| 494 | 
            +
                                portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']).astype('float32') / 100
         | 
| 495 |  | 
| 496 | 
             
                            portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
         | 
| 497 | 
             
                            portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
         | 
|  | |
| 544 | 
             
                portfolio['Lineup Edge'] = portfolio['Win%'] * ((.5 - portfolio['Finish_percentile']) * (Contest_Size / 2.5))
         | 
| 545 | 
             
                portfolio['Lineup Edge'] = portfolio.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge'], axis=1)
         | 
| 546 | 
             
                portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean()
         | 
| 547 | 
            +
                portfolio['Weighted Own'] = portfolio[own_columns].apply(calculate_weighted_ownership_wrapper, axis=1)
         | 
| 548 | 
             
                portfolio['Geomean'] = np.power((portfolio[own_columns] * 100).product(axis=1), 1 / len(own_columns))
         | 
| 549 |  | 
| 550 | 
             
                # Calculate similarity score based on actual player selection
         | 
| 551 | 
            +
                portfolio['Diversity'] = calculate_player_similarity_score_vectorized(portfolio, player_columns)
         | 
| 552 |  | 
| 553 | 
             
                portfolio = portfolio.drop(columns=dup_count_columns)
         | 
| 554 | 
             
                portfolio = portfolio.drop(columns=own_columns)
         | 
| 555 | 
             
                portfolio = portfolio.drop(columns=calc_columns)
         | 
| 556 | 
            +
                
         | 
| 557 | 
            +
                int16_columns_stacks = ['Dupes', 'Size', 'salary']
         | 
| 558 | 
            +
                int16_columns_nstacks = ['Dupes', 'salary']
         | 
| 559 | 
            +
                float32_columns = ['median', 'Own', 'Finish_percentile', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity']
         | 
| 560 | 
            +
             | 
| 561 | 
            +
                try:
         | 
| 562 | 
            +
                    portfolio[int16_columns_stacks] = portfolio[int16_columns_stacks].astype('uint16')
         | 
| 563 | 
            +
                except:
         | 
| 564 | 
            +
                    portfolio[int16_columns_nstacks] = portfolio[int16_columns_nstacks].astype('uint16')
         | 
| 565 | 
            +
             | 
| 566 | 
            +
                portfolio[float32_columns] = portfolio[float32_columns].astype('float32')
         | 
| 567 |  | 
| 568 | 
            +
                return portfolio
         | 
