James McCool
commited on
Commit
·
197d1b2
1
Parent(s):
b4a377f
Add memory-efficient ranking calculation in predict_dupes.py
Browse filesIntroduced a new function, calculate_flex_ranks_efficient, to replace pd.concat and rank operations, enhancing memory efficiency. Updated the predict_dupes function to utilize this new ranking method for FLEX and position ownership percent ranks.
- global_func/predict_dupes.py +53 -56
global_func/predict_dupes.py
CHANGED
|
@@ -4,6 +4,7 @@ import pandas as pd
|
|
| 4 |
import time
|
| 5 |
import math
|
| 6 |
from difflib import SequenceMatcher
|
|
|
|
| 7 |
|
| 8 |
def calculate_weighted_ownership_vectorized(ownership_array):
|
| 9 |
"""
|
|
@@ -38,6 +39,32 @@ def calculate_weighted_ownership_vectorized(ownership_array):
|
|
| 38 |
# Convert back to percentage form
|
| 39 |
return weighted * 10000
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
def calculate_weighted_ownership_wrapper(row_ownerships):
|
| 42 |
"""
|
| 43 |
Wrapper function for the original calculate_weighted_ownership to work with Pandas .apply()
|
|
@@ -144,22 +171,14 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
| 144 |
|
| 145 |
# Assign ranks back to individual columns using the same rank scale
|
| 146 |
if sport_var == 'GOLF':
|
| 147 |
-
|
| 148 |
-
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
| 149 |
-
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
| 150 |
-
portfolio.iloc[:,3].map(maps_dict['own_map']),
|
| 151 |
-
portfolio.iloc[:,4].map(maps_dict['own_map']),
|
| 152 |
-
portfolio.iloc[:,5].map(maps_dict['own_map']),
|
| 153 |
-
portfolio.iloc[:,6].map(maps_dict['own_map'])
|
| 154 |
-
])
|
| 155 |
-
flex_rank = flex_ownerships.rank(pct=True)
|
| 156 |
|
| 157 |
-
portfolio['FLEX1_Own_percent_rank'] =
|
| 158 |
-
portfolio['FLEX2_Own_percent_rank'] =
|
| 159 |
-
portfolio['FLEX3_Own_percent_rank'] =
|
| 160 |
-
portfolio['FLEX4_Own_percent_rank'] =
|
| 161 |
-
portfolio['FLEX5_Own_percent_rank'] =
|
| 162 |
-
portfolio['FLEX6_Own_percent_rank'] =
|
| 163 |
|
| 164 |
portfolio['FLEX1_Own'] = portfolio.iloc[:,0].map(maps_dict['own_map']).astype('float32') / 100
|
| 165 |
portfolio['FLEX2_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
|
|
@@ -168,21 +187,14 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
| 168 |
portfolio['FLEX5_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100
|
| 169 |
portfolio['FLEX6_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100
|
| 170 |
else:
|
| 171 |
-
|
| 172 |
-
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
| 173 |
-
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
| 174 |
-
portfolio.iloc[:,3].map(maps_dict['own_map']),
|
| 175 |
-
portfolio.iloc[:,4].map(maps_dict['own_map']),
|
| 176 |
-
portfolio.iloc[:,5].map(maps_dict['own_map'])
|
| 177 |
-
])
|
| 178 |
-
flex_rank = flex_ownerships.rank(pct=True)
|
| 179 |
|
| 180 |
portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
|
| 181 |
-
portfolio['FLEX1_Own_percent_rank'] =
|
| 182 |
-
portfolio['FLEX2_Own_percent_rank'] =
|
| 183 |
-
portfolio['FLEX3_Own_percent_rank'] =
|
| 184 |
-
portfolio['FLEX4_Own_percent_rank'] =
|
| 185 |
-
portfolio['FLEX5_Own_percent_rank'] =
|
| 186 |
|
| 187 |
portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
|
| 188 |
portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
|
|
@@ -223,22 +235,15 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
| 223 |
|
| 224 |
n_rows = len(portfolio)
|
| 225 |
|
| 226 |
-
|
| 227 |
-
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
| 228 |
-
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
| 229 |
-
portfolio.iloc[:,3].map(maps_dict['own_map']),
|
| 230 |
-
portfolio.iloc[:,4].map(maps_dict['own_map']),
|
| 231 |
-
portfolio.iloc[:,5].map(maps_dict['own_map'])
|
| 232 |
-
])
|
| 233 |
-
flex_rank = flex_ownerships.rank(pct=True)
|
| 234 |
|
| 235 |
# Assign ranks back to individual columns using the same rank scale
|
| 236 |
portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
|
| 237 |
-
portfolio['FLEX1_Own_percent_rank'] =
|
| 238 |
-
portfolio['FLEX2_Own_percent_rank'] =
|
| 239 |
-
portfolio['FLEX3_Own_percent_rank'] =
|
| 240 |
-
portfolio['FLEX4_Own_percent_rank'] =
|
| 241 |
-
portfolio['FLEX5_Own_percent_rank'] =
|
| 242 |
|
| 243 |
portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
|
| 244 |
portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
|
|
@@ -276,24 +281,16 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
| 276 |
|
| 277 |
n_rows = len(portfolio)
|
| 278 |
|
| 279 |
-
|
| 280 |
-
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
| 281 |
-
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
| 282 |
-
portfolio.iloc[:,3].map(maps_dict['own_map']),
|
| 283 |
-
portfolio.iloc[:,4].map(maps_dict['own_map']),
|
| 284 |
-
portfolio.iloc[:,5].map(maps_dict['own_map']),
|
| 285 |
-
portfolio.iloc[:,6].map(maps_dict['own_map'])
|
| 286 |
-
])
|
| 287 |
-
flex_rank = flex_ownerships.rank(pct=True)
|
| 288 |
|
| 289 |
# Assign ranks back to individual columns using the same rank scale
|
| 290 |
portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
|
| 291 |
-
portfolio['TOP_Own_percent_rank'] =
|
| 292 |
-
portfolio['JNG_Own_percent_rank'] =
|
| 293 |
-
portfolio['MID_Own_percent_rank'] =
|
| 294 |
-
portfolio['ADC_Own_percent_rank'] =
|
| 295 |
-
portfolio['SUP_Own_percent_rank'] =
|
| 296 |
-
portfolio['Team_Own_percent_rank'] =
|
| 297 |
|
| 298 |
portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
|
| 299 |
portfolio['TOP_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
|
|
|
|
| 4 |
import time
|
| 5 |
import math
|
| 6 |
from difflib import SequenceMatcher
|
| 7 |
+
import scipy.stats
|
| 8 |
|
| 9 |
def calculate_weighted_ownership_vectorized(ownership_array):
|
| 10 |
"""
|
|
|
|
| 39 |
# Convert back to percentage form
|
| 40 |
return weighted * 10000
|
| 41 |
|
| 42 |
+
def calculate_flex_ranks_efficient(portfolio, start_col, end_col, maps_dict, map_key='own_map'):
|
| 43 |
+
"""Memory-efficient replacement for pd.concat + rank operations"""
|
| 44 |
+
n_rows = len(portfolio)
|
| 45 |
+
n_cols = end_col - start_col
|
| 46 |
+
|
| 47 |
+
# Pre-allocate result arrays
|
| 48 |
+
all_values = np.zeros(n_rows * n_cols, dtype=np.float32)
|
| 49 |
+
|
| 50 |
+
# Fill values column by column
|
| 51 |
+
for i, col_idx in enumerate(range(start_col, end_col)):
|
| 52 |
+
start_idx = i * n_rows
|
| 53 |
+
end_idx = (i + 1) * n_rows
|
| 54 |
+
all_values[start_idx:end_idx] = portfolio.iloc[:, col_idx].map(maps_dict[map_key]).values
|
| 55 |
+
|
| 56 |
+
# Calculate percentile ranks efficiently
|
| 57 |
+
ranks = scipy.stats.rankdata(all_values, method='average') / len(all_values)
|
| 58 |
+
|
| 59 |
+
# Reshape back to individual column ranks
|
| 60 |
+
result_ranks = {}
|
| 61 |
+
for i in range(n_cols):
|
| 62 |
+
start_idx = i * n_rows
|
| 63 |
+
end_idx = (i + 1) * n_rows
|
| 64 |
+
result_ranks[i] = ranks[start_idx:end_idx]
|
| 65 |
+
|
| 66 |
+
return result_ranks
|
| 67 |
+
|
| 68 |
def calculate_weighted_ownership_wrapper(row_ownerships):
|
| 69 |
"""
|
| 70 |
Wrapper function for the original calculate_weighted_ownership to work with Pandas .apply()
|
|
|
|
| 171 |
|
| 172 |
# Assign ranks back to individual columns using the same rank scale
|
| 173 |
if sport_var == 'GOLF':
|
| 174 |
+
flex_ranks = calculate_flex_ranks_efficient(portfolio, 1, 7, maps_dict)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
+
portfolio['FLEX1_Own_percent_rank'] = flex_ranks[0]
|
| 177 |
+
portfolio['FLEX2_Own_percent_rank'] = flex_ranks[1]
|
| 178 |
+
portfolio['FLEX3_Own_percent_rank'] = flex_ranks[2]
|
| 179 |
+
portfolio['FLEX4_Own_percent_rank'] = flex_ranks[3]
|
| 180 |
+
portfolio['FLEX5_Own_percent_rank'] = flex_ranks[4]
|
| 181 |
+
portfolio['FLEX6_Own_percent_rank'] = flex_ranks[5]
|
| 182 |
|
| 183 |
portfolio['FLEX1_Own'] = portfolio.iloc[:,0].map(maps_dict['own_map']).astype('float32') / 100
|
| 184 |
portfolio['FLEX2_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
|
|
|
|
| 187 |
portfolio['FLEX5_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100
|
| 188 |
portfolio['FLEX6_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100
|
| 189 |
else:
|
| 190 |
+
flex_ranks = calculate_flex_ranks_efficient(portfolio, 1, 6, maps_dict)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
|
| 193 |
+
portfolio['FLEX1_Own_percent_rank'] = flex_ranks[0]
|
| 194 |
+
portfolio['FLEX2_Own_percent_rank'] = flex_ranks[1]
|
| 195 |
+
portfolio['FLEX3_Own_percent_rank'] = flex_ranks[2]
|
| 196 |
+
portfolio['FLEX4_Own_percent_rank'] = flex_ranks[3]
|
| 197 |
+
portfolio['FLEX5_Own_percent_rank'] = flex_ranks[4]
|
| 198 |
|
| 199 |
portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
|
| 200 |
portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
|
|
|
|
| 235 |
|
| 236 |
n_rows = len(portfolio)
|
| 237 |
|
| 238 |
+
flex_ranks = calculate_flex_ranks_efficient(portfolio, 1, 6, maps_dict)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
# Assign ranks back to individual columns using the same rank scale
|
| 241 |
portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
|
| 242 |
+
portfolio['FLEX1_Own_percent_rank'] = flex_ranks[0]
|
| 243 |
+
portfolio['FLEX2_Own_percent_rank'] = flex_ranks[1]
|
| 244 |
+
portfolio['FLEX3_Own_percent_rank'] = flex_ranks[2]
|
| 245 |
+
portfolio['FLEX4_Own_percent_rank'] = flex_ranks[3]
|
| 246 |
+
portfolio['FLEX5_Own_percent_rank'] = flex_ranks[4]
|
| 247 |
|
| 248 |
portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
|
| 249 |
portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
|
|
|
|
| 281 |
|
| 282 |
n_rows = len(portfolio)
|
| 283 |
|
| 284 |
+
flex_ranks = calculate_flex_ranks_efficient(portfolio, 1, 7, maps_dict)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
|
| 286 |
# Assign ranks back to individual columns using the same rank scale
|
| 287 |
portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
|
| 288 |
+
portfolio['TOP_Own_percent_rank'] = flex_ranks[0]
|
| 289 |
+
portfolio['JNG_Own_percent_rank'] = flex_ranks[1]
|
| 290 |
+
portfolio['MID_Own_percent_rank'] = flex_ranks[2]
|
| 291 |
+
portfolio['ADC_Own_percent_rank'] = flex_ranks[3]
|
| 292 |
+
portfolio['SUP_Own_percent_rank'] = flex_ranks[4]
|
| 293 |
+
portfolio['Team_Own_percent_rank'] = flex_ranks[5]
|
| 294 |
|
| 295 |
portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
|
| 296 |
portfolio['TOP_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
|