James McCool commited on
Commit
197d1b2
·
1 Parent(s): b4a377f

Add memory-efficient ranking calculation in predict_dupes.py

Browse files

Introduced a new function, calculate_flex_ranks_efficient, to replace pd.concat and rank operations, enhancing memory efficiency. Updated the predict_dupes function to utilize this new ranking method for FLEX and position ownership percent ranks.

Files changed (1) hide show
  1. global_func/predict_dupes.py +53 -56
global_func/predict_dupes.py CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
4
  import time
5
  import math
6
  from difflib import SequenceMatcher
 
7
 
8
  def calculate_weighted_ownership_vectorized(ownership_array):
9
  """
@@ -38,6 +39,32 @@ def calculate_weighted_ownership_vectorized(ownership_array):
38
  # Convert back to percentage form
39
  return weighted * 10000
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def calculate_weighted_ownership_wrapper(row_ownerships):
42
  """
43
  Wrapper function for the original calculate_weighted_ownership to work with Pandas .apply()
@@ -144,22 +171,14 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
144
 
145
  # Assign ranks back to individual columns using the same rank scale
146
  if sport_var == 'GOLF':
147
- flex_ownerships = pd.concat([
148
- portfolio.iloc[:,1].map(maps_dict['own_map']),
149
- portfolio.iloc[:,2].map(maps_dict['own_map']),
150
- portfolio.iloc[:,3].map(maps_dict['own_map']),
151
- portfolio.iloc[:,4].map(maps_dict['own_map']),
152
- portfolio.iloc[:,5].map(maps_dict['own_map']),
153
- portfolio.iloc[:,6].map(maps_dict['own_map'])
154
- ])
155
- flex_rank = flex_ownerships.rank(pct=True)
156
 
157
- portfolio['FLEX1_Own_percent_rank'] = flex_rank.iloc[0:n_rows].values
158
- portfolio['FLEX2_Own_percent_rank'] = flex_rank.iloc[n_rows:2*n_rows].values
159
- portfolio['FLEX3_Own_percent_rank'] = flex_rank.iloc[2*n_rows:3*n_rows].values
160
- portfolio['FLEX4_Own_percent_rank'] = flex_rank.iloc[3*n_rows:4*n_rows].values
161
- portfolio['FLEX5_Own_percent_rank'] = flex_rank.iloc[4*n_rows:5*n_rows].values
162
- portfolio['FLEX6_Own_percent_rank'] = flex_rank.iloc[5*n_rows:6*n_rows].values
163
 
164
  portfolio['FLEX1_Own'] = portfolio.iloc[:,0].map(maps_dict['own_map']).astype('float32') / 100
165
  portfolio['FLEX2_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
@@ -168,21 +187,14 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
168
  portfolio['FLEX5_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100
169
  portfolio['FLEX6_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100
170
  else:
171
- flex_ownerships = pd.concat([
172
- portfolio.iloc[:,1].map(maps_dict['own_map']),
173
- portfolio.iloc[:,2].map(maps_dict['own_map']),
174
- portfolio.iloc[:,3].map(maps_dict['own_map']),
175
- portfolio.iloc[:,4].map(maps_dict['own_map']),
176
- portfolio.iloc[:,5].map(maps_dict['own_map'])
177
- ])
178
- flex_rank = flex_ownerships.rank(pct=True)
179
 
180
  portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
181
- portfolio['FLEX1_Own_percent_rank'] = flex_rank.iloc[0:n_rows].values
182
- portfolio['FLEX2_Own_percent_rank'] = flex_rank.iloc[n_rows:2*n_rows].values
183
- portfolio['FLEX3_Own_percent_rank'] = flex_rank.iloc[2*n_rows:3*n_rows].values
184
- portfolio['FLEX4_Own_percent_rank'] = flex_rank.iloc[3*n_rows:4*n_rows].values
185
- portfolio['FLEX5_Own_percent_rank'] = flex_rank.iloc[4*n_rows:5*n_rows].values
186
 
187
  portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
188
  portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
@@ -223,22 +235,15 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
223
 
224
  n_rows = len(portfolio)
225
 
226
- flex_ownerships = pd.concat([
227
- portfolio.iloc[:,1].map(maps_dict['own_map']),
228
- portfolio.iloc[:,2].map(maps_dict['own_map']),
229
- portfolio.iloc[:,3].map(maps_dict['own_map']),
230
- portfolio.iloc[:,4].map(maps_dict['own_map']),
231
- portfolio.iloc[:,5].map(maps_dict['own_map'])
232
- ])
233
- flex_rank = flex_ownerships.rank(pct=True)
234
 
235
  # Assign ranks back to individual columns using the same rank scale
236
  portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
237
- portfolio['FLEX1_Own_percent_rank'] = flex_rank.iloc[0:n_rows].values
238
- portfolio['FLEX2_Own_percent_rank'] = flex_rank.iloc[n_rows:2*n_rows].values
239
- portfolio['FLEX3_Own_percent_rank'] = flex_rank.iloc[2*n_rows:3*n_rows].values
240
- portfolio['FLEX4_Own_percent_rank'] = flex_rank.iloc[3*n_rows:4*n_rows].values
241
- portfolio['FLEX5_Own_percent_rank'] = flex_rank.iloc[4*n_rows:5*n_rows].values
242
 
243
  portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
244
  portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
@@ -276,24 +281,16 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
276
 
277
  n_rows = len(portfolio)
278
 
279
- flex_ownerships = pd.concat([
280
- portfolio.iloc[:,1].map(maps_dict['own_map']),
281
- portfolio.iloc[:,2].map(maps_dict['own_map']),
282
- portfolio.iloc[:,3].map(maps_dict['own_map']),
283
- portfolio.iloc[:,4].map(maps_dict['own_map']),
284
- portfolio.iloc[:,5].map(maps_dict['own_map']),
285
- portfolio.iloc[:,6].map(maps_dict['own_map'])
286
- ])
287
- flex_rank = flex_ownerships.rank(pct=True)
288
 
289
  # Assign ranks back to individual columns using the same rank scale
290
  portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
291
- portfolio['TOP_Own_percent_rank'] = flex_rank.iloc[0:n_rows].values
292
- portfolio['JNG_Own_percent_rank'] = flex_rank.iloc[n_rows:2*n_rows].values
293
- portfolio['MID_Own_percent_rank'] = flex_rank.iloc[2*n_rows:3*n_rows].values
294
- portfolio['ADC_Own_percent_rank'] = flex_rank.iloc[3*n_rows:4*n_rows].values
295
- portfolio['SUP_Own_percent_rank'] = flex_rank.iloc[4*n_rows:5*n_rows].values
296
- portfolio['Team_Own_percent_rank'] = flex_rank.iloc[5*n_rows:6*n_rows].values
297
 
298
  portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
299
  portfolio['TOP_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
 
4
  import time
5
  import math
6
  from difflib import SequenceMatcher
7
+ import scipy.stats
8
 
9
  def calculate_weighted_ownership_vectorized(ownership_array):
10
  """
 
39
  # Convert back to percentage form
40
  return weighted * 10000
41
 
42
+ def calculate_flex_ranks_efficient(portfolio, start_col, end_col, maps_dict, map_key='own_map'):
43
+ """Memory-efficient replacement for pd.concat + rank operations"""
44
+ n_rows = len(portfolio)
45
+ n_cols = end_col - start_col
46
+
47
+ # Pre-allocate result arrays
48
+ all_values = np.zeros(n_rows * n_cols, dtype=np.float32)
49
+
50
+ # Fill values column by column
51
+ for i, col_idx in enumerate(range(start_col, end_col)):
52
+ start_idx = i * n_rows
53
+ end_idx = (i + 1) * n_rows
54
+ all_values[start_idx:end_idx] = portfolio.iloc[:, col_idx].map(maps_dict[map_key]).values
55
+
56
+ # Calculate percentile ranks efficiently
57
+ ranks = scipy.stats.rankdata(all_values, method='average') / len(all_values)
58
+
59
+ # Reshape back to individual column ranks
60
+ result_ranks = {}
61
+ for i in range(n_cols):
62
+ start_idx = i * n_rows
63
+ end_idx = (i + 1) * n_rows
64
+ result_ranks[i] = ranks[start_idx:end_idx]
65
+
66
+ return result_ranks
67
+
68
  def calculate_weighted_ownership_wrapper(row_ownerships):
69
  """
70
  Wrapper function for the original calculate_weighted_ownership to work with Pandas .apply()
 
171
 
172
  # Assign ranks back to individual columns using the same rank scale
173
  if sport_var == 'GOLF':
174
+ flex_ranks = calculate_flex_ranks_efficient(portfolio, 1, 7, maps_dict)
 
 
 
 
 
 
 
 
175
 
176
+ portfolio['FLEX1_Own_percent_rank'] = flex_ranks[0]
177
+ portfolio['FLEX2_Own_percent_rank'] = flex_ranks[1]
178
+ portfolio['FLEX3_Own_percent_rank'] = flex_ranks[2]
179
+ portfolio['FLEX4_Own_percent_rank'] = flex_ranks[3]
180
+ portfolio['FLEX5_Own_percent_rank'] = flex_ranks[4]
181
+ portfolio['FLEX6_Own_percent_rank'] = flex_ranks[5]
182
 
183
  portfolio['FLEX1_Own'] = portfolio.iloc[:,0].map(maps_dict['own_map']).astype('float32') / 100
184
  portfolio['FLEX2_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
 
187
  portfolio['FLEX5_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100
188
  portfolio['FLEX6_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100
189
  else:
190
+ flex_ranks = calculate_flex_ranks_efficient(portfolio, 1, 6, maps_dict)
 
 
 
 
 
 
 
191
 
192
  portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
193
+ portfolio['FLEX1_Own_percent_rank'] = flex_ranks[0]
194
+ portfolio['FLEX2_Own_percent_rank'] = flex_ranks[1]
195
+ portfolio['FLEX3_Own_percent_rank'] = flex_ranks[2]
196
+ portfolio['FLEX4_Own_percent_rank'] = flex_ranks[3]
197
+ portfolio['FLEX5_Own_percent_rank'] = flex_ranks[4]
198
 
199
  portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
200
  portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
 
235
 
236
  n_rows = len(portfolio)
237
 
238
+ flex_ranks = calculate_flex_ranks_efficient(portfolio, 1, 6, maps_dict)
 
 
 
 
 
 
 
239
 
240
  # Assign ranks back to individual columns using the same rank scale
241
  portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
242
+ portfolio['FLEX1_Own_percent_rank'] = flex_ranks[0]
243
+ portfolio['FLEX2_Own_percent_rank'] = flex_ranks[1]
244
+ portfolio['FLEX3_Own_percent_rank'] = flex_ranks[2]
245
+ portfolio['FLEX4_Own_percent_rank'] = flex_ranks[3]
246
+ portfolio['FLEX5_Own_percent_rank'] = flex_ranks[4]
247
 
248
  portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
249
  portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
 
281
 
282
  n_rows = len(portfolio)
283
 
284
+ flex_ranks = calculate_flex_ranks_efficient(portfolio, 1, 7, maps_dict)
 
 
 
 
 
 
 
 
285
 
286
  # Assign ranks back to individual columns using the same rank scale
287
  portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
288
+ portfolio['TOP_Own_percent_rank'] = flex_ranks[0]
289
+ portfolio['JNG_Own_percent_rank'] = flex_ranks[1]
290
+ portfolio['MID_Own_percent_rank'] = flex_ranks[2]
291
+ portfolio['ADC_Own_percent_rank'] = flex_ranks[3]
292
+ portfolio['SUP_Own_percent_rank'] = flex_ranks[4]
293
+ portfolio['Team_Own_percent_rank'] = flex_ranks[5]
294
 
295
  portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
296
  portfolio['TOP_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100