James McCool commited on
Commit
579a535
·
1 Parent(s): 7eef51a

Enhance DataFrame optimization and vectorized calculations in app.py. Refine category conversion logic to exclude specific player columns and improve memory efficiency. Introduce a safe mapping function to handle NaN values for salary, median, and ownership calculations, ensuring better performance across various scenarios.

Browse files
Files changed (1) hide show
  1. app.py +46 -20
app.py CHANGED
@@ -133,8 +133,10 @@ def optimize_dataframe_dtypes(df):
133
  """Optimize DataFrame data types for memory efficiency"""
134
  for col in df.columns:
135
  if df[col].dtype == 'object':
136
- # Try to convert to category if many duplicates
137
- if df[col].nunique() / len(df) < 0.5:
 
 
138
  df[col] = df[col].astype('category')
139
  return df
140
 
@@ -206,53 +208,77 @@ def create_memory_efficient_mappings(projections_df, site_var, type_var, sport_v
206
 
207
  def calculate_salary_vectorized(df, player_columns, map_dict, type_var, sport_var):
208
  """Vectorized salary calculation to replace expensive apply operations"""
 
 
 
 
 
 
 
 
209
  if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
210
  # Captain + flex calculations
211
- cpt_salaries = df.iloc[:, 0].map(map_dict['cpt_salary_map']).fillna(0)
212
- flex_salaries = sum(df.iloc[:, i].map(map_dict['salary_map']).fillna(0) for i in range(1, len(player_columns)))
213
  return cpt_salaries + flex_salaries
214
  elif type_var == 'Showdown':
215
  if sport_var == 'GOLF':
216
- return sum(df[col].map(map_dict['salary_map']).fillna(0) for col in player_columns)
217
  else:
218
- cpt_salaries = df.iloc[:, 0].map(map_dict['cpt_salary_map']).fillna(0)
219
- flex_salaries = sum(df.iloc[:, i].map(map_dict['salary_map']).fillna(0) for i in range(1, len(player_columns)))
220
  return cpt_salaries + flex_salaries
221
  else:
222
  # Classic non-CS2/LOL
223
- return sum(df[col].map(map_dict['salary_map']).fillna(0) for col in player_columns)
224
 
225
  def calculate_median_vectorized(df, player_columns, map_dict, type_var, sport_var):
226
  """Vectorized median calculation to replace expensive apply operations"""
 
 
 
 
 
 
 
 
227
  if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
228
- cpt_medians = df.iloc[:, 0].map(map_dict['cpt_proj_map']).fillna(0)
229
- flex_medians = sum(df.iloc[:, i].map(map_dict['proj_map']).fillna(0) for i in range(1, len(player_columns)))
230
  return cpt_medians + flex_medians
231
  elif type_var == 'Showdown':
232
  if sport_var == 'GOLF':
233
- return sum(df[col].map(map_dict['proj_map']).fillna(0) for col in player_columns)
234
  else:
235
- cpt_medians = df.iloc[:, 0].map(map_dict['cpt_proj_map']).fillna(0)
236
- flex_medians = sum(df.iloc[:, i].map(map_dict['proj_map']).fillna(0) for i in range(1, len(player_columns)))
237
  return cpt_medians + flex_medians
238
  else:
239
- return sum(df[col].map(map_dict['proj_map']).fillna(0) for col in player_columns)
240
 
241
  def calculate_ownership_vectorized(df, player_columns, map_dict, type_var, sport_var):
242
  """Vectorized ownership calculation to replace expensive apply operations"""
 
 
 
 
 
 
 
 
243
  if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
244
- cpt_own = df.iloc[:, 0].map(map_dict['cpt_own_map']).fillna(0)
245
- flex_own = sum(df.iloc[:, i].map(map_dict['own_map']).fillna(0) for i in range(1, len(player_columns)))
246
  return cpt_own + flex_own
247
  elif type_var == 'Showdown':
248
  if sport_var == 'GOLF':
249
- return sum(df[col].map(map_dict['own_map']).fillna(0) for col in player_columns)
250
  else:
251
- cpt_own = df.iloc[:, 0].map(map_dict['cpt_own_map']).fillna(0)
252
- flex_own = sum(df.iloc[:, i].map(map_dict['own_map']).fillna(0) for i in range(1, len(player_columns)))
253
  return cpt_own + flex_own
254
  else:
255
- return sum(df[col].map(map_dict['own_map']).fillna(0) for col in player_columns)
256
 
257
  def calculate_lineup_metrics(df, player_columns, map_dict, type_var, sport_var, projections_df=None):
258
  """Centralized function to calculate salary, median, and ownership efficiently"""
 
133
  """Optimize DataFrame data types for memory efficiency"""
134
  for col in df.columns:
135
  if df[col].dtype == 'object':
136
+ # Only convert to category if there are many duplicates AND it's not a player column
137
+ # Player columns need to stay as object for mapping operations
138
+ excluded_cols = ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Size', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity']
139
+ if col not in excluded_cols and df[col].nunique() / len(df) < 0.3:
140
  df[col] = df[col].astype('category')
141
  return df
142
 
 
208
 
209
  def calculate_salary_vectorized(df, player_columns, map_dict, type_var, sport_var):
210
  """Vectorized salary calculation to replace expensive apply operations"""
211
+ def safe_map_and_fill(series, mapping, fill_value=0):
212
+ """Safely map values and fill NaN, handling categorical columns"""
213
+ mapped = series.map(mapping)
214
+ if hasattr(series, 'cat'):
215
+ # Handle categorical columns by converting to object first
216
+ mapped = mapped.astype('object')
217
+ return mapped.fillna(fill_value)
218
+
219
  if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
220
  # Captain + flex calculations
221
+ cpt_salaries = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_salary_map'])
222
+ flex_salaries = sum(safe_map_and_fill(df.iloc[:, i], map_dict['salary_map']) for i in range(1, len(player_columns)))
223
  return cpt_salaries + flex_salaries
224
  elif type_var == 'Showdown':
225
  if sport_var == 'GOLF':
226
+ return sum(safe_map_and_fill(df[col], map_dict['salary_map']) for col in player_columns)
227
  else:
228
+ cpt_salaries = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_salary_map'])
229
+ flex_salaries = sum(safe_map_and_fill(df.iloc[:, i], map_dict['salary_map']) for i in range(1, len(player_columns)))
230
  return cpt_salaries + flex_salaries
231
  else:
232
  # Classic non-CS2/LOL
233
+ return sum(safe_map_and_fill(df[col], map_dict['salary_map']) for col in player_columns)
234
 
235
  def calculate_median_vectorized(df, player_columns, map_dict, type_var, sport_var):
236
  """Vectorized median calculation to replace expensive apply operations"""
237
+ def safe_map_and_fill(series, mapping, fill_value=0):
238
+ """Safely map values and fill NaN, handling categorical columns"""
239
+ mapped = series.map(mapping)
240
+ if hasattr(series, 'cat'):
241
+ # Handle categorical columns by converting to object first
242
+ mapped = mapped.astype('object')
243
+ return mapped.fillna(fill_value)
244
+
245
  if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
246
+ cpt_medians = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_proj_map'])
247
+ flex_medians = sum(safe_map_and_fill(df.iloc[:, i], map_dict['proj_map']) for i in range(1, len(player_columns)))
248
  return cpt_medians + flex_medians
249
  elif type_var == 'Showdown':
250
  if sport_var == 'GOLF':
251
+ return sum(safe_map_and_fill(df[col], map_dict['proj_map']) for col in player_columns)
252
  else:
253
+ cpt_medians = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_proj_map'])
254
+ flex_medians = sum(safe_map_and_fill(df.iloc[:, i], map_dict['proj_map']) for i in range(1, len(player_columns)))
255
  return cpt_medians + flex_medians
256
  else:
257
+ return sum(safe_map_and_fill(df[col], map_dict['proj_map']) for col in player_columns)
258
 
259
  def calculate_ownership_vectorized(df, player_columns, map_dict, type_var, sport_var):
260
  """Vectorized ownership calculation to replace expensive apply operations"""
261
+ def safe_map_and_fill(series, mapping, fill_value=0):
262
+ """Safely map values and fill NaN, handling categorical columns"""
263
+ mapped = series.map(mapping)
264
+ if hasattr(series, 'cat'):
265
+ # Handle categorical columns by converting to object first
266
+ mapped = mapped.astype('object')
267
+ return mapped.fillna(fill_value)
268
+
269
  if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
270
+ cpt_own = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_own_map'])
271
+ flex_own = sum(safe_map_and_fill(df.iloc[:, i], map_dict['own_map']) for i in range(1, len(player_columns)))
272
  return cpt_own + flex_own
273
  elif type_var == 'Showdown':
274
  if sport_var == 'GOLF':
275
+ return sum(safe_map_and_fill(df[col], map_dict['own_map']) for col in player_columns)
276
  else:
277
+ cpt_own = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_own_map'])
278
+ flex_own = sum(safe_map_and_fill(df.iloc[:, i], map_dict['own_map']) for i in range(1, len(player_columns)))
279
  return cpt_own + flex_own
280
  else:
281
+ return sum(safe_map_and_fill(df[col], map_dict['own_map']) for col in player_columns)
282
 
283
  def calculate_lineup_metrics(df, player_columns, map_dict, type_var, sport_var, projections_df=None):
284
  """Centralized function to calculate salary, median, and ownership efficiently"""