James McCool
commited on
Commit
·
579a535
1
Parent(s):
7eef51a
Enhance DataFrame optimization and vectorized calculations in app.py. Refine category conversion logic to exclude specific player columns and improve memory efficiency. Introduce a safe mapping function to handle NaN values for salary, median, and ownership calculations, ensuring better performance across various scenarios.
Browse files
app.py
CHANGED
@@ -133,8 +133,10 @@ def optimize_dataframe_dtypes(df):
|
|
133 |
"""Optimize DataFrame data types for memory efficiency"""
|
134 |
for col in df.columns:
|
135 |
if df[col].dtype == 'object':
|
136 |
-
#
|
137 |
-
|
|
|
|
|
138 |
df[col] = df[col].astype('category')
|
139 |
return df
|
140 |
|
@@ -206,53 +208,77 @@ def create_memory_efficient_mappings(projections_df, site_var, type_var, sport_v
|
|
206 |
|
207 |
def calculate_salary_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
208 |
"""Vectorized salary calculation to replace expensive apply operations"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
210 |
# Captain + flex calculations
|
211 |
-
cpt_salaries = df.iloc[:, 0]
|
212 |
-
flex_salaries = sum(df.iloc[:, i]
|
213 |
return cpt_salaries + flex_salaries
|
214 |
elif type_var == 'Showdown':
|
215 |
if sport_var == 'GOLF':
|
216 |
-
return sum(df[col]
|
217 |
else:
|
218 |
-
cpt_salaries = df.iloc[:, 0]
|
219 |
-
flex_salaries = sum(df.iloc[:, i]
|
220 |
return cpt_salaries + flex_salaries
|
221 |
else:
|
222 |
# Classic non-CS2/LOL
|
223 |
-
return sum(df[col]
|
224 |
|
225 |
def calculate_median_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
226 |
"""Vectorized median calculation to replace expensive apply operations"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
228 |
-
cpt_medians = df.iloc[:, 0]
|
229 |
-
flex_medians = sum(df.iloc[:, i]
|
230 |
return cpt_medians + flex_medians
|
231 |
elif type_var == 'Showdown':
|
232 |
if sport_var == 'GOLF':
|
233 |
-
return sum(df[col]
|
234 |
else:
|
235 |
-
cpt_medians = df.iloc[:, 0]
|
236 |
-
flex_medians = sum(df.iloc[:, i]
|
237 |
return cpt_medians + flex_medians
|
238 |
else:
|
239 |
-
return sum(df[col]
|
240 |
|
241 |
def calculate_ownership_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
242 |
"""Vectorized ownership calculation to replace expensive apply operations"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
244 |
-
cpt_own = df.iloc[:, 0]
|
245 |
-
flex_own = sum(df.iloc[:, i]
|
246 |
return cpt_own + flex_own
|
247 |
elif type_var == 'Showdown':
|
248 |
if sport_var == 'GOLF':
|
249 |
-
return sum(df[col]
|
250 |
else:
|
251 |
-
cpt_own = df.iloc[:, 0]
|
252 |
-
flex_own = sum(df.iloc[:, i]
|
253 |
return cpt_own + flex_own
|
254 |
else:
|
255 |
-
return sum(df[col]
|
256 |
|
257 |
def calculate_lineup_metrics(df, player_columns, map_dict, type_var, sport_var, projections_df=None):
|
258 |
"""Centralized function to calculate salary, median, and ownership efficiently"""
|
|
|
133 |
"""Optimize DataFrame data types for memory efficiency"""
|
134 |
for col in df.columns:
|
135 |
if df[col].dtype == 'object':
|
136 |
+
# Only convert to category if there are many duplicates AND it's not a player column
|
137 |
+
# Player columns need to stay as object for mapping operations
|
138 |
+
excluded_cols = ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Size', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity']
|
139 |
+
if col not in excluded_cols and df[col].nunique() / len(df) < 0.3:
|
140 |
df[col] = df[col].astype('category')
|
141 |
return df
|
142 |
|
|
|
208 |
|
209 |
def calculate_salary_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
210 |
"""Vectorized salary calculation to replace expensive apply operations"""
|
211 |
+
def safe_map_and_fill(series, mapping, fill_value=0):
|
212 |
+
"""Safely map values and fill NaN, handling categorical columns"""
|
213 |
+
mapped = series.map(mapping)
|
214 |
+
if hasattr(series, 'cat'):
|
215 |
+
# Handle categorical columns by converting to object first
|
216 |
+
mapped = mapped.astype('object')
|
217 |
+
return mapped.fillna(fill_value)
|
218 |
+
|
219 |
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
220 |
# Captain + flex calculations
|
221 |
+
cpt_salaries = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_salary_map'])
|
222 |
+
flex_salaries = sum(safe_map_and_fill(df.iloc[:, i], map_dict['salary_map']) for i in range(1, len(player_columns)))
|
223 |
return cpt_salaries + flex_salaries
|
224 |
elif type_var == 'Showdown':
|
225 |
if sport_var == 'GOLF':
|
226 |
+
return sum(safe_map_and_fill(df[col], map_dict['salary_map']) for col in player_columns)
|
227 |
else:
|
228 |
+
cpt_salaries = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_salary_map'])
|
229 |
+
flex_salaries = sum(safe_map_and_fill(df.iloc[:, i], map_dict['salary_map']) for i in range(1, len(player_columns)))
|
230 |
return cpt_salaries + flex_salaries
|
231 |
else:
|
232 |
# Classic non-CS2/LOL
|
233 |
+
return sum(safe_map_and_fill(df[col], map_dict['salary_map']) for col in player_columns)
|
234 |
|
235 |
def calculate_median_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
236 |
"""Vectorized median calculation to replace expensive apply operations"""
|
237 |
+
def safe_map_and_fill(series, mapping, fill_value=0):
|
238 |
+
"""Safely map values and fill NaN, handling categorical columns"""
|
239 |
+
mapped = series.map(mapping)
|
240 |
+
if hasattr(series, 'cat'):
|
241 |
+
# Handle categorical columns by converting to object first
|
242 |
+
mapped = mapped.astype('object')
|
243 |
+
return mapped.fillna(fill_value)
|
244 |
+
|
245 |
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
246 |
+
cpt_medians = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_proj_map'])
|
247 |
+
flex_medians = sum(safe_map_and_fill(df.iloc[:, i], map_dict['proj_map']) for i in range(1, len(player_columns)))
|
248 |
return cpt_medians + flex_medians
|
249 |
elif type_var == 'Showdown':
|
250 |
if sport_var == 'GOLF':
|
251 |
+
return sum(safe_map_and_fill(df[col], map_dict['proj_map']) for col in player_columns)
|
252 |
else:
|
253 |
+
cpt_medians = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_proj_map'])
|
254 |
+
flex_medians = sum(safe_map_and_fill(df.iloc[:, i], map_dict['proj_map']) for i in range(1, len(player_columns)))
|
255 |
return cpt_medians + flex_medians
|
256 |
else:
|
257 |
+
return sum(safe_map_and_fill(df[col], map_dict['proj_map']) for col in player_columns)
|
258 |
|
259 |
def calculate_ownership_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
260 |
"""Vectorized ownership calculation to replace expensive apply operations"""
|
261 |
+
def safe_map_and_fill(series, mapping, fill_value=0):
|
262 |
+
"""Safely map values and fill NaN, handling categorical columns"""
|
263 |
+
mapped = series.map(mapping)
|
264 |
+
if hasattr(series, 'cat'):
|
265 |
+
# Handle categorical columns by converting to object first
|
266 |
+
mapped = mapped.astype('object')
|
267 |
+
return mapped.fillna(fill_value)
|
268 |
+
|
269 |
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
270 |
+
cpt_own = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_own_map'])
|
271 |
+
flex_own = sum(safe_map_and_fill(df.iloc[:, i], map_dict['own_map']) for i in range(1, len(player_columns)))
|
272 |
return cpt_own + flex_own
|
273 |
elif type_var == 'Showdown':
|
274 |
if sport_var == 'GOLF':
|
275 |
+
return sum(safe_map_and_fill(df[col], map_dict['own_map']) for col in player_columns)
|
276 |
else:
|
277 |
+
cpt_own = safe_map_and_fill(df.iloc[:, 0], map_dict['cpt_own_map'])
|
278 |
+
flex_own = sum(safe_map_and_fill(df.iloc[:, i], map_dict['own_map']) for i in range(1, len(player_columns)))
|
279 |
return cpt_own + flex_own
|
280 |
else:
|
281 |
+
return sum(safe_map_and_fill(df[col], map_dict['own_map']) for col in player_columns)
|
282 |
|
283 |
def calculate_lineup_metrics(df, player_columns, map_dict, type_var, sport_var, projections_df=None):
|
284 |
"""Centralized function to calculate salary, median, and ownership efficiently"""
|