James McCool
commited on
Commit
·
3cfe4c4
1
Parent(s):
579a535
Refactor DataFrame optimization in app.py to enhance memory efficiency. Disable categorical conversion for specific columns to avoid issues with exposure_spread, while implementing smarter handling for numeric types. Introduce a new function to prepare DataFrames for exposure_spread, ensuring compatibility and improved performance during calculations.
Browse files
app.py
CHANGED
@@ -131,13 +131,29 @@ def chunk_name_matching(portfolio_names, csv_names, chunk_size=1000):
|
|
131 |
|
132 |
def optimize_dataframe_dtypes(df):
|
133 |
"""Optimize DataFrame data types for memory efficiency"""
|
|
|
|
|
|
|
|
|
|
|
134 |
for col in df.columns:
|
135 |
-
if df[col].dtype == '
|
136 |
-
#
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
return df
|
142 |
|
143 |
def create_memory_efficient_mappings(projections_df, site_var, type_var, sport_var):
|
@@ -284,6 +300,11 @@ def calculate_lineup_metrics(df, player_columns, map_dict, type_var, sport_var,
|
|
284 |
"""Centralized function to calculate salary, median, and ownership efficiently"""
|
285 |
df = df.copy() # Work on a copy to avoid modifying original
|
286 |
|
|
|
|
|
|
|
|
|
|
|
287 |
# Vectorized calculations
|
288 |
df['salary'] = calculate_salary_vectorized(df[player_columns], player_columns, map_dict, type_var, sport_var)
|
289 |
df['median'] = calculate_median_vectorized(df[player_columns], player_columns, map_dict, type_var, sport_var)
|
@@ -358,6 +379,17 @@ def create_team_filter_mask(df, player_columns, team_map, teams_to_filter, focus
|
|
358 |
|
359 |
return mask
|
360 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
361 |
def create_position_export_dict(column_name, csv_file, site_var, type_var, sport_var):
|
362 |
try:
|
363 |
# Remove any numbers from the column name to get the position
|
@@ -2037,7 +2069,9 @@ if selected_tab == 'Manage Portfolio':
|
|
2037 |
exp_submitted = st.form_submit_button("Export")
|
2038 |
if reg_submitted:
|
2039 |
st.session_state['settings_base'] = False
|
2040 |
-
|
|
|
|
|
2041 |
|
2042 |
# Use consolidated calculation function
|
2043 |
parsed_frame = calculate_lineup_metrics(
|
@@ -2056,7 +2090,9 @@ if selected_tab == 'Manage Portfolio':
|
|
2056 |
st.session_state['export_merge'] = st.session_state['working_frame'].copy()
|
2057 |
elif exp_submitted:
|
2058 |
st.session_state['settings_base'] = False
|
2059 |
-
|
|
|
|
|
2060 |
|
2061 |
# Use consolidated calculation function for export
|
2062 |
parsed_frame = calculate_lineup_metrics(
|
|
|
131 |
|
132 |
def optimize_dataframe_dtypes(df):
|
133 |
"""Optimize DataFrame data types for memory efficiency"""
|
134 |
+
# For now, disable categorical conversion entirely to avoid issues with exposure_spread and other operations
|
135 |
+
# This maintains compatibility while still providing other memory optimizations
|
136 |
+
# Future enhancement: implement smarter categorical handling that preserves mutability
|
137 |
+
|
138 |
+
# Only optimize numeric columns to more efficient dtypes
|
139 |
for col in df.columns:
|
140 |
+
if df[col].dtype == 'float64':
|
141 |
+
# Convert float64 to float32 if possible without significant precision loss
|
142 |
+
try:
|
143 |
+
if df[col].max() < 3.4e+38 and df[col].min() > -3.4e+38: # float32 range
|
144 |
+
df[col] = df[col].astype('float32')
|
145 |
+
except:
|
146 |
+
pass
|
147 |
+
elif df[col].dtype == 'int64':
|
148 |
+
# Convert int64 to smaller int types if possible
|
149 |
+
try:
|
150 |
+
if df[col].max() <= 32767 and df[col].min() >= -32768:
|
151 |
+
df[col] = df[col].astype('int16')
|
152 |
+
elif df[col].max() <= 2147483647 and df[col].min() >= -2147483648:
|
153 |
+
df[col] = df[col].astype('int32')
|
154 |
+
except:
|
155 |
+
pass
|
156 |
+
|
157 |
return df
|
158 |
|
159 |
def create_memory_efficient_mappings(projections_df, site_var, type_var, sport_var):
|
|
|
300 |
"""Centralized function to calculate salary, median, and ownership efficiently"""
|
301 |
df = df.copy() # Work on a copy to avoid modifying original
|
302 |
|
303 |
+
# Ensure player columns are object type to avoid categorical issues with exposure_spread
|
304 |
+
for col in player_columns:
|
305 |
+
if df[col].dtype.name == 'category':
|
306 |
+
df[col] = df[col].astype('object')
|
307 |
+
|
308 |
# Vectorized calculations
|
309 |
df['salary'] = calculate_salary_vectorized(df[player_columns], player_columns, map_dict, type_var, sport_var)
|
310 |
df['median'] = calculate_median_vectorized(df[player_columns], player_columns, map_dict, type_var, sport_var)
|
|
|
379 |
|
380 |
return mask
|
381 |
|
382 |
+
def prepare_dataframe_for_exposure_spread(df, player_columns):
|
383 |
+
"""Ensure DataFrame is ready for exposure_spread by converting player columns to object type"""
|
384 |
+
df_prepared = df.copy()
|
385 |
+
|
386 |
+
# Convert any categorical player columns back to object type
|
387 |
+
for col in player_columns:
|
388 |
+
if col in df_prepared.columns and df_prepared[col].dtype.name == 'category':
|
389 |
+
df_prepared[col] = df_prepared[col].astype('object')
|
390 |
+
|
391 |
+
return df_prepared
|
392 |
+
|
393 |
def create_position_export_dict(column_name, csv_file, site_var, type_var, sport_var):
|
394 |
try:
|
395 |
# Remove any numbers from the column name to get the position
|
|
|
2069 |
exp_submitted = st.form_submit_button("Export")
|
2070 |
if reg_submitted:
|
2071 |
st.session_state['settings_base'] = False
|
2072 |
+
# Prepare DataFrame for exposure_spread to avoid categorical issues
|
2073 |
+
working_frame_prepared = prepare_dataframe_for_exposure_spread(st.session_state['working_frame'], st.session_state['player_columns'])
|
2074 |
+
parsed_frame = exposure_spread(working_frame_prepared, st.session_state['exposure_player'], exposure_target, ignore_stacks, remove_teams_exposure, specific_replacements, specific_columns, st.session_state['projections_df'], sport_var, type_var, salary_max, stacking_sports)
|
2075 |
|
2076 |
# Use consolidated calculation function
|
2077 |
parsed_frame = calculate_lineup_metrics(
|
|
|
2090 |
st.session_state['export_merge'] = st.session_state['working_frame'].copy()
|
2091 |
elif exp_submitted:
|
2092 |
st.session_state['settings_base'] = False
|
2093 |
+
# Prepare DataFrame for exposure_spread to avoid categorical issues
|
2094 |
+
export_base_prepared = prepare_dataframe_for_exposure_spread(st.session_state['export_base'], st.session_state['player_columns'])
|
2095 |
+
parsed_frame = exposure_spread(export_base_prepared, st.session_state['exposure_player'], exposure_target, ignore_stacks, remove_teams_exposure, specific_replacements, specific_columns, st.session_state['projections_df'], sport_var, type_var, salary_max, stacking_sports)
|
2096 |
|
2097 |
# Use consolidated calculation function for export
|
2098 |
parsed_frame = calculate_lineup_metrics(
|