James McCool
Implement vectorized calculations for salary, median, and ownership in app.py to enhance performance and memory efficiency. Refactor reassess_edge and stratification_function to minimize DataFrame copies and improve memory management. Update filtering logic to use boolean masks for better efficiency.
7eef51a
raw
history blame
2.2 kB
import pandas as pd
from global_func.predict_dupes import predict_dupes
def reassess_edge(modified_frame: pd.DataFrame, base_frame: pd.DataFrame, maps_dict: dict, site_var: str, type_var: str, Contest_Size: int, strength_var: str, sport_var: str, max_salary: int) -> pd.DataFrame:
"""
Reassess edge by concatenating modified frame with base frame, running predict_dupes,
and then extracting the first N rows (where N is the length of modified_frame).
Args:
modified_frame: DataFrame with rows that were modified by exposure_spread
base_frame: Original base frame (base_frame for Portfolio, original export_base for Export)
maps_dict: Dictionary containing player mappings
site_var: Site variable (Draftkings/Fanduel)
type_var: Type variable (Classic/Showdown)
Contest_Size: Contest size for calculations
strength_var: Strength variable (Weak/Average/Sharp)
sport_var: Sport variable
max_salary: Maximum salary for the contest
Returns:
DataFrame: Updated modified_frame with recalculated metrics
"""
# Store the number of rows in the modified frame
num_modified_rows = len(modified_frame)
# Define columns to drop for memory efficiency
cols_to_drop = ['Dupes', 'Finish_percentile', 'Lineup Edge', 'Win%', 'Weighted Own', 'Geomean', 'Diversity']
# More memory-efficient concatenation
modified_clean = modified_frame.drop(columns=[col for col in cols_to_drop if col in modified_frame.columns])
base_clean = base_frame.drop(columns=[col for col in cols_to_drop if col in base_frame.columns])
# Use ignore_index=True and avoid unnecessary copies
combined_frame = pd.concat([modified_clean, base_clean], ignore_index=True, copy=False)
# Run predict_dupes on the combined frame
updated_combined_frame = predict_dupes(combined_frame, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var, max_salary)
# Extract the first N rows (which correspond to our modified frame) - use iloc for efficiency
result_frame = updated_combined_frame.iloc[:num_modified_rows].copy()
return result_frame