James McCool
Implement vectorized calculations for salary, median, and ownership in app.py to enhance performance and memory efficiency. Refactor reassess_edge and stratification_function to minimize DataFrame copies and improve memory management. Update filtering logic to use boolean masks for better efficiency.
7eef51a
import pandas as pd
from global_func.predict_dupes import predict_dupes
def reassess_edge(modified_frame: pd.DataFrame, base_frame: pd.DataFrame, maps_dict: dict, site_var: str, type_var: str, Contest_Size: int, strength_var: str, sport_var: str, max_salary: int) -> pd.DataFrame:
"""
Reassess edge by concatenating modified frame with base frame, running predict_dupes,
and then extracting the first N rows (where N is the length of modified_frame).
Args:
modified_frame: DataFrame with rows that were modified by exposure_spread
base_frame: Original base frame (base_frame for Portfolio, original export_base for Export)
maps_dict: Dictionary containing player mappings
site_var: Site variable (Draftkings/Fanduel)
type_var: Type variable (Classic/Showdown)
Contest_Size: Contest size for calculations
strength_var: Strength variable (Weak/Average/Sharp)
sport_var: Sport variable
max_salary: Maximum salary for the contest
Returns:
DataFrame: Updated modified_frame with recalculated metrics
"""
# Store the number of rows in the modified frame
num_modified_rows = len(modified_frame)
# Define columns to drop for memory efficiency
cols_to_drop = ['Dupes', 'Finish_percentile', 'Lineup Edge', 'Win%', 'Weighted Own', 'Geomean', 'Diversity']
# More memory-efficient concatenation
modified_clean = modified_frame.drop(columns=[col for col in cols_to_drop if col in modified_frame.columns])
base_clean = base_frame.drop(columns=[col for col in cols_to_drop if col in base_frame.columns])
# Use ignore_index=True and avoid unnecessary copies
combined_frame = pd.concat([modified_clean, base_clean], ignore_index=True, copy=False)
# Run predict_dupes on the combined frame
updated_combined_frame = predict_dupes(combined_frame, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var, max_salary)
# Extract the first N rows (which correspond to our modified frame) - use iloc for efficiency
result_frame = updated_combined_frame.iloc[:num_modified_rows].copy()
return result_frame