# Rerunning the predict_dupes.py function on a small portfolio after running exposure_spread.py breaks the metrics
# This is because the preeict_dupes.py function is exclusive of the lineups it takes it, and is meant to give edge around the median expectation of those lineups
# So what we need to do instead is find the differences across the original set and the post-exposure_spread.py set and calculate new metrics around the diff
# Need to find the diff in salary, median, and Own to calculate new Dupes, finish_percentile, Win%, Edge, Weighted Own, and Geomean
# Then at the end run the Diveristy function to set a new column for Diversity
# This way we only make the changes to the metrics where we have a difference in salary, median, and Own and leave the rest of the lineups alone

import pandas as pd
import numpy as np
import math

def calculate_weighted_ownership_vectorized(ownership_array):
    """
    Vectorized version of calculate_weighted_ownership using NumPy operations.
    
    Args:
        ownership_array: 2D array of ownership values (rows x players)
        
    Returns:
        array: Calculated weighted ownership values for each row
    """
    # Convert percentages to decimals and handle NaN values
    ownership_array = np.where(np.isnan(ownership_array), 0, ownership_array) / 100
    
    # Calculate row means
    row_means = np.mean(ownership_array, axis=1, keepdims=True)
    
    # Calculate average of each value with the overall mean
    value_means = (ownership_array + row_means) / 2
    
    # Take average of all those means
    avg_of_means = np.mean(value_means, axis=1)
    
    # Multiply by count of values
    weighted = avg_of_means * ownership_array.shape[1]
    
    # Subtract (max - min) for each row
    row_max = np.max(ownership_array, axis=1)
    row_min = np.min(ownership_array, axis=1)
    weighted = weighted - (row_max - row_min)
    
    # Convert back to percentage form
    return weighted * 10000

def calculate_weighted_ownership_wrapper(row_ownerships):
    """
    Wrapper function for the original calculate_weighted_ownership to work with Pandas .apply()
    
    Args:
        row_ownerships: Series containing ownership values in percentage form
        
    Returns:
        float: Calculated weighted ownership value
    """
    # Convert Series to 2D array for vectorized function
    ownership_array = row_ownerships.values.reshape(1, -1)
    return calculate_weighted_ownership_vectorized(ownership_array)[0]

def reassess_dupes(row: pd.Series, salary_max: int) -> float:
    return math.ceil(row['Dupes'] + ((row['salary_diff'] / 100) + ((salary_max + (salary_max - row['salary'])) / 100)) * (1 - (row['own_diff'] / 100)))

def reassess_lineup_edge(row: pd.Series, Contest_Size: int) -> float:
    row['Lineup Edge'] = row['Win%'] * ((.5 - row['Finish_percentile']) * (Contest_Size / 2.5))
    row['Lineup Edge'] = row.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge'])
    
    return row['Lineup Edge'] - row['Lineup Edge'].mean()

def reassess_edge(refactored_frame: pd.DataFrame, original_frame: pd.DataFrame, maps_dict: dict, site_var: str, type_var: str, Contest_Size: int, strength_var: str, sport_var: str, salary_max: int) -> pd.DataFrame:
    orig_df = original_frame.copy()
    orig_df = orig_df.reset_index(drop=True)
    refactored_df = refactored_frame.copy()
    refactored_df = refactored_df.reset_index(drop=True)

    refactored_df['salary_diff'] = refactored_df['salary'] - orig_df['salary']
    refactored_df['median_diff'] = refactored_df['median'] - orig_df['median']
    refactored_df['own_diff'] = refactored_df['Own'] - orig_df['Own']

    change_mask = refactored_df[refactored_df['median_diff'] != 0]

    num_players = len([col for col in refactored_df.columns if col not in ['salary', 'median', 'Own', 'Finish_percentile', 'Win%', 'Edge', 'Weighted Own', 'Geomean', 'salary_diff', 'median_diff', 'own_diff']])
    own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]

    for lineups in change_mask.index:
        refactored_df.loc[lineups, 'Dupes'] = reassess_dupes(refactored_df.loc[lineups, :], salary_max)
        refactored_df.loc[lineups, 'Finish_percentile'] = refactored_df.loc[lineups, 'Finish_percentile']
        refactored_df.loc[lineups, 'Win%'] = refactored_df.loc[lineups, 'Win%']
        refactored_df.loc[lineups, 'Edge'] = reassess_lineup_edge(refactored_df.loc[lineups, :], Contest_Size)
        refactored_df.loc[lineups, 'Weighted Own'] = refactored_df[own_columns].apply(calculate_weighted_ownership_wrapper, axis=1)
        refactored_df.loc[lineups, 'Geomean'] = np.power((refactored_df.loc[lineups, own_columns] * 100).product(axis=1), 1 / len(own_columns))

    return refactored_df