|
import pandas as pd |
|
import numpy as np |
|
from collections import Counter |
|
|
|
def analyze_player_combos(display_frame, excluded_cols, combo_size=2): |
|
""" |
|
Analyze the most common player combinations in a DataFrame. |
|
Optimized for large datasets by using vectorized operations. |
|
|
|
Args: |
|
display_frame: DataFrame containing lineup data |
|
excluded_cols: List of columns to exclude from analysis |
|
combo_size: Size of combinations to analyze (2 or 3) |
|
|
|
Returns: |
|
DataFrame with combo statistics |
|
""" |
|
|
|
player_columns = [col for col in display_frame.columns if col not in excluded_cols] |
|
|
|
|
|
player_sets = [] |
|
for _, row in display_frame.iterrows(): |
|
|
|
players = set() |
|
for col in player_columns: |
|
player = row[col] |
|
if pd.notna(player) and str(player).strip() != '': |
|
players.add(str(player)) |
|
player_sets.append(players) |
|
|
|
|
|
combo_counter = Counter() |
|
|
|
for player_set in player_sets: |
|
if len(player_set) >= combo_size: |
|
|
|
players_list = sorted(list(player_set)) |
|
|
|
|
|
if combo_size == 2: |
|
|
|
for i in range(len(players_list)): |
|
for j in range(i + 1, len(players_list)): |
|
combo = (players_list[i], players_list[j]) |
|
combo_counter[combo] += 1 |
|
elif combo_size == 3: |
|
|
|
for i in range(len(players_list)): |
|
for j in range(i + 1, len(players_list)): |
|
for k in range(j + 1, len(players_list)): |
|
combo = (players_list[i], players_list[j], players_list[k]) |
|
combo_counter[combo] += 1 |
|
|
|
|
|
combo_stats = [] |
|
total_lineups = len(display_frame) |
|
|
|
for combo, count in combo_counter.most_common(100): |
|
exposure = count / total_lineups |
|
|
|
|
|
combo_mask = [] |
|
for player_set in player_sets: |
|
if all(player in player_set for player in combo): |
|
combo_mask.append(True) |
|
else: |
|
combo_mask.append(False) |
|
|
|
combo_mask = np.array(combo_mask) |
|
|
|
if combo_mask.any(): |
|
avg_median = display_frame.loc[combo_mask, 'median'].mean() |
|
avg_own = display_frame.loc[combo_mask, 'Own'].mean() |
|
avg_dupes = display_frame.loc[combo_mask, 'Dupes'].mean() |
|
avg_finish = display_frame.loc[combo_mask, 'Finish_percentile'].mean() |
|
avg_edge = display_frame.loc[combo_mask, 'Lineup Edge'].mean() |
|
avg_diversity = display_frame.loc[combo_mask, 'Diversity'].mean() |
|
else: |
|
avg_median = avg_own = avg_dupes = avg_finish = avg_edge = avg_diversity = 0 |
|
|
|
combo_stats.append({ |
|
'Combo': ' + '.join(combo), |
|
'Lineup Count': count, |
|
'Exposure': exposure, |
|
'Avg Median': avg_median, |
|
'Avg Own': avg_own, |
|
'Avg Dupes': avg_dupes, |
|
'Avg Finish %': avg_finish, |
|
'Avg Lineup Edge': avg_edge, |
|
'Avg Diversity': avg_diversity |
|
}) |
|
|
|
return pd.DataFrame(combo_stats) |