DFS_Portfolio_Manager / global_func /analyze_player_combos.py
James McCool
adding diversity to analysis tables
d23b397
raw
history blame
3.77 kB
import pandas as pd
import numpy as np
from collections import Counter
def analyze_player_combos(display_frame, excluded_cols, combo_size=2):
"""
Analyze the most common player combinations in a DataFrame.
Optimized for large datasets by using vectorized operations.
Args:
display_frame: DataFrame containing lineup data
excluded_cols: List of columns to exclude from analysis
combo_size: Size of combinations to analyze (2 or 3)
Returns:
DataFrame with combo statistics
"""
# Get player columns
player_columns = [col for col in display_frame.columns if col not in excluded_cols]
# Extract player data and convert to sets for each row
player_sets = []
for _, row in display_frame.iterrows():
# Get unique players in this row (handles duplicates)
players = set()
for col in player_columns:
player = row[col]
if pd.notna(player) and str(player).strip() != '':
players.add(str(player))
player_sets.append(players)
# Count combinations using Counter
combo_counter = Counter()
for player_set in player_sets:
if len(player_set) >= combo_size:
# Convert set to sorted tuple for consistent hashing
players_list = sorted(list(player_set))
# Generate combinations using a more efficient approach
if combo_size == 2:
# For 2-player combos, use nested loops (faster than itertools for small n)
for i in range(len(players_list)):
for j in range(i + 1, len(players_list)):
combo = (players_list[i], players_list[j])
combo_counter[combo] += 1
elif combo_size == 3:
# For 3-player combos, use nested loops
for i in range(len(players_list)):
for j in range(i + 1, len(players_list)):
for k in range(j + 1, len(players_list)):
combo = (players_list[i], players_list[j], players_list[k])
combo_counter[combo] += 1
# Convert to DataFrame
combo_stats = []
total_lineups = len(display_frame)
for combo, count in combo_counter.most_common(100): # Top 50 combos
exposure = count / total_lineups
# Calculate average stats for lineups containing this combo
combo_mask = []
for player_set in player_sets:
if all(player in player_set for player in combo):
combo_mask.append(True)
else:
combo_mask.append(False)
combo_mask = np.array(combo_mask)
if combo_mask.any():
avg_median = display_frame.loc[combo_mask, 'median'].mean()
avg_own = display_frame.loc[combo_mask, 'Own'].mean()
avg_dupes = display_frame.loc[combo_mask, 'Dupes'].mean()
avg_finish = display_frame.loc[combo_mask, 'Finish_percentile'].mean()
avg_edge = display_frame.loc[combo_mask, 'Lineup Edge'].mean()
avg_diversity = display_frame.loc[combo_mask, 'Diversity'].mean()
else:
avg_median = avg_own = avg_dupes = avg_finish = avg_edge = avg_diversity = 0
combo_stats.append({
'Combo': ' + '.join(combo),
'Lineup Count': count,
'Exposure': exposure,
'Avg Median': avg_median,
'Avg Own': avg_own,
'Avg Dupes': avg_dupes,
'Avg Finish %': avg_finish,
'Avg Lineup Edge': avg_edge,
'Avg Diversity': avg_diversity
})
return pd.DataFrame(combo_stats)