DFS_Portfolio_Manager / global_func /analyze_player_combos.py
James McCool
Increase the number of top combos analyzed from 50 to 100 in the analyze_player_combos function, enhancing the depth of player combination statistics available for analysis.
d42af28
raw
history blame
3.63 kB
import pandas as pd
import numpy as np
from collections import Counter
def analyze_player_combos(display_frame, excluded_cols, combo_size=2):
"""
Analyze the most common player combinations in a DataFrame.
Optimized for large datasets by using vectorized operations.
Args:
display_frame: DataFrame containing lineup data
excluded_cols: List of columns to exclude from analysis
combo_size: Size of combinations to analyze (2 or 3)
Returns:
DataFrame with combo statistics
"""
# Get player columns
player_columns = [col for col in display_frame.columns if col not in excluded_cols]
# Extract player data and convert to sets for each row
player_sets = []
for _, row in display_frame.iterrows():
# Get unique players in this row (handles duplicates)
players = set()
for col in player_columns:
player = row[col]
if pd.notna(player) and str(player).strip() != '':
players.add(str(player))
player_sets.append(players)
# Count combinations using Counter
combo_counter = Counter()
for player_set in player_sets:
if len(player_set) >= combo_size:
# Convert set to sorted tuple for consistent hashing
players_list = sorted(list(player_set))
# Generate combinations using a more efficient approach
if combo_size == 2:
# For 2-player combos, use nested loops (faster than itertools for small n)
for i in range(len(players_list)):
for j in range(i + 1, len(players_list)):
combo = (players_list[i], players_list[j])
combo_counter[combo] += 1
elif combo_size == 3:
# For 3-player combos, use nested loops
for i in range(len(players_list)):
for j in range(i + 1, len(players_list)):
for k in range(j + 1, len(players_list)):
combo = (players_list[i], players_list[j], players_list[k])
combo_counter[combo] += 1
# Convert to DataFrame
combo_stats = []
total_lineups = len(display_frame)
for combo, count in combo_counter.most_common(100): # Top 50 combos
exposure = count / total_lineups
# Calculate average stats for lineups containing this combo
combo_mask = []
for player_set in player_sets:
if all(player in player_set for player in combo):
combo_mask.append(True)
else:
combo_mask.append(False)
combo_mask = np.array(combo_mask)
if combo_mask.any():
avg_median = display_frame.loc[combo_mask, 'median'].mean()
avg_own = display_frame.loc[combo_mask, 'Own'].mean()
avg_dupes = display_frame.loc[combo_mask, 'Dupes'].mean()
avg_finish = display_frame.loc[combo_mask, 'Finish_percentile'].mean()
avg_edge = display_frame.loc[combo_mask, 'Lineup Edge'].mean()
else:
avg_median = avg_own = avg_dupes = avg_finish = avg_edge = 0
combo_stats.append({
'Combo': ' + '.join(combo),
'Lineup Count': count,
'Exposure': exposure,
'Avg Median': avg_median,
'Avg Own': avg_own,
'Avg Dupes': avg_dupes,
'Avg Finish %': avg_finish,
'Avg Lineup Edge': avg_edge
})
return pd.DataFrame(combo_stats)