Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
from collections import Counter | |
def analyze_player_combos(display_frame, excluded_cols, combo_size=2): | |
""" | |
Analyze the most common player combinations in a DataFrame. | |
Optimized for large datasets by using vectorized operations. | |
Args: | |
display_frame: DataFrame containing lineup data | |
excluded_cols: List of columns to exclude from analysis | |
combo_size: Size of combinations to analyze (2 or 3) | |
Returns: | |
DataFrame with combo statistics | |
""" | |
# Get player columns | |
player_columns = [col for col in display_frame.columns if col not in excluded_cols] | |
# Extract player data and convert to sets for each row | |
player_sets = [] | |
for _, row in display_frame.iterrows(): | |
# Get unique players in this row (handles duplicates) | |
players = set() | |
for col in player_columns: | |
player = row[col] | |
if pd.notna(player) and str(player).strip() != '': | |
players.add(str(player)) | |
player_sets.append(players) | |
# Count combinations using Counter | |
combo_counter = Counter() | |
for player_set in player_sets: | |
if len(player_set) >= combo_size: | |
# Convert set to sorted tuple for consistent hashing | |
players_list = sorted(list(player_set)) | |
# Generate combinations using a more efficient approach | |
if combo_size == 2: | |
# For 2-player combos, use nested loops (faster than itertools for small n) | |
for i in range(len(players_list)): | |
for j in range(i + 1, len(players_list)): | |
combo = (players_list[i], players_list[j]) | |
combo_counter[combo] += 1 | |
elif combo_size == 3: | |
# For 3-player combos, use nested loops | |
for i in range(len(players_list)): | |
for j in range(i + 1, len(players_list)): | |
for k in range(j + 1, len(players_list)): | |
combo = (players_list[i], players_list[j], players_list[k]) | |
combo_counter[combo] += 1 | |
# Convert to DataFrame | |
combo_stats = [] | |
total_lineups = len(display_frame) | |
for combo, count in combo_counter.most_common(100): # Top 50 combos | |
exposure = count / total_lineups | |
# Calculate average stats for lineups containing this combo | |
combo_mask = [] | |
for player_set in player_sets: | |
if all(player in player_set for player in combo): | |
combo_mask.append(True) | |
else: | |
combo_mask.append(False) | |
combo_mask = np.array(combo_mask) | |
if combo_mask.any(): | |
avg_median = display_frame.loc[combo_mask, 'median'].mean() | |
avg_own = display_frame.loc[combo_mask, 'Own'].mean() | |
avg_dupes = display_frame.loc[combo_mask, 'Dupes'].mean() | |
avg_finish = display_frame.loc[combo_mask, 'Finish_percentile'].mean() | |
avg_edge = display_frame.loc[combo_mask, 'Lineup Edge'].mean() | |
avg_diversity = display_frame.loc[combo_mask, 'Diversity'].mean() | |
else: | |
avg_median = avg_own = avg_dupes = avg_finish = avg_edge = avg_diversity = 0 | |
combo_stats.append({ | |
'Combo': ' + '.join(combo), | |
'Lineup Count': count, | |
'Exposure': exposure, | |
'Avg Median': avg_median, | |
'Avg Own': avg_own, | |
'Avg Dupes': avg_dupes, | |
'Avg Finish %': avg_finish, | |
'Avg Lineup Edge': avg_edge, | |
'Avg Diversity': avg_diversity | |
}) | |
return pd.DataFrame(combo_stats) |