import pandas as pd import numpy as np from collections import Counter def analyze_player_combos(display_frame, excluded_cols, combo_size=2): """ Analyze the most common player combinations in a DataFrame. Optimized for large datasets by using vectorized operations. Args: display_frame: DataFrame containing lineup data excluded_cols: List of columns to exclude from analysis combo_size: Size of combinations to analyze (2 or 3) Returns: DataFrame with combo statistics """ # Get player columns player_columns = [col for col in display_frame.columns if col not in excluded_cols] # Extract player data and convert to sets for each row player_sets = [] for _, row in display_frame.iterrows(): # Get unique players in this row (handles duplicates) players = set() for col in player_columns: player = row[col] if pd.notna(player) and str(player).strip() != '': players.add(str(player)) player_sets.append(players) # Count combinations using Counter combo_counter = Counter() for player_set in player_sets: if len(player_set) >= combo_size: # Convert set to sorted tuple for consistent hashing players_list = sorted(list(player_set)) # Generate combinations using a more efficient approach if combo_size == 2: # For 2-player combos, use nested loops (faster than itertools for small n) for i in range(len(players_list)): for j in range(i + 1, len(players_list)): combo = (players_list[i], players_list[j]) combo_counter[combo] += 1 elif combo_size == 3: # For 3-player combos, use nested loops for i in range(len(players_list)): for j in range(i + 1, len(players_list)): for k in range(j + 1, len(players_list)): combo = (players_list[i], players_list[j], players_list[k]) combo_counter[combo] += 1 # Convert to DataFrame combo_stats = [] total_lineups = len(display_frame) for combo, count in combo_counter.most_common(100): # Top 50 combos exposure = count / total_lineups # Calculate average stats for lineups containing this combo combo_mask = [] for player_set in player_sets: if all(player in player_set for player in combo): combo_mask.append(True) else: combo_mask.append(False) combo_mask = np.array(combo_mask) if combo_mask.any(): avg_median = display_frame.loc[combo_mask, 'median'].mean() avg_own = display_frame.loc[combo_mask, 'Own'].mean() avg_dupes = display_frame.loc[combo_mask, 'Dupes'].mean() avg_finish = display_frame.loc[combo_mask, 'Finish_percentile'].mean() avg_edge = display_frame.loc[combo_mask, 'Lineup Edge'].mean() avg_diversity = display_frame.loc[combo_mask, 'Diversity'].mean() else: avg_median = avg_own = avg_dupes = avg_finish = avg_edge = avg_diversity = 0 combo_stats.append({ 'Combo': ' + '.join(combo), 'Lineup Count': count, 'Exposure': exposure, 'Avg Median': avg_median, 'Avg Own': avg_own, 'Avg Dupes': avg_dupes, 'Avg Finish %': avg_finish, 'Avg Lineup Edge': avg_edge, 'Avg Diversity': avg_diversity }) return pd.DataFrame(combo_stats)