Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from collections import Counter | |
| def analyze_player_combos(display_frame, excluded_cols, combo_size=2): | |
| """ | |
| Analyze the most common player combinations in a DataFrame. | |
| Optimized for large datasets by using vectorized operations. | |
| Args: | |
| display_frame: DataFrame containing lineup data | |
| excluded_cols: List of columns to exclude from analysis | |
| combo_size: Size of combinations to analyze (2 or 3) | |
| Returns: | |
| DataFrame with combo statistics | |
| """ | |
| # Get player columns | |
| player_columns = [col for col in display_frame.columns if col not in excluded_cols] | |
| # Extract player data and convert to sets for each row | |
| player_sets = [] | |
| for _, row in display_frame.iterrows(): | |
| # Get unique players in this row (handles duplicates) | |
| players = set() | |
| for col in player_columns: | |
| player = row[col] | |
| if pd.notna(player) and str(player).strip() != '': | |
| players.add(str(player)) | |
| player_sets.append(players) | |
| # Count combinations using Counter | |
| combo_counter = Counter() | |
| for player_set in player_sets: | |
| if len(player_set) >= combo_size: | |
| # Convert set to sorted tuple for consistent hashing | |
| players_list = sorted(list(player_set)) | |
| # Generate combinations using a more efficient approach | |
| if combo_size == 2: | |
| # For 2-player combos, use nested loops (faster than itertools for small n) | |
| for i in range(len(players_list)): | |
| for j in range(i + 1, len(players_list)): | |
| combo = (players_list[i], players_list[j]) | |
| combo_counter[combo] += 1 | |
| elif combo_size == 3: | |
| # For 3-player combos, use nested loops | |
| for i in range(len(players_list)): | |
| for j in range(i + 1, len(players_list)): | |
| for k in range(j + 1, len(players_list)): | |
| combo = (players_list[i], players_list[j], players_list[k]) | |
| combo_counter[combo] += 1 | |
| # Convert to DataFrame | |
| combo_stats = [] | |
| total_lineups = len(display_frame) | |
| for combo, count in combo_counter.most_common(100): # Top 50 combos | |
| exposure = count / total_lineups | |
| # Calculate average stats for lineups containing this combo | |
| combo_mask = [] | |
| for player_set in player_sets: | |
| if all(player in player_set for player in combo): | |
| combo_mask.append(True) | |
| else: | |
| combo_mask.append(False) | |
| combo_mask = np.array(combo_mask) | |
| if combo_mask.any(): | |
| avg_median = display_frame.loc[combo_mask, 'median'].mean() | |
| avg_own = display_frame.loc[combo_mask, 'Own'].mean() | |
| avg_dupes = display_frame.loc[combo_mask, 'Dupes'].mean() | |
| avg_finish = display_frame.loc[combo_mask, 'Finish_percentile'].mean() | |
| avg_edge = display_frame.loc[combo_mask, 'Lineup Edge'].mean() | |
| avg_diversity = display_frame.loc[combo_mask, 'Diversity'].mean() | |
| else: | |
| avg_median = avg_own = avg_dupes = avg_finish = avg_edge = avg_diversity = 0 | |
| combo_stats.append({ | |
| 'Combo': ' + '.join(combo), | |
| 'Lineup Count': count, | |
| 'Exposure': exposure, | |
| 'Avg Median': avg_median, | |
| 'Avg Own': avg_own, | |
| 'Avg Dupes': avg_dupes, | |
| 'Avg Finish %': avg_finish, | |
| 'Avg Lineup Edge': avg_edge, | |
| 'Avg Diversity': avg_diversity | |
| }) | |
| return pd.DataFrame(combo_stats) |