DFS_Portfolio_Manager / global_func /recalc_diversity.py
James McCool
introduced a button to recalculate diversity
872a007
import streamlit as st
import numpy as np
import pandas as pd
import time
import math
from difflib import SequenceMatcher
def recalc_diversity(portfolio, player_columns):
"""
Vectorized version of recalc_diversity using NumPy operations.
"""
# Extract player data and convert to string array
player_data = portfolio[player_columns].astype(str).fillna('').values
# Get all unique players and create a mapping to numeric IDs
all_players = set()
for row in player_data:
for val in row:
if isinstance(val, str) and val.strip() != '':
all_players.add(val)
# Create player ID mapping
player_to_id = {player: idx for idx, player in enumerate(sorted(all_players))}
# Convert each row to a binary vector (1 if player is present, 0 if not)
n_players = len(all_players)
n_rows = len(portfolio)
binary_matrix = np.zeros((n_rows, n_players), dtype=np.int8)
# Vectorized binary matrix creation
for i, row in enumerate(player_data):
for val in row:
if isinstance(val, str) and str(val).strip() != '' and str(val) in player_to_id:
binary_matrix[i, player_to_id[str(val)]] = 1
# Vectorized Jaccard distance calculation
intersection_matrix = np.dot(binary_matrix, binary_matrix.T)
row_sums = np.sum(binary_matrix, axis=1)
union_matrix = row_sums[:, np.newaxis] + row_sums - intersection_matrix
# Calculate Jaccard distance: 1 - (intersection / union)
with np.errstate(divide='ignore', invalid='ignore'):
jaccard_similarity = np.divide(intersection_matrix, union_matrix,
out=np.zeros_like(intersection_matrix, dtype=float),
where=union_matrix != 0)
jaccard_distance = 1 - jaccard_similarity
# Exclude self-comparison and calculate average distance for each row
np.fill_diagonal(jaccard_distance, 0)
row_counts = n_rows - 1
similarity_scores = np.sum(jaccard_distance, axis=1) / row_counts
# Normalize to 0-1 scale
score_range = similarity_scores.max() - similarity_scores.min()
if score_range > 0:
similarity_scores = (similarity_scores - similarity_scores.min()) / score_range
return similarity_scores