import numpy as np from scipy.spatial.distance import squareform from fastcluster import linkage import umap # ------------------------------------------------------------------------------------------------ # # Sim Matrix Ordering # # ------------------------------------------------------------------------------------------------ def seriation(Z,N,cur_index): ''' input: - Z is a hierarchical tree (dendrogram) - N is the number of points given to the clustering process - cur_index is the position in the tree for the recursive traversal output: - order implied by the hierarchical tree Z seriation computes the order implied by a hierarchical tree (dendrogram) ''' if cur_index < N: return [cur_index] else: left = int(Z[cur_index-N,0]) right = int(Z[cur_index-N,1]) return (seriation(Z,N,left) + seriation(Z,N,right)) def compute_serial_matrix(dist_mat,method="ward"): ''' input: - dist_mat is a distance matrix - method = ["ward","single","average","complete"] output: - seriated_dist is the input dist_mat, but with re-ordered rows and columns according to the seriation, i.e. the order implied by the hierarchical tree - res_order is the order implied by the hierarhical tree - res_linkage is the hierarhical tree (dendrogram) compute_serial_matrix transforms a distance matrix into a sorted distance matrix according to the order implied by the hierarchical tree (dendrogram) ''' N = len(dist_mat) flat_dist_mat = squareform(dist_mat) res_linkage = linkage(flat_dist_mat, method=method,preserve_input=True) res_order = seriation(res_linkage, N, N + N-2) seriated_dist = np.zeros((N,N)) a,b = np.triu_indices(N,k=1) seriated_dist[a,b] = dist_mat[ [res_order[i] for i in a], [res_order[j] for j in b]] seriated_dist[b,a] = seriated_dist[a,b] return seriated_dist, res_order, res_linkage def compute_ordered_matrix(sim_matrix,dist_matrix, model_names): if len(sim_matrix) >= 2: # Compute serial matrix (hierarchical clustering) for tab1 ordered_dist_matrix, order, Z = compute_serial_matrix(dist_matrix) ordered_sim_matrix = sim_matrix[order][:, order] ordered_model_names = [model_names[i] for i in order] else: ordered_sim_matrix = sim_matrix ordered_model_names = model_names return ordered_sim_matrix, ordered_model_names # ------------------------------------------------------------------------------------------------ # # UMAP computation # # ------------------------------------------------------------------------------------------------ def compute_umap(dist_matrix,d=2): embedding = umap.UMAP(densmap=True,n_components=d, metric='precomputed',random_state=42).fit_transform(dist_matrix) return embedding