|
import numpy as np |
|
from scipy.spatial.distance import squareform |
|
from fastcluster import linkage |
|
import umap |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def seriation(Z,N,cur_index): |
|
''' |
|
input: |
|
- Z is a hierarchical tree (dendrogram) |
|
- N is the number of points given to the clustering process |
|
- cur_index is the position in the tree for the recursive traversal |
|
output: |
|
- order implied by the hierarchical tree Z |
|
|
|
seriation computes the order implied by a hierarchical tree (dendrogram) |
|
''' |
|
if cur_index < N: |
|
return [cur_index] |
|
else: |
|
left = int(Z[cur_index-N,0]) |
|
right = int(Z[cur_index-N,1]) |
|
return (seriation(Z,N,left) + seriation(Z,N,right)) |
|
|
|
def compute_serial_matrix(dist_mat,method="ward"): |
|
''' |
|
input: |
|
- dist_mat is a distance matrix |
|
- method = ["ward","single","average","complete"] |
|
output: |
|
- seriated_dist is the input dist_mat, |
|
but with re-ordered rows and columns |
|
according to the seriation, i.e. the |
|
order implied by the hierarchical tree |
|
- res_order is the order implied by |
|
the hierarhical tree |
|
- res_linkage is the hierarhical tree (dendrogram) |
|
|
|
compute_serial_matrix transforms a distance matrix into |
|
a sorted distance matrix according to the order implied |
|
by the hierarchical tree (dendrogram) |
|
''' |
|
N = len(dist_mat) |
|
flat_dist_mat = squareform(dist_mat) |
|
res_linkage = linkage(flat_dist_mat, method=method,preserve_input=True) |
|
res_order = seriation(res_linkage, N, N + N-2) |
|
seriated_dist = np.zeros((N,N)) |
|
a,b = np.triu_indices(N,k=1) |
|
seriated_dist[a,b] = dist_mat[ [res_order[i] for i in a], [res_order[j] for j in b]] |
|
seriated_dist[b,a] = seriated_dist[a,b] |
|
|
|
return seriated_dist, res_order, res_linkage |
|
|
|
def compute_ordered_matrix(sim_matrix,dist_matrix, model_names): |
|
if len(sim_matrix) >= 2: |
|
|
|
ordered_dist_matrix, order, Z = compute_serial_matrix(dist_matrix) |
|
ordered_sim_matrix = sim_matrix[order][:, order] |
|
ordered_model_names = [model_names[i] for i in order] |
|
else: |
|
ordered_sim_matrix = sim_matrix |
|
ordered_model_names = model_names |
|
|
|
return ordered_sim_matrix, ordered_model_names |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def compute_umap(dist_matrix,d=2): |
|
embedding = umap.UMAP(densmap=True,n_components=d, metric='precomputed',random_state=42).fit_transform(dist_matrix) |
|
return embedding |