Kuautli commited on
Commit
d1607d8
·
verified ·
1 Parent(s): e22fec3

Update clustering.py

Browse files
Files changed (1) hide show
  1. clustering.py +2 -2
clustering.py CHANGED
@@ -626,7 +626,7 @@ def perform_clustering(
626
  embeddings_matrix = np.array(data[embeddings_col].tolist())
627
 
628
  if not threshold_values:
629
- threshold_values = np.round(np.linspace(min_eps, max_eps, n), 6)
630
  log_message(f"perform_clustering {threshold_values}")
631
  # threshold_values = np.linspace(min_eps, max_eps, n)
632
 
@@ -637,7 +637,6 @@ def perform_clustering(
637
  most_similar_comments = {}
638
 
639
  for distance_threshold in threshold_values:
640
- distance_threshold = round(distance_threshold, 6)
641
  log_message(distance_threshold)
642
  clustering = AgglomerativeClustering(
643
  n_clusters=None,
@@ -645,6 +644,7 @@ def perform_clustering(
645
  linkage="complete",
646
  metric="cosine",
647
  )
 
648
  data[f"cluster_{distance_threshold}"] = clustering.fit_predict(
649
  embeddings_matrix
650
  )
 
626
  embeddings_matrix = np.array(data[embeddings_col].tolist())
627
 
628
  if not threshold_values:
629
+ threshold_values = np.round(np.linspace(min_eps, max_eps, n), 6).astype(float)
630
  log_message(f"perform_clustering {threshold_values}")
631
  # threshold_values = np.linspace(min_eps, max_eps, n)
632
 
 
637
  most_similar_comments = {}
638
 
639
  for distance_threshold in threshold_values:
 
640
  log_message(distance_threshold)
641
  clustering = AgglomerativeClustering(
642
  n_clusters=None,
 
644
  linkage="complete",
645
  metric="cosine",
646
  )
647
+
648
  data[f"cluster_{distance_threshold}"] = clustering.fit_predict(
649
  embeddings_matrix
650
  )