Kuautli commited on
Commit
3722726
·
verified ·
1 Parent(s): 9e7bae5

Update clustering.py

Browse files
Files changed (1) hide show
  1. clustering.py +0 -16
clustering.py CHANGED
@@ -28,18 +28,6 @@ from wordcloud import WordCloud
28
 
29
  from concurrent.futures import ThreadPoolExecutor
30
 
31
- import logging
32
-
33
- logging.basicConfig(
34
- level=logging.INFO,
35
- format='%(asctime)s - %(levelname)s - %(message)s',
36
- datefmt='%Y-%m-%d %H:%M:%S'
37
- )
38
-
39
- def log_message(message):
40
- """"""
41
- logging.info(message)
42
-
43
  try:
44
  import torch
45
  device = 0 if torch.cuda.is_available() else -1
@@ -627,7 +615,6 @@ def perform_clustering(
627
 
628
  if threshold_values is None:
629
  threshold_values = np.round(np.linspace(min_eps, max_eps, n), 6).astype(float)
630
- log_message(f"perform_clustering {threshold_values}")
631
 
632
  cluster_assignments = {}
633
  cluster_counts = {}
@@ -636,7 +623,6 @@ def perform_clustering(
636
  most_similar_comments = {}
637
 
638
  for distance_threshold in threshold_values:
639
- log_message(distance_threshold)
640
  rounded_distance_threshold = round(distance_threshold, 6)
641
  clustering = AgglomerativeClustering(
642
  n_clusters=None,
@@ -704,11 +690,9 @@ def build_sankey_data(
704
  comments = []
705
 
706
  sorted_threshold_values = sorted(cluster_assignments.keys())
707
- log_message(f"build_sankey_data {sorted_threshold_values}")
708
  valid_clusters = {}
709
 
710
  for threshold in sorted_threshold_values:
711
- print(threshold)
712
  valid_clusters[threshold] = [
713
  j
714
  for j in np.unique(cluster_assignments[threshold])
 
28
 
29
  from concurrent.futures import ThreadPoolExecutor
30
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  try:
32
  import torch
33
  device = 0 if torch.cuda.is_available() else -1
 
615
 
616
  if threshold_values is None:
617
  threshold_values = np.round(np.linspace(min_eps, max_eps, n), 6).astype(float)
 
618
 
619
  cluster_assignments = {}
620
  cluster_counts = {}
 
623
  most_similar_comments = {}
624
 
625
  for distance_threshold in threshold_values:
 
626
  rounded_distance_threshold = round(distance_threshold, 6)
627
  clustering = AgglomerativeClustering(
628
  n_clusters=None,
 
690
  comments = []
691
 
692
  sorted_threshold_values = sorted(cluster_assignments.keys())
 
693
  valid_clusters = {}
694
 
695
  for threshold in sorted_threshold_values:
 
696
  valid_clusters[threshold] = [
697
  j
698
  for j in np.unique(cluster_assignments[threshold])