Kuautli commited on
Commit
674dbba
·
verified ·
1 Parent(s): 73db4fc

Update clustering.py

Browse files
Files changed (1) hide show
  1. clustering.py +15 -3
clustering.py CHANGED
@@ -28,6 +28,18 @@ from wordcloud import WordCloud
28
 
29
  from concurrent.futures import ThreadPoolExecutor
30
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  try:
32
  import torch
33
  device = 0 if torch.cuda.is_available() else -1
@@ -615,7 +627,7 @@ def perform_clustering(
615
 
616
  if not threshold_values:
617
  threshold_values = np.round(np.linspace(min_eps, max_eps, n), 2)
618
- print(threshold_values)
619
  # threshold_values = np.linspace(min_eps, max_eps, n)
620
 
621
  cluster_assignments = {}
@@ -625,7 +637,7 @@ def perform_clustering(
625
  most_similar_comments = {}
626
 
627
  for distance_threshold in threshold_values:
628
- print(distance_threshold)
629
  clustering = AgglomerativeClustering(
630
  n_clusters=None,
631
  distance_threshold=distance_threshold,
@@ -695,7 +707,7 @@ def build_sankey_data(
695
  comments = []
696
 
697
  threshold_values = sorted(cluster_assignments.keys())
698
- print("build_sankey_data", threshold_values)
699
  valid_clusters = {}
700
 
701
  for threshold in threshold_values:
 
28
 
29
  from concurrent.futures import ThreadPoolExecutor
30
 
31
+ import logging
32
+
33
+ logging.basicConfig(
34
+ level=logging.INFO,
35
+ format='%(asctime)s - %(levelname)s - %(message)s',
36
+ datefmt='%Y-%m-%d %H:%M:%S'
37
+ )
38
+
39
+ def log_message(message):
40
+ """"""
41
+ logging.info(message)
42
+
43
  try:
44
  import torch
45
  device = 0 if torch.cuda.is_available() else -1
 
627
 
628
  if not threshold_values:
629
  threshold_values = np.round(np.linspace(min_eps, max_eps, n), 2)
630
+ log_message(threshold_values)
631
  # threshold_values = np.linspace(min_eps, max_eps, n)
632
 
633
  cluster_assignments = {}
 
637
  most_similar_comments = {}
638
 
639
  for distance_threshold in threshold_values:
640
+ log_message(distance_threshold)
641
  clustering = AgglomerativeClustering(
642
  n_clusters=None,
643
  distance_threshold=distance_threshold,
 
707
  comments = []
708
 
709
  threshold_values = sorted(cluster_assignments.keys())
710
+ log_message(f"build_sankey_data {threshold_values}")
711
  valid_clusters = {}
712
 
713
  for threshold in threshold_values: