Spaces:
Runtime error
Runtime error
Update clustering.py
Browse files- clustering.py +15 -3
clustering.py
CHANGED
@@ -28,6 +28,18 @@ from wordcloud import WordCloud
|
|
28 |
|
29 |
from concurrent.futures import ThreadPoolExecutor
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
try:
|
32 |
import torch
|
33 |
device = 0 if torch.cuda.is_available() else -1
|
@@ -615,7 +627,7 @@ def perform_clustering(
|
|
615 |
|
616 |
if not threshold_values:
|
617 |
threshold_values = np.round(np.linspace(min_eps, max_eps, n), 2)
|
618 |
-
|
619 |
# threshold_values = np.linspace(min_eps, max_eps, n)
|
620 |
|
621 |
cluster_assignments = {}
|
@@ -625,7 +637,7 @@ def perform_clustering(
|
|
625 |
most_similar_comments = {}
|
626 |
|
627 |
for distance_threshold in threshold_values:
|
628 |
-
|
629 |
clustering = AgglomerativeClustering(
|
630 |
n_clusters=None,
|
631 |
distance_threshold=distance_threshold,
|
@@ -695,7 +707,7 @@ def build_sankey_data(
|
|
695 |
comments = []
|
696 |
|
697 |
threshold_values = sorted(cluster_assignments.keys())
|
698 |
-
|
699 |
valid_clusters = {}
|
700 |
|
701 |
for threshold in threshold_values:
|
|
|
28 |
|
29 |
from concurrent.futures import ThreadPoolExecutor
|
30 |
|
31 |
+
import logging
|
32 |
+
|
33 |
+
logging.basicConfig(
|
34 |
+
level=logging.INFO,
|
35 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
36 |
+
datefmt='%Y-%m-%d %H:%M:%S'
|
37 |
+
)
|
38 |
+
|
39 |
+
def log_message(message):
|
40 |
+
""""""
|
41 |
+
logging.info(message)
|
42 |
+
|
43 |
try:
|
44 |
import torch
|
45 |
device = 0 if torch.cuda.is_available() else -1
|
|
|
627 |
|
628 |
if not threshold_values:
|
629 |
threshold_values = np.round(np.linspace(min_eps, max_eps, n), 2)
|
630 |
+
log_message(threshold_values)
|
631 |
# threshold_values = np.linspace(min_eps, max_eps, n)
|
632 |
|
633 |
cluster_assignments = {}
|
|
|
637 |
most_similar_comments = {}
|
638 |
|
639 |
for distance_threshold in threshold_values:
|
640 |
+
log_message(distance_threshold)
|
641 |
clustering = AgglomerativeClustering(
|
642 |
n_clusters=None,
|
643 |
distance_threshold=distance_threshold,
|
|
|
707 |
comments = []
|
708 |
|
709 |
threshold_values = sorted(cluster_assignments.keys())
|
710 |
+
log_message(f"build_sankey_data {threshold_values}")
|
711 |
valid_clusters = {}
|
712 |
|
713 |
for threshold in threshold_values:
|