Spaces:
Sleeping
Sleeping
Update clustering.py
Browse files- clustering.py +0 -16
clustering.py
CHANGED
@@ -28,18 +28,6 @@ from wordcloud import WordCloud
|
|
28 |
|
29 |
from concurrent.futures import ThreadPoolExecutor
|
30 |
|
31 |
-
import logging
|
32 |
-
|
33 |
-
logging.basicConfig(
|
34 |
-
level=logging.INFO,
|
35 |
-
format='%(asctime)s - %(levelname)s - %(message)s',
|
36 |
-
datefmt='%Y-%m-%d %H:%M:%S'
|
37 |
-
)
|
38 |
-
|
39 |
-
def log_message(message):
|
40 |
-
""""""
|
41 |
-
logging.info(message)
|
42 |
-
|
43 |
try:
|
44 |
import torch
|
45 |
device = 0 if torch.cuda.is_available() else -1
|
@@ -627,7 +615,6 @@ def perform_clustering(
|
|
627 |
|
628 |
if threshold_values is None:
|
629 |
threshold_values = np.round(np.linspace(min_eps, max_eps, n), 6).astype(float)
|
630 |
-
log_message(f"perform_clustering {threshold_values}")
|
631 |
|
632 |
cluster_assignments = {}
|
633 |
cluster_counts = {}
|
@@ -636,7 +623,6 @@ def perform_clustering(
|
|
636 |
most_similar_comments = {}
|
637 |
|
638 |
for distance_threshold in threshold_values:
|
639 |
-
log_message(distance_threshold)
|
640 |
rounded_distance_threshold = round(distance_threshold, 6)
|
641 |
clustering = AgglomerativeClustering(
|
642 |
n_clusters=None,
|
@@ -704,11 +690,9 @@ def build_sankey_data(
|
|
704 |
comments = []
|
705 |
|
706 |
sorted_threshold_values = sorted(cluster_assignments.keys())
|
707 |
-
log_message(f"build_sankey_data {sorted_threshold_values}")
|
708 |
valid_clusters = {}
|
709 |
|
710 |
for threshold in sorted_threshold_values:
|
711 |
-
print(threshold)
|
712 |
valid_clusters[threshold] = [
|
713 |
j
|
714 |
for j in np.unique(cluster_assignments[threshold])
|
|
|
28 |
|
29 |
from concurrent.futures import ThreadPoolExecutor
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
try:
|
32 |
import torch
|
33 |
device = 0 if torch.cuda.is_available() else -1
|
|
|
615 |
|
616 |
if threshold_values is None:
|
617 |
threshold_values = np.round(np.linspace(min_eps, max_eps, n), 6).astype(float)
|
|
|
618 |
|
619 |
cluster_assignments = {}
|
620 |
cluster_counts = {}
|
|
|
623 |
most_similar_comments = {}
|
624 |
|
625 |
for distance_threshold in threshold_values:
|
|
|
626 |
rounded_distance_threshold = round(distance_threshold, 6)
|
627 |
clustering = AgglomerativeClustering(
|
628 |
n_clusters=None,
|
|
|
690 |
comments = []
|
691 |
|
692 |
sorted_threshold_values = sorted(cluster_assignments.keys())
|
|
|
693 |
valid_clusters = {}
|
694 |
|
695 |
for threshold in sorted_threshold_values:
|
|
|
696 |
valid_clusters[threshold] = [
|
697 |
j
|
698 |
for j in np.unique(cluster_assignments[threshold])
|