Spaces:
Runtime error
Runtime error
Update clustering.py
Browse files- clustering.py +0 -16
clustering.py
CHANGED
|
@@ -28,18 +28,6 @@ from wordcloud import WordCloud
|
|
| 28 |
|
| 29 |
from concurrent.futures import ThreadPoolExecutor
|
| 30 |
|
| 31 |
-
import logging
|
| 32 |
-
|
| 33 |
-
logging.basicConfig(
|
| 34 |
-
level=logging.INFO,
|
| 35 |
-
format='%(asctime)s - %(levelname)s - %(message)s',
|
| 36 |
-
datefmt='%Y-%m-%d %H:%M:%S'
|
| 37 |
-
)
|
| 38 |
-
|
| 39 |
-
def log_message(message):
|
| 40 |
-
""""""
|
| 41 |
-
logging.info(message)
|
| 42 |
-
|
| 43 |
try:
|
| 44 |
import torch
|
| 45 |
device = 0 if torch.cuda.is_available() else -1
|
|
@@ -627,7 +615,6 @@ def perform_clustering(
|
|
| 627 |
|
| 628 |
if threshold_values is None:
|
| 629 |
threshold_values = np.round(np.linspace(min_eps, max_eps, n), 6).astype(float)
|
| 630 |
-
log_message(f"perform_clustering {threshold_values}")
|
| 631 |
|
| 632 |
cluster_assignments = {}
|
| 633 |
cluster_counts = {}
|
|
@@ -636,7 +623,6 @@ def perform_clustering(
|
|
| 636 |
most_similar_comments = {}
|
| 637 |
|
| 638 |
for distance_threshold in threshold_values:
|
| 639 |
-
log_message(distance_threshold)
|
| 640 |
rounded_distance_threshold = round(distance_threshold, 6)
|
| 641 |
clustering = AgglomerativeClustering(
|
| 642 |
n_clusters=None,
|
|
@@ -704,11 +690,9 @@ def build_sankey_data(
|
|
| 704 |
comments = []
|
| 705 |
|
| 706 |
sorted_threshold_values = sorted(cluster_assignments.keys())
|
| 707 |
-
log_message(f"build_sankey_data {sorted_threshold_values}")
|
| 708 |
valid_clusters = {}
|
| 709 |
|
| 710 |
for threshold in sorted_threshold_values:
|
| 711 |
-
print(threshold)
|
| 712 |
valid_clusters[threshold] = [
|
| 713 |
j
|
| 714 |
for j in np.unique(cluster_assignments[threshold])
|
|
|
|
| 28 |
|
| 29 |
from concurrent.futures import ThreadPoolExecutor
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
try:
|
| 32 |
import torch
|
| 33 |
device = 0 if torch.cuda.is_available() else -1
|
|
|
|
| 615 |
|
| 616 |
if threshold_values is None:
|
| 617 |
threshold_values = np.round(np.linspace(min_eps, max_eps, n), 6).astype(float)
|
|
|
|
| 618 |
|
| 619 |
cluster_assignments = {}
|
| 620 |
cluster_counts = {}
|
|
|
|
| 623 |
most_similar_comments = {}
|
| 624 |
|
| 625 |
for distance_threshold in threshold_values:
|
|
|
|
| 626 |
rounded_distance_threshold = round(distance_threshold, 6)
|
| 627 |
clustering = AgglomerativeClustering(
|
| 628 |
n_clusters=None,
|
|
|
|
| 690 |
comments = []
|
| 691 |
|
| 692 |
sorted_threshold_values = sorted(cluster_assignments.keys())
|
|
|
|
| 693 |
valid_clusters = {}
|
| 694 |
|
| 695 |
for threshold in sorted_threshold_values:
|
|
|
|
| 696 |
valid_clusters[threshold] = [
|
| 697 |
j
|
| 698 |
for j in np.unique(cluster_assignments[threshold])
|