File size: 4,530 Bytes
63f9eaa
4179233
63f9eaa
 
ca7b7fb
63f9eaa
4179233
 
63f9eaa
edb7d72
a8ccaf1
edb7d72
 
 
 
 
 
 
 
63f9eaa
 
 
 
 
4179233
 
 
 
63f9eaa
 
 
 
 
 
 
4179233
 
63f9eaa
4179233
3d817f4
4179233
63f9eaa
 
3d817f4
63f9eaa
7341cb3
edb7d72
7341cb3
4179233
3d817f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e76213
3d817f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4179233
 
 
 
 
 
 
 
 
3d817f4
4179233
 
 
 
 
63f9eaa
45aca14
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import os

import pandas as pd
import plotly.io as pio
import clustering
from dotenv import load_dotenv
from flask import Flask, render_template, request
import logging

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

def log_message(message):
    """"""
    logging.info(message)

if os.getenv("HUGGINGFACE_HUB_CACHE") is None:
    load_dotenv()

api_key = os.getenv("youtube_api_key")

app = Flask(__name__)
app.logger.setLevel(logging.ERROR)
app.config["PROPAGATE_EXCEPTIONS"] = False

RANDOM_STATE = 333


def convert_graph_to_html(graph, full_html=False):
    return pio.to_html(graph, full_html=full_html) if graph else None


@app.route("/", methods=["GET", "POST"])
def index():
    video_details = None
    sankey_graph = None
    scores_graph = None
    image_path = None
    sentiment_daily_graph = None
    sentiment_count = None
    error_message = None

    current_directory = os.getcwd()
    log_message("Iniciando procesamiento...")

    if request.method == "POST":
        url = request.form.get("url")  # Utiliza get para evitar KeyError
        if not url:
            error_message = "La URL es requerida."
            return render_template("index.html", error_message=error_message)

        log_message("Obteniendo datos de Youtube")
        video_details = clustering.get_youtube_video_details(url, api_key)
        if "error" in video_details:  # Manejo de error al obtener detalles del video
            error_message = video_details["error"]
            return render_template("index.html", error_message=error_message)

        comments_df = clustering.get_youtube_comments(api_key, url)
        if comments_df is None:  # Verifica si no hay comentarios
            error_message = "No se pudieron obtener comentarios."
            return render_template("index.html", error_message=error_message)

        log_message("Generando embeddings")
        comments_df = clustering.add_normalized_embeddings_to_dataframe(comments_df, "comment")
        
        log_message("Procesamiento de los datos")
        comments_df["published_at"] = pd.to_datetime(comments_df["published_at"]).dt.date
        
        log_message("Clasificaci贸n de los sentimientos")
        comments_df = clustering.classify_sentiment_df(comments_df)
        comments_df.to_pickle("./data/Comentarios-Youtube/comments_df.pkl")
        
        sentiment_count = comments_df["sentimiento"].value_counts().to_dict()
        sentiment_daily_graph = clustering.plot_sentiment_daily(comments_df)
        sentiment_daily_graph = convert_graph_to_html(sentiment_daily_graph)

        umap_df, min_eps, max_eps = clustering.transform_embeddings(comments_df, embeddings_col="embeddings")

        log_message("Generaci贸n de Wordcloud")
        image_path = os.path.join("static", "wordcloud.png")
        clustering.plot_wordcloud(comments_df, text_column="comment", output_filename=image_path)

        total = comments_df.shape[0]
        min_items_by_cluster = clustering.determine_min_items_by_cluster(total)

        log_message("Modelado y generaci贸n de m茅tricas")
        (cluster_assignments, cluster_counts, calinski_harabasz_scores, silhouette_scores, most_similar_comments, umap_df) = clustering.perform_clustering(
            umap_df, min_eps, max_eps, n=10, embeddings_col="embeddings"
        )

        log_message("Creaci贸n de gr谩fico de Sankey")
        labels, source, target, values, comments = clustering.build_sankey_data(
            cluster_assignments, cluster_counts, most_similar_comments, min_items_by_cluster=min_items_by_cluster
        )

        sankey_graph = clustering.plot_sankey(labels, source, target, values, comments, height=1000, width=1200)
        sankey_graph = convert_graph_to_html(sankey_graph)

        scores_graph, _ = clustering.plot_clustering_metric(silhouette_scores, calinski_harabasz_scores)
        scores_graph = convert_graph_to_html(scores_graph)

    return render_template(
        "index.html",
        video_details=video_details,
        sankey_graph=sankey_graph,
        scores_graph=scores_graph,
        wordcloud_path=image_path,
        sentiment_daily_graph=sentiment_daily_graph,
        sentiment_count=sentiment_count,
        error_message=error_message,  # Incluye el mensaje de error si existe
    )

#  gunicorn -b 0.0.0.0:5000 app_clustering.app:app
# http://172.20.0.2:5000/
# http://0.0.0.0:5000/
if __name__ == "__main__":
    app.run(host='0.0.0.0', port=7860)