Spaces:

Kuautli
/

ProyectoDS-AnalizaTube

Runtime error

File size: 4,480 Bytes

import os

import pandas as pd
import plotly.io as pio
import clustering
from dotenv import load_dotenv
from flask import Flask, render_template, request
import logging

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

def log_message(message):
    """"""
    logging.info(message)

if os.getenv("HUGGINGFACE_HUB_CACHE") is None:
    load_dotenv()

api_key = os.getenv("youtube_api_key")

app = Flask(__name__)
app.logger.setLevel(logging.ERROR)
app.config["PROPAGATE_EXCEPTIONS"] = False

RANDOM_STATE = 333


def convert_graph_to_html(graph, full_html=False):
    return pio.to_html(graph, full_html=full_html) if graph else None


@app.route("/", methods=["GET", "POST"])
def index():
    video_details = None
    k_distance_graph = None
    scores_graph = None
    sankey_graph = None
    image_path = None
    sentiment_daily_graph = None
    sentiment_count = None

    current_directory = os.getcwd()
    log_message("Iniciando procesamiento...")

    if request.method == "POST":
        url = request.form["url"]
        if url:
            log_message("Obteniendo datos de Youtube")
            video_details = clustering.get_youtube_video_details(url, api_key)
            comments_df = clustering.get_youtube_comments(api_key, url)
            log_message("Generando embeddings")
            comments_df = clustering.add_normalized_embeddings_to_dataframe(
                comments_df, "comment"
            )
            log_message("Procesamiento de los datos")
            comments_df["published_at"] = pd.to_datetime(
                comments_df["published_at"]
            ).dt.date
            log_message("Clasificación de los sentimientos")
            comments_df = clustering.classify_sentiment_df(comments_df)
            comments_df.to_pickle(
                "./data/Comentarios-Youtube/comments_df.pkl"
            )
            comments_df = pd.read_pickle(
                "./data/Comentarios-Youtube/comments_df.pkl"
            )
            sentiment_count = comments_df["sentimiento"].value_counts().to_dict()
            sentiment_daily_graph = clustering.plot_sentiment_daily(comments_df)

            sentiment_daily_graph = convert_graph_to_html(sentiment_daily_graph)

            umap_df, min_eps, max_eps = clustering.transform_embeddings(
                comments_df, embeddings_col="embeddings"
            )
            log_message("Generación de wordcloud")
            image_path = os.path.join("static", "wordcloud.png")
            clustering.plot_wordcloud(comments_df, text_column="comment", output_filename=image_path)

            total = comments_df.shape[0]
            
            min_items_by_cluster = clustering.determine_min_items_by_cluster(total)
            log_message("Modelado y generación de métricas")
            (
                cluster_assignments,
                cluster_counts,
                calinski_harabasz_scores,
                silhouette_scores,
                most_similar_comments,
                umap_df,
            ) = clustering.perform_clustering(
                umap_df, min_eps, max_eps, n=10, 
                embeddings_col="embeddings"
            )
            log_message("Creación de gráfico de Sankey")
            labels, source, target, values, comments = clustering.build_sankey_data(
                cluster_assignments,
                cluster_counts,
                most_similar_comments,
                min_items_by_cluster=min_items_by_cluster,
            )

            sankey_graph = clustering.plot_sankey(
                labels, source, target, values, comments, height=1000, width=1200
            )
            sankey_graph = convert_graph_to_html(sankey_graph)

            scores_graph, _ = clustering.plot_clustering_metric(
                silhouette_scores, calinski_harabasz_scores
            )
            scores_graph = convert_graph_to_html(scores_graph)

    return render_template(
        "index.html",
        video_details=video_details,
        k_distance_graph=k_distance_graph,
        sankey_graph=sankey_graph,
        scores_graph=scores_graph,
        wordcloud_path=image_path,
        sentiment_daily_graph=sentiment_daily_graph,
        sentiment_count=sentiment_count,
    )


#  gunicorn -b 0.0.0.0:5000 app_clustering.app:app
# http://172.20.0.2:5000/
# http://0.0.0.0:5000/
if __name__ == "__main__":
    app.run(host='0.0.0.0', port=7860)