Kuautli's picture
Update app.py
7e76213 verified
import os
import pandas as pd
import plotly.io as pio
import clustering
from dotenv import load_dotenv
from flask import Flask, render_template, request
import logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
def log_message(message):
""""""
logging.info(message)
if os.getenv("HUGGINGFACE_HUB_CACHE") is None:
load_dotenv()
api_key = os.getenv("youtube_api_key")
app = Flask(__name__)
app.logger.setLevel(logging.ERROR)
app.config["PROPAGATE_EXCEPTIONS"] = False
RANDOM_STATE = 333
def convert_graph_to_html(graph, full_html=False):
return pio.to_html(graph, full_html=full_html) if graph else None
@app.route("/", methods=["GET", "POST"])
def index():
video_details = None
sankey_graph = None
scores_graph = None
image_path = None
sentiment_daily_graph = None
sentiment_count = None
error_message = None
current_directory = os.getcwd()
log_message("Iniciando procesamiento...")
if request.method == "POST":
url = request.form.get("url") # Utiliza get para evitar KeyError
if not url:
error_message = "La URL es requerida."
return render_template("index.html", error_message=error_message)
log_message("Obteniendo datos de Youtube")
video_details = clustering.get_youtube_video_details(url, api_key)
if "error" in video_details: # Manejo de error al obtener detalles del video
error_message = video_details["error"]
return render_template("index.html", error_message=error_message)
comments_df = clustering.get_youtube_comments(api_key, url)
if comments_df is None: # Verifica si no hay comentarios
error_message = "No se pudieron obtener comentarios."
return render_template("index.html", error_message=error_message)
log_message("Generando embeddings")
comments_df = clustering.add_normalized_embeddings_to_dataframe(comments_df, "comment")
log_message("Procesamiento de los datos")
comments_df["published_at"] = pd.to_datetime(comments_df["published_at"]).dt.date
log_message("Clasificaci贸n de los sentimientos")
comments_df = clustering.classify_sentiment_df(comments_df)
comments_df.to_pickle("./data/Comentarios-Youtube/comments_df.pkl")
sentiment_count = comments_df["sentimiento"].value_counts().to_dict()
sentiment_daily_graph = clustering.plot_sentiment_daily(comments_df)
sentiment_daily_graph = convert_graph_to_html(sentiment_daily_graph)
umap_df, min_eps, max_eps = clustering.transform_embeddings(comments_df, embeddings_col="embeddings")
log_message("Generaci贸n de Wordcloud")
image_path = os.path.join("static", "wordcloud.png")
clustering.plot_wordcloud(comments_df, text_column="comment", output_filename=image_path)
total = comments_df.shape[0]
min_items_by_cluster = clustering.determine_min_items_by_cluster(total)
log_message("Modelado y generaci贸n de m茅tricas")
(cluster_assignments, cluster_counts, calinski_harabasz_scores, silhouette_scores, most_similar_comments, umap_df) = clustering.perform_clustering(
umap_df, min_eps, max_eps, n=10, embeddings_col="embeddings"
)
log_message("Creaci贸n de gr谩fico de Sankey")
labels, source, target, values, comments = clustering.build_sankey_data(
cluster_assignments, cluster_counts, most_similar_comments, min_items_by_cluster=min_items_by_cluster
)
sankey_graph = clustering.plot_sankey(labels, source, target, values, comments, height=1000, width=1200)
sankey_graph = convert_graph_to_html(sankey_graph)
scores_graph, _ = clustering.plot_clustering_metric(silhouette_scores, calinski_harabasz_scores)
scores_graph = convert_graph_to_html(scores_graph)
return render_template(
"index.html",
video_details=video_details,
sankey_graph=sankey_graph,
scores_graph=scores_graph,
wordcloud_path=image_path,
sentiment_daily_graph=sentiment_daily_graph,
sentiment_count=sentiment_count,
error_message=error_message, # Incluye el mensaje de error si existe
)
# gunicorn -b 0.0.0.0:5000 app_clustering.app:app
# http://172.20.0.2:5000/
# http://0.0.0.0:5000/
if __name__ == "__main__":
app.run(host='0.0.0.0', port=7860)