Spaces:
Running
on
T4
Running
on
T4
File size: 4,530 Bytes
63f9eaa 4179233 63f9eaa ca7b7fb 63f9eaa 4179233 63f9eaa edb7d72 a8ccaf1 edb7d72 63f9eaa 4179233 63f9eaa 4179233 63f9eaa 4179233 3d817f4 4179233 63f9eaa 3d817f4 63f9eaa 7341cb3 edb7d72 7341cb3 4179233 3d817f4 7e76213 3d817f4 4179233 3d817f4 4179233 63f9eaa 45aca14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import os
import pandas as pd
import plotly.io as pio
import clustering
from dotenv import load_dotenv
from flask import Flask, render_template, request
import logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
def log_message(message):
""""""
logging.info(message)
if os.getenv("HUGGINGFACE_HUB_CACHE") is None:
load_dotenv()
api_key = os.getenv("youtube_api_key")
app = Flask(__name__)
app.logger.setLevel(logging.ERROR)
app.config["PROPAGATE_EXCEPTIONS"] = False
RANDOM_STATE = 333
def convert_graph_to_html(graph, full_html=False):
return pio.to_html(graph, full_html=full_html) if graph else None
@app.route("/", methods=["GET", "POST"])
def index():
video_details = None
sankey_graph = None
scores_graph = None
image_path = None
sentiment_daily_graph = None
sentiment_count = None
error_message = None
current_directory = os.getcwd()
log_message("Iniciando procesamiento...")
if request.method == "POST":
url = request.form.get("url") # Utiliza get para evitar KeyError
if not url:
error_message = "La URL es requerida."
return render_template("index.html", error_message=error_message)
log_message("Obteniendo datos de Youtube")
video_details = clustering.get_youtube_video_details(url, api_key)
if "error" in video_details: # Manejo de error al obtener detalles del video
error_message = video_details["error"]
return render_template("index.html", error_message=error_message)
comments_df = clustering.get_youtube_comments(api_key, url)
if comments_df is None: # Verifica si no hay comentarios
error_message = "No se pudieron obtener comentarios."
return render_template("index.html", error_message=error_message)
log_message("Generando embeddings")
comments_df = clustering.add_normalized_embeddings_to_dataframe(comments_df, "comment")
log_message("Procesamiento de los datos")
comments_df["published_at"] = pd.to_datetime(comments_df["published_at"]).dt.date
log_message("Clasificaci贸n de los sentimientos")
comments_df = clustering.classify_sentiment_df(comments_df)
comments_df.to_pickle("./data/Comentarios-Youtube/comments_df.pkl")
sentiment_count = comments_df["sentimiento"].value_counts().to_dict()
sentiment_daily_graph = clustering.plot_sentiment_daily(comments_df)
sentiment_daily_graph = convert_graph_to_html(sentiment_daily_graph)
umap_df, min_eps, max_eps = clustering.transform_embeddings(comments_df, embeddings_col="embeddings")
log_message("Generaci贸n de Wordcloud")
image_path = os.path.join("static", "wordcloud.png")
clustering.plot_wordcloud(comments_df, text_column="comment", output_filename=image_path)
total = comments_df.shape[0]
min_items_by_cluster = clustering.determine_min_items_by_cluster(total)
log_message("Modelado y generaci贸n de m茅tricas")
(cluster_assignments, cluster_counts, calinski_harabasz_scores, silhouette_scores, most_similar_comments, umap_df) = clustering.perform_clustering(
umap_df, min_eps, max_eps, n=10, embeddings_col="embeddings"
)
log_message("Creaci贸n de gr谩fico de Sankey")
labels, source, target, values, comments = clustering.build_sankey_data(
cluster_assignments, cluster_counts, most_similar_comments, min_items_by_cluster=min_items_by_cluster
)
sankey_graph = clustering.plot_sankey(labels, source, target, values, comments, height=1000, width=1200)
sankey_graph = convert_graph_to_html(sankey_graph)
scores_graph, _ = clustering.plot_clustering_metric(silhouette_scores, calinski_harabasz_scores)
scores_graph = convert_graph_to_html(scores_graph)
return render_template(
"index.html",
video_details=video_details,
sankey_graph=sankey_graph,
scores_graph=scores_graph,
wordcloud_path=image_path,
sentiment_daily_graph=sentiment_daily_graph,
sentiment_count=sentiment_count,
error_message=error_message, # Incluye el mensaje de error si existe
)
# gunicorn -b 0.0.0.0:5000 app_clustering.app:app
# http://172.20.0.2:5000/
# http://0.0.0.0:5000/
if __name__ == "__main__":
app.run(host='0.0.0.0', port=7860) |