Spaces:

Kuautli
/

ProyectoDS-AnalizaTube

Sleeping

App Files Files Community

ProyectoDS-AnalizaTube / app.py

Kuautli

Update app.py

7e76213 verified about 18 hours ago

raw

history blame contribute delete

4.53 kB

	import os

	import pandas as pd
	import plotly.io as pio
	import clustering
	from dotenv import load_dotenv
	from flask import Flask, render_template, request
	import logging

	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s',
	datefmt='%Y-%m-%d %H:%M:%S'
	)

	def log_message(message):
	""""""
	logging.info(message)

	if os.getenv("HUGGINGFACE_HUB_CACHE") is None:
	load_dotenv()

	api_key = os.getenv("youtube_api_key")

	app = Flask(__name__)
	app.logger.setLevel(logging.ERROR)
	app.config["PROPAGATE_EXCEPTIONS"] = False

	RANDOM_STATE = 333


	def convert_graph_to_html(graph, full_html=False):
	return pio.to_html(graph, full_html=full_html) if graph else None


	@app.route("/", methods=["GET", "POST"])
	def index():
	video_details = None
	sankey_graph = None
	scores_graph = None
	image_path = None
	sentiment_daily_graph = None
	sentiment_count = None
	error_message = None

	current_directory = os.getcwd()
	log_message("Iniciando procesamiento...")

	if request.method == "POST":
	url = request.form.get("url") # Utiliza get para evitar KeyError
	if not url:
	error_message = "La URL es requerida."
	return render_template("index.html", error_message=error_message)

	log_message("Obteniendo datos de Youtube")
	video_details = clustering.get_youtube_video_details(url, api_key)
	if "error" in video_details: # Manejo de error al obtener detalles del video
	error_message = video_details["error"]
	return render_template("index.html", error_message=error_message)

	comments_df = clustering.get_youtube_comments(api_key, url)
	if comments_df is None: # Verifica si no hay comentarios
	error_message = "No se pudieron obtener comentarios."
	return render_template("index.html", error_message=error_message)

	log_message("Generando embeddings")
	comments_df = clustering.add_normalized_embeddings_to_dataframe(comments_df, "comment")

	log_message("Procesamiento de los datos")
	comments_df["published_at"] = pd.to_datetime(comments_df["published_at"]).dt.date

	log_message("Clasificación de los sentimientos")
	comments_df = clustering.classify_sentiment_df(comments_df)
	comments_df.to_pickle("./data/Comentarios-Youtube/comments_df.pkl")

	sentiment_count = comments_df["sentimiento"].value_counts().to_dict()
	sentiment_daily_graph = clustering.plot_sentiment_daily(comments_df)
	sentiment_daily_graph = convert_graph_to_html(sentiment_daily_graph)

	umap_df, min_eps, max_eps = clustering.transform_embeddings(comments_df, embeddings_col="embeddings")

	log_message("Generación de Wordcloud")
	image_path = os.path.join("static", "wordcloud.png")
	clustering.plot_wordcloud(comments_df, text_column="comment", output_filename=image_path)

	total = comments_df.shape[0]
	min_items_by_cluster = clustering.determine_min_items_by_cluster(total)

	log_message("Modelado y generación de métricas")
	(cluster_assignments, cluster_counts, calinski_harabasz_scores, silhouette_scores, most_similar_comments, umap_df) = clustering.perform_clustering(
	umap_df, min_eps, max_eps, n=10, embeddings_col="embeddings"
	)

	log_message("Creación de gráfico de Sankey")
	labels, source, target, values, comments = clustering.build_sankey_data(
	cluster_assignments, cluster_counts, most_similar_comments, min_items_by_cluster=min_items_by_cluster
	)

	sankey_graph = clustering.plot_sankey(labels, source, target, values, comments, height=1000, width=1200)
	sankey_graph = convert_graph_to_html(sankey_graph)

	scores_graph, _ = clustering.plot_clustering_metric(silhouette_scores, calinski_harabasz_scores)
	scores_graph = convert_graph_to_html(scores_graph)

	return render_template(
	"index.html",
	video_details=video_details,
	sankey_graph=sankey_graph,
	scores_graph=scores_graph,
	wordcloud_path=image_path,
	sentiment_daily_graph=sentiment_daily_graph,
	sentiment_count=sentiment_count,
	error_message=error_message, # Incluye el mensaje de error si existe
	)

	# gunicorn -b 0.0.0.0:5000 app_clustering.app:app
	# http://172.20.0.2:5000/
	# http://0.0.0.0:5000/
	if __name__ == "__main__":
	app.run(host='0.0.0.0', port=7860)