Spaces:

Kuautli
/

ProyectoDS-AnalizaTube

Runtime error

App Files Files Community

ProyectoDS-AnalizaTube / app.py

Kuautli

Update app.py

f9d2ce2 verified 27 days ago

raw

history blame

4.48 kB

	import os

	import pandas as pd
	import plotly.io as pio
	import clustering
	from dotenv import load_dotenv
	from flask import Flask, render_template, request
	import logging

	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s',
	datefmt='%Y-%m-%d %H:%M:%S'
	)

	def log_message(message):
	""""""
	logging.info(message)

	if os.getenv("HUGGINGFACE_HUB_CACHE") is None:
	load_dotenv()

	api_key = os.getenv("youtube_api_key")

	app = Flask(__name__)
	app.logger.setLevel(logging.ERROR)
	app.config["PROPAGATE_EXCEPTIONS"] = False

	RANDOM_STATE = 333


	def convert_graph_to_html(graph, full_html=False):
	return pio.to_html(graph, full_html=full_html) if graph else None


	@app.route("/", methods=["GET", "POST"])
	def index():
	video_details = None
	k_distance_graph = None
	scores_graph = None
	sankey_graph = None
	image_path = None
	sentiment_daily_graph = None
	sentiment_count = None

	current_directory = os.getcwd()
	log_message("Iniciando procesamiento...")

	if request.method == "POST":
	url = request.form["url"]
	if url:
	log_message("Obteniendo datos de Youtube")
	video_details = clustering.get_youtube_video_details(url, api_key)
	comments_df = clustering.get_youtube_comments(api_key, url)
	log_message("Generando embeddings")
	comments_df = clustering.add_normalized_embeddings_to_dataframe(
	comments_df, "comment"
	)
	log_message("Procesamiento de los datos")
	comments_df["published_at"] = pd.to_datetime(
	comments_df["published_at"]
	).dt.date
	log_message("Clasificación de los sentimientos")
	comments_df = clustering.classify_sentiment_df(comments_df)
	comments_df.to_pickle(
	"./data/Comentarios-Youtube/comments_df.pkl"
	)
	comments_df = pd.read_pickle(
	"./data/Comentarios-Youtube/comments_df.pkl"
	)
	sentiment_count = comments_df["sentimiento"].value_counts().to_dict()
	sentiment_daily_graph = clustering.plot_sentiment_daily(comments_df)

	sentiment_daily_graph = convert_graph_to_html(sentiment_daily_graph)

	umap_df, min_eps, max_eps = clustering.transform_embeddings(
	comments_df, embeddings_col="embeddings"
	)
	log_message("Generación de wordcloud")
	image_path = os.path.join("static", "wordcloud.png")
	clustering.plot_wordcloud(comments_df, text_column="comment", output_filename=image_path)

	total = comments_df.shape[0]

	min_items_by_cluster = clustering.determine_min_items_by_cluster(total)
	log_message("Modelado y generación de métricas")
	(
	cluster_assignments,
	cluster_counts,
	calinski_harabasz_scores,
	silhouette_scores,
	most_similar_comments,
	umap_df,
	) = clustering.perform_clustering(
	umap_df, min_eps, max_eps, n=10,
	embeddings_col="embeddings"
	)
	log_message("Creación de gráfico de Sankey")
	labels, source, target, values, comments = clustering.build_sankey_data(
	cluster_assignments,
	cluster_counts,
	most_similar_comments,
	min_items_by_cluster=min_items_by_cluster,
	)

	sankey_graph = clustering.plot_sankey(
	labels, source, target, values, comments, height=1000, width=1200
	)
	sankey_graph = convert_graph_to_html(sankey_graph)

	scores_graph, _ = clustering.plot_clustering_metric(
	silhouette_scores, calinski_harabasz_scores
	)
	scores_graph = convert_graph_to_html(scores_graph)

	return render_template(
	"index.html",
	video_details=video_details,
	k_distance_graph=k_distance_graph,
	sankey_graph=sankey_graph,
	scores_graph=scores_graph,
	wordcloud_path=image_path,
	sentiment_daily_graph=sentiment_daily_graph,
	sentiment_count=sentiment_count,
	)


	# gunicorn -b 0.0.0.0:5000 app_clustering.app:app
	# http://172.20.0.2:5000/
	# http://0.0.0.0:5000/
	if __name__ == "__main__":
	app.run(host='0.0.0.0', port=7860)