Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| import os | |
| import pandas as pd | |
| import plotly.io as pio | |
| import clustering | |
| from dotenv import load_dotenv | |
| from flask import Flask, render_template, request | |
| import logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s', | |
| datefmt='%Y-%m-%d %H:%M:%S' | |
| ) | |
| def log_message(message): | |
| """""" | |
| logging.info(message) | |
| if os.getenv("HUGGINGFACE_HUB_CACHE") is None: | |
| load_dotenv() | |
| api_key = os.getenv("youtube_api_key") | |
| app = Flask(__name__) | |
| app.logger.setLevel(logging.ERROR) | |
| app.config["PROPAGATE_EXCEPTIONS"] = False | |
| RANDOM_STATE = 333 | |
| def convert_graph_to_html(graph, full_html=False): | |
| return pio.to_html(graph, full_html=full_html) if graph else None | |
| def index(): | |
| video_details = None | |
| sankey_graph = None | |
| scores_graph = None | |
| image_path = None | |
| sentiment_daily_graph = None | |
| sentiment_count = None | |
| error_message = None | |
| current_directory = os.getcwd() | |
| log_message("Iniciando procesamiento...") | |
| if request.method == "POST": | |
| url = request.form.get("url") # Utiliza get para evitar KeyError | |
| if not url: | |
| error_message = "La URL es requerida." | |
| return render_template("index.html", error_message=error_message) | |
| log_message("Obteniendo datos de Youtube") | |
| video_details = clustering.get_youtube_video_details(url, api_key) | |
| if "error" in video_details: # Manejo de error al obtener detalles del video | |
| error_message = video_details["error"] | |
| return render_template("index.html", error_message=error_message) | |
| comments_df = clustering.get_youtube_comments(api_key, url) | |
| if comments_df is None: # Verifica si no hay comentarios | |
| error_message = "No se pudieron obtener comentarios." | |
| return render_template("index.html", error_message=error_message) | |
| log_message("Generando embeddings") | |
| comments_df = clustering.add_normalized_embeddings_to_dataframe(comments_df, "comment") | |
| log_message("Procesamiento de los datos") | |
| comments_df["published_at"] = pd.to_datetime(comments_df["published_at"]).dt.date | |
| log_message("Clasificaci贸n de los sentimientos") | |
| comments_df = clustering.classify_sentiment_df(comments_df) | |
| comments_df.to_pickle("./data/Comentarios-Youtube/comments_df.pkl") | |
| sentiment_count = comments_df["sentimiento"].value_counts().to_dict() | |
| sentiment_daily_graph = clustering.plot_sentiment_daily(comments_df) | |
| sentiment_daily_graph = convert_graph_to_html(sentiment_daily_graph) | |
| umap_df, min_eps, max_eps = clustering.transform_embeddings(comments_df, embeddings_col="embeddings") | |
| log_message("Generaci贸n de wordcloud") | |
| image_path = os.path.join("static", "wordcloud.png") | |
| clustering.plot_wordcloud(comments_df, text_column="comment", output_filename=image_path) | |
| total = comments_df.shape[0] | |
| min_items_by_cluster = clustering.determine_min_items_by_cluster(total) | |
| log_message("Modelado y generaci贸n de m茅tricas") | |
| (cluster_assignments, cluster_counts, calinski_harabasz_scores, silhouette_scores, most_similar_comments, umap_df) = clustering.perform_clustering( | |
| umap_df, min_eps, max_eps, n=10, embeddings_col="embeddings" | |
| ) | |
| log_message(f"Clusters assignments {cluster_assignments}") | |
| log_message("Creaci贸n de gr谩fico de Sankey") | |
| labels, source, target, values, comments = clustering.build_sankey_data( | |
| cluster_assignments, cluster_counts, most_similar_comments, min_items_by_cluster=min_items_by_cluster | |
| ) | |
| sankey_graph = clustering.plot_sankey(labels, source, target, values, comments, height=1000, width=1200) | |
| sankey_graph = convert_graph_to_html(sankey_graph) | |
| scores_graph, _ = clustering.plot_clustering_metric(silhouette_scores, calinski_harabasz_scores) | |
| scores_graph = convert_graph_to_html(scores_graph) | |
| return render_template( | |
| "index.html", | |
| video_details=video_details, | |
| sankey_graph=sankey_graph, | |
| scores_graph=scores_graph, | |
| wordcloud_path=image_path, | |
| sentiment_daily_graph=sentiment_daily_graph, | |
| sentiment_count=sentiment_count, | |
| error_message=error_message, # Incluye el mensaje de error si existe | |
| ) | |
| # gunicorn -b 0.0.0.0:5000 app_clustering.app:app | |
| # http://172.20.0.2:5000/ | |
| # http://0.0.0.0:5000/ | |
| if __name__ == "__main__": | |
| app.run(host='0.0.0.0', port=7860) |