Spaces:
Runtime error
Runtime error
File size: 4,560 Bytes
63f9eaa 4179233 63f9eaa ca7b7fb 63f9eaa 4179233 63f9eaa edb7d72 a8ccaf1 edb7d72 63f9eaa 4179233 63f9eaa 4179233 63f9eaa 4179233 63f9eaa 7341cb3 edb7d72 7341cb3 4179233 edb7d72 4179233 edb7d72 4179233 edb7d72 4179233 edb7d72 4179233 7341cb3 4179233 7341cb3 4179233 edb7d72 c1385ec 4179233 edb7d72 4179233 edb7d72 4179233 3d454b3 4179233 edb7d72 4179233 63f9eaa 4179233 63f9eaa 45aca14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import os
import pandas as pd
import plotly.io as pio
import clustering
from dotenv import load_dotenv
from flask import Flask, render_template, request
import logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
def log_message(message):
""""""
logging.info(message)
if os.getenv("HUGGINGFACE_HUB_CACHE") is None:
load_dotenv()
api_key = os.getenv("youtube_api_key")
app = Flask(__name__)
app.logger.setLevel(logging.ERROR)
app.config["PROPAGATE_EXCEPTIONS"] = False
RANDOM_STATE = 333
def convert_graph_to_html(graph, full_html=False):
return pio.to_html(graph, full_html=full_html) if graph else None
@app.route("/", methods=["GET", "POST"])
def index():
video_details = None
k_distance_graph = None
scores_graph = None
sankey_graph = None
image_path = None
sentiment_daily_graph = None
sentiment_count = None
current_directory = os.getcwd()
log_message("Iniciando procesamiento...")
if request.method == "POST":
url = request.form["url"]
if url:
log_message("Obteniendo datos de Youtube")
video_details = clustering.get_youtube_video_details(url, api_key)
comments_df = clustering.get_youtube_comments(api_key, url)
log_message("Generando embeddings")
comments_df = clustering.add_normalized_embeddings_to_dataframe(
comments_df, "comment"
)
log_message("Procesamiento de los datos")
comments_df["published_at"] = pd.to_datetime(
comments_df["published_at"]
).dt.date
log_message("Clasificaci贸n de los sentimientos")
comments_df = clustering.classify_sentiment_df(comments_df)
comments_df.to_pickle(
"./data/Comentarios-Youtube/comments_df.pkl"
)
comments_df = pd.read_pickle(
"./data/Comentarios-Youtube/comments_df.pkl"
)
sentiment_count = comments_df["sentimiento"].value_counts().to_dict()
sentiment_daily_graph = clustering.plot_sentiment_daily(comments_df)
sentiment_daily_graph = convert_graph_to_html(sentiment_daily_graph)
umap_df, min_eps, max_eps = clustering.transform_embeddings(
comments_df, embeddings_col="embeddings"
)
log_message("Generaci贸n de wordcloud")
image_path = os.path.join("static", "wordcloud.png")
clustering.plot_wordcloud(comments_df, text_column="comment", output_filename=image_path)
total = comments_df.shape[0]
min_items_by_cluster = clustering.determine_min_items_by_cluster(total)
log_message("Modelado y generaci贸n de m茅tricas")
(
cluster_assignments,
cluster_counts,
calinski_harabasz_scores,
silhouette_scores,
most_similar_comments,
umap_df,
) = clustering.perform_clustering(
umap_df, min_eps, max_eps, n=10,
threshold_values=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
embeddings_col="embeddings"
)
log_message("Creaci贸n de gr谩fico de Sankey")
labels, source, target, values, comments = clustering.build_sankey_data(
cluster_assignments,
cluster_counts,
most_similar_comments,
min_items_by_cluster=min_items_by_cluster,
)
sankey_graph = clustering.plot_sankey(
labels, source, target, values, comments, height=1000, width=1200
)
sankey_graph = convert_graph_to_html(sankey_graph)
scores_graph, _ = clustering.plot_clustering_metric(
silhouette_scores, calinski_harabasz_scores
)
scores_graph = convert_graph_to_html(scores_graph)
return render_template(
"index.html",
video_details=video_details,
k_distance_graph=k_distance_graph,
sankey_graph=sankey_graph,
scores_graph=scores_graph,
wordcloud_path=image_path,
sentiment_daily_graph=sentiment_daily_graph,
sentiment_count=sentiment_count,
)
# gunicorn -b 0.0.0.0:5000 app_clustering.app:app
# http://172.20.0.2:5000/
# http://0.0.0.0:5000/
if __name__ == "__main__":
app.run(host='0.0.0.0', port=7860) |