Kuautli commited on
Commit
4179233
·
verified ·
1 Parent(s): 4f10cd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -56
app.py CHANGED
@@ -1,15 +1,21 @@
1
  import os
 
2
  import pandas as pd
3
  import plotly.io as pio
4
- import gradio as gr
5
- import clustering
6
  from dotenv import load_dotenv
 
 
7
 
8
  if os.getenv("HUGGINGFACE_HUB_CACHE") is None:
9
  load_dotenv()
10
 
11
  api_key = os.getenv("youtube_api_key")
12
 
 
 
 
 
13
  RANDOM_STATE = 333
14
 
15
 
@@ -17,63 +23,94 @@ def convert_graph_to_html(graph, full_html=False):
17
  return pio.to_html(graph, full_html=full_html) if graph else None
18
 
19
 
20
- def process_video(url):
 
21
  video_details = None
 
 
 
 
22
  sentiment_daily_graph = None
23
  sentiment_count = None
24
- sankey_graph = None
25
- scores_graph = None
26
 
27
- if url:
28
- video_details = clustering.get_youtube_video_details(url, api_key)
29
- comments_df = clustering.get_youtube_comments(api_key, url)
30
- comments_df = clustering.add_normalized_embeddings_to_dataframe(comments_df, "comment")
31
- comments_df["published_at"] = pd.to_datetime(comments_df["published_at"]).dt.date
32
- comments_df = clustering.classify_sentiment_df(comments_df)
33
-
34
- # Sentiment count
35
- sentiment_count = comments_df["sentimiento"].value_counts().to_dict()
36
-
37
- # Plot daily sentiment
38
- sentiment_daily_graph = clustering.plot_sentiment_daily(comments_df)
39
- sentiment_daily_graph_html = convert_graph_to_html(sentiment_daily_graph)
40
-
41
- umap_df, min_eps, max_eps = clustering.transform_embeddings(comments_df, embeddings_col="embeddings")
42
- total = comments_df.shape[0]
43
- min_items_by_cluster = clustering.determine_min_items_by_cluster(total)
44
-
45
- cluster_assignments, cluster_counts, calinski_harabasz_scores, silhouette_scores, most_similar_comments, umap_df = clustering.perform_clustering(
46
- umap_df, min_eps, max_eps, n=10, embeddings_col="embeddings"
47
- )
48
-
49
- # Build Sankey data and plot
50
- labels, source, target, values, comments = clustering.build_sankey_data(
51
- cluster_assignments, cluster_counts, most_similar_comments, min_items_by_cluster=min_items_by_cluster
52
- )
53
- sankey_graph = clustering.plot_sankey(labels, source, target, values, comments, height=1000, width=1200)
54
- sankey_graph_html = convert_graph_to_html(sankey_graph)
55
-
56
- # Plot clustering metrics
57
- scores_graph, _ = clustering.plot_clustering_metric(silhouette_scores, calinski_harabasz_scores)
58
- scores_graph_html = convert_graph_to_html(scores_graph)
59
-
60
- return video_details, sentiment_daily_graph_html, sentiment_count, sankey_graph_html, scores_graph_html
61
-
62
-
63
- # Gradio Interface
64
- iface = gr.Interface(
65
- fn=process_video,
66
- inputs=gr.Textbox(label="YouTube Video URL", placeholder="Ingresa la URL del video..."),
67
- outputs=[
68
- gr.JSON(label="Video Details"),
69
- gr.HTML(label="Sentiment Daily Graph"),
70
- gr.JSON(label="Sentiment Count"),
71
- gr.HTML(label="Sankey Graph"),
72
- gr.HTML(label="Clustering Scores Graph")
73
- ],
74
- title="YouTube Video Sentiment Analysis",
75
- description="Ingresa la URL de un video de YouTube para analizar los comentarios y visualizar los resultados."
76
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
 
 
 
78
  if __name__ == "__main__":
79
- iface.launch()
 
1
  import os
2
+
3
  import pandas as pd
4
  import plotly.io as pio
5
+ from app_clustering import clustering
 
6
  from dotenv import load_dotenv
7
+ from flask import Flask, render_template, request
8
+ import logging
9
 
10
  if os.getenv("HUGGINGFACE_HUB_CACHE") is None:
11
  load_dotenv()
12
 
13
  api_key = os.getenv("youtube_api_key")
14
 
15
+ app = Flask(__name__)
16
+ app.logger.setLevel(logging.ERROR)
17
+ app.config["PROPAGATE_EXCEPTIONS"] = False
18
+
19
  RANDOM_STATE = 333
20
 
21
 
 
23
  return pio.to_html(graph, full_html=full_html) if graph else None
24
 
25
 
26
+ @app.route("/", methods=["GET", "POST"])
27
+ def index():
28
  video_details = None
29
+ k_distance_graph = None
30
+ scores_graph = None
31
+ sankey_graph = None
32
+ image_path = None
33
  sentiment_daily_graph = None
34
  sentiment_count = None
 
 
35
 
36
+ if request.method == "POST":
37
+ url = request.form["url"]
38
+ if url:
39
+ video_details = clustering.get_youtube_video_details(url, api_key)
40
+ comments_df = clustering.get_youtube_comments(api_key, url)
41
+ comments_df = clustering.add_normalized_embeddings_to_dataframe(
42
+ comments_df, "comment"
43
+ )
44
+
45
+ comments_df["published_at"] = pd.to_datetime(
46
+ comments_df["published_at"]
47
+ ).dt.date
48
+
49
+ comments_df = clustering.classify_sentiment_df(comments_df)
50
+ comments_df.to_pickle(
51
+ "/workspace/app_clustering/data/Comentarios-Youtube/comments_df.pkl"
52
+ )
53
+ comments_df = pd.read_pickle(
54
+ "/workspace/app_clustering/data/Comentarios-Youtube/comments_df.pkl"
55
+ )
56
+ sentiment_count = comments_df["sentimiento"].value_counts().to_dict()
57
+ sentiment_daily_graph = clustering.plot_sentiment_daily(comments_df)
58
+
59
+ sentiment_daily_graph = convert_graph_to_html(sentiment_daily_graph)
60
+
61
+ umap_df, min_eps, max_eps = clustering.transform_embeddings(
62
+ comments_df, embeddings_col="embeddings"
63
+ )
64
+
65
+ # image_path = os.path.join(os.getcwd(), "static/wordcloud.png")
66
+ # print("path", image_path)
67
+
68
+ total = comments_df.shape[0]
69
+
70
+ min_items_by_cluster = clustering.determine_min_items_by_cluster(total)
71
+
72
+ (
73
+ cluster_assignments,
74
+ cluster_counts,
75
+ calinski_harabasz_scores,
76
+ silhouette_scores,
77
+ most_similar_comments,
78
+ umap_df,
79
+ ) = clustering.perform_clustering(
80
+ umap_df, min_eps, max_eps, n=10, embeddings_col="embeddings"
81
+ )
82
+
83
+ labels, source, target, values, comments = clustering.build_sankey_data(
84
+ cluster_assignments,
85
+ cluster_counts,
86
+ most_similar_comments,
87
+ min_items_by_cluster=min_items_by_cluster,
88
+ )
89
+
90
+ sankey_graph = clustering.plot_sankey(
91
+ labels, source, target, values, comments, height=1000, width=1200
92
+ )
93
+ sankey_graph = convert_graph_to_html(sankey_graph)
94
+
95
+ scores_graph, _ = clustering.plot_clustering_metric(
96
+ silhouette_scores, calinski_harabasz_scores
97
+ )
98
+ scores_graph = convert_graph_to_html(scores_graph)
99
+
100
+ return render_template(
101
+ "index.html",
102
+ video_details=video_details,
103
+ k_distance_graph=k_distance_graph,
104
+ sankey_graph=sankey_graph,
105
+ scores_graph=scores_graph,
106
+ wordcloud_path=image_path,
107
+ sentiment_daily_graph=sentiment_daily_graph,
108
+ sentiment_count=sentiment_count,
109
+ )
110
+
111
 
112
+ # gunicorn -b 0.0.0.0:5000 app_clustering.app:app
113
+ # http://172.20.0.2:5000/
114
+ # http://0.0.0.0:5000/
115
  if __name__ == "__main__":
116
+ app.run()