Spaces:

Kuautli
/

ProyectoDS-AnalizaTube

Runtime error

App Files Files Community

Kuautli commited on Jan 27

Commit

4179233

verified ·

1 Parent(s): 4f10cd2

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -56

app.py CHANGED Viewed

@@ -1,15 +1,21 @@
 import os
 import pandas as pd
 import plotly.io as pio
-import gradio as gr
-import clustering
 from dotenv import load_dotenv
 if os.getenv("HUGGINGFACE_HUB_CACHE") is None:
     load_dotenv()
 api_key = os.getenv("youtube_api_key")
 RANDOM_STATE = 333
@@ -17,63 +23,94 @@ def convert_graph_to_html(graph, full_html=False):
     return pio.to_html(graph, full_html=full_html) if graph else None
-def process_video(url):
     video_details = None
     sentiment_daily_graph = None
     sentiment_count = None
-    sankey_graph = None
-    scores_graph = None
-    if url:
-        video_details = clustering.get_youtube_video_details(url, api_key)
-        comments_df = clustering.get_youtube_comments(api_key, url)
-        comments_df = clustering.add_normalized_embeddings_to_dataframe(comments_df, "comment")
-        comments_df["published_at"] = pd.to_datetime(comments_df["published_at"]).dt.date
-        comments_df = clustering.classify_sentiment_df(comments_df)
-        # Sentiment count
-        sentiment_count = comments_df["sentimiento"].value_counts().to_dict()
-        # Plot daily sentiment
-        sentiment_daily_graph = clustering.plot_sentiment_daily(comments_df)
-        sentiment_daily_graph_html = convert_graph_to_html(sentiment_daily_graph)
-        umap_df, min_eps, max_eps = clustering.transform_embeddings(comments_df, embeddings_col="embeddings")
-        total = comments_df.shape[0]
-        min_items_by_cluster = clustering.determine_min_items_by_cluster(total)
-        cluster_assignments, cluster_counts, calinski_harabasz_scores, silhouette_scores, most_similar_comments, umap_df = clustering.perform_clustering(
-            umap_df, min_eps, max_eps, n=10, embeddings_col="embeddings"
-        )
-        # Build Sankey data and plot
-        labels, source, target, values, comments = clustering.build_sankey_data(
-            cluster_assignments, cluster_counts, most_similar_comments, min_items_by_cluster=min_items_by_cluster
-        )
-        sankey_graph = clustering.plot_sankey(labels, source, target, values, comments, height=1000, width=1200)
-        sankey_graph_html = convert_graph_to_html(sankey_graph)
-        # Plot clustering metrics
-        scores_graph, _ = clustering.plot_clustering_metric(silhouette_scores, calinski_harabasz_scores)
-        scores_graph_html = convert_graph_to_html(scores_graph)
-    return video_details, sentiment_daily_graph_html, sentiment_count, sankey_graph_html, scores_graph_html
-# Gradio Interface
-iface = gr.Interface(
-    fn=process_video,
-    inputs=gr.Textbox(label="YouTube Video URL", placeholder="Ingresa la URL del video..."),
-    outputs=[
-        gr.JSON(label="Video Details"),
-        gr.HTML(label="Sentiment Daily Graph"),
-        gr.JSON(label="Sentiment Count"),
-        gr.HTML(label="Sankey Graph"),
-        gr.HTML(label="Clustering Scores Graph")
-    ],
-    title="YouTube Video Sentiment Analysis",
-    description="Ingresa la URL de un video de YouTube para analizar los comentarios y visualizar los resultados."
-)
 if __name__ == "__main__":
-    iface.launch()

 import os
 import pandas as pd
 import plotly.io as pio
+from app_clustering import clustering
 from dotenv import load_dotenv
+from flask import Flask, render_template, request
+import logging
 if os.getenv("HUGGINGFACE_HUB_CACHE") is None:
     load_dotenv()
 api_key = os.getenv("youtube_api_key")
+app = Flask(__name__)
+app.logger.setLevel(logging.ERROR)
+app.config["PROPAGATE_EXCEPTIONS"] = False
 RANDOM_STATE = 333
     return pio.to_html(graph, full_html=full_html) if graph else None
+@app.route("/", methods=["GET", "POST"])
+def index():
     video_details = None
+    k_distance_graph = None
+    scores_graph = None
+    sankey_graph = None
+    image_path = None
     sentiment_daily_graph = None
     sentiment_count = None
+    if request.method == "POST":
+        url = request.form["url"]
+        if url:
+            video_details = clustering.get_youtube_video_details(url, api_key)
+            comments_df = clustering.get_youtube_comments(api_key, url)
+            comments_df = clustering.add_normalized_embeddings_to_dataframe(
+                comments_df, "comment"
+            )
+            comments_df["published_at"] = pd.to_datetime(
+                comments_df["published_at"]
+            ).dt.date
+            comments_df = clustering.classify_sentiment_df(comments_df)
+            comments_df.to_pickle(
+                "/workspace/app_clustering/data/Comentarios-Youtube/comments_df.pkl"
+            )
+            comments_df = pd.read_pickle(
+                "/workspace/app_clustering/data/Comentarios-Youtube/comments_df.pkl"
+            )
+            sentiment_count = comments_df["sentimiento"].value_counts().to_dict()
+            sentiment_daily_graph = clustering.plot_sentiment_daily(comments_df)
+            sentiment_daily_graph = convert_graph_to_html(sentiment_daily_graph)
+            umap_df, min_eps, max_eps = clustering.transform_embeddings(
+                comments_df, embeddings_col="embeddings"
+            )
+            # image_path = os.path.join(os.getcwd(), "static/wordcloud.png")
+            # print("path", image_path)
+            total = comments_df.shape[0]
+            min_items_by_cluster = clustering.determine_min_items_by_cluster(total)
+            (
+                cluster_assignments,
+                cluster_counts,
+                calinski_harabasz_scores,
+                silhouette_scores,
+                most_similar_comments,
+                umap_df,
+            ) = clustering.perform_clustering(
+                umap_df, min_eps, max_eps, n=10, embeddings_col="embeddings"
+            )
+            labels, source, target, values, comments = clustering.build_sankey_data(
+                cluster_assignments,
+                cluster_counts,
+                most_similar_comments,
+                min_items_by_cluster=min_items_by_cluster,
+            )
+            sankey_graph = clustering.plot_sankey(
+                labels, source, target, values, comments, height=1000, width=1200
+            )
+            sankey_graph = convert_graph_to_html(sankey_graph)
+            scores_graph, _ = clustering.plot_clustering_metric(
+                silhouette_scores, calinski_harabasz_scores
+            )
+            scores_graph = convert_graph_to_html(scores_graph)
+    return render_template(
+        "index.html",
+        video_details=video_details,
+        k_distance_graph=k_distance_graph,
+        sankey_graph=sankey_graph,
+        scores_graph=scores_graph,
+        wordcloud_path=image_path,
+        sentiment_daily_graph=sentiment_daily_graph,
+        sentiment_count=sentiment_count,
+    )
+#  gunicorn -b 0.0.0.0:5000 app_clustering.app:app
+# http://172.20.0.2:5000/
+# http://0.0.0.0:5000/
 if __name__ == "__main__":
+    app.run()