Spaces:

william1324
/

1234

Sleeping

App Files Files Community

william1324 commited on 14 days ago

Commit

4ab5677

verified ·

1 Parent(s): 06c367a

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -40

app.py CHANGED Viewed

@@ -1,50 +1,87 @@
 import pandas as pd
 from transformers import pipeline
 import gradio as gr
-# 載入 Hugging Face 的中文三分類情緒模型
-classifier = pipeline("text-classification", model="uer/roberta-base-finetuned-dianping-chinese", tokenizer="uer/roberta-base-finetuned-dianping-chinese")
-# 分析函式（接收 CSV）
-def analyze_csv(file):
     df = pd.read_csv(file)
-    # 檢查是否有 "text" 欄位
     if "text" not in df.columns:
-        return "錯誤：CSV 檔案中必須包含 'text' 欄位。"
-    # 對每一列文字做情緒分析
-    results = []
-    for text in df["text"]:
-        result = classifier(str(text))[0]
-        label = result["label"]
-        score = round(result["score"], 4)
-        if label == "LABEL_0":
-            sentiment = "負向"
-        elif label == "LABEL_1":
-            sentiment = "中立"
-        elif label == "LABEL_2":
-            sentiment = "正向"
-        else:
-            sentiment = "未知"
-        results.append({"label": sentiment, "score": score})
-    # 加回原始 dataframe
-    df["情緒判斷"] = [r["label"] for r in results]
-    df["信心分數"] = [r["score"] for r in results]
-    # 輸出為新的 csv 檔案
-    output_file = "/tmp/output.csv"
-    df.to_csv(output_file, index=False)
-    return output_file
-# Gradio 介面（接收檔案，回傳檔案）
 gr.Interface(
-    fn=analyze_csv,
-    inputs=gr.File(label="上傳包含 'text' 欄位的 CSV 檔案", file_types=[".csv"]),
-    outputs=gr.File(label="下載標註後的 CSV 檔案"),
-    title="中文情緒分析系統（批次處理）",
-    description="上傳一份 CSV，系統會針對 'text' 欄做情緒分析，並下載結果。"
 ).launch()

 import pandas as pd
+from sentence_transformers import SentenceTransformer
+from sklearn.cluster import KMeans
 from transformers import pipeline
+from prophet import Prophet
+import matplotlib.pyplot as plt
 import gradio as gr
+# 1️⃣ 模型初始化
+embedder = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
+sentiment_model = pipeline(
+    "text-classification",
+    model="uer/roberta-base-finetuned-dianping-chinese",
+    tokenizer="uer/roberta-base-finetuned-dianping-chinese"
+)
+# 2️⃣ 主處理流程
+def full_pipeline(file, num_clusters):
     df = pd.read_csv(file)
     if "text" not in df.columns:
+        return "❌ 錯誤：CSV 檔案需包含 text 欄位"
+    if "timestamp" not in df.columns:
+        return "❌ 錯誤：CSV 檔案需包含 timestamp 欄位（例如新聞時間）"
+    # 向量化並聚類
+    texts = df["text"].astype(str).tolist()
+    embeddings = embedder.encode(texts, show_progress_bar=True)
+    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
+    df["topic"] = kmeans.fit_predict(embeddings)
+    # 情緒分析
+    sentiments = []
+    for text in texts:
+        try:
+            result = sentiment_model(text)[0]
+            label = result["label"]
+            if label == "LABEL_0":
+                sentiment = "負向"
+            elif label == "LABEL_1":
+                sentiment = "中立"
+            elif label == "LABEL_2":
+                sentiment = "正向"
+            else:
+                sentiment = "未知"
+        except:
+            sentiment = "錯誤"
+        sentiments.append(sentiment)
+    df["sentiment"] = sentiments
+    # 熱度預測（以 topic=0 為例）
+    df["timestamp"] = pd.to_datetime(df["timestamp"])
+    topic0 = df[df["topic"] == 0]
+    daily_counts = topic0.groupby(df["timestamp"].dt.date).size().reset_index(name="count")
+    daily_counts.columns = ["ds", "y"]
+    if len(daily_counts) < 2:
+        return "❌ 無法預測：topic=0 數據太少"
+    m = Prophet()
+    m.fit(daily_counts)
+    future = m.make_future_dataframe(periods=7)
+    forecast = m.predict(future)
+    fig = m.plot(forecast)
+    # 儲存結果
+    output_csv = "/tmp/final_output.csv"
+    output_img = "/tmp/forecast.png"
+    df.to_csv(output_csv, index=False)
+    fig.savefig(output_img)
+    return output_csv, output_img
+# 3️⃣ Gradio 介面
 gr.Interface(
+    fn=full_pipeline,
+    inputs=[
+        gr.File(label="上傳 CSV（需含 text 與 timestamp 欄）"),
+        gr.Number(label="分幾群？（聚類數）", value=5)
+    ],
+    outputs=[
+        gr.File(label="結果 CSV（含 topic, sentiment）"),
+        gr.Image(label="topic=0 熱度預測圖（Prophet）")
+    ],
+    title="中文新聞話題類聚 + 情緒分析 + 熱度預測",
+    description="自動分群、分析情緒，並預測熱度走勢（topic=0 為例）"
 ).launch()