william1324 commited on
Commit
4ab5677
·
verified ·
1 Parent(s): 06c367a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -40
app.py CHANGED
@@ -1,50 +1,87 @@
1
  import pandas as pd
 
 
2
  from transformers import pipeline
 
 
3
  import gradio as gr
4
 
5
- # 載入 Hugging Face 的中文三分類情緒模型
6
- classifier = pipeline("text-classification", model="uer/roberta-base-finetuned-dianping-chinese", tokenizer="uer/roberta-base-finetuned-dianping-chinese")
 
 
 
 
 
7
 
8
- # 分析函式(接收 CSV)
9
- def analyze_csv(file):
10
  df = pd.read_csv(file)
11
 
12
- # 檢查是否有 "text" 欄位
13
  if "text" not in df.columns:
14
- return "錯誤:CSV 檔案中必須包含 'text' 欄位。"
15
-
16
- # 對每一列文字做情緒分析
17
- results = []
18
- for text in df["text"]:
19
- result = classifier(str(text))[0]
20
- label = result["label"]
21
- score = round(result["score"], 4)
22
-
23
- if label == "LABEL_0":
24
- sentiment = "負向"
25
- elif label == "LABEL_1":
26
- sentiment = "中立"
27
- elif label == "LABEL_2":
28
- sentiment = "正向"
29
- else:
30
- sentiment = "未知"
31
-
32
- results.append({"label": sentiment, "score": score})
33
-
34
- # 加回原始 dataframe
35
- df["情緒判斷"] = [r["label"] for r in results]
36
- df["信心分數"] = [r["score"] for r in results]
37
-
38
- # 輸出為新的 csv 檔案
39
- output_file = "/tmp/output.csv"
40
- df.to_csv(output_file, index=False)
41
- return output_file
42
-
43
- # Gradio 介面(接收檔案,回傳檔案)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  gr.Interface(
45
- fn=analyze_csv,
46
- inputs=gr.File(label="上傳包含 'text' 欄位的 CSV 檔案", file_types=[".csv"]),
47
- outputs=gr.File(label="下載標註後的 CSV 檔案"),
48
- title="中文情緒分析系統(批次處理)",
49
- description="上傳一份 CSV,系統會針對 'text' 欄做情緒分析,並下載結果。"
 
 
 
 
 
 
50
  ).launch()
 
1
  import pandas as pd
2
+ from sentence_transformers import SentenceTransformer
3
+ from sklearn.cluster import KMeans
4
  from transformers import pipeline
5
+ from prophet import Prophet
6
+ import matplotlib.pyplot as plt
7
  import gradio as gr
8
 
9
+ # 1️⃣ 模型初始化
10
+ embedder = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
11
+ sentiment_model = pipeline(
12
+ "text-classification",
13
+ model="uer/roberta-base-finetuned-dianping-chinese",
14
+ tokenizer="uer/roberta-base-finetuned-dianping-chinese"
15
+ )
16
 
17
+ # 2️⃣ 主處理流程
18
+ def full_pipeline(file, num_clusters):
19
  df = pd.read_csv(file)
20
 
 
21
  if "text" not in df.columns:
22
+ return "錯誤:CSV 檔案需包含 text 欄位"
23
+ if "timestamp" not in df.columns:
24
+ return "❌ 錯誤:CSV 檔案需包含 timestamp 欄位(例如新聞時間)"
25
+
26
+ # 向量化並聚類
27
+ texts = df["text"].astype(str).tolist()
28
+ embeddings = embedder.encode(texts, show_progress_bar=True)
29
+ kmeans = KMeans(n_clusters=num_clusters, random_state=42)
30
+ df["topic"] = kmeans.fit_predict(embeddings)
31
+
32
+ # 情緒分析
33
+ sentiments = []
34
+ for text in texts:
35
+ try:
36
+ result = sentiment_model(text)[0]
37
+ label = result["label"]
38
+ if label == "LABEL_0":
39
+ sentiment = "負向"
40
+ elif label == "LABEL_1":
41
+ sentiment = "中立"
42
+ elif label == "LABEL_2":
43
+ sentiment = "正向"
44
+ else:
45
+ sentiment = "未知"
46
+ except:
47
+ sentiment = "錯誤"
48
+ sentiments.append(sentiment)
49
+ df["sentiment"] = sentiments
50
+
51
+ # 熱度預測(以 topic=0 為例)
52
+ df["timestamp"] = pd.to_datetime(df["timestamp"])
53
+ topic0 = df[df["topic"] == 0]
54
+ daily_counts = topic0.groupby(df["timestamp"].dt.date).size().reset_index(name="count")
55
+ daily_counts.columns = ["ds", "y"]
56
+
57
+ if len(daily_counts) < 2:
58
+ return "❌ 無法預測:topic=0 數據太少"
59
+
60
+ m = Prophet()
61
+ m.fit(daily_counts)
62
+ future = m.make_future_dataframe(periods=7)
63
+ forecast = m.predict(future)
64
+ fig = m.plot(forecast)
65
+
66
+ # 儲存結果
67
+ output_csv = "/tmp/final_output.csv"
68
+ output_img = "/tmp/forecast.png"
69
+ df.to_csv(output_csv, index=False)
70
+ fig.savefig(output_img)
71
+
72
+ return output_csv, output_img
73
+
74
+ # 3️⃣ Gradio 介面
75
  gr.Interface(
76
+ fn=full_pipeline,
77
+ inputs=[
78
+ gr.File(label="上傳 CSV(需含 text 與 timestamp 欄)"),
79
+ gr.Number(label="分幾群?(聚類數)", value=5)
80
+ ],
81
+ outputs=[
82
+ gr.File(label="結果 CSV(含 topic, sentiment)"),
83
+ gr.Image(label="topic=0 熱度預測圖(Prophet)")
84
+ ],
85
+ title="中文新聞話題類聚 + 情緒分析 + 熱度預測",
86
+ description="自動分群、分析情緒,並預測熱度走勢(topic=0 為例)"
87
  ).launch()