Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ from prophet import Prophet
|
|
6 |
import matplotlib.pyplot as plt
|
7 |
import gradio as gr
|
8 |
|
|
|
9 |
embedder = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
10 |
sentiment_model = pipeline(
|
11 |
"text-classification",
|
@@ -13,6 +14,7 @@ sentiment_model = pipeline(
|
|
13 |
tokenizer="uer/roberta-base-finetuned-dianping-chinese"
|
14 |
)
|
15 |
|
|
|
16 |
def full_pipeline(file, num_clusters):
|
17 |
df = pd.read_csv(file)
|
18 |
|
@@ -21,12 +23,13 @@ def full_pipeline(file, num_clusters):
|
|
21 |
if "timestamp" not in df.columns:
|
22 |
return "❌ 錯誤:CSV 檔案需包含 timestamp 欄位(例如新聞時間)"
|
23 |
|
|
|
24 |
texts = df["text"].astype(str).tolist()
|
25 |
embeddings = embedder.encode(texts, show_progress_bar=True)
|
26 |
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
|
27 |
df["topic"] = kmeans.fit_predict(embeddings)
|
28 |
|
29 |
-
|
30 |
sentiments = []
|
31 |
for text in texts:
|
32 |
try:
|
@@ -45,7 +48,7 @@ def full_pipeline(file, num_clusters):
|
|
45 |
sentiments.append(sentiment)
|
46 |
df["sentiment"] = sentiments
|
47 |
|
48 |
-
|
49 |
df["timestamp"] = pd.to_datetime(df["timestamp"])
|
50 |
topic0 = df[df["topic"] == 0]
|
51 |
daily_counts = topic0.groupby(df["timestamp"].dt.date).size().reset_index(name="count")
|
@@ -60,7 +63,7 @@ def full_pipeline(file, num_clusters):
|
|
60 |
forecast = m.predict(future)
|
61 |
fig = m.plot(forecast)
|
62 |
|
63 |
-
#
|
64 |
output_csv = "/tmp/final_output.csv"
|
65 |
output_img = "/tmp/forecast.png"
|
66 |
df.to_csv(output_csv, index=False)
|
@@ -68,7 +71,7 @@ def full_pipeline(file, num_clusters):
|
|
68 |
|
69 |
return output_csv, output_img
|
70 |
|
71 |
-
#
|
72 |
gr.Interface(
|
73 |
fn=full_pipeline,
|
74 |
inputs=[
|
@@ -79,6 +82,6 @@ gr.Interface(
|
|
79 |
gr.File(label="結果 CSV(含 topic, sentiment)"),
|
80 |
gr.Image(label="topic=0 熱度預測圖(Prophet)")
|
81 |
],
|
82 |
-
title
|
83 |
description="自動分群、分析情緒,並預測熱度走勢(topic=0 為例)"
|
84 |
).launch()
|
|
|
6 |
import matplotlib.pyplot as plt
|
7 |
import gradio as gr
|
8 |
|
9 |
+
# model
|
10 |
embedder = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
11 |
sentiment_model = pipeline(
|
12 |
"text-classification",
|
|
|
14 |
tokenizer="uer/roberta-base-finetuned-dianping-chinese"
|
15 |
)
|
16 |
|
17 |
+
#main
|
18 |
def full_pipeline(file, num_clusters):
|
19 |
df = pd.read_csv(file)
|
20 |
|
|
|
23 |
if "timestamp" not in df.columns:
|
24 |
return "❌ 錯誤:CSV 檔案需包含 timestamp 欄位(例如新聞時間)"
|
25 |
|
26 |
+
#降維
|
27 |
texts = df["text"].astype(str).tolist()
|
28 |
embeddings = embedder.encode(texts, show_progress_bar=True)
|
29 |
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
|
30 |
df["topic"] = kmeans.fit_predict(embeddings)
|
31 |
|
32 |
+
# 情緒分析
|
33 |
sentiments = []
|
34 |
for text in texts:
|
35 |
try:
|
|
|
48 |
sentiments.append(sentiment)
|
49 |
df["sentiment"] = sentiments
|
50 |
|
51 |
+
# 熱度預測
|
52 |
df["timestamp"] = pd.to_datetime(df["timestamp"])
|
53 |
topic0 = df[df["topic"] == 0]
|
54 |
daily_counts = topic0.groupby(df["timestamp"].dt.date).size().reset_index(name="count")
|
|
|
63 |
forecast = m.predict(future)
|
64 |
fig = m.plot(forecast)
|
65 |
|
66 |
+
#output
|
67 |
output_csv = "/tmp/final_output.csv"
|
68 |
output_img = "/tmp/forecast.png"
|
69 |
df.to_csv(output_csv, index=False)
|
|
|
71 |
|
72 |
return output_csv, output_img
|
73 |
|
74 |
+
#gradio
|
75 |
gr.Interface(
|
76 |
fn=full_pipeline,
|
77 |
inputs=[
|
|
|
82 |
gr.File(label="結果 CSV(含 topic, sentiment)"),
|
83 |
gr.Image(label="topic=0 熱度預測圖(Prophet)")
|
84 |
],
|
85 |
+
title="話題雷達",
|
86 |
description="自動分群、分析情緒,並預測熱度走勢(topic=0 為例)"
|
87 |
).launch()
|