GCLing commited on
Commit
c7ec63e
·
verified ·
1 Parent(s): 41762c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -124
app.py CHANGED
@@ -1,124 +1,124 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
-
4
- import os
5
- import joblib
6
- import numpy as np
7
- import librosa
8
- import gradio as gr
9
- from huggingface_hub import hf_hub_download
10
- from deepface import DeepFace
11
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
12
-
13
- # --- 1. 下載並載入 SVM 模型 ---
14
- # 這裡 repo_id 填你的模型倉庫路徑,例如 "GCLing/emotion-svm-model"
15
- # filename 填上傳到該倉庫的檔案名,例如 "svm_emotion_model.joblib"
16
- print("Downloading SVM model from Hugging Face Hub...")
17
- model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
18
- print(f"SVM model downloaded to: {model_path}")
19
- svm_model = joblib.load(model_path)
20
- print("SVM model loaded.")
21
-
22
- # --- 2. 載入文字情緒分析模型 ---
23
- # 以 uer/roberta-base-finetuned-chinanews-chinese 為例;可替換成其他合適的中文情感分類模型
24
- print("Loading text sentiment model...")
25
- tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
26
- model_txt = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
27
- text_emotion = pipeline("sentiment-analysis", model=model_txt, tokenizer=tokenizer)
28
- print("Text sentiment model loaded.")
29
-
30
- # --- 3. 聲音特徵擷取函式 ---
31
- def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
32
- """
33
- 從一段音訊 signal (numpy array) 和取樣率 sr 計算 MFCC 特徵 (13 維),
34
- 並回傳平均與變異組成的特徵向量 (共 26 維)。
35
- """
36
- # librosa 載入後 signal 為 float numpy array
37
- mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
38
- # axis=1: 每個 MFCC 維度對時間做平均與變異數
39
- return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])
40
-
41
- # --- 4. 三種預測函式 ---
42
-
43
- def predict_face(img: np.ndarray):
44
- """
45
- 臉部情緒分析:使用 DeepFace 分析單張影像 (numpy array, HxWx3)。
46
- 強制使用 OpenCV 後端以避免 retinaface/tf 版本衝突。
47
- 回傳格式為 dict,例如 {"happy": 0.80, "sad": 0.05, ...}
48
- """
49
- # DeepFace.analyze 可能較耗時,建議在 Space 上需有適當硬體
50
- result = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
51
- # result["emotion"] 是字典
52
- return result["emotion"]
53
-
54
- def predict_voice(audio):
55
- """
56
- 語音情緒分析:audio 由 Gradio 傳入,形式為暫存檔路徑字串 (str)。
57
- 用 librosa.load 讀取,再提取 MFCC 特徵,最後用 SVM 模型 predict_proba。
58
- 回傳格式為 dict,例如 {"angry":0.1, "happy":0.7, ...}
59
- """
60
- # audio 參數為 Gradio Audio 組件給的檔案路徑
61
- signal, sr = librosa.load(audio, sr=None)
62
- feat = extract_feature(signal, sr)
63
- probs = svm_model.predict_proba([feat])[0]
64
- labels = svm_model.classes_
65
- return {labels[i]: float(probs[i]) for i in range(len(labels))}
66
-
67
- def predict_text(text: str):
68
- """
69
- 文字情緒分析:使用 transformers pipeline,
70
- 輸入中文字串,回傳 dict,例如 {"POSITIVE":0.95} 或模型輸出標籤與信心分數。
71
- """
72
- if not text or text.strip() == "":
73
- return {}
74
- pred = text_emotion(text)[0]
75
- # pred 形如 {"label": "...", "score": ...}
76
- return {pred["label"]: float(pred["score"])}
77
-
78
- # --- 5. 建立 Gradio 介面 ---
79
- def build_interface():
80
- """
81
- 建立一個 TabbedInterface,包含三個子 Interface:
82
- - 臉部情緒 (Webcam 拍照或上傳)
83
- - 語音情緒 (錄音或上傳音檔)
84
- - 文字情緒 (文字輸入)
85
- """
86
- # 臉部情緒:使用 gr.Interface 或 Blocks?
87
- face_interface = gr.Interface(
88
- fn=predict_face,
89
- inputs=gr.Image(sources="webcam", streaming=True, type="numpy"),
90
- outputs=gr.Label(num_top_classes=1),
91
- title="臉部情緒 (即時 Webcam)",
92
- description="允許攝影機拍照後自動分析當前表情的情緒分佈。"
93
- )
94
-
95
- # 語音情緒:錄音或上傳
96
- voice_interface = gr.Interface(
97
- fn=predict_voice,
98
- inputs=gr.Audio(sources="microphone", type="filepath"),
99
- outputs=gr.Label(num_top_classes=1),
100
- title="語音情緒",
101
- description="錄製語音或上傳音訊檔,模型會回傳「驚訝/生氣/開心/悲傷/害怕」五種情緒機率。"
102
- )
103
-
104
- # 文字情緒:輸入中文
105
- text_interface = gr.Interface(
106
- fn=predict_text,
107
- inputs=gr.Textbox(lines=3, placeholder="請輸入中文文字…"),
108
- outputs=gr.Label(num_top_classes=1),
109
- title="文字情緒",
110
- description="輸入中文文字,即時判斷文字情緒並回傳標籤與信心分數。"
111
- )
112
-
113
- # 三合一 Tabs
114
- app = gr.TabbedInterface(
115
- interface_list=[face_interface, voice_interface, text_interface],
116
- tab_names=["臉部情緒", "語音情緒", "文字情緒"]
117
- )
118
- return app
119
-
120
- if __name__ == "__main__":
121
- # 可修改 port,如有多個服務可選不同 port
122
- demo = build_interface()
123
- # share=True 會產生臨時公開連結;若部署到 Spaces,可去掉 share 或留 False
124
- demo.launch(server_name="0.0.0.0", server_port=7861, share=True)
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import os
5
+ import joblib
6
+ import numpy as np
7
+ import librosa
8
+ import gradio as gr
9
+ from huggingface_hub import hf_hub_download
10
+ from deepface import DeepFace
11
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
12
+
13
+ # --- 1. 下載並載入 SVM 模型 ---
14
+ # 這裡 repo_id 填你的模型倉庫路徑,例如 "GCLing/emotion-svm-model"
15
+ # filename 填上傳到該倉庫的檔案名,例如 "svm_emotion_model.joblib"
16
+ print("Downloading SVM model from Hugging Face Hub...")
17
+ model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
18
+ print(f"SVM model downloaded to: {model_path}")
19
+ svm_model = joblib.load(model_path)
20
+ print("SVM model loaded.")
21
+
22
+ # --- 2. 載入文字情緒分析模型 ---
23
+ # 以 uer/roberta-base-finetuned-chinanews-chinese 為例;可替換成其他合適的中文情感分類模型
24
+ print("Loading text sentiment model...")
25
+ tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
26
+ model_txt = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
27
+ text_emotion = pipeline("sentiment-analysis", model=model_txt, tokenizer=tokenizer)
28
+ print("Text sentiment model loaded.")
29
+
30
+ # --- 3. 聲音特徵擷取函式 ---
31
+ def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
32
+ """
33
+ 從一段音訊 signal (numpy array) 和取樣率 sr 計算 MFCC 特徵 (13 維),
34
+ 並回傳平均與變異組成的特徵向量 (共 26 維)。
35
+ """
36
+ # librosa 載入後 signal 為 float numpy array
37
+ mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
38
+ # axis=1: 每個 MFCC 維度對時間做平均與變異數
39
+ return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])
40
+
41
+ # --- 4. 三種預測函式 ---
42
+
43
+ def predict_face(img: np.ndarray):
44
+ """
45
+ 臉部情緒分析:使用 DeepFace 分析單張影像 (numpy array, HxWx3)。
46
+ 強制使用 OpenCV 後端以避免 retinaface/tf 版本衝突。
47
+ 回傳格式為 dict,例如 {"happy": 0.80, "sad": 0.05, ...}
48
+ """
49
+ # DeepFace.analyze 可能較耗時,建議在 Space 上需有適當硬體
50
+ result = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
51
+ # result["emotion"] 是字典
52
+ return result["emotion"]
53
+
54
+ def predict_voice(audio):
55
+ """
56
+ 語音情緒分析:audio 由 Gradio 傳入,形式為暫存檔路徑字串 (str)。
57
+ 用 librosa.load 讀取,再提取 MFCC 特徵,最後用 SVM 模型 predict_proba。
58
+ 回傳格式為 dict,例如 {"angry":0.1, "happy":0.7, ...}
59
+ """
60
+ # audio 參數為 Gradio Audio 組件給的檔案路徑
61
+ signal, sr = librosa.load(audio, sr=None)
62
+ feat = extract_feature(signal, sr)
63
+ probs = svm_model.predict_proba([feat])[0]
64
+ labels = svm_model.classes_
65
+ return {labels[i]: float(probs[i]) for i in range(len(labels))}
66
+
67
+ def predict_text(text: str):
68
+ """
69
+ 文字情緒分析:使用 transformers pipeline,
70
+ 輸入中文字串,回傳 dict,例如 {"POSITIVE":0.95} 或模型輸出標籤與信心分數。
71
+ """
72
+ if not text or text.strip() == "":
73
+ return {}
74
+ pred = text_emotion(text)[0]
75
+ # pred 形如 {"label": "...", "score": ...}
76
+ return {pred["label"]: float(pred["score"])}
77
+
78
+ # --- 5. 建立 Gradio 介面 ---
79
+ def build_interface():
80
+ """
81
+ 建立一個 TabbedInterface,包含三個子 Interface:
82
+ - 臉部情緒 (Webcam 拍照或上傳)
83
+ - 語音情緒 (錄音或上傳音檔)
84
+ - 文字情緒 (文字輸入)
85
+ """
86
+ # 臉部情緒:使用 gr.Interface 或 Blocks?
87
+ face_interface = gr.Interface(
88
+ fn=predict_face,
89
+ inputs=gr.Image(sources="webcam", streaming=True, type="numpy"),
90
+ outputs=gr.Label(num_top_classes=1),
91
+ title="臉部情緒 (即時 Webcam)",
92
+ description="允許攝影機拍照後自動分析當前表情的情緒分佈。"
93
+ )
94
+
95
+ # 語音情緒:錄音或上傳
96
+ voice_interface = gr.Interface(
97
+ fn=predict_voice,
98
+ inputs=gr.Audio(sources="microphone", type="filepath"),
99
+ outputs=gr.Label(num_top_classes=1),
100
+ title="語音情緒",
101
+ description="錄製語音或上傳音訊檔,模型會回傳「驚訝/生氣/開心/悲傷/害怕」五種情緒機率。"
102
+ )
103
+
104
+ # 文字情緒:輸入中文
105
+ text_interface = gr.Interface(
106
+ fn=predict_text,
107
+ inputs=gr.Textbox(lines=3, placeholder="請輸入中文文字…"),
108
+ outputs=gr.Label(num_top_classes=1),
109
+ title="文字情緒",
110
+ description="輸入中文文字,即時判斷文字情緒並回傳標籤與信心分數。"
111
+ )
112
+
113
+ # 三合一 Tabs
114
+ app = gr.TabbedInterface(
115
+ interface_list=[face_interface, voice_interface, text_interface],
116
+ tab_names=["臉部情緒", "語音情緒", "文字情緒"]
117
+ )
118
+ return app
119
+
120
+ if __name__ == "__main__":
121
+ # 可修改 port,如有多個服務可選不同 port
122
+ demo = build_interface()
123
+ # share=True 會產生臨時公開連結;若部署到 Spaces,可去掉 share 或留 False
124
+ demo.launch(server_name="0.0.0.0", server_port=7861, share=True)