GCLing commited on
Commit
7b9bebe
·
verified ·
1 Parent(s): b8ff14e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -93
app.py CHANGED
@@ -1,28 +1,22 @@
1
  import gradio as gr
2
  print("Gradio version:", gr.__version__)
3
- import os
4
- import joblib
5
  import numpy as np
 
6
  import librosa
7
- import gradio as gr
8
- import time
9
- import re
10
- from transformers import pipeline
11
  from huggingface_hub import hf_hub_download
12
  from deepface import DeepFace
13
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
14
-
15
 
16
- # --- 1. 下載並載入 SVM 模型 ---
17
- # 這裡 repo_id 填你的模型倉庫路徑,例如 "GCLing/emotion-svm-model"
18
- # filename 填上傳到該倉庫的檔案名,例如 "svm_emotion_model.joblib"
19
  print("Downloading SVM model from Hugging Face Hub...")
20
  model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
21
  print(f"SVM model downloaded to: {model_path}")
22
  svm_model = joblib.load(model_path)
23
  print("SVM model loaded.")
24
 
25
- # --- 2. 載入文字情緒分析模型 ---
26
  zero_shot = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
27
  candidate_labels = ["joy", "sadness", "anger", "fear", "surprise", "disgust"]
28
  label_map_en2cn = {
@@ -36,82 +30,38 @@ emo_keywords = {
36
  "surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
37
  "fear": ["怕","恐懼","緊張","懼","膽怯","畏"]
38
  }
39
- # 简单否定词列表
40
  negations = ["不","沒","沒有","別","勿","非"]
41
 
42
- # --- 3. 聲音特徵擷取函式 ---
43
- def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
44
- """
45
- 從一段音訊 signal (numpy array) 和取樣率 sr 計算 MFCC 特徵 (13 維),
46
- 並回傳平均與變異組成的特徵向量 (共 26 維)
47
- """
48
- # librosa 載入後 signal 為 float numpy array
49
- mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
50
- # axis=1: 每個 MFCC 維度對時間做平均與變異數
51
- return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])
52
-
53
- # --- 4. 三種預測函式 ---
54
-
55
- def predict_face(img):
56
- print("predict_face called, img is None?", img is None)
57
- if img is None:
58
- return {}
59
- try:
60
- res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
61
- # 省略 list/dict 处理...
62
- # 直接取第一张人脸
63
- if isinstance(res, list):
64
- first = res[0] if res else {}
65
- emo = first.get("emotion", {}) if isinstance(first, dict) else {}
66
- else:
67
- emo = res.get("emotion", {}) if isinstance(res, dict) else {}
68
- print("predict_face result:", emo)
69
- return emo
70
- except Exception as e:
71
- print("DeepFace.analyze error:", e)
72
- return {}
73
 
74
- def predict_voice(audio_path: str):
75
- # 如果没有录音文件路径,直接返回空字典或提示
76
- if not audio_path:
77
- # 可打印日志,帮助调试
78
- print("predict_voice: 收到 None 或空 audio_path,跳過分析")
79
- return {}
80
- try:
81
- signal, sr = librosa.load(audio_path, sr=None)
82
- # 提取特征
83
- feat = extract_feature(signal, sr) # 你的特征提取函数
84
- probs = svm_model.predict_proba([feat])[0]
85
- labels = svm_model.classes_
86
- return {labels[i]: float(probs[i]) for i in range(len(labels))}
87
- except Exception as e:
88
- print("predict_voice error:", e)
89
- return {}
90
-
91
  def predict_text_mixed(text: str):
92
- """
93
- 先用 keyword_emotion 规则;若未命中再用 zero-shot 分类,
94
- 返回 {中文标签: float_score} 的 dict,供 gr.Label 显示。
95
- """
96
  if not text or text.strip() == "":
97
  return {}
98
- # 规则优先
99
  res = keyword_emotion(text)
100
  if res:
101
- # 只返回最高那一项及其比例,也可返回完整分布
102
  top_emo = max(res, key=res.get)
103
- # 可将英文 key 转成中文,若需要
104
- # mapping: happy->高兴, angry->愤怒, etc.
105
- mapping = {
106
- "happy": "高兴",
107
- "angry": "愤怒",
108
- "sad": "悲伤",
109
- "surprise": "惊讶",
110
- "fear": "恐惧"
111
- }
112
  cn = mapping.get(top_emo, top_emo)
113
  return {cn: res[top_emo]}
114
- # 规则未命中,zero-shot fallback
115
  try:
116
  out = zero_shot(text, candidate_labels=candidate_labels,
117
  hypothesis_template="这句话表达了{}情绪")
@@ -124,29 +74,70 @@ def predict_text_mixed(text: str):
124
  print("zero-shot error:", e)
125
  return {"中性": 1.0}
126
 
127
- # --- 5. 建立 Gradio 介面 ---
128
- with gr.Blocks() as demo:
129
- with gr.TabItem("臉部情緒"):
130
- with gr.Row():
131
- webcam = gr.Image(source="webcam", streaming=True, type="numpy", label="攝像頭畫面")
132
- face_out = gr.Label(label="情緒分布")
133
- webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out)
134
 
135
- # 其餘 Tab 可按原先寫法,或用 Blocks 方式
136
- with gr.TabItem("語音情緒"):
137
- audio = gr.Audio(sources="microphone", streaming=False, type="filepath", label="錄音")
138
- audio_output = gr.Label(label="語音情緒結果")
139
- # 用 change/submit 触发:录音结束后调用 predict_voice
140
- audio.change(fn=predict_voice, inputs=audio, outputs=audio_output)
 
 
 
 
 
 
 
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  with gr.TabItem("文字情緒"):
143
  gr.Markdown("### 文字情緒 分析 (规则+zero-shot)")
144
  with gr.Row():
145
  text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
146
  text_out = gr.Label(label="文字情緒結果")
147
  text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out)
 
148
  if __name__ == "__main__":
149
-
150
  demo.launch()
151
- # 不要传 server_name 或 server_port
152
-
 
1
  import gradio as gr
2
  print("Gradio version:", gr.__version__)
3
+ import os, time, re
 
4
  import numpy as np
5
+ import joblib
6
  import librosa
 
 
 
 
7
  from huggingface_hub import hf_hub_download
8
  from deepface import DeepFace
9
+ from transformers import pipeline
10
+ # 如果不手动用 AutoTokenizer/AutoModel,就不必 import AutoTokenizer, AutoModelForSequenceClassification
11
 
12
+ # --- 1. 加载 SVM 语音模型 ---
 
 
13
  print("Downloading SVM model from Hugging Face Hub...")
14
  model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
15
  print(f"SVM model downloaded to: {model_path}")
16
  svm_model = joblib.load(model_path)
17
  print("SVM model loaded.")
18
 
19
+ # --- 2. 文本情绪分析:规则+zero-shot ---
20
  zero_shot = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
21
  candidate_labels = ["joy", "sadness", "anger", "fear", "surprise", "disgust"]
22
  label_map_en2cn = {
 
30
  "surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
31
  "fear": ["怕","恐懼","緊張","懼","膽怯","畏"]
32
  }
 
33
  negations = ["不","沒","沒有","別","勿","非"]
34
 
35
+ def keyword_emotion(text: str):
36
+ counts = {emo: 0 for emo in emo_keywords}
37
+ for emo, kws in emo_keywords.items():
38
+ for w in kws:
39
+ idx = text.find(w)
40
+ if idx != -1:
41
+ # 简单否定检测
42
+ neg = False
43
+ for neg_word in negations:
44
+ plen = len(neg_word)
45
+ if idx - plen >= 0 and text[idx-plen:idx] == neg_word:
46
+ neg = True
47
+ break
48
+ if not neg:
49
+ counts[emo] += 1
50
+ total = sum(counts.values())
51
+ if total > 0:
52
+ return {emo: counts[emo]/total for emo in counts}
53
+ else:
54
+ return None
 
 
 
 
 
 
 
 
 
 
 
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  def predict_text_mixed(text: str):
 
 
 
 
57
  if not text or text.strip() == "":
58
  return {}
 
59
  res = keyword_emotion(text)
60
  if res:
 
61
  top_emo = max(res, key=res.get)
62
+ mapping = {"happy":"高兴","angry":"愤怒","sad":"悲���","surprise":"惊讶","fear":"恐惧"}
 
 
 
 
 
 
 
 
63
  cn = mapping.get(top_emo, top_emo)
64
  return {cn: res[top_emo]}
 
65
  try:
66
  out = zero_shot(text, candidate_labels=candidate_labels,
67
  hypothesis_template="这句话表达了{}情绪")
 
74
  print("zero-shot error:", e)
75
  return {"中性": 1.0}
76
 
77
+ # --- 3. 语音情绪预测函数 ---
78
+ def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
79
+ mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
80
+ return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])
 
 
 
81
 
82
+ def predict_voice(audio_path: str):
83
+ if not audio_path:
84
+ print("predict_voice: audio_path,跳过")
85
+ return {}
86
+ try:
87
+ signal, sr = librosa.load(audio_path, sr=None)
88
+ feat = extract_feature(signal, sr)
89
+ probs = svm_model.predict_proba([feat])[0]
90
+ labels = svm_model.classes_
91
+ return {labels[i]: float(probs[i]) for i in range(len(labels))}
92
+ except Exception as e:
93
+ print("predict_voice error:", e)
94
+ return {}
95
 
96
+ # --- 4. 人脸情绪预测函数 ---
97
+ def predict_face(img: np.ndarray):
98
+ print("predict_face called, img is None?", img is None)
99
+ if img is None:
100
+ return {}
101
+ try:
102
+ res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
103
+ if isinstance(res, list):
104
+ first = res[0] if res else {}
105
+ emo = first.get("emotion", {}) if isinstance(first, dict) else {}
106
+ else:
107
+ emo = res.get("emotion", {}) if isinstance(res, dict) else {}
108
+ # 转 float,确保 JSON 可序列化
109
+ emo_fixed = {k: float(v) for k, v in emo.items()}
110
+ print("predict_face result:", emo_fixed)
111
+ return emo_fixed
112
+ except Exception as e:
113
+ print("DeepFace.analyze error:", e)
114
+ return {}
115
+
116
+ # --- 5. Gradio 界面 ---
117
+ with gr.Blocks() as demo:
118
+ gr.Markdown("## 多模態情緒分析示例")
119
+ with gr.Tabs():
120
+ # 臉部情緒 Tab
121
+ with gr.TabItem("臉部情緒"):
122
+ gr.Markdown("### 臉部情緒 (即時 Webcam Streaming 分析)")
123
+ with gr.Row():
124
+ webcam = gr.Image(source="webcam", streaming=True, type="numpy", label="攝像頭畫面")
125
+ face_out = gr.Label(label="情緒分布")
126
+ webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out)
127
+ # 語音情緒 Tab
128
+ with gr.TabItem("語音情緒"):
129
+ gr.Markdown("### 語音情緒 分析")
130
+ with gr.Row():
131
+ audio = gr.Audio(source="microphone", streaming=False, type="filepath", label="錄音")
132
+ voice_out = gr.Label(label="語音情緒結果")
133
+ audio.change(fn=predict_voice, inputs=audio, outputs=voice_out)
134
+ # 文字情緒 Tab
135
  with gr.TabItem("文字情緒"):
136
  gr.Markdown("### 文字情緒 分析 (规则+zero-shot)")
137
  with gr.Row():
138
  text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
139
  text_out = gr.Label(label="文字情緒結果")
140
  text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out)
141
+
142
  if __name__ == "__main__":
 
143
  demo.launch()