File size: 6,097 Bytes
c7ec63e 36c201f c7ec63e 36c201f c7ec63e 36c201f c7ec63e 4826bc7 dfd5bb6 4826bc7 babd923 4826bc7 dfd5bb6 4826bc7 dfd5bb6 4826bc7 dfd5bb6 4826bc7 dd2bb14 babd923 dd2bb14 dfd5bb6 c7ec63e dd2bb14 4826bc7 c7ec63e 4eafa91 dd2bb14 36c201f 4eafa91 36c201f dd2bb14 36c201f dd2bb14 36c201f dd2bb14 c7ec63e babd923 dfd5bb6 c7ec63e dd2bb14 babd923 c7ec63e babd923 36c201f dd2bb14 c7ec63e 505ff46 c7ec63e 92eb8b6 0422b31 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import joblib
import numpy as np
import librosa
import gradio as gr
import time
import re
from transformers import pipeline
from huggingface_hub import hf_hub_download
from deepface import DeepFace
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
# --- 1. 下載並載入 SVM 模型 ---
# 這裡 repo_id 填你的模型倉庫路徑,例如 "GCLing/emotion-svm-model"
# filename 填上傳到該倉庫的檔案名,例如 "svm_emotion_model.joblib"
print("Downloading SVM model from Hugging Face Hub...")
model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
print(f"SVM model downloaded to: {model_path}")
svm_model = joblib.load(model_path)
print("SVM model loaded.")
# --- 2. 載入文字情緒分析模型 ---
zero_shot = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
candidate_labels = ["joy", "sadness", "anger", "fear", "surprise", "disgust"]
label_map_en2cn = {
"joy": "高興", "sadness": "悲傷", "anger": "憤怒",
"fear": "恐懼", "surprise": "驚訝", "disgust": "厭惡"
}
emo_keywords = {
"happy": ["開心","快樂","愉快","喜悦","喜悅","歡喜","興奮","高興"],
"angry": ["生氣","憤怒","不爽","發火","火大","氣憤"],
"sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"],
"surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
"fear": ["怕","恐懼","緊張","懼","膽怯","畏"]
}
# 简单否定词列表
negations = ["不","沒","沒有","別","勿","非"]
# --- 3. 聲音特徵擷取函式 ---
def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
"""
從一段音訊 signal (numpy array) 和取樣率 sr 計算 MFCC 特徵 (13 維),
並回傳平均與變異組成的特徵向量 (共 26 維)。
"""
# librosa 載入後 signal 為 float numpy array
mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
# axis=1: 每個 MFCC 維度對時間做平均與變異數
return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])
# --- 4. 三種預測函式 ---
def predict_face(img):
print("predict_face called, img is None?", img is None)
if img is None:
return {}
try:
res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
# 省略 list/dict 处理...
# 直接取第一张人脸
if isinstance(res, list):
first = res[0] if res else {}
emo = first.get("emotion", {}) if isinstance(first, dict) else {}
else:
emo = res.get("emotion", {}) if isinstance(res, dict) else {}
print("predict_face result:", emo)
return emo
except Exception as e:
print("DeepFace.analyze error:", e)
return {}
def predict_voice(audio_path: str):
# 如果没有录音文件路径,直接返回空字典或提示
if not audio_path:
# 可打印日志,帮助调试
print("predict_voice: 收到 None 或空 audio_path,跳過分析")
return {}
try:
signal, sr = librosa.load(audio_path, sr=None)
# 提取特征
feat = extract_feature(signal, sr) # 你的特征提取函数
probs = svm_model.predict_proba([feat])[0]
labels = svm_model.classes_
return {labels[i]: float(probs[i]) for i in range(len(labels))}
except Exception as e:
print("predict_voice error:", e)
return {}
def predict_text_mixed(text: str):
"""
先用 keyword_emotion 规则;若未命中再用 zero-shot 分类,
返回 {中文标签: float_score} 的 dict,供 gr.Label 显示。
"""
if not text or text.strip() == "":
return {}
# 规则优先
res = keyword_emotion(text)
if res:
# 只返回最高那一项及其比例,也可返回完整分布
top_emo = max(res, key=res.get)
# 可将英文 key 转成中文,若需要
# mapping: happy->高兴, angry->愤怒, etc.
mapping = {
"happy": "高兴",
"angry": "愤怒",
"sad": "悲伤",
"surprise": "惊讶",
"fear": "恐惧"
}
cn = mapping.get(top_emo, top_emo)
return {cn: res[top_emo]}
# 规则未命中,zero-shot fallback
try:
out = zero_shot(text, candidate_labels=candidate_labels,
hypothesis_template="这句话表达了{}情绪")
result = {}
for lab, sc in zip(out["labels"], out["scores"]):
cn = label_map_en2cn.get(lab.lower(), lab)
result[cn] = float(sc)
return result
except Exception as e:
print("zero-shot error:", e)
return {"中性": 1.0}
# --- 5. 建立 Gradio 介面 ---
with gr.Blocks() as demo:
with gr.TabItem("臉部情緒"):
with gr.Row():
webcam = gr.Image(source="webcam", streaming=True, type="numpy", label="攝像頭畫面")
face_out = gr.Label(label="情緒分布")
webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out)
# 其餘 Tab 可按原先寫法,或用 Blocks 方式
with gr.TabItem("語音情緒"):
audio = gr.Audio(sources="microphone", streaming=False, type="filepath", label="錄音")
audio_output = gr.Label(label="語音情緒結果")
# 用 change/submit 触发:录音结束后调用 predict_voice
audio.change(fn=predict_voice, inputs=audio, outputs=audio_output)
with gr.TabItem("文字情緒"):
gr.Markdown("### 文字情緒 分析 (规则+zero-shot)")
with gr.Row():
text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
text_out = gr.Label(label="文字情緒結果")
text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out)
if __name__ == "__main__":
demo.launch()
# 不要传 server_name 或 server_port
|