import gradio as gr import os import numpy as np import joblib import librosa import requests from huggingface_hub import hf_hub_download # --- DeepFace 条件导入 --- try: from deepface import DeepFace has_deepface = True except ImportError: print("本地未安装 deepface,将在本地跳过臉部情緒;Space 上会安装 deepface。") has_deepface = False # --- 1. 语音 SVM 加载 --- print("Downloading SVM model from Hugging Face Hub...") model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib") svm_model = joblib.load(model_path) print("SVM model loaded.") # --- 2. 文本情绪分析:改用 Inference API --- HF_API_TOKEN = os.getenv("HF_API_TOKEN") if HF_API_TOKEN is None: print("警告:未检测到 HF_API_TOKEN,Inference API 可能失败。") # 选用公开存在的中文情感分类模型 HF_TEXT_MODEL = "uer/roberta-base-finetuned-dianping-chinese" HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_TEXT_MODEL}" headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {} def predict_text_via_api(text: str): if not text or text.strip()=="": return {} payload = {"inputs": text} try: resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=30) if resp.status_code != 200: print(f"Inference API 返回状态码 {resp.status_code}: {resp.text}") # 退回到简单规则或中性 return {"中性": 1.0} data = resp.json() # 根据模型返回格式解析:假设返回 [{"label": "...", "score": ...}, ...] if isinstance(data, list) and len(data)>0 and isinstance(data[0], dict): # 选 top 3 展示 result = {} for item in data[:3]: lbl = item.get("label", "") score = item.get("score", 0.0) # 若标签是英文,可映射到中文;若就是中文可直接用 # 例如模型返回 "positive"/"negative"/"neutral",可映射: if lbl.lower() in ["positive","pos","正面"]: cn = "正面" elif lbl.lower() in ["negative","neg","负面","負面"]: cn = "負面" elif lbl.lower() in ["neutral","中性"]: cn = "中性" else: cn = lbl result[cn] = float(score) return result else: print("Inference API 返回格式异常:", data) return {"中性": 1.0} except Exception as e: print("调用 Inference API 出错:", e) return {"中性": 1.0} # 可保留简单规则优先,若规则命中则返回规则,否则调用 API emo_keywords = { "happy": ["開心","快樂","愉快","喜悦","喜悅","歡喜","興奮","高興"], "angry": ["生氣","憤怒","不爽","發火","火大","氣憤"], "sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"], "surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"], "fear": ["怕","恐懼","緊張","懼","膽怯","畏"], "disgust": ["噁心","厭惡","反感"] } negations = ["不","沒","沒有","別","勿","非"] def keyword_emotion(text: str): text_proc = text.strip() counts = {emo:0 for emo in emo_keywords} for emo, kws in emo_keywords.items(): for w in kws: idx = text_proc.find(w) if idx!=-1: neg=False for neg_word in negations: plen = len(neg_word) if idx-plen>=0 and text_proc[idx-plen:idx]==neg_word: neg=True; break if not neg: counts[emo]+=1 total = sum(counts.values()) if total>0: # 归一化并取最高 top = max(counts, key=lambda k: counts[k]) return {top: counts[top]/total} return None def predict_text_mixed(text: str): print("predict_text_mixed:", text) if not text or text.strip()=="": return {} res = keyword_emotion(text) if res: # 映射中文标签 mapping = { "happy":"高興","angry":"憤怒","sad":"悲傷", "surprise":"驚訝","fear":"恐懼","disgust":"厭惡" } emo = list(res.keys())[0]; prob = float(res[emo]) cn = mapping.get(emo, emo) return {cn: prob} # 规则未命中,调用 Inference API return predict_text_via_api(text) # --- 3. 语音情绪预测 --- def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray: mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13) return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)]) def predict_voice(audio_path: str): if not audio_path: return {} try: signal, sr = librosa.load(audio_path, sr=None) feat = extract_feature(signal, sr) probs = svm_model.predict_proba([feat])[0] labels = svm_model.classes_ return {labels[i]: float(probs[i]) for i in range(len(labels))} except Exception as e: print("predict_voice error:", e) return {} # --- 4. 人脸情绪预测 --- import gradio as gr def predict_face(img: np.ndarray): # 你的 DeepFace 分析逻辑 if img is None: return {} # ... return {"happy": 0.5, "sad": 0.5} # 举例 def build_interface(): with gr.Blocks() as demo: gr.Markdown("## 多模態情緒分析示例") with gr.Tabs(): # 臉部情緒 Tab with gr.TabItem("臉部情緒"): gr.Markdown("### 臉部情緒 (即時 Webcam Streaming 分析)") with gr.Row(): # 这里用 gr.Image(sources="webcam", streaming=True, type="numpy") webcam = gr.Image(sources="webcam", streaming=True, type="numpy", label="攝像頭畫面") face_out = gr.Label(label="情緒分佈") # 每帧送到 predict_face webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out) # 語音情緒 Tab with gr.TabItem("語音情緒"): gr.Markdown("### 語音情緒 分析") with gr.Row(): # 浏览器录音用 source="microphone" audio = gr.Audio(source="microphone", streaming=False, type="filepath", label="錄音") voice_out = gr.Label(label="語音情緒結果") audio.change(fn=predict_voice, inputs=audio, outputs=voice_out) # 文字情緒 Tab with gr.TabItem("文字情緒"): gr.Markdown("### 文字情緒 分析 (規則+Inference API)") with gr.Row(): text = gr.Textbox(lines=3, placeholder="請輸入中文文字…") text_out = gr.Label(label="文字情緒結果") # 使用 submit 触发 text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out) return demo if __name__ == "__main__": demo = build_interface() # share=True 可在本地测试时生成临时公网链接 demo.launch(share=True)