#!/usr/bin/env python3 # -*- coding: utf-8 -*- import os import joblib import numpy as np import librosa import gradio as gr import time import re from transformers import pipeline from huggingface_hub import hf_hub_download from deepface import DeepFace from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline # --- 1. 下載並載入 SVM 模型 --- # 這裡 repo_id 填你的模型倉庫路徑,例如 "GCLing/emotion-svm-model" # filename 填上傳到該倉庫的檔案名,例如 "svm_emotion_model.joblib" print("Downloading SVM model from Hugging Face Hub...") model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib") print(f"SVM model downloaded to: {model_path}") svm_model = joblib.load(model_path) print("SVM model loaded.") # --- 2. 載入文字情緒分析模型 --- zero_shot = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli") candidate_labels = ["joy", "sadness", "anger", "fear", "surprise", "disgust"] label_map_en2cn = { "joy": "高興", "sadness": "悲傷", "anger": "憤怒", "fear": "恐懼", "surprise": "驚訝", "disgust": "厭惡" } emo_keywords = { "happy": ["開心","快樂","愉快","喜悦","喜悅","歡喜","興奮","高興"], "angry": ["生氣","憤怒","不爽","發火","火大","氣憤"], "sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"], "surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"], "fear": ["怕","恐懼","緊張","懼","膽怯","畏"] } # 简单否定词列表 negations = ["不","沒","沒有","別","勿","非"] # --- 3. 聲音特徵擷取函式 --- def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray: """ 從一段音訊 signal (numpy array) 和取樣率 sr 計算 MFCC 特徵 (13 維), 並回傳平均與變異組成的特徵向量 (共 26 維)。 """ # librosa 載入後 signal 為 float numpy array mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13) # axis=1: 每個 MFCC 維度對時間做平均與變異數 return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)]) # --- 4. 三種預測函式 --- def predict_face(img): print("predict_face called, img is None?", img is None) if img is None: return {} try: res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv") # 省略 list/dict 处理... # 直接取第一张人脸 if isinstance(res, list): first = res[0] if res else {} emo = first.get("emotion", {}) if isinstance(first, dict) else {} else: emo = res.get("emotion", {}) if isinstance(res, dict) else {} print("predict_face result:", emo) return emo except Exception as e: print("DeepFace.analyze error:", e) return {} def predict_voice(audio_path: str): # 如果没有录音文件路径,直接返回空字典或提示 if not audio_path: # 可打印日志,帮助调试 print("predict_voice: 收到 None 或空 audio_path,跳過分析") return {} try: signal, sr = librosa.load(audio_path, sr=None) # 提取特征 feat = extract_feature(signal, sr) # 你的特征提取函数 probs = svm_model.predict_proba([feat])[0] labels = svm_model.classes_ return {labels[i]: float(probs[i]) for i in range(len(labels))} except Exception as e: print("predict_voice error:", e) return {} def predict_text_mixed(text: str): """ 先用 keyword_emotion 规则;若未命中再用 zero-shot 分类, 返回 {中文标签: float_score} 的 dict,供 gr.Label 显示。 """ if not text or text.strip() == "": return {} # 规则优先 res = keyword_emotion(text) if res: # 只返回最高那一项及其比例,也可返回完整分布 top_emo = max(res, key=res.get) # 可将英文 key 转成中文,若需要 # mapping: happy->高兴, angry->愤怒, etc. mapping = { "happy": "高兴", "angry": "愤怒", "sad": "悲伤", "surprise": "惊讶", "fear": "恐惧" } cn = mapping.get(top_emo, top_emo) return {cn: res[top_emo]} # 规则未命中,zero-shot fallback try: out = zero_shot(text, candidate_labels=candidate_labels, hypothesis_template="这句话表达了{}情绪") result = {} for lab, sc in zip(out["labels"], out["scores"]): cn = label_map_en2cn.get(lab.lower(), lab) result[cn] = float(sc) return result except Exception as e: print("zero-shot error:", e) return {"中性": 1.0} # --- 5. 建立 Gradio 介面 --- with gr.Blocks() as demo: with gr.TabItem("臉部情緒"): with gr.Row(): webcam = gr.Image(source="webcam", streaming=True, type="numpy", label="攝像頭畫面") face_out = gr.Label(label="情緒分布") webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out) # 其餘 Tab 可按原先寫法,或用 Blocks 方式 with gr.TabItem("語音情緒"): audio = gr.Audio(sources="microphone", streaming=False, type="filepath", label="錄音") audio_output = gr.Label(label="語音情緒結果") # 用 change/submit 触发:录音结束后调用 predict_voice audio.change(fn=predict_voice, inputs=audio, outputs=audio_output) with gr.TabItem("文字情緒"): gr.Markdown("### 文字情緒 分析 (规则+zero-shot)") with gr.Row(): text = gr.Textbox(lines=3, placeholder="請輸入中文文字…") text_out = gr.Label(label="文字情緒結果") text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out) if __name__ == "__main__": demo.launch() # 不要传 server_name 或 server_port