Spaces:

GCLing
/

emotion

Runtime error

File size: 6,097 Bytes

c7ec63e
 
 
 
 
 
 
 
36c201f
 
 
c7ec63e
 
 
 
36c201f
c7ec63e
 
 
 
 
 
 
 
 
 
36c201f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7ec63e
 
 
 
 
 
 
 
 
 
 
 
 
 
4826bc7
dfd5bb6
4826bc7
 
babd923
4826bc7
dfd5bb6
 
4826bc7
dfd5bb6
4826bc7
 
dfd5bb6
4826bc7
dd2bb14
babd923
dd2bb14
dfd5bb6
 
c7ec63e
dd2bb14
4826bc7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7ec63e
4eafa91
dd2bb14
36c201f
 
 
 
 
4eafa91
 
36c201f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd2bb14
36c201f
 
 
 
 
 
dd2bb14
 
36c201f
 
 
 
 
dd2bb14
c7ec63e
 
babd923
dfd5bb6
 
 
 
 
 
c7ec63e
dd2bb14
babd923
 
 
 
 
 
c7ec63e
babd923
36c201f
 
 
 
 
dd2bb14
c7ec63e
505ff46
c7ec63e
 
92eb8b6
 
0422b31

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import joblib
import numpy as np
import librosa
import gradio as gr
import time
import re
from transformers import pipeline
from huggingface_hub import hf_hub_download
from deepface import DeepFace
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline


# --- 1. 下載並載入 SVM 模型 ---
# 這裡 repo_id 填你的模型倉庫路徑，例如 "GCLing/emotion-svm-model"
# filename 填上傳到該倉庫的檔案名，例如 "svm_emotion_model.joblib"
print("Downloading SVM model from Hugging Face Hub...")
model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
print(f"SVM model downloaded to: {model_path}")
svm_model = joblib.load(model_path)
print("SVM model loaded.")

# --- 2. 載入文字情緒分析模型 ---
zero_shot = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
candidate_labels = ["joy", "sadness", "anger", "fear", "surprise", "disgust"]
label_map_en2cn = {
    "joy": "高興", "sadness": "悲傷", "anger": "憤怒",
    "fear": "恐懼", "surprise": "驚訝", "disgust": "厭惡"
}
emo_keywords = {
    "happy": ["開心","快樂","愉快","喜悦","喜悅","歡喜","興奮","高興"],
    "angry": ["生氣","憤怒","不爽","發火","火大","氣憤"],
    "sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"],
    "surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
    "fear": ["怕","恐懼","緊張","懼","膽怯","畏"]
}
# 简单否定词列表
negations = ["不","沒","沒有","別","勿","非"]

# --- 3. 聲音特徵擷取函式 ---
def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
    """
    從一段音訊 signal (numpy array) 和取樣率 sr 計算 MFCC 特徵 (13 維)，
    並回傳平均與變異組成的特徵向量 (共 26 維)。
    """
    # librosa 載入後 signal 為 float numpy array
    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
    # axis=1: 每個 MFCC 維度對時間做平均與變異數
    return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])

# --- 4. 三種預測函式 ---

def predict_face(img):
    print("predict_face called, img is None?", img is None)
    if img is None:
        return {}
    try:
        res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
        # 省略 list/dict 处理...
        # 直接取第一张人脸
        if isinstance(res, list):
            first = res[0] if res else {}
            emo = first.get("emotion", {}) if isinstance(first, dict) else {}
        else:
            emo = res.get("emotion", {}) if isinstance(res, dict) else {}
        print("predict_face result:", emo)
        return emo
    except Exception as e:
        print("DeepFace.analyze error:", e)
        return {}




def predict_voice(audio_path: str):
    # 如果没有录音文件路径，直接返回空字典或提示
    if not audio_path:
        # 可打印日志，帮助调试
        print("predict_voice: 收到 None 或空 audio_path，跳過分析")
        return {}
    try:
        signal, sr = librosa.load(audio_path, sr=None)
        # 提取特征
        feat = extract_feature(signal, sr)  # 你的特征提取函数
        probs = svm_model.predict_proba([feat])[0]
        labels = svm_model.classes_
        return {labels[i]: float(probs[i]) for i in range(len(labels))}
    except Exception as e:
        print("predict_voice error:", e)
        return {}



    
def predict_text_mixed(text: str):
    """
    先用 keyword_emotion 规则；若未命中再用 zero-shot 分类，
    返回 {中文标签: float_score} 的 dict，供 gr.Label 显示。
    """
    if not text or text.strip() == "":
        return {}
    # 规则优先
    res = keyword_emotion(text)
    if res:
        # 只返回最高那一项及其比例，也可返回完整分布
        top_emo = max(res, key=res.get)
        # 可将英文 key 转成中文，若需要
        # mapping: happy->高兴, angry->愤怒, etc.
        mapping = {
            "happy": "高兴",
            "angry": "愤怒",
            "sad": "悲伤",
            "surprise": "惊讶",
            "fear": "恐惧"
        }
        cn = mapping.get(top_emo, top_emo)
        return {cn: res[top_emo]}
    # 规则未命中，zero-shot fallback
    try:
        out = zero_shot(text, candidate_labels=candidate_labels,
                        hypothesis_template="这句话表达了{}情绪")
        result = {}
        for lab, sc in zip(out["labels"], out["scores"]):
            cn = label_map_en2cn.get(lab.lower(), lab)
            result[cn] = float(sc)
        return result
    except Exception as e:
        print("zero-shot error:", e)
        return {"中性": 1.0}





# --- 5. 建立 Gradio 介面 ---
with gr.Blocks() as demo:
    with gr.TabItem("臉部情緒"):
        with gr.Row():
            webcam = gr.Image(source="webcam", streaming=True, type="numpy", label="攝像頭畫面")
            face_out = gr.Label(label="情緒分布")
        webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out)



        # 其餘 Tab 可按原先寫法，或用 Blocks 方式
        with gr.TabItem("語音情緒"):
            audio = gr.Audio(sources="microphone", streaming=False, type="filepath", label="錄音")
            audio_output = gr.Label(label="語音情緒結果")
            # 用 change/submit 触发：录音结束后调用 predict_voice
            audio.change(fn=predict_voice, inputs=audio, outputs=audio_output)

        with gr.TabItem("文字情緒"):
            gr.Markdown("### 文字情緒 分析 (规则+zero-shot)")
            with gr.Row():
                text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
                text_out = gr.Label(label="文字情緒結果")
            text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out)


    

if __name__ == "__main__":
    
    demo.launch()
  # 不要传 server_name 或 server_port