Spaces:

GCLing
/

emotion

Runtime error

File size: 7,267 Bytes

b8ff14e
283d228
c7ec63e
7b9bebe
c7ec63e
283d228
c7ec63e
287ab51
283d228
 
 
 
 
 
 
287ab51
283d228
c7ec63e
 
 
 
 
283d228
 
 
 
 
 
 
 
2a89f5d
 
283d228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36c201f
 
 
 
 
2a89f5d
283d228
36c201f
 
7b9bebe
283d228
 
7b9bebe
 
2a89f5d
283d228
 
7b9bebe
 
283d228
 
7b9bebe
283d228
7b9bebe
283d228
 
 
 
 
dfd5bb6
36c201f
283d228
 
4eafa91
36c201f
 
283d228
2a89f5d
283d228
 
2a89f5d
283d228
 
2a89f5d
283d228
 
36c201f
283d228
7b9bebe
 
 
dfd5bb6
7b9bebe
 
 
 
 
 
 
 
 
 
 
 
c7ec63e
283d228
c0131be
 
7b9bebe
c0131be
 
7b9bebe
c0131be
 
7b9bebe
287ab51
 
 
 
c0131be
 
 
 
 
 
 
 
 
 
 
287ab51
 
 
c0131be
287ab51
 
283d228
82dd2b7
c0131be
287ab51
c0131be
b9af6a3
 
 
c0131be
287ab51
c0131be
287ab51
7b9bebe
c0131be
 
c7ec63e
b9af6a3
283d228
b1ced09

import gradio as gr
import os
import numpy as np
import joblib
import librosa
import requests
from huggingface_hub import hf_hub_download

# --- DeepFace 条件导入 ---
try:
    from deepface import DeepFace
    has_deepface = True
except ImportError:
    print("本地未安装 deepface，将在本地跳过臉部情緒；Space 上会安装 deepface。")
    has_deepface = False

# --- 1. 语音 SVM 加载 ---
print("Downloading SVM model from Hugging Face Hub...")
model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
svm_model = joblib.load(model_path)
print("SVM model loaded.")

# --- 2. 文本情绪分析：改用 Inference API ---
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
if HF_API_TOKEN is None:
    print("警告：未检测到 HF_API_TOKEN，Inference API 可能失败。")
# 选用公开存在的中文情感分类模型
HF_TEXT_MODEL = "uer/roberta-base-finetuned-dianping-chinese"
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_TEXT_MODEL}"
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}


def predict_text_via_api(text: str):
    if not text or text.strip()=="":
        return {}
    payload = {"inputs": text}
    try:
        resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=30)
        if resp.status_code != 200:
            print(f"Inference API 返回状态码 {resp.status_code}: {resp.text}")
            # 退回到简单规则或中性
            return {"中性": 1.0}
        data = resp.json()
        # 根据模型返回格式解析：假设返回 [{"label": "...", "score": ...}, ...]
        if isinstance(data, list) and len(data)>0 and isinstance(data[0], dict):
            # 选 top 3 展示
            result = {}
            for item in data[:3]:
                lbl = item.get("label", "")
                score = item.get("score", 0.0)
                # 若标签是英文，可映射到中文；若就是中文可直接用
                # 例如模型返回 "positive"/"negative"/"neutral"，可映射：
                if lbl.lower() in ["positive","pos","正面"]:
                    cn = "正面"
                elif lbl.lower() in ["negative","neg","负面","負面"]:
                    cn = "負面"
                elif lbl.lower() in ["neutral","中性"]:
                    cn = "中性"
                else:
                    cn = lbl
                result[cn] = float(score)
            return result
        else:
            print("Inference API 返回格式异常:", data)
            return {"中性": 1.0}
    except Exception as e:
        print("调用 Inference API 出错:", e)
        return {"中性": 1.0}

# 可保留简单规则优先，若规则命中则返回规则，否则调用 API
emo_keywords = {
    "happy": ["開心","快樂","愉快","喜悦","喜悅","歡喜","興奮","高興"],
    "angry": ["生氣","憤怒","不爽","發火","火大","氣憤"],
    "sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"],
    "surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
    "fear": ["怕","恐懼","緊張","懼","膽怯","畏"],
    "disgust": ["噁心","厭惡","反感"]
}
negations = ["不","沒","沒有","別","勿","非"]
def keyword_emotion(text: str):
    text_proc = text.strip()
    counts = {emo:0 for emo in emo_keywords}
    for emo, kws in emo_keywords.items():
        for w in kws:
            idx = text_proc.find(w)
            if idx!=-1:
                neg=False
                for neg_word in negations:
                    plen = len(neg_word)
                    if idx-plen>=0 and text_proc[idx-plen:idx]==neg_word:
                        neg=True; break
                if not neg:
                    counts[emo]+=1
    total = sum(counts.values())
    if total>0:
        # 归一化并取最高
        top = max(counts, key=lambda k: counts[k])
        return {top: counts[top]/total}
    return None

def predict_text_mixed(text: str):
    print("predict_text_mixed:", text)
    if not text or text.strip()=="":
        return {}
    res = keyword_emotion(text)
    if res:
        # 映射中文标签
        mapping = {
            "happy":"高興","angry":"憤怒","sad":"悲傷",
            "surprise":"驚訝","fear":"恐懼","disgust":"厭惡"
        }
        emo = list(res.keys())[0]; prob = float(res[emo])
        cn = mapping.get(emo, emo)
        return {cn: prob}
    # 规则未命中，调用 Inference API
    return predict_text_via_api(text)

# --- 3. 语音情绪预测 ---
def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
    return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])

def predict_voice(audio_path: str):
    if not audio_path:
        return {}
    try:
        signal, sr = librosa.load(audio_path, sr=None)
        feat = extract_feature(signal, sr)
        probs = svm_model.predict_proba([feat])[0]
        labels = svm_model.classes_
        return {labels[i]: float(probs[i]) for i in range(len(labels))}
    except Exception as e:
        print("predict_voice error:", e)
        return {}

# --- 4. 人脸情绪预测 ---
import gradio as gr

def predict_face(img: np.ndarray):
    # 你的 DeepFace 分析逻辑
    if img is None:
        return {}
    # ...
    return {"happy": 0.5, "sad": 0.5}  # 举例

def build_interface():
    with gr.Blocks() as demo:
        gr.Markdown("## 多模態情緒分析示例")
        with gr.Tabs():
            # 臉部情緒 Tab
            with gr.TabItem("臉部情緒"):
                gr.Markdown("### 臉部情緒 (即時 Webcam Streaming 分析)")
                with gr.Row():
                    # 这里用 gr.Image(sources="webcam", streaming=True, type="numpy")
                    webcam = gr.Image(sources="webcam", streaming=True, type="numpy", label="攝像頭畫面")
                    face_out = gr.Label(label="情緒分佈")
                # 每帧送到 predict_face
                webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out)

            # 語音情緒 Tab
            with gr.TabItem("語音情緒"):
                gr.Markdown("### 語音情緒 分析")
                with gr.Row():
                    # 浏览器录音用 source="microphone"
                    audio = gr.Audio(source="microphone", streaming=False, type="filepath", label="錄音")
                    voice_out = gr.Label(label="語音情緒結果")
                audio.change(fn=predict_voice, inputs=audio, outputs=voice_out)

            # 文字情緒 Tab
            with gr.TabItem("文字情緒"):
                gr.Markdown("### 文字情緒 分析 (規則+Inference API)")
                with gr.Row():
                    text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
                    text_out = gr.Label(label="文字情緒結果")
                # 使用 submit 触发
                text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out)

        return demo



if __name__ == "__main__":
    demo = build_interface()
    # share=True 可在本地测试时生成临时公网链接
    demo.launch(share=True)