File size: 2,568 Bytes
357796f
f93ed06
 
357796f
10f35a5
f93ed06
 
357796f
f93ed06
 
 
 
 
 
 
 
0b5ee74
f93ed06
c0e97f6
f93ed06
 
 
a2a4ab6
f93ed06
 
 
a2a4ab6
f93ed06
8bda002
f93ed06
8bda002
7caecfb
 
 
 
 
8bda002
f93ed06
 
8bda002
f93ed06
a2a4ab6
f93ed06
 
 
8bda002
f93ed06
 
 
 
 
 
 
 
8bda002
f93ed06
 
 
 
 
 
 
 
 
357796f
7caecfb
f93ed06
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import gradio as gr
import numpy as np, cv2, io, base64
import librosa, joblib
from deepface import DeepFace

# 预加载模型
audio_model = joblib.load("voice_model.joblib")

# 人脸情绪分析函数
def analyze_face(frame):
    # frame: H×W×3 numpy array, RGB
    try:
        res = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
        return res['dominant_emotion']
    except Exception as e:
        return f"Error: {e}"

# 语音情绪分析函数
def analyze_audio(wav_file):
    if wav_file is None: return "no audio"
    data = wav_file.read()
    y, sr = librosa.load(io.BytesIO(data), sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    feat = np.mean(mfccs.T, axis=0)
    pred = audio_model.predict([feat])[0]
    return pred

# 文本情绪分析函数
def analyze_text(txt):
    if not txt: return "no text"
    mapping = {
        "😊 happy":   ["開心","快樂","愉快","喜悅","歡喜","興奮","高興","歡"],
        "😠 angry":   ["生氣","憤怒","不爽","發火","火大","氣憤"],
        "😢 sad":     ["傷心","難過","哭","憂","悲","心酸","哀","痛苦","慘","愁"],
        "😲 surprise":["驚訝","意外","嚇","好奇","驚詫","詫異","訝異"],
        "😨 fear":    ["怕","恐懼","緊張","懼","膽怯","畏"],
    }
    for emo, kw in mapping.items():
        if any(w in txt for w in kw):
            return emo
    return "neutral"

# Gradio 界面
with gr.Blocks() as demo:
    gr.Markdown("# 多模態即時情緒分析")
    with gr.Tabs():
        with gr.TabItem("📷 即時人臉"):
            camera = gr.Camera(label="請對準鏡頭")
            face_out = gr.Textbox(label="偵測到的情緒")
            camera.change(analyze_face, inputs=camera, outputs=face_out)
        with gr.TabItem("🎤 上傳語音"):
            audio = gr.Audio(source="upload", type="file", label="上傳 WAV")
            audio_out = gr.Textbox(label="語音情緒")
            audio.change(analyze_audio, inputs=audio, outputs=audio_out)
        with gr.TabItem("⌨️ 輸入文字"):
            txt_input = gr.Textbox(label="輸入文字")
            txt_btn = gr.Button("分析文字")
            txt_out = gr.Textbox(label="文字情緒")
            txt_btn.click(analyze_text, inputs=txt_input, outputs=txt_out)

    gr.Markdown(
        "⚠️ Hugging Face Spaces 無法直接呼叫本機攝影機;"
        "請在手機/電腦瀏覽器使用,或拉到最下方打開 Camera 頁籤測試。"
    )

if __name__ == "__main__":
    demo.launch()