Spaces:

GCLing
/

emotion

Runtime error

App Files Files Community

emotion / app.py

GCLing

Update app.py

dfd5bb6 verified 4 days ago

raw

history blame

6.1 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	import os
	import joblib
	import numpy as np
	import librosa
	import gradio as gr
	import time
	import re
	from transformers import pipeline
	from huggingface_hub import hf_hub_download
	from deepface import DeepFace
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline


	# --- 1. 下載並載入 SVM 模型 ---
	# 這裡 repo_id 填你的模型倉庫路徑，例如 "GCLing/emotion-svm-model"
	# filename 填上傳到該倉庫的檔案名，例如 "svm_emotion_model.joblib"
	print("Downloading SVM model from Hugging Face Hub...")
	model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
	print(f"SVM model downloaded to: {model_path}")
	svm_model = joblib.load(model_path)
	print("SVM model loaded.")

	# --- 2. 載入文字情緒分析模型 ---
	zero_shot = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
	candidate_labels = ["joy", "sadness", "anger", "fear", "surprise", "disgust"]
	label_map_en2cn = {
	"joy": "高興", "sadness": "悲傷", "anger": "憤怒",
	"fear": "恐懼", "surprise": "驚訝", "disgust": "厭惡"
	}
	emo_keywords = {
	"happy": ["開心","快樂","愉快","喜悦","喜悅","歡喜","興奮","高興"],
	"angry": ["生氣","憤怒","不爽","發火","火大","氣憤"],
	"sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"],
	"surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
	"fear": ["怕","恐懼","緊張","懼","膽怯","畏"]
	}
	# 简单否定词列表
	negations = ["不","沒","沒有","別","勿","非"]

	# --- 3. 聲音特徵擷取函式 ---
	def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
	"""
	從一段音訊 signal (numpy array) 和取樣率 sr 計算 MFCC 特徵 (13 維)，
	並回傳平均與變異組成的特徵向量 (共 26 維)。
	"""
	# librosa 載入後 signal 為 float numpy array
	mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
	# axis=1: 每個 MFCC 維度對時間做平均與變異數
	return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])

	# --- 4. 三種預測函式 ---

	def predict_face(img):
	print("predict_face called, img is None?", img is None)
	if img is None:
	return {}
	try:
	res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
	# 省略 list/dict 处理...
	# 直接取第一张人脸
	if isinstance(res, list):
	first = res[0] if res else {}
	emo = first.get("emotion", {}) if isinstance(first, dict) else {}
	else:
	emo = res.get("emotion", {}) if isinstance(res, dict) else {}
	print("predict_face result:", emo)
	return emo
	except Exception as e:
	print("DeepFace.analyze error:", e)
	return {}




	def predict_voice(audio_path: str):
	# 如果没有录音文件路径，直接返回空字典或提示
	if not audio_path:
	# 可打印日志，帮助调试
	print("predict_voice: 收到 None 或空 audio_path，跳過分析")
	return {}
	try:
	signal, sr = librosa.load(audio_path, sr=None)
	# 提取特征
	feat = extract_feature(signal, sr) # 你的特征提取函数
	probs = svm_model.predict_proba([feat])[0]
	labels = svm_model.classes_
	return {labels[i]: float(probs[i]) for i in range(len(labels))}
	except Exception as e:
	print("predict_voice error:", e)
	return {}




	def predict_text_mixed(text: str):
	"""
	先用 keyword_emotion 规则；若未命中再用 zero-shot 分类，
	返回 {中文标签: float_score} 的 dict，供 gr.Label 显示。
	"""
	if not text or text.strip() == "":
	return {}
	# 规则优先
	res = keyword_emotion(text)
	if res:
	# 只返回最高那一项及其比例，也可返回完整分布
	top_emo = max(res, key=res.get)
	# 可将英文 key 转成中文，若需要
	# mapping: happy->高兴, angry->愤怒, etc.
	mapping = {
	"happy": "高兴",
	"angry": "愤怒",
	"sad": "悲伤",
	"surprise": "惊讶",
	"fear": "恐惧"
	}
	cn = mapping.get(top_emo, top_emo)
	return {cn: res[top_emo]}
	# 规则未命中，zero-shot fallback
	try:
	out = zero_shot(text, candidate_labels=candidate_labels,
	hypothesis_template="这句话表达了{}情绪")
	result = {}
	for lab, sc in zip(out["labels"], out["scores"]):
	cn = label_map_en2cn.get(lab.lower(), lab)
	result[cn] = float(sc)
	return result
	except Exception as e:
	print("zero-shot error:", e)
	return {"中性": 1.0}





	# --- 5. 建立 Gradio 介面 ---
	with gr.Blocks() as demo:
	with gr.TabItem("臉部情緒"):
	with gr.Row():
	webcam = gr.Image(source="webcam", streaming=True, type="numpy", label="攝像頭畫面")
	face_out = gr.Label(label="情緒分布")
	webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out)



	# 其餘 Tab 可按原先寫法，或用 Blocks 方式
	with gr.TabItem("語音情緒"):
	audio = gr.Audio(sources="microphone", streaming=False, type="filepath", label="錄音")
	audio_output = gr.Label(label="語音情緒結果")
	# 用 change/submit 触发：录音结束后调用 predict_voice
	audio.change(fn=predict_voice, inputs=audio, outputs=audio_output)

	with gr.TabItem("文字情緒"):
	gr.Markdown("### 文字情緒分析 (规则+zero-shot)")
	with gr.Row():
	text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
	text_out = gr.Label(label="文字情緒結果")
	text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out)




	if __name__ == "__main__":

	demo.launch()
	# 不要传 server_name 或 server_port