|
import gradio as gr |
|
import os |
|
import numpy as np |
|
import joblib |
|
import librosa |
|
import requests |
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
try: |
|
from deepface import DeepFace |
|
has_deepface = True |
|
except ImportError: |
|
print("本地未安装 deepface,将在本地跳过臉部情緒;Space 上会安装 deepface。") |
|
has_deepface = False |
|
|
|
|
|
print("Downloading SVM model from Hugging Face Hub...") |
|
model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib") |
|
svm_model = joblib.load(model_path) |
|
print("SVM model loaded.") |
|
|
|
|
|
HF_API_TOKEN = os.getenv("HF_API_TOKEN") |
|
if HF_API_TOKEN is None: |
|
print("警告:未检测到 HF_API_TOKEN,Inference API 可能失败。") |
|
|
|
HF_TEXT_MODEL = "uer/roberta-base-finetuned-dianping-chinese" |
|
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_TEXT_MODEL}" |
|
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {} |
|
|
|
|
|
def predict_text_via_api(text: str): |
|
if not text or text.strip()=="": |
|
return {} |
|
payload = {"inputs": text} |
|
try: |
|
resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=30) |
|
if resp.status_code != 200: |
|
print(f"Inference API 返回状态码 {resp.status_code}: {resp.text}") |
|
|
|
return {"中性": 1.0} |
|
data = resp.json() |
|
|
|
if isinstance(data, list) and len(data)>0 and isinstance(data[0], dict): |
|
|
|
result = {} |
|
for item in data[:3]: |
|
lbl = item.get("label", "") |
|
score = item.get("score", 0.0) |
|
|
|
|
|
if lbl.lower() in ["positive","pos","正面"]: |
|
cn = "正面" |
|
elif lbl.lower() in ["negative","neg","负面","負面"]: |
|
cn = "負面" |
|
elif lbl.lower() in ["neutral","中性"]: |
|
cn = "中性" |
|
else: |
|
cn = lbl |
|
result[cn] = float(score) |
|
return result |
|
else: |
|
print("Inference API 返回格式异常:", data) |
|
return {"中性": 1.0} |
|
except Exception as e: |
|
print("调用 Inference API 出错:", e) |
|
return {"中性": 1.0} |
|
|
|
|
|
emo_keywords = { |
|
"happy": ["開心","快樂","愉快","喜悦","喜悅","歡喜","興奮","高興"], |
|
"angry": ["生氣","憤怒","不爽","發火","火大","氣憤"], |
|
"sad": ["傷心","難過","哭","難受","心酸","憂","悲","哀","痛苦","慘","愁"], |
|
"surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"], |
|
"fear": ["怕","恐懼","緊張","懼","膽怯","畏"], |
|
"disgust": ["噁心","厭惡","反感"] |
|
} |
|
negations = ["不","沒","沒有","別","勿","非"] |
|
def keyword_emotion(text: str): |
|
text_proc = text.strip() |
|
counts = {emo:0 for emo in emo_keywords} |
|
for emo, kws in emo_keywords.items(): |
|
for w in kws: |
|
idx = text_proc.find(w) |
|
if idx!=-1: |
|
neg=False |
|
for neg_word in negations: |
|
plen = len(neg_word) |
|
if idx-plen>=0 and text_proc[idx-plen:idx]==neg_word: |
|
neg=True; break |
|
if not neg: |
|
counts[emo]+=1 |
|
total = sum(counts.values()) |
|
if total>0: |
|
|
|
top = max(counts, key=lambda k: counts[k]) |
|
return {top: counts[top]/total} |
|
return None |
|
|
|
def predict_text_mixed(text: str): |
|
print("predict_text_mixed:", text) |
|
if not text or text.strip()=="": |
|
return {} |
|
res = keyword_emotion(text) |
|
if res: |
|
|
|
mapping = { |
|
"happy":"高興","angry":"憤怒","sad":"悲傷", |
|
"surprise":"驚訝","fear":"恐懼","disgust":"厭惡" |
|
} |
|
emo = list(res.keys())[0]; prob = float(res[emo]) |
|
cn = mapping.get(emo, emo) |
|
return {cn: prob} |
|
|
|
return predict_text_via_api(text) |
|
|
|
|
|
def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray: |
|
mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13) |
|
return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)]) |
|
|
|
def predict_voice(audio_path: str): |
|
if not audio_path: |
|
return {} |
|
try: |
|
signal, sr = librosa.load(audio_path, sr=None) |
|
feat = extract_feature(signal, sr) |
|
probs = svm_model.predict_proba([feat])[0] |
|
labels = svm_model.classes_ |
|
return {labels[i]: float(probs[i]) for i in range(len(labels))} |
|
except Exception as e: |
|
print("predict_voice error:", e) |
|
return {} |
|
|
|
|
|
import gradio as gr |
|
|
|
def predict_face(img: np.ndarray): |
|
|
|
if img is None: |
|
return {} |
|
|
|
return {"happy": 0.5, "sad": 0.5} |
|
|
|
def build_interface(): |
|
with gr.Blocks() as demo: |
|
gr.Markdown("## 多模態情緒分析示例") |
|
with gr.Tabs(): |
|
|
|
with gr.TabItem("臉部情緒"): |
|
gr.Markdown("### 臉部情緒 (即時 Webcam Streaming 分析)") |
|
with gr.Row(): |
|
|
|
webcam = gr.Image(sources="webcam", streaming=True, type="numpy", label="攝像頭畫面") |
|
face_out = gr.Label(label="情緒分佈") |
|
|
|
webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out) |
|
|
|
|
|
with gr.TabItem("語音情緒"): |
|
gr.Markdown("### 語音情緒 分析") |
|
with gr.Row(): |
|
|
|
audio = gr.Audio(source="microphone", streaming=False, type="filepath", label="錄音") |
|
voice_out = gr.Label(label="語音情緒結果") |
|
audio.change(fn=predict_voice, inputs=audio, outputs=voice_out) |
|
|
|
|
|
with gr.TabItem("文字情緒"): |
|
gr.Markdown("### 文字情緒 分析 (規則+Inference API)") |
|
with gr.Row(): |
|
text = gr.Textbox(lines=3, placeholder="請輸入中文文字…") |
|
text_out = gr.Label(label="文字情緒結果") |
|
|
|
text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out) |
|
|
|
return demo |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
demo = build_interface() |
|
|
|
demo.launch(share=True) |
|
|