|
import os |
|
import gradio as gr |
|
import requests |
|
import joblib |
|
import numpy as np |
|
import librosa |
|
from huggingface_hub import hf_hub_download |
|
from deepface import DeepFace |
|
|
|
|
|
HF_API_TOKEN = os.getenv("Hhf_ZfPJkShFNeheFJQZwkZFEOCkYKCBwNhLAw") |
|
if HF_API_TOKEN is None: |
|
print("警告:未检测到 HF_API_TOKEN,文字分析可能失败或限流。") |
|
|
|
HF_TEXT_MODEL = "uer/roberta-base-finetuned-dianping-chinese" |
|
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_TEXT_MODEL}" |
|
HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {} |
|
|
|
def call_text_api(text: str): |
|
if not text or text.strip() == "": |
|
return {} |
|
payload = {"inputs": text} |
|
try: |
|
res = requests.post(HF_API_URL, headers=HEADERS, json=payload, timeout=15) |
|
res.raise_for_status() |
|
data = res.json() |
|
result = {} |
|
if isinstance(data, list): |
|
for item in data: |
|
label = item.get("label", "") |
|
score = item.get("score", 0.0) |
|
result[label] = float(score) |
|
else: |
|
|
|
print("call_text_api 返回格式未预期:", data) |
|
return {} |
|
return result |
|
except Exception as e: |
|
print("call_text_api error:", e) |
|
return {"中性": 1.0} |
|
|
|
|
|
USE_VOICE = True |
|
svm_model = None |
|
if USE_VOICE: |
|
try: |
|
print("下载并加载语音 SVM 模型...") |
|
model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib") |
|
svm_model = joblib.load(model_path) |
|
print("SVM 模型加载完成") |
|
except Exception as e: |
|
print("语音 SVM 模型加载失败,禁用语音模块:", e) |
|
USE_VOICE = False |
|
|
|
def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray: |
|
mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13) |
|
feat = np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)]) |
|
return feat |
|
|
|
def predict_voice(audio_path: str): |
|
if not USE_VOICE or svm_model is None: |
|
return {"error": 1.0} |
|
if not audio_path: |
|
return {} |
|
try: |
|
signal, sr = librosa.load(audio_path, sr=None) |
|
feat = extract_feature(signal, sr) |
|
probs = svm_model.predict_proba([feat])[0] |
|
labels = svm_model.classes_ |
|
return {labels[i]: float(probs[i]) for i in range(len(labels))} |
|
except Exception as e: |
|
print("predict_voice error:", e) |
|
return {} |
|
|
|
|
|
def predict_face(img: np.ndarray): |
|
|
|
if img is None: |
|
return {} |
|
try: |
|
res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv") |
|
if isinstance(res, list): |
|
first = res[0] if res else {} |
|
emo = first.get("emotion", {}) if isinstance(first, dict) else {} |
|
else: |
|
emo = res.get("emotion", {}) if isinstance(res, dict) else {} |
|
emo_fixed = {k: float(v) for k, v in emo.items()} |
|
return emo_fixed |
|
except Exception as e: |
|
print("DeepFace.analyze error:", e) |
|
return {} |
|
|
|
|
|
def build_interface(): |
|
with gr.Blocks() as demo: |
|
gr.Markdown("## 多模態情緒分析(簡化版:上傳/拍照人臉 + 語音 + 文字)") |
|
with gr.Tabs(): |
|
|
|
with gr.TabItem("臉部情緒"): |
|
gr.Markdown("### 臉部情緒 分析 (上傳或拍照圖片)") |
|
with gr.Row(): |
|
|
|
face_input = gr.Image(sources=["upload"], type="numpy", label="上傳或拍照圖片") |
|
face_out = gr.Label(label="情緒分布") |
|
face_input.change(fn=predict_face, inputs=face_input, outputs=face_out) |
|
|
|
|
|
with gr.TabItem("語音情緒"): |
|
gr.Markdown("### 語音情緒 分析 (錄音並上傳)") |
|
if USE_VOICE: |
|
with gr.Row(): |
|
audio_input = gr.Audio(source="microphone", streaming=False, type="filepath", label="錄音") |
|
voice_out = gr.Label(label="語音情緒結果") |
|
audio_input.change(fn=predict_voice, inputs=audio_input, outputs=voice_out) |
|
else: |
|
gr.Markdown("語音模塊不可用。") |
|
|
|
|
|
with gr.TabItem("文字情緒"): |
|
gr.Markdown("### 文字情緒 分析 (Hugging Face Inference API)") |
|
with gr.Row(): |
|
text_input = gr.Textbox(lines=3, placeholder="請輸入中文文字…") |
|
text_out = gr.Label(label="文字情緒結果") |
|
text_input.submit(fn=call_text_api, inputs=text_input, outputs=text_out) |
|
|
|
return demo |
|
|
|
if __name__ == "__main__": |
|
demo = build_interface() |
|
|
|
demo.launch() |
|
|