import os import gradio as gr import requests import joblib import numpy as np import librosa from huggingface_hub import hf_hub_download from deepface import DeepFace # --- 配置:Hugging Face Inference API 文本分析 --- HF_API_TOKEN = os.getenv("Hhf_ZfPJkShFNeheFJQZwkZFEOCkYKCBwNhLAw") if HF_API_TOKEN is None: print("警告:未检测到 HF_API_TOKEN,文字分析可能失败或限流。") # 选用公开存在的中文情感分类模型 ID HF_TEXT_MODEL = "uer/roberta-base-finetuned-dianping-chinese" HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_TEXT_MODEL}" HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {} def call_text_api(text: str): if not text or text.strip() == "": return {} payload = {"inputs": text} try: res = requests.post(HF_API_URL, headers=HEADERS, json=payload, timeout=15) res.raise_for_status() data = res.json() result = {} if isinstance(data, list): for item in data: label = item.get("label", "") score = item.get("score", 0.0) result[label] = float(score) else: # 如果返回不同结构,可根据实际调整 print("call_text_api 返回格式未预期:", data) return {} return result except Exception as e: print("call_text_api error:", e) return {"中性": 1.0} # --- 语音情绪分析 SVM 模型加载 --- USE_VOICE = True svm_model = None if USE_VOICE: try: print("下载并加载语音 SVM 模型...") model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib") svm_model = joblib.load(model_path) print("SVM 模型加载完成") except Exception as e: print("语音 SVM 模型加载失败,禁用语音模块:", e) USE_VOICE = False def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray: mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13) feat = np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)]) return feat def predict_voice(audio_path: str): if not USE_VOICE or svm_model is None: return {"error": 1.0} if not audio_path: return {} try: signal, sr = librosa.load(audio_path, sr=None) feat = extract_feature(signal, sr) probs = svm_model.predict_proba([feat])[0] labels = svm_model.classes_ return {labels[i]: float(probs[i]) for i in range(len(labels))} except Exception as e: print("predict_voice error:", e) return {} # --- 臉部情緒分析,使用 DeepFace 分析上傳或拍照圖片 --- def predict_face(img: np.ndarray): # img 為 numpy array,或 None if img is None: return {} try: res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv") if isinstance(res, list): first = res[0] if res else {} emo = first.get("emotion", {}) if isinstance(first, dict) else {} else: emo = res.get("emotion", {}) if isinstance(res, dict) else {} emo_fixed = {k: float(v) for k, v in emo.items()} return emo_fixed except Exception as e: print("DeepFace.analyze error:", e) return {} # --- Gradio 界面 --- def build_interface(): with gr.Blocks() as demo: gr.Markdown("## 多模態情緒分析(簡化版:上傳/拍照人臉 + 語音 + 文字)") with gr.Tabs(): # 臉部 Tab:上傳或拍照 with gr.TabItem("臉部情緒"): gr.Markdown("### 臉部情緒 分析 (上傳或拍照圖片)") with gr.Row(): # sources=["upload"] 在手機上點上傳可調出相機拍照 face_input = gr.Image(sources=["upload"], type="numpy", label="上傳或拍照圖片") face_out = gr.Label(label="情緒分布") face_input.change(fn=predict_face, inputs=face_input, outputs=face_out) # 語音 Tab with gr.TabItem("語音情緒"): gr.Markdown("### 語音情緒 分析 (錄音並上傳)") if USE_VOICE: with gr.Row(): audio_input = gr.Audio(source="microphone", streaming=False, type="filepath", label="錄音") voice_out = gr.Label(label="語音情緒結果") audio_input.change(fn=predict_voice, inputs=audio_input, outputs=voice_out) else: gr.Markdown("語音模塊不可用。") # 文字 Tab with gr.TabItem("文字情緒"): gr.Markdown("### 文字情緒 分析 (Hugging Face Inference API)") with gr.Row(): text_input = gr.Textbox(lines=3, placeholder="請輸入中文文字…") text_out = gr.Label(label="文字情緒結果") text_input.submit(fn=call_text_api, inputs=text_input, outputs=text_out) return demo if __name__ == "__main__": demo = build_interface() # share=True 可生成临时公开链接;部署到 Spaces 时无需此参数 demo.launch()