import gradio as gr import numpy as np import librosa, joblib, os from sklearn.ensemble import RandomForestClassifier from transformers import pipeline # —— 1) 訓練/載入語音模型 —— BASE_VOICE_PATH = r"C:\情緒" VOICE_MODEL_FILE = "voice_model.joblib" def train_voice_model(): labels = ["angry","happy","sad","fear","surprise"] X, y = [], [] for lbl in labels: folder = os.path.join(BASE_VOICE_PATH, lbl) if not os.path.isdir(folder): raise FileNotFoundError(f"找不到資料夾:{folder}") for fname in os.listdir(folder): if fname.lower().endswith(".wav"): path = os.path.join(folder, fname) audio, sr = librosa.load(path, sr=None) mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13) mfcc_mean = np.mean(mfccs.T, axis=0) X.append(mfcc_mean) y.append(lbl) clf = RandomForestClassifier(n_estimators=100, random_state=42) clf.fit(X, y) joblib.dump(clf, VOICE_MODEL_FILE) return clf if os.path.exists(VOICE_MODEL_FILE): voice_clf = joblib.load(VOICE_MODEL_FILE) else: voice_clf = train_voice_model() def analyze_audio(path): audio, sr = librosa.load(path, sr=None) mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13) mfcc_mean = np.mean(mfccs.T, axis=0).reshape(1, -1) return voice_clf.predict(mfcc_mean)[0] # —— 2) 中文文字情緒分析 —— def analyze_text(text): if any(w in text for w in ["開心","快樂"]): return "happy" if any(w in text for w in ["生氣","憤怒"]): return "angry" if any(w in text for w in ["傷心","難過","哭"]): return "sad" if any(w in text for w in ["驚","意外"]): return "surprise" if any(w in text for w in ["怕","恐懼"]): return "fear" return "neutral" # —— 3) 即時臉部情緒分析(改用 Hugging Face ferplus) —— face_classifier = pipeline( "image-classification", model="nateraw/ferplus", device=-1 # CPU ) def analyze_face(img): # img: PIL image or numpy array result = face_classifier(img, top_k=1)[0] return result["label"] # —— 4) 建立 Gradio 多標籤介面 —— with gr.Blocks() as demo: gr.Markdown("# 多模態情緒分析示範") with gr.Tab("📝 文字"): txt = gr.Textbox(placeholder="輸入中文…") btn_txt = gr.Button("分析文字") out_txt = gr.Textbox() btn_txt.click(analyze_text, inputs=txt, outputs=out_txt) with gr.Tab("🎤 語音"): aud = gr.Audio(type="filepath") btn_aud = gr.Button("分析語音") out_aud = gr.Textbox() btn_aud.click(analyze_audio, inputs=aud, outputs=out_aud) with gr.Tab("📷 臉部"): img_cam = gr.Image(source="webcam") btn_img = gr.Button("分析表情") out_img = gr.Textbox() btn_img.click(analyze_face, inputs=img_cam, outputs=out_img) demo.launch()