import streamlit as st import cv2, numpy as np, base64, io, os import librosa, joblib from deepface import DeepFace # 1) 加载所有模型 @st.cache_resource def load_models(): DeepFace.analyze(img_path=np.zeros((224,224,3),dtype=np.uint8), actions=['emotion'], enforce_detection=False) voice_clf = joblib.load("voice_model.joblib") return voice_clf voice_clf = load_models() st.title("📱 即時多模態情緒分析") # 2) 即时人脸 st.header("🖼 實時人臉情緒") img_data = st.camera_input("對準鏡頭") if img_data is not None: arr = np.frombuffer(img_data.read(), np.uint8) img = cv2.imdecode(arr, cv2.IMREAD_COLOR) res = DeepFace.analyze(img, actions=["emotion"], enforce_detection=False) emo = (res[0] if isinstance(res,list) else res).get("dominant_emotion","unknown") st.write("情緒:", emo) # 3) 語音上傳 st.header("🎤 上傳語音情緒") audio = st.file_uploader("請上傳 WAV 音檔", type=["wav"]) if audio is not None: with open("tmp.wav","wb") as f: f.write(audio.getbuffer()) y, sr = librosa.load("tmp.wav", sr=None) mf = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T,axis=0) emo = voice_clf.predict([mf])[0] st.write("情緒:", emo) # 4) 文字輸入 st.header("📝 輸入文字情緒") txt = st.text_input("打些文字…") if txt: # copy 你的 analyze_text_fn emo = analyze_text_fn(txt) st.write("情緒:", emo)