import streamlit as st
import cv2, numpy as np, base64, io, os
import librosa, joblib
from deepface import DeepFace

# 1) 加载所有模型
@st.cache_resource
def load_models():
    DeepFace.analyze(img_path=np.zeros((224,224,3),dtype=np.uint8),
                     actions=['emotion'], enforce_detection=False)
    voice_clf = joblib.load("voice_model.joblib")
    return voice_clf

voice_clf = load_models()

st.title("📱 即時多模態情緒分析")

# 2) 即时人脸
st.header("🖼 實時人臉情緒")
img_data = st.camera_input("對準鏡頭")
if img_data is not None:
    arr = np.frombuffer(img_data.read(), np.uint8)
    img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
    res = DeepFace.analyze(img, actions=["emotion"], enforce_detection=False)
    emo = (res[0] if isinstance(res,list) else res).get("dominant_emotion","unknown")
    st.write("情緒：", emo)

# 3) 語音上傳
st.header("🎤 上傳語音情緒")
audio = st.file_uploader("請上傳 WAV 音檔", type=["wav"])
if audio is not None:
    with open("tmp.wav","wb") as f: f.write(audio.getbuffer())
    y, sr = librosa.load("tmp.wav", sr=None)
    mf = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T,axis=0)
    emo = voice_clf.predict([mf])[0]
    st.write("情緒：", emo)

# 4) 文字輸入
st.header("📝 輸入文字情緒")
txt = st.text_input("打些文字…")
if txt:
    # copy 你的 analyze_text_fn
    emo = analyze_text_fn(txt)
    st.write("情緒：", emo)