import gradio as gr
import numpy as np
import librosa, joblib, os
from sklearn.ensemble import RandomForestClassifier
from transformers import pipeline

# —— 1) 訓練／載入語音模型 —— 
BASE_VOICE_PATH = r"C:\情緒"
VOICE_MODEL_FILE = "voice_model.joblib"

def train_voice_model():
    labels = ["angry","happy","sad","fear","surprise"]
    X, y = [], []
    for lbl in labels:
        folder = os.path.join(BASE_VOICE_PATH, lbl)
        if not os.path.isdir(folder):
            raise FileNotFoundError(f"找不到資料夾：{folder}")
        for fname in os.listdir(folder):
            if fname.lower().endswith(".wav"):
                path = os.path.join(folder, fname)
                audio, sr = librosa.load(path, sr=None)
                mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
                mfcc_mean = np.mean(mfccs.T, axis=0)
                X.append(mfcc_mean)
                y.append(lbl)
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X, y)
    joblib.dump(clf, VOICE_MODEL_FILE)
    return clf

if os.path.exists(VOICE_MODEL_FILE):
    voice_clf = joblib.load(VOICE_MODEL_FILE)
else:
    voice_clf = train_voice_model()

def analyze_audio(path):
    audio, sr = librosa.load(path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    mfcc_mean = np.mean(mfccs.T, axis=0).reshape(1, -1)
    return voice_clf.predict(mfcc_mean)[0]

# —— 2) 中文文字情緒分析 —— 
def analyze_text(text):
    if any(w in text for w in ["開心","快樂"]): return "happy"
    if any(w in text for w in ["生氣","憤怒"]): return "angry"
    if any(w in text for w in ["傷心","難過","哭"]): return "sad"
    if any(w in text for w in ["驚","意外"]): return "surprise"
    if any(w in text for w in ["怕","恐懼"]): return "fear"
    return "neutral"

# —— 3) 即時臉部情緒分析（改用 Hugging Face ferplus） —— 
face_classifier = pipeline(
    "image-classification",
    model="nateraw/ferplus",
    device=-1  # CPU
)

def analyze_face(img):
    # img: PIL image or numpy array
    result = face_classifier(img, top_k=1)[0]
    return result["label"]

# —— 4) 建立 Gradio 多標籤介面 —— 
with gr.Blocks() as demo:
    gr.Markdown("# 多模態情緒分析示範")
    with gr.Tab("📝 文字"):
        txt = gr.Textbox(placeholder="輸入中文…")
        btn_txt = gr.Button("分析文字")
        out_txt = gr.Textbox()
        btn_txt.click(analyze_text, inputs=txt, outputs=out_txt)

    with gr.Tab("🎤 語音"):
        aud = gr.Audio(type="filepath")
        btn_aud = gr.Button("分析語音")
        out_aud = gr.Textbox()
        btn_aud.click(analyze_audio, inputs=aud, outputs=out_aud)

    with gr.Tab("📷 臉部"):
        img_cam = gr.Image(source="webcam")
        btn_img = gr.Button("分析表情")
        out_img = gr.Textbox()
        btn_img.click(analyze_face, inputs=img_cam, outputs=out_img)

demo.launch()