from huggingface_hub import hf_hub_download
import gradio as gr
import joblib
import json
import lightgbm as lgb
from sentence_transformers import SentenceTransformer
import numpy as np

# Load files from the model repo "your-username/Essay"
repo_id = "Nawal20/Essay"

ridge_path = hf_hub_download(repo_id=repo_id, filename="ridge_model.pkl")
lgb_path = hf_hub_download(repo_id=repo_id, filename="lightgbm_model.pkl")
encoder_path = hf_hub_download(repo_id=repo_id, filename="scaler_encoder.pkl")
metadata_path = hf_hub_download(repo_id=repo_id, filename="metadata_columns.json")

# Load the models and encoder
ridge = joblib.load(ridge_path)
lgb_model = joblib.load(lgb_path)
encoder = joblib.load(encoder_path)

with open(metadata_path, "r") as f:
    metadata_columns = json.load(f)

# Load SBERT model (will download at runtime)
sbert = SentenceTransformer("sentence-transformers/paraphrase-mpnet-base-v2")

def predict_score(essay_text, gender, race_ethnicity, disability, disadvantaged, ell_status):
    # Encode essay
    essay_embedding = sbert.encode([essay_text])
    
    # Prepare metadata as dict
    metadata_input = {
        "gender": gender,
        "race_ethnicity": race_ethnicity,
        "student_disability_status": disability,
        "economically_disadvantaged": disadvantaged,
        "ell_status": ell_status
    }

    # Convert to array in correct order
    metadata_values = [metadata_input[col] for col in metadata_columns]
    metadata_array = encoder.transform([metadata_values])  # shape: (1, n)

    # Combine essay embedding + metadata
    full_input = np.hstack([essay_embedding, metadata_array])

    # Predict from both models
    ridge_score = ridge.predict(full_input)[0]
    lgb_score = lgb_model.predict(full_input)[0]
    #final_score = round((0.5 * ridge_score + 0.5 * lgb_score), 2)
    final_score = (ridge_pred + lgb_pred) / 2
    return final_score

# Gradio UI
iface = gr.Interface(
    fn=predict_score,
    inputs=[
        gr.Textbox(label="Essay Text", lines=10, placeholder="Paste your essay here..."),
        gr.Dropdown(["Male", "Female", "Other"], label="Gender"),
        gr.Dropdown(["Asian", "Black", "Hispanic", "White", "Other"], label="Race/Ethnicity"),
        gr.Dropdown(["Yes", "No"], label="Student has Disability"),
        gr.Dropdown(["Yes", "No"], label="Economically Disadvantaged"),
        gr.Dropdown(["Yes", "No"], label="ELL Status")
    ],
    outputs=gr.Number(label="Predicted Essay Score"),
    title="Automated Essay Scoring App"
)

iface.launch()