import gradio as gr import joblib import json import lightgbm as lgb from sentence_transformers import SentenceTransformer import numpy as np # Load models from Hugging Face Hub ridge = joblib.load("Essay/ridge_model.pkl") lgb_model = lgb.Booster(model_file="Essay/lightgbm_model.txt") encoder = joblib.load("Essay/scaler_encoder.pkl") # Load metadata column order with open("Essay/metadata_columns.json", "r") as f: metadata_columns = json.load(f) # Load SBERT model (will download at runtime) sbert = SentenceTransformer("sentence-transformers/paraphrase-mpnet-base-v2") def predict_score(essay_text, gender, race_ethnicity, disability, disadvantaged, ell_status): # Encode essay essay_embedding = sbert.encode([essay_text]) # Prepare metadata as dict metadata_input = { "gender": gender, "race_ethnicity": race_ethnicity, "student_disability_status": disability, "economically_disadvantaged": disadvantaged, "ell_status": ell_status } # Convert to array in correct order metadata_values = [metadata_input[col] for col in metadata_columns] metadata_array = encoder.transform([metadata_values]) # shape: (1, n) # Combine essay embedding + metadata full_input = np.hstack([essay_embedding, metadata_array]) # Predict from both models ridge_score = ridge.predict(full_input)[0] lgb_score = lgb_model.predict(full_input)[0] final_score = round((0.5 * ridge_score + 0.5 * lgb_score), 2) return final_score # Gradio UI iface = gr.Interface( fn=predict_score, inputs=[ gr.Textbox(label="Essay Text", lines=10, placeholder="Paste your essay here..."), gr.Dropdown(["Male", "Female", "Other"], label="Gender"), gr.Dropdown(["Asian", "Black", "Hispanic", "White", "Other"], label="Race/Ethnicity"), gr.Dropdown(["Yes", "No"], label="Student has Disability"), gr.Dropdown(["Yes", "No"], label="Economically Disadvantaged"), gr.Dropdown(["Yes", "No"], label="ELL Status") ], outputs=gr.Number(label="Predicted Essay Score"), title="Automated Essay Scoring App" ) iface.launch()