import gradio as gr  # used to build the web interface
import torch  # used to run the model and handle predictions
from transformers import BertTokenizer, BertForSequenceClassification  # to load our trained model and tokenizer
import zipfile  # for extracting the uploaded model
import os  # to check if folder already exists

# check if the fine-tuned model folder is already extracted
if not os.path.exists("fine_tuned_model"):
    with zipfile.ZipFile("fine_tuned_model.zip", 'r') as zip_ref:
        zip_ref.extractall("fine_tuned_model")

# load tokenizer and model
model_path = "./fine_tuned_model"
tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForSequenceClassification.from_pretrained(model_path)
model.eval()  # set model to evaluation mode (important for inference)

# this function will be triggered when user submits a sentence
def detect_bias(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probs = torch.softmax(logits, dim=1).squeeze()
        pred_label = torch.argmax(probs).item()
        confidence = round(probs[pred_label].item(), 2)

        # flip label logic because model predictions seem inverted
        pred_label = 1 - pred_label  # flip 0<->1

    # prediction and explanation logic based on flipped label and confidence
    if pred_label == 1:  # now 1 = biased
        if confidence > 0.75:
            final_label = "Biased"
            explanation = (
                "⚠️ This text is likely biased. The model is highly confident that it reflects gender stereotypes or role bias."
            )
        elif 0.5 <= confidence <= 0.75:
            final_label = "Possibly Biased"
            explanation = (
                "🤔 This text might contain some gender bias, but the model is not entirely sure. Review it carefully."
            )
        else:
            final_label = "Uncertain"
            explanation = (
                "😐 The model predicted 'biased' but with low confidence. The result may not be reliable."
            )

    elif pred_label == 0:  # now 0 = unbiased
        if confidence > 0.75:
            final_label = "Unbiased"
            explanation = (
                "✅ This text appears neutral with no strong signs of gender bias based on the model's understanding."
            )
        elif 0.5 <= confidence <= 0.75:
            final_label = "Possibly Unbiased"
            explanation = (
                "🤔 This text seems unbiased, but the model isn't highly confident. It may still be worth reviewing."
            )
        else:
            final_label = "Uncertain"
            explanation = (
                "😐 The model predicted 'unbiased' but with low confidence. The result is unclear."
            )

    return {
        "Bias Classification": final_label,
        "Confidence Score": confidence,
        "Explanation": explanation
    }

# build the Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## Bias Bin – Fine-Tuned BERT Version by Aryan, Gowtham & Manoj")
    gr.Markdown("Detect gender bias in text using a BERT model fine-tuned with counterfactual data.")

    # input box for users
    text_input = gr.Textbox(
        label="Enter Narrative Text",
        lines=4,
        placeholder="E.g., 'The woman stayed at home while the man went to work.'"
    )

    # button to submit
    submit_btn = gr.Button("Detect Bias")

    # output area
    output = gr.JSON(label="Prediction Output")

    # connect button to function
    submit_btn.click(fn=detect_bias, inputs=text_input, outputs=output)

    # disclaimer at the bottom
    gr.Markdown("⚠️ **Disclaimer:** This model is trained on a small, synthetic dataset and may not always be accurate. Results should be interpreted cautiously and reviewed by a human.")

# run the app
demo.launch()