bias.bin / app.py
aryn25's picture
Update app.py
e75d802 verified
import gradio as gr # used to build the web interface
import torch # used to run the model and handle predictions
from transformers import BertTokenizer, BertForSequenceClassification # to load our trained model and tokenizer
import zipfile # for extracting the uploaded model
import os # to check if folder already exists
# check if the fine-tuned model folder is already extracted
if not os.path.exists("fine_tuned_model"):
with zipfile.ZipFile("fine_tuned_model.zip", 'r') as zip_ref:
zip_ref.extractall("fine_tuned_model")
# load tokenizer and model
model_path = "./fine_tuned_model"
tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForSequenceClassification.from_pretrained(model_path)
model.eval() # set model to evaluation mode (important for inference)
# this function will be triggered when user submits a sentence
def detect_bias(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probs = torch.softmax(logits, dim=1).squeeze()
pred_label = torch.argmax(probs).item()
confidence = round(probs[pred_label].item(), 2)
# flip label logic because model predictions seem inverted
pred_label = 1 - pred_label # flip 0<->1
# prediction and explanation logic based on flipped label and confidence
if pred_label == 1: # now 1 = biased
if confidence > 0.75:
final_label = "Biased"
explanation = (
"⚠️ This text is likely biased. The model is highly confident that it reflects gender stereotypes or role bias."
)
elif 0.5 <= confidence <= 0.75:
final_label = "Possibly Biased"
explanation = (
"πŸ€” This text might contain some gender bias, but the model is not entirely sure. Review it carefully."
)
else:
final_label = "Uncertain"
explanation = (
"😐 The model predicted 'biased' but with low confidence. The result may not be reliable."
)
elif pred_label == 0: # now 0 = unbiased
if confidence > 0.75:
final_label = "Unbiased"
explanation = (
"βœ… This text appears neutral with no strong signs of gender bias based on the model's understanding."
)
elif 0.5 <= confidence <= 0.75:
final_label = "Possibly Unbiased"
explanation = (
"πŸ€” This text seems unbiased, but the model isn't highly confident. It may still be worth reviewing."
)
else:
final_label = "Uncertain"
explanation = (
"😐 The model predicted 'unbiased' but with low confidence. The result is unclear."
)
return {
"Bias Classification": final_label,
"Confidence Score": confidence,
"Explanation": explanation
}
# build the Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## Bias Bin – Fine-Tuned BERT Version by Aryan, Gowtham & Manoj")
gr.Markdown("Detect gender bias in text using a BERT model fine-tuned with counterfactual data.")
# input box for users
text_input = gr.Textbox(
label="Enter Narrative Text",
lines=4,
placeholder="E.g., 'The woman stayed at home while the man went to work.'"
)
# button to submit
submit_btn = gr.Button("Detect Bias")
# output area
output = gr.JSON(label="Prediction Output")
# connect button to function
submit_btn.click(fn=detect_bias, inputs=text_input, outputs=output)
# disclaimer at the bottom
gr.Markdown("⚠️ **Disclaimer:** This model is trained on a small, synthetic dataset and may not always be accurate. Results should be interpreted cautiously and reviewed by a human.")
# run the app
demo.launch()