import gradio as gr # used to build the web interface import torch # used to run the model and handle predictions from transformers import BertTokenizer, BertForSequenceClassification # to load our trained model and tokenizer import zipfile # for extracting the uploaded model import os # to check if folder already exists # check if the fine-tuned model folder is already extracted if not os.path.exists("fine_tuned_model"): with zipfile.ZipFile("fine_tuned_model.zip", 'r') as zip_ref: zip_ref.extractall("fine_tuned_model") # load tokenizer and model model_path = "./fine_tuned_model" tokenizer = BertTokenizer.from_pretrained(model_path) model = BertForSequenceClassification.from_pretrained(model_path) model.eval() # set model to evaluation mode (important for inference) # this function will be triggered when user submits a sentence def detect_bias(text): inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits probs = torch.softmax(logits, dim=1).squeeze() pred_label = torch.argmax(probs).item() confidence = round(probs[pred_label].item(), 2) # flip label logic because model predictions seem inverted pred_label = 1 - pred_label # flip 0<->1 # prediction and explanation logic based on flipped label and confidence if pred_label == 1: # now 1 = biased if confidence > 0.75: final_label = "Biased" explanation = ( "⚠️ This text is likely biased. The model is highly confident that it reflects gender stereotypes or role bias." ) elif 0.5 <= confidence <= 0.75: final_label = "Possibly Biased" explanation = ( "🤔 This text might contain some gender bias, but the model is not entirely sure. Review it carefully." ) else: final_label = "Uncertain" explanation = ( "😐 The model predicted 'biased' but with low confidence. The result may not be reliable." ) elif pred_label == 0: # now 0 = unbiased if confidence > 0.75: final_label = "Unbiased" explanation = ( "✅ This text appears neutral with no strong signs of gender bias based on the model's understanding." ) elif 0.5 <= confidence <= 0.75: final_label = "Possibly Unbiased" explanation = ( "🤔 This text seems unbiased, but the model isn't highly confident. It may still be worth reviewing." ) else: final_label = "Uncertain" explanation = ( "😐 The model predicted 'unbiased' but with low confidence. The result is unclear." ) return { "Bias Classification": final_label, "Confidence Score": confidence, "Explanation": explanation } # build the Gradio UI with gr.Blocks() as demo: gr.Markdown("## Bias Bin – Fine-Tuned BERT Version by Aryan, Gowtham & Manoj") gr.Markdown("Detect gender bias in text using a BERT model fine-tuned with counterfactual data.") # input box for users text_input = gr.Textbox( label="Enter Narrative Text", lines=4, placeholder="E.g., 'The woman stayed at home while the man went to work.'" ) # button to submit submit_btn = gr.Button("Detect Bias") # output area output = gr.JSON(label="Prediction Output") # connect button to function submit_btn.click(fn=detect_bias, inputs=text_input, outputs=output) # disclaimer at the bottom gr.Markdown("⚠️ **Disclaimer:** This model is trained on a small, synthetic dataset and may not always be accurate. Results should be interpreted cautiously and reviewed by a human.") # run the app demo.launch()