aryn25 commited on
Commit
4ccff65
·
verified ·
1 Parent(s): e75d802

Upload Evaluation&Results.ipynb

Browse files
Files changed (1) hide show
  1. Evaluation&Results.ipynb +159 -0
Evaluation&Results.ipynb ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "code",
19
+ "source": [
20
+ "#upload the fine_tuned_model.zip and narrative_texts.csv then run the code for evaluation\n",
21
+ "\n",
22
+ "import zipfile\n",
23
+ "import os\n",
24
+ "\n",
25
+ "#if the folder doesn't exist already, then extract the model\n",
26
+ "if not os.path.exists(\"fine_tuned_model\"):\n",
27
+ " with zipfile.ZipFile(\"fine_tuned_model.zip\", 'r') as zip_ref:\n",
28
+ " zip_ref.extractall(\"fine_tuned_model\") #extract all model files into the target folder\n",
29
+ "\n",
30
+ "print(\"Model extracted successfully.\") #confirmation message"
31
+ ],
32
+ "metadata": {
33
+ "colab": {
34
+ "base_uri": "https://localhost:8080/"
35
+ },
36
+ "id": "9iMmMqqB6Hf_",
37
+ "outputId": "cb0c6eb8-6650-4087-9bb7-078ec6012375"
38
+ },
39
+ "execution_count": 4,
40
+ "outputs": [
41
+ {
42
+ "output_type": "stream",
43
+ "name": "stdout",
44
+ "text": [
45
+ "Model extracted successfully.\n"
46
+ ]
47
+ }
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "source": [
53
+ "import torch #for deep learning\n",
54
+ "from transformers import BertTokenizer, BertForSequenceClassification #model training in bert\n",
55
+ "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score #evaulation metrics\n",
56
+ "import pandas as pd\n",
57
+ "import re #regex\n",
58
+ "\n",
59
+ "#load fine-tuned model and tokenizer\n",
60
+ "model_path = \"./fine_tuned_model\"\n",
61
+ "tokenizer = BertTokenizer.from_pretrained(model_path)\n",
62
+ "model = BertForSequenceClassification.from_pretrained(model_path)\n",
63
+ "model.eval() #set model to evaluation mode\n",
64
+ "\n",
65
+ "#load dataset and normalize the text\n",
66
+ "df = pd.read_csv(\"narrative_texts.csv\")\n",
67
+ "df['text'] = df['text'].str.lower() #convert to lowercase\n",
68
+ "df['text'] = df['text'].apply(lambda x: re.sub(r'[^a-z\\s]', '', x)) #remove non-alphabetic characters\n",
69
+ "df['text'] = df['text'].apply(lambda x: re.sub(r'\\s+', ' ', x).strip()) #clean extra spaces\n",
70
+ "\n",
71
+ "#function to swap gendered words in text\n",
72
+ "def gender_swap(text):\n",
73
+ " swaps = {\n",
74
+ " \" he \": \" TEMP \", \" she \": \" he \", \" TEMP \": \" she \",\n",
75
+ " \" his \": \" TEMP2 \", \" her \": \" his \", \" TEMP2 \": \" her \",\n",
76
+ " \" him \": \" TEMP3 \", \" her \": \" him \", \" TEMP3 \": \" her \"\n",
77
+ " }\n",
78
+ " for key, value in swaps.items():\n",
79
+ " text = text.replace(key, value)\n",
80
+ " return text\n",
81
+ "\n",
82
+ "#generate swapped gender versions of each sentence\n",
83
+ "df['text_swapped'] = df['text'].apply(lambda x: gender_swap(\" \" + x + \" \"))\n",
84
+ "\n",
85
+ "#create a mixed dataset of original and swapped texts\n",
86
+ "df_mixed = pd.concat([df['text'], df['text_swapped']], ignore_index=True)\n",
87
+ "labels_mixed = [0] * len(df) + [1] * len(df) #label 0 for original, 1 for swapped\n",
88
+ "\n",
89
+ "#function to evaluate model performance\n",
90
+ "def evaluate_model(texts, labels):\n",
91
+ " inputs = tokenizer(texts.tolist(), truncation=True, padding=True, return_tensors=\"pt\", max_length=128)\n",
92
+ "\n",
93
+ " with torch.no_grad():\n",
94
+ " outputs = model(**inputs)\n",
95
+ " logits = outputs.logits\n",
96
+ " preds = torch.argmax(logits, dim=1).numpy()\n",
97
+ "\n",
98
+ " acc = accuracy_score(labels, preds)\n",
99
+ " precision = precision_score(labels, preds)\n",
100
+ " recall = recall_score(labels, preds)\n",
101
+ " f1 = f1_score(labels, preds)\n",
102
+ "\n",
103
+ " return {\n",
104
+ " \"Accuracy\": round(acc, 4),\n",
105
+ " \"Precision\": round(precision, 4),\n",
106
+ " \"Recall\": round(recall, 4),\n",
107
+ " \"F1 Score\": round(f1, 4)\n",
108
+ " }"
109
+ ],
110
+ "metadata": {
111
+ "id": "xnCn3rmr62nN"
112
+ },
113
+ "execution_count": 5,
114
+ "outputs": []
115
+ },
116
+ {
117
+ "cell_type": "code",
118
+ "source": [
119
+ "#evaluating the model on both original and gender-swapped text\n",
120
+ "metrics = evaluate_model(df_mixed, labels_mixed)\n",
121
+ "\n",
122
+ "#printing out the evaluation results\n",
123
+ "print(\"Model Evaluation Results:\")\n",
124
+ "for metric, value in metrics.items():\n",
125
+ " print(f\"{metric}: {value}\") #prints each metric and its value one by one"
126
+ ],
127
+ "metadata": {
128
+ "colab": {
129
+ "base_uri": "https://localhost:8080/"
130
+ },
131
+ "id": "Tyn_TmKo7USd",
132
+ "outputId": "75ae6a93-a783-4357-fd13-d9441a8a7744"
133
+ },
134
+ "execution_count": 7,
135
+ "outputs": [
136
+ {
137
+ "output_type": "stream",
138
+ "name": "stdout",
139
+ "text": [
140
+ "Model Evaluation Results:\n",
141
+ "Accuracy: 0.55\n",
142
+ "Precision: 0.5385\n",
143
+ "Recall: 0.7\n",
144
+ "F1 Score: 0.6087\n"
145
+ ]
146
+ }
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "source": [],
152
+ "metadata": {
153
+ "id": "GfvTDUPp7Wi1"
154
+ },
155
+ "execution_count": null,
156
+ "outputs": []
157
+ }
158
+ ]
159
+ }