DeFamy
/

BERT-FineTune-CyberbullyingMalay

Text Classification

Transformers

PyTorch

bert

Model card Files Files and versions Community

DeFamy commited on Dec 25, 2023

Commit

d2cc008

1 Parent(s): 0af51d7

Delete train_model.ipynb

Browse files

Files changed (1) hide show

train_model.ipynb +0 -909

train_model.ipynb DELETED Viewed

@@ -1,909 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "XLhB2j_Hemio"
-      },
-      "source": [
-        "## Read the dataset csv file"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "hgYEtrYgemir",
-        "outputId": "d3ddedc7-8bd7-4ba9-c82e-68e4eb1309c3"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>Unnamed: 0</th>\n",
-              "      <th>Text</th>\n",
-              "      <th>target</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>0.0</td>\n",
-              "      <td>polis tangkap</td>\n",
-              "      <td>NonCyberbully</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>1.0</td>\n",
-              "      <td>kenapa lokasi kebakaran terlalu spesifik</td>\n",
-              "      <td>NonCyberbully</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>2.0</td>\n",
-              "      <td>menyesal tanya nak for birthday</td>\n",
-              "      <td>NonCyberbully</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>3.0</td>\n",
-              "      <td>meriah tah</td>\n",
-              "      <td>NonCyberbully</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>4.0</td>\n",
-              "      <td>asal bs kelar kerja jam sik kl baru diajak mee...</td>\n",
-              "      <td>NonCyberbully</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "   Unnamed: 0                                               Text  \\\n",
-              "0         0.0                                     polis tangkap    \n",
-              "1         1.0          kenapa lokasi kebakaran terlalu spesifik    \n",
-              "2         2.0                   menyesal tanya nak for birthday    \n",
-              "3         3.0                                        meriah tah    \n",
-              "4         4.0  asal bs kelar kerja jam sik kl baru diajak mee...   \n",
-              "\n",
-              "          target  \n",
-              "0  NonCyberbully  \n",
-              "1  NonCyberbully  \n",
-              "2  NonCyberbully  \n",
-              "3  NonCyberbully  \n",
-              "4  NonCyberbully  "
-            ]
-          },
-          "execution_count": 3,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "import pandas as pd\n",
-        "df = pd.read_csv('C:/Users/user/Documents/PSM/BERT_Ver2/Transformers-Text-Classification-BERT-Blog-main/input/Tagged_MixedNew.csv')\n",
-        "df.head()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "fGUtFkVfemit"
-      },
-      "source": [
-        "## Process the data"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "7C3uRWECemiu",
-        "outputId": "8e764d84-010d-4e42-987a-af7162627f6e",
-        "colab": {
-          "referenced_widgets": [
-            "042c8b0b8dcf42eb84660c93778d8ea7",
-            "4ab6074437a849f79be038b043025283",
-            "9aed4d88c18e4e28a1efbbed94331228"
-          ]
-        }
-      },
-      "outputs": [
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "042c8b0b8dcf42eb84660c93778d8ea7",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Downloading (…)okenizer_config.json:   0%|          | 0.00/380 [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "C:\\Users\\user\\anaconda3\\lib\\site-packages\\huggingface_hub\\file_download.py:133: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\user\\.cache\\huggingface\\hub. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
-            "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
-            "  warnings.warn(message)\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "4ab6074437a849f79be038b043025283",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/233k [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "9aed4d88c18e4e28a1efbbed94331228",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        }
-      ],
-      "source": [
-        "#from transformers import BertTokenizer\n",
-        "#tokenizer = BertTokenizer.from_pretrained('malay-huggingface/bert-tiny-bahasa-cased')\n",
-        "\n",
-        "from transformers import AutoTokenizer\n",
-        "tokenizer = AutoTokenizer.from_pretrained('mesolitica/bert-base-standard-bahasa-cased')"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Ks3XobW0emiu"
-      },
-      "outputs": [],
-      "source": [
-        "import numpy as np\n",
-        "from sklearn.model_selection import train_test_split\n",
-        "from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score\n",
-        "import torch\n",
-        "from transformers import TrainingArguments, Trainer\n",
-        "from transformers import BertTokenizer, BertForSequenceClassification"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "0ZZx6mUdemiv"
-      },
-      "outputs": [],
-      "source": [
-        "def process_data(row):\n",
-        "\n",
-        "    text = row['Text']\n",
-        "    text = str(text)\n",
-        "    text = ' '.join(text.split())\n",
-        "\n",
-        "    encodings = tokenizer(text, padding=\"max_length\", truncation=True, max_length=128)\n",
-        "\n",
-        "    label = 0\n",
-        "    if row['target'] == 'Cyberbully':\n",
-        "        label += 1\n",
-        "\n",
-        "    encodings['label'] = label\n",
-        "    encodings['Text'] = text\n",
-        "\n",
-        "    return encodings"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "MaFmqSc-emiv",
-        "outputId": "03eb6491-b646-45dd-ef3d-318c81313430"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "{'input_ids': [2, 2039, 3058, 9857, 1606, 1164, 2161, 8062, 1219, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'label': 0, 'Text': 'Saya suka masakan beliau dan cara penyampaiannya'}\n"
-          ]
-        }
-      ],
-      "source": [
-        "print(process_data({\n",
-        "    'Text': 'Saya suka masakan beliau dan cara penyampaiannya',\n",
-        "    'target': 'NonCyberbully'\n",
-        "}))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Lel-2lqKemiw"
-      },
-      "outputs": [],
-      "source": [
-        "processed_data = []\n",
-        "\n",
-        "for i in range(len(df[:1383])):\n",
-        "    processed_data.append(process_data(df.iloc[i]))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "x_DGsKzHemiw"
-      },
-      "source": [
-        "## Generate the dataset"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "oc_NsbnXemiw"
-      },
-      "outputs": [],
-      "source": [
-        "from sklearn.model_selection import train_test_split\n",
-        "\n",
-        "new_df = pd.DataFrame(processed_data)\n",
-        "\n",
-        "train_df, valid_df = train_test_split(\n",
-        "    new_df,\n",
-        "    test_size=0.2,\n",
-        "    random_state=2022\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "4qSci5CRemix"
-      },
-      "outputs": [],
-      "source": [
-        "import pyarrow as pa\n",
-        "from datasets import Dataset\n",
-        "\n",
-        "train_hg = Dataset(pa.Table.from_pandas(train_df))\n",
-        "valid_hg = Dataset(pa.Table.from_pandas(valid_df))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "xDgnim7iemix",
-        "outputId": "59858161-59a4-4731-fbfc-7e30a1246eed"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "Dataset({\n",
-              "    features: ['Text', 'attention_mask', 'input_ids', 'label', 'token_type_ids', '__index_level_0__'],\n",
-              "    num_rows: 277\n",
-              "})"
-            ]
-          },
-          "execution_count": 12,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "valid_hg"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "8Uqq0cKKemiy"
-      },
-      "source": [
-        "## Create a model"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "QQkDAXmRemiz",
-        "outputId": "e00faff0-c7d7-456d-dab2-73d9839c0274",
-        "colab": {
-          "referenced_widgets": [
-            "b9faad28a43547029c8b13ab639f8d05",
-            "6175ea4206304020823d86e0bbc23298"
-          ]
-        }
-      },
-      "outputs": [
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "b9faad28a43547029c8b13ab639f8d05",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Downloading (…)lve/main/config.json:   0%|          | 0.00/697 [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "6175ea4206304020823d86e0bbc23298",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Downloading pytorch_model.bin:   0%|          | 0.00/443M [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Some weights of the model checkpoint at mesolitica/bert-base-standard-bahasa-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']\n",
-            "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
-            "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-            "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at mesolitica/bert-base-standard-bahasa-cased and are newly initialized: ['classifier.bias', 'bert.pooler.dense.bias', 'classifier.weight', 'bert.pooler.dense.weight']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        }
-      ],
-      "source": [
-        "#from transformers import BertForSequenceClassification\n",
-        "\n",
-        "#model = BertForSequenceClassification.from_pretrained(\n",
-        "#    'malay-huggingface/bert-tiny-bahasa-cased',\n",
-        "#    num_labels=2\n",
-        "#)\n",
-        "\n",
-        "\n",
-        "from transformers import AutoModelForSequenceClassification\n",
-        "\n",
-        "model = AutoModelForSequenceClassification.from_pretrained(\n",
-        "    'mesolitica/bert-base-standard-bahasa-cased',\n",
-        "    num_labels=2\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ifvtnwBMemi1"
-      },
-      "outputs": [],
-      "source": [
-        "def compute_metrics(p):\n",
-        "    print(type(p))\n",
-        "    pred, labels = p\n",
-        "    pred = np.argmax(pred, axis=1)\n",
-        "\n",
-        "    accuracy = accuracy_score(y_true=labels, y_pred=pred)\n",
-        "    recall = recall_score(y_true=labels, y_pred=pred)\n",
-        "    precision = precision_score(y_true=labels, y_pred=pred)\n",
-        "    f1 = f1_score(y_true=labels, y_pred=pred)\n",
-        "\n",
-        "    return {\"accuracy\": accuracy, \"precision\": precision, \"recall\": recall, \"f1\": f1}\n",
-        ""
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "50Xy9P7Remi2"
-      },
-      "outputs": [],
-      "source": [
-        "from transformers import TrainingArguments, Trainer\n",
-        "\n",
-        "training_args = TrainingArguments(output_dir=\"./result\", evaluation_strategy=\"epoch\")\n",
-        "\n",
-        "trainer = Trainer(\n",
-        "    model=model,\n",
-        "    args=training_args,\n",
-        "    train_dataset=train_hg,\n",
-        "    eval_dataset=valid_hg,\n",
-        "    tokenizer=tokenizer,\n",
-        "    compute_metrics=compute_metrics\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "myIstfgJemi3"
-      },
-      "source": [
-        "## Train and Evaluate the model"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "-UtAkNHUemi4",
-        "outputId": "5af038f3-a77c-41eb-e48d-747a8e776e38"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "C:\\Users\\user\\anaconda3\\lib\\site-packages\\transformers\\optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
-            "  warnings.warn(\n",
-            "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
-          ]
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='417' max='417' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [417/417 56:36, Epoch 3/3]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Epoch</th>\n",
-              "      <th>Training Loss</th>\n",
-              "      <th>Validation Loss</th>\n",
-              "      <th>Accuracy</th>\n",
-              "      <th>Precision</th>\n",
-              "      <th>Recall</th>\n",
-              "      <th>F1</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <td>1</td>\n",
-              "      <td>No log</td>\n",
-              "      <td>0.493876</td>\n",
-              "      <td>0.779783</td>\n",
-              "      <td>0.657343</td>\n",
-              "      <td>0.886792</td>\n",
-              "      <td>0.755020</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2</td>\n",
-              "      <td>No log</td>\n",
-              "      <td>0.542367</td>\n",
-              "      <td>0.870036</td>\n",
-              "      <td>0.850000</td>\n",
-              "      <td>0.801887</td>\n",
-              "      <td>0.825243</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>3</td>\n",
-              "      <td>No log</td>\n",
-              "      <td>0.725669</td>\n",
-              "      <td>0.848375</td>\n",
-              "      <td>0.820000</td>\n",
-              "      <td>0.773585</td>\n",
-              "      <td>0.796117</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "<class 'transformers.trainer_utils.EvalPrediction'>\n",
-            "<class 'transformers.trainer_utils.EvalPrediction'>\n",
-            "<class 'transformers.trainer_utils.EvalPrediction'>\n"
-          ]
-        },
-        {
-          "data": {
-            "text/plain": [
-              "TrainOutput(global_step=417, training_loss=0.2771467213436282, metrics={'train_runtime': 3405.0836, 'train_samples_per_second': 0.974, 'train_steps_per_second': 0.122, 'total_flos': 218053287129600.0, 'train_loss': 0.2771467213436282, 'epoch': 3.0})"
-            ]
-          },
-          "execution_count": 16,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "trainer.train()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "fZYGhNyremi4",
-        "outputId": "5119c379-d7e9-48f7-9137-d788f99a3731"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='35' max='35' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [35/35 00:43]\n",
-              "    </div>\n",
-              "    "
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "<class 'transformers.trainer_utils.EvalPrediction'>\n"
-          ]
-        },
-        {
-          "data": {
-            "text/plain": [
-              "{'eval_loss': 0.7256694436073303,\n",
-              " 'eval_accuracy': 0.8483754512635379,\n",
-              " 'eval_precision': 0.82,\n",
-              " 'eval_recall': 0.7735849056603774,\n",
-              " 'eval_f1': 0.796116504854369,\n",
-              " 'eval_runtime': 44.9419,\n",
-              " 'eval_samples_per_second': 6.164,\n",
-              " 'eval_steps_per_second': 0.779,\n",
-              " 'epoch': 3.0}"
-            ]
-          },
-          "execution_count": 17,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "trainer.evaluate()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tlw24Ccdemi5"
-      },
-      "source": [
-        "## Save the model"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "69n4eVBHemi6"
-      },
-      "outputs": [],
-      "source": [
-        "model.save_pretrained('./model/')"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "gC9qDoERemi6",
-        "outputId": "a5514df7-d322-48b9-df27-c799dca6d884"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Looking in indexes: https://download.pytorch.org/whl/cu117\n",
-            "Requirement already satisfied: torch in c:\\users\\user\\anaconda3\\lib\\site-packages (2.0.1+cu118)\n",
-            "Requirement already satisfied: torchvision in c:\\users\\user\\anaconda3\\lib\\site-packages (0.15.2+cu117)\n",
-            "Requirement already satisfied: torchaudio in c:\\users\\user\\anaconda3\\lib\\site-packages (2.0.2+cu117)\n",
-            "Requirement already satisfied: sympy in c:\\users\\user\\anaconda3\\lib\\site-packages (from torch) (1.11.1)\n",
-            "Requirement already satisfied: jinja2 in c:\\users\\user\\anaconda3\\lib\\site-packages (from torch) (3.1.2)\n",
-            "Requirement already satisfied: filelock in c:\\users\\user\\anaconda3\\lib\\site-packages (from torch) (3.9.0)\n",
-            "Requirement already satisfied: networkx in c:\\users\\user\\anaconda3\\lib\\site-packages (from torch) (2.5.1)\n",
-            "Requirement already satisfied: typing-extensions in c:\\users\\user\\anaconda3\\lib\\site-packages (from torch) (4.4.0)\n",
-            "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from torchvision) (9.4.0)\n",
-            "Requirement already satisfied: numpy in c:\\users\\user\\anaconda3\\lib\\site-packages (from torchvision) (1.23.5)\n",
-            "Requirement already satisfied: requests in c:\\users\\user\\anaconda3\\lib\\site-packages (from torchvision) (2.28.1)\n",
-            "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from jinja2->torch) (2.1.1)\n",
-            "Requirement already satisfied: decorator<5,>=4.3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from networkx->torch) (4.4.2)\n",
-            "Requirement already satisfied: charset-normalizer<3,>=2 in c:\\users\\user\\anaconda3\\lib\\site-packages (from requests->torchvision) (2.0.4)\n",
-            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\user\\anaconda3\\lib\\site-packages (from requests->torchvision) (1.26.14)\n",
-            "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\user\\anaconda3\\lib\\site-packages (from requests->torchvision) (2.10)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\user\\anaconda3\\lib\\site-packages (from requests->torchvision) (2022.12.7)\n",
-            "Requirement already satisfied: mpmath>=0.19 in c:\\users\\user\\anaconda3\\lib\\site-packages (from sympy->torch) (1.2.1)\n"
-          ]
-        }
-      ],
-      "source": [
-        "!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "3NBugUKAemi7"
-      },
-      "outputs": [],
-      "source": []
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "-W3_K_Kjemi7"
-      },
-      "outputs": [],
-      "source": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "yMiT54Ddemi7"
-      },
-      "source": [
-        "## Load the model"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "mEFnUaM3emi7"
-      },
-      "outputs": [],
-      "source": [
-        "import torch\n",
-        "from transformers import AutoModelForSequenceClassification\n",
-        "\n",
-        "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
-        "\n",
-        "new_model = AutoModelForSequenceClassification.from_pretrained('./model/').to(device)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "zkDeulcTemi8",
-        "outputId": "2500b324-398b-471b-9c08-48fa79ea9de3"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "ERROR: torch-1.0.1-cp36-cp36m-win_amd64.whl is not a supported wheel on this platform.\n",
-            "\n",
-            "[notice] A new release of pip is available: 23.0.1 -> 23.1.2\n",
-            "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Requirement already satisfied: torchvision in c:\\users\\user\\anaconda3\\lib\\site-packages (0.14.0)\n",
-            "Requirement already satisfied: typing-extensions in c:\\users\\user\\anaconda3\\lib\\site-packages (from torchvision) (4.1.1)\n",
-            "Requirement already satisfied: requests in c:\\users\\user\\anaconda3\\lib\\site-packages (from torchvision) (2.27.1)\n",
-            "Requirement already satisfied: torch==1.13.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from torchvision) (1.13.0)\n",
-            "Requirement already satisfied: numpy in c:\\users\\user\\anaconda3\\lib\\site-packages (from torchvision) (1.24.2)\n",
-            "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from torchvision) (9.0.1)\n",
-            "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\user\\anaconda3\\lib\\site-packages (from requests->torchvision) (3.3)\n",
-            "Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from requests->torchvision) (2.0.4)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\user\\anaconda3\\lib\\site-packages (from requests->torchvision) (2022.9.24)\n",
-            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\user\\anaconda3\\lib\\site-packages (from requests->torchvision) (1.26.9)\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "[notice] A new release of pip is available: 23.0.1 -> 23.1.2\n",
-            "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
-          ]
-        }
-      ],
-      "source": [
-        "!pip install https://download.pytorch.org/whl/cpu/torch-1.0.1-cp36-cp36m-win_amd64.whl\n",
-        "!pip install torchvision"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "WtI-WDBhemi8"
-      },
-      "outputs": [],
-      "source": [
-        "from transformers import AutoTokenizer\n",
-        "\n",
-        "new_tokenizer = AutoTokenizer.from_pretrained('mesolitica/bert-base-standard-bahasa-cased')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "S2X_uPYJemi9"
-      },
-      "source": [
-        "## Get predictions"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "qXKQEiWxemi9"
-      },
-      "outputs": [],
-      "source": [
-        "import torch\n",
-        "import numpy as np\n",
-        "\n",
-        "def get_prediction(text):\n",
-        "    encoding = new_tokenizer(text, return_tensors=\"pt\", padding=\"max_length\", truncation=True, max_length=128)\n",
-        "    encoding = {k: v.to(trainer.model.device) for k,v in encoding.items()}\n",
-        "\n",
-        "    outputs = new_model(**encoding)\n",
-        "\n",
-        "    logits = outputs.logits\n",
-        "    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
-        "    sigmoid = torch.nn.Sigmoid()\n",
-        "    print(sigmoid)\n",
-        "    probs = sigmoid(logits.squeeze().cpu())\n",
-        "    probs = probs.detach().numpy()\n",
-        "    label = np.argmax(probs, axis=-1)\n",
-        "\n",
-        "    if label == 1:\n",
-        "        return {\n",
-        "            'Target': 'Cyberbully',\n",
-        "            'probability': probs[1]\n",
-        "        }\n",
-        "    else:\n",
-        "        return {\n",
-        "            'Target': 'Not Cyberbully',\n",
-        "            'probability': probs[0]\n",
-        "        }"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "NcYq4vmVemi9"
-      },
-      "outputs": [],
-      "source": [
-        "# dir()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "CS_2FfAeemi_",
-        "outputId": "106776a5-fced-4329-aa1f-5970a4a71386"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Sigmoid()\n"
-          ]
-        },
-        {
-          "data": {
-            "text/plain": [
-              "{'Target': 'Cyberbully', 'probability': 0.9651532}"
-            ]
-          },
-          "execution_count": 24,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "get_prediction('Aku malas kerja dengan orang macam ni menyusahkan orang je')"
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3 (ipykernel)",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.10.9"
-    },
-    "vscode": {
-      "interpreter": {
-        "hash": "173fe52379437b78f95c8980b8ee9f2930fd7b56889ab31a72735475ddc10c81"
-      }
-    },
-    "colab": {
-      "provenance": []
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}