{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: transformers in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (4.49.0)\n", "Requirement already satisfied: datasets in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (3.3.1)\n", "Requirement already satisfied: torch in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (2.2.1+cu121)\n", "Requirement already satisfied: scikit-learn in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (1.3.2)\n", "Requirement already satisfied: accelerate in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (1.4.0)\n", "Requirement already satisfied: filelock in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from transformers) (3.17.0)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.26.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from transformers) (0.29.1)\n", "Requirement already satisfied: numpy>=1.17 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from transformers) (1.26.4)\n", "Requirement already satisfied: packaging>=20.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from transformers) (24.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from transformers) (6.0.2)\n", "Requirement already satisfied: regex!=2019.12.17 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from transformers) (2024.11.6)\n", "Requirement already satisfied: requests in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from transformers) (2.32.3)\n", "Requirement already satisfied: tokenizers<0.22,>=0.21 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from transformers) (0.21.0)\n", "Requirement already satisfied: safetensors>=0.4.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from transformers) (0.5.2)\n", "Requirement already satisfied: tqdm>=4.27 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from transformers) (4.67.1)\n", "Requirement already satisfied: pyarrow>=15.0.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from datasets) (19.0.1)\n", "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from datasets) (0.3.8)\n", "Requirement already satisfied: pandas in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from datasets) (2.1.4)\n", "Requirement already satisfied: xxhash in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from datasets) (3.5.0)\n", "Requirement already satisfied: multiprocess<0.70.17 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from datasets) (0.70.16)\n", "Requirement already satisfied: fsspec<=2024.12.0,>=2023.1.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets) (2024.12.0)\n", "Requirement already satisfied: aiohttp in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from datasets) (3.11.12)\n", "Requirement already satisfied: typing-extensions>=4.8.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (4.12.2)\n", "Requirement already satisfied: sympy in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (1.13.3)\n", "Requirement already satisfied: networkx in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (3.4.2)\n", "Requirement already satisfied: jinja2 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (3.1.5)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (12.1.105)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (12.1.105)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (12.1.105)\n", "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (8.9.2.26)\n", "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (12.1.3.1)\n", "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (11.0.2.54)\n", "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (10.3.2.106)\n", "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (11.4.5.107)\n", "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (12.1.0.106)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (2.19.3)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (12.1.105)\n", "Requirement already satisfied: triton==2.2.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (2.2.0)\n", "Requirement already satisfied: nvidia-nvjitlink-cu12 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch) (12.8.61)\n", "Requirement already satisfied: scipy>=1.5.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from scikit-learn) (1.11.4)\n", "Requirement already satisfied: joblib>=1.1.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from scikit-learn) (1.4.2)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from scikit-learn) (3.5.0)\n", "Requirement already satisfied: psutil in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from accelerate) (6.1.1)\n", "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from aiohttp->datasets) (2.4.4)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.2)\n", "Requirement already satisfied: async-timeout<6.0,>=4.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from aiohttp->datasets) (5.0.1)\n", "Requirement already satisfied: attrs>=17.3.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from aiohttp->datasets) (25.1.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from aiohttp->datasets) (1.5.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from aiohttp->datasets) (6.1.0)\n", "Requirement already satisfied: propcache>=0.2.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from aiohttp->datasets) (0.2.1)\n", "Requirement already satisfied: yarl<2.0,>=1.17.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from aiohttp->datasets) (1.18.3)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->transformers) (3.4.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->transformers) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->transformers) (2.3.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->transformers) (2025.1.31)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from jinja2->torch) (3.0.2)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from pandas->datasets) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from pandas->datasets) (2025.1)\n", "Requirement already satisfied: tzdata>=2022.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from pandas->datasets) (2025.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from sympy->torch) (1.3.0)\n", "Requirement already satisfied: six>=1.5 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install transformers datasets torch scikit-learn accelerate\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from datasets import load_dataset\n", "from transformers import RobertaTokenizer\n", "import pandas as pd\n", "\n", "# Load IMDB dataset\n", "dataset = load_dataset(\"imdb\")\n", "\n", "# Load RoBERTa tokenizer\n", "tokenizer = RobertaTokenizer.from_pretrained(\"roberta-base\")\n", "\n", "# Tokenization function\n", "def tokenize_function(examples):\n", " return tokenizer(examples[\"text\"], padding=\"max_length\", truncation=True, max_length=512)\n", "\n", "# Tokenize dataset\n", "tokenized_datasets = dataset.map(tokenize_function, batched=True)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from transformers import DataCollatorWithPadding\n", "\n", "# Convert labels to PyTorch format\n", "tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")\n", "tokenized_datasets.set_format(\"torch\", columns=[\"input_ids\", \"attention_mask\", \"labels\"])\n", "\n", "# Data collator to handle padding dynamically\n", "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "from transformers import RobertaForSequenceClassification\n", "\n", "# Load RoBERTa with classification head\n", "model = RobertaForSequenceClassification.from_pretrained(\"roberta-base\", num_labels=2)\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Transformers version: 4.49.0\n", "Accelerate version: 1.4.0\n", "Torch version: 2.2.1+cu121\n" ] } ], "source": [ "import transformers\n", "import accelerate\n", "\n", "print(\"Transformers version:\", transformers.__version__)\n", "print(\"Accelerate version:\", accelerate.__version__)\n", "import torch\n", "print(\"Torch version:\", torch.__version__)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/training_args.py:1594: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", "/tmp/ipykernel_1535/2580171433.py:18: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n", " trainer = Trainer(\n" ] }, { "data": { "text/html": [ "\n", "
| Epoch | \n", "Training Loss | \n", "Validation Loss | \n", "
|---|---|---|
| 1 | \n", "0.347200 | \n", "0.283987 | \n", "
| 2 | \n", "0.223200 | \n", "0.214965 | \n", "
| 3 | \n", "0.136300 | \n", "0.250880 | \n", "
"
],
"text/plain": [
"