{ "cells": [ { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "from src import BertForSemanticEmbedding, getLabelModel\n", "from src import DataTrainingArguments, ModelArguments, CustomTrainingArguments, read_yaml_config\n", "from src import dataset_classification_type\n", "from src import SemSupDataset\n", "from transformers import AutoConfig, HfArgumentParser, AutoTokenizer\n", "import torch\n", "\n", "import json\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "ARGS_FILE = 'configs/ablation_amzn_eda.yml'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Yaml Config is:\n", "--------------------------------------------------------------------------------\n", "{'task_name': 'amazon13k', 'dataset_name': 'amazon13k', 'dataset_config_name': None, 'max_seq_length': 160, 'overwrite_output_dir': False, 'overwrite_cache': False, 'pad_to_max_length': True, 'load_from_local': True, 'max_train_samples': None, 'max_eval_samples': 15000, 'max_predict_samples': None, 'train_file': '/n/fs/nlp-pranjal/SemSup-LMLC/training/datasets/Amzn13K/train_split6500_2.jsonl', 'validation_file': '/n/fs/nlp-pranjal/SemSup-LMLC/training/datasets/Amzn13K/test_unseen_split6500_2.jsonl', 'test_file': '/n/fs/nlp-pranjal/SemSup-LMLC/training/datasets/Amzn13K/test_unseen_split6500_2.jsonl', 'label_max_seq_length': 160, 'descriptions_file': '/n/fs/nlp-pranjal/SemSup-LMLC/training/datasets/Amzn13K/heir_withdescriptions_v3_v3_unseen_edaaug.json', 'test_descriptions_file': '/n/fs/nlp-pranjal/SemSup-LMLC/training/datasets/Amzn13K/heir_withdescriptions_v3_v3.json', 'all_labels': '/n/fs/nlp-pranjal/SemSup-LMLC/training/datasets/Amzn13K/all_labels.txt', 'test_labels': '/n/fs/nlp-pranjal/SemSup-LMLC/training/datasets/Amzn13K/unseen_labels_split6500_2.txt', 'contrastive_learning_samples': 1000, 'cl_min_positive_descs': 1, 'coil_cluster_mapping_path': 'bert_coil_map_dict_lemma255K_isotropic.json', 'model_name_or_path': 'bert-base-uncased', 'config_name': None, 'tokenizer_name': None, 'cache_dir': None, 'use_fast_tokenizer': True, 'model_revision': 'main', 'use_auth_token': False, 'ignore_mismatched_sizes': False, 'negative_sampling': 'none', 'semsup': True, 'label_model_name_or_path': 'prajjwal1/bert-small', 'encoder_model_type': 'bert', 'use_custom_optimizer': 'adamw', 'output_learning_rate': 0.0001, 'arch_type': 2, 'add_label_name': True, 'normalize_embeddings': False, 'tie_weights': False, 'coil': True, 'colbert': False, 'token_dim': 16, 'label_frozen_layers': 2, 'do_train': True, 'do_eval': True, 'do_predict': False, 'per_device_train_batch_size': 1, 'gradient_accumulation_steps': 8, 'per_device_eval_batch_size': 1, 'learning_rate': 5e-05, 'num_train_epochs': 2, 'save_steps': 4900, 'evaluation_strategy': 'steps', 'eval_steps': 3000000, 'fp16': True, 'fp16_opt_level': 'O1', 'lr_scheduler_type': 'linear', 'dataloader_num_workers': 16, 'label_names': ['labels'], 'scenario': 'unseen_labels', 'ddp_find_unused_parameters': False, 'ignore_data_skip': True, 'seed': -1, 'EXP_NAME': 'semsup_descs_100ep_newds_cosine', 'EXP_DESC': 'SemSup Descriptions ran for 100 epochs', 'output_dir': 'demo_tmp'}\n", "--------------------------------------------------------------------------------\n" ] } ], "source": [ "parser = HfArgumentParser((ModelArguments, DataTrainingArguments, CustomTrainingArguments))\n", "model_args, data_args, training_args = parser.parse_dict(read_yaml_config(ARGS_FILE, output_dir = 'demo_tmp', extra_args = {}))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "config = AutoConfig.from_pretrained(\n", " model_args.config_name if model_args.config_name else model_args.model_name_or_path,\n", " finetuning_task=data_args.task_name,\n", " cache_dir=model_args.cache_dir,\n", " revision=model_args.model_revision,\n", " use_auth_token=True if model_args.use_auth_token else None,\n", ")\n", "\n", "config.model_name_or_path = model_args.model_name_or_path\n", "config.problem_type = dataset_classification_type[data_args.task_name]\n", "config.negative_sampling = model_args.negative_sampling\n", "config.semsup = model_args.semsup\n", "config.encoder_model_type = model_args.encoder_model_type\n", "config.arch_type = model_args.arch_type\n", "config.coil = model_args.coil\n", "config.token_dim = model_args.token_dim\n", "config.colbert = model_args.colbert" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at prajjwal1/bert-small were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias']\n", "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']\n", "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Config is BertConfig {\n", " \"_name_or_path\": \"bert-base-uncased\",\n", " \"arch_type\": 2,\n", " \"architectures\": [\n", " \"BertForMaskedLM\"\n", " ],\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"classifier_dropout\": null,\n", " \"coil\": true,\n", " \"colbert\": false,\n", " \"encoder_model_type\": \"bert\",\n", " \"finetuning_task\": \"amazon13k\",\n", " \"gradient_checkpointing\": false,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 768,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 3072,\n", " \"label_hidden_size\": 512,\n", " \"layer_norm_eps\": 1e-12,\n", " \"max_position_embeddings\": 512,\n", " \"model_name_or_path\": \"bert-base-uncased\",\n", " \"model_type\": \"bert\",\n", " \"negative_sampling\": \"none\",\n", " \"num_attention_heads\": 12,\n", " \"num_hidden_layers\": 12,\n", " \"pad_token_id\": 0,\n", " \"position_embedding_type\": \"absolute\",\n", " \"problem_type\": \"multi_label_classification\",\n", " \"semsup\": true,\n", " \"token_dim\": 16,\n", " \"transformers_version\": \"4.20.0\",\n", " \"type_vocab_size\": 2,\n", " \"use_cache\": true,\n", " \"vocab_size\": 30522\n", "}\n", "\n" ] }, { "data": { "text/html": [ "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n", "│ /tmp/ipykernel_1149757/433561231.py:6 in <cell line: 6> │\n", "│ │\n", "│ [Errno 2] No such file or directory: '/tmp/ipykernel_1149757/433561231.py' │\n", "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", "NameError: name 'label_list' is not defined\n", "\n" ], "text/plain": [ "\u001b[31m╭─\u001b[0m\u001b[31m────────────────────────────── \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m ───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/tmp/ipykernel_1149757/\u001b[0m\u001b[1;33m433561231.py\u001b[0m:\u001b[94m6\u001b[0m in \u001b[92m
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n", "│ /tmp/ipykernel_1149757/1661601364.py:1 in <cell line: 1> │\n", "│ │\n", "│ [Errno 2] No such file or directory: '/tmp/ipykernel_1149757/1661601364.py' │\n", "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", "NameError: name 'logits' is not defined\n", "\n" ], "text/plain": [ "\u001b[31m╭─\u001b[0m\u001b[31m────────────────────────────── \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m ───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/tmp/ipykernel_1149757/\u001b[0m\u001b[1;33m1661601364.py\u001b[0m:\u001b[94m1\u001b[0m in \u001b[92m