ArtoriasXV commited on Mar 8, 2024

Commit

b4e7718

verified ·

1 Parent(s): 1c2db32

Training in progress, epoch 1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

model.safetensors +1 -1
run-0/checkpoint-56/config.json +25 -0
run-0/checkpoint-56/model.safetensors +3 -0
run-0/checkpoint-56/optimizer.pt +3 -0
run-0/checkpoint-56/rng_state.pth +3 -0
run-0/checkpoint-56/scheduler.pt +3 -0
run-0/checkpoint-56/special_tokens_map.json +7 -0
run-0/checkpoint-56/tokenizer.json +0 -0
run-0/checkpoint-56/tokenizer_config.json +55 -0
run-0/checkpoint-56/trainer_state.json +62 -0
run-0/checkpoint-56/training_args.bin +3 -0
run-0/checkpoint-56/vocab.txt +0 -0
run-1/checkpoint-108/config.json +25 -0
run-1/checkpoint-108/model.safetensors +3 -0
run-1/checkpoint-108/optimizer.pt +3 -0
run-1/checkpoint-108/rng_state.pth +3 -0
run-1/checkpoint-108/scheduler.pt +3 -0
run-1/checkpoint-108/special_tokens_map.json +7 -0
run-1/checkpoint-108/tokenizer.json +0 -0
run-1/checkpoint-108/tokenizer_config.json +55 -0
run-1/checkpoint-108/trainer_state.json +44 -0
run-1/checkpoint-108/training_args.bin +3 -0
run-1/checkpoint-108/vocab.txt +0 -0
run-1/checkpoint-162/config.json +25 -0
run-1/checkpoint-162/model.safetensors +3 -0
run-1/checkpoint-162/optimizer.pt +3 -0
run-1/checkpoint-162/rng_state.pth +3 -0
run-1/checkpoint-162/scheduler.pt +3 -0
run-1/checkpoint-162/special_tokens_map.json +7 -0
run-1/checkpoint-162/tokenizer.json +0 -0
run-1/checkpoint-162/tokenizer_config.json +55 -0
run-1/checkpoint-162/trainer_state.json +53 -0
run-1/checkpoint-162/training_args.bin +3 -0
run-1/checkpoint-162/vocab.txt +0 -0
run-1/checkpoint-216/config.json +25 -0
run-1/checkpoint-216/model.safetensors +3 -0
run-1/checkpoint-216/optimizer.pt +3 -0
run-1/checkpoint-216/rng_state.pth +3 -0
run-1/checkpoint-216/scheduler.pt +3 -0
run-1/checkpoint-216/special_tokens_map.json +7 -0
run-1/checkpoint-216/tokenizer.json +0 -0
run-1/checkpoint-216/tokenizer_config.json +55 -0
run-1/checkpoint-216/trainer_state.json +62 -0
run-1/checkpoint-216/training_args.bin +3 -0
run-1/checkpoint-216/vocab.txt +0 -0
run-1/checkpoint-270/config.json +25 -0
run-1/checkpoint-270/model.safetensors +3 -0
run-1/checkpoint-270/optimizer.pt +3 -0
run-1/checkpoint-270/rng_state.pth +3 -0
run-1/checkpoint-270/scheduler.pt +3 -0

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:545b6a713c6506dd2198745a6eb79045153309a67ec1fcae0279a5765e5c7cae
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca695362ee6b22cb7b6e9f819725f63ad1ec2693e5a34a78ab2c3becc56cc066
 size 267832560

run-0/checkpoint-56/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-0/checkpoint-56/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:545b6a713c6506dd2198745a6eb79045153309a67ec1fcae0279a5765e5c7cae
+size 267832560

run-0/checkpoint-56/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5883f555da3bbd7f6c6cda28d277924dc40bb9d86f0bd0ab9f49b496d319b3c
+size 535727290

run-0/checkpoint-56/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65e4492e3f27619401192d125a149b39d48b4ab3e263956db240341bf5323451
+size 14244

run-0/checkpoint-56/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8431f71a0f6cc21b01bc55e5fe3bb87a949608ea0d3a8fce96465049865aa6ce
+size 1064

run-0/checkpoint-56/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-56/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-56/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-56/trainer_state.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "best_metric": 0.0,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-0/checkpoint-14",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 56,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.6275959014892578,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.8308,
+      "eval_samples_per_second": 1255.449,
+      "eval_steps_per_second": 79.444,
+      "step": 14
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6147311925888062,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.6967,
+      "eval_samples_per_second": 1497.063,
+      "eval_steps_per_second": 94.733,
+      "step": 28
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.6137582659721375,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.7586,
+      "eval_samples_per_second": 1374.866,
+      "eval_steps_per_second": 87.0,
+      "step": 42
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.6106928586959839,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.6886,
+      "eval_samples_per_second": 1514.709,
+      "eval_steps_per_second": 95.849,
+      "step": 56
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 56,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 8.219212725200357e-06,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 64,
+    "seed": 15
+  }
+}

run-0/checkpoint-56/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52ed7b2111aadd145363f970443f5758354e03e46557384b6001a048998cb254
+size 4984

run-0/checkpoint-56/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-108/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-1/checkpoint-108/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:956221cbe845b519d581db6ff4678b590a6eece35c812bcacb586e429d75076c
+size 267832560

run-1/checkpoint-108/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31134d15f7ba350f2f45018c5fca64d94630787f87641a95afe4d976a485f97c
+size 535727290

run-1/checkpoint-108/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d2ce677d59bd42c4069fde1f4e20262cfffbc9034e60063a7cb86b37e99e902
+size 14244

run-1/checkpoint-108/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c8251d96a636289cdea64fa21b3c0d1a8a9c513771742eddfe37c3f25fea6df
+size 1064

run-1/checkpoint-108/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-1/checkpoint-108/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-108/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-1/checkpoint-108/trainer_state.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "best_metric": 0.2555516815486373,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-108",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 108,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5905566215515137,
+      "eval_matthews_correlation": 0.0928457264044978,
+      "eval_runtime": 0.6953,
+      "eval_samples_per_second": 1500.167,
+      "eval_steps_per_second": 94.929,
+      "step": 54
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.7765615582466125,
+      "eval_matthews_correlation": 0.2555516815486373,
+      "eval_runtime": 1.3105,
+      "eval_samples_per_second": 795.885,
+      "eval_steps_per_second": 50.363,
+      "step": 108
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 270,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 8.062343844760649e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 16,
+    "seed": 22
+  }
+}

run-1/checkpoint-108/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2a285bd5d1ba7ccfe930c23cedf60174cf9292ccd17171c6cbf5354a51e2946
+size 4984

run-1/checkpoint-108/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-162/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-1/checkpoint-162/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae09d92dceec3922101ef24e1bc1efd3d7e037b1e9d69e34c44bb374ba01c00f
+size 267832560

run-1/checkpoint-162/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:503db2bcc09b65376c06a7cbd95cd00a1d98de1605db1a78bfe7c3304eb48172
+size 535727290

run-1/checkpoint-162/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05cf25a8aac0197f9dd0fc73950ab5ebf0b0120b5a26a3fbb66223702c12946d
+size 14244

run-1/checkpoint-162/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6a36b6cdaf28c70eb84437a2f24980006c3a05981a9de85b15c595542973b88
+size 1064

run-1/checkpoint-162/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-1/checkpoint-162/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-162/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-1/checkpoint-162/trainer_state.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+  "best_metric": 0.3052591174566493,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-162",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 162,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5905566215515137,
+      "eval_matthews_correlation": 0.0928457264044978,
+      "eval_runtime": 0.6953,
+      "eval_samples_per_second": 1500.167,
+      "eval_steps_per_second": 94.929,
+      "step": 54
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.7765615582466125,
+      "eval_matthews_correlation": 0.2555516815486373,
+      "eval_runtime": 1.3105,
+      "eval_samples_per_second": 795.885,
+      "eval_steps_per_second": 50.363,
+      "step": 108
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 1.1682683229446411,
+      "eval_matthews_correlation": 0.3052591174566493,
+      "eval_runtime": 1.411,
+      "eval_samples_per_second": 739.172,
+      "eval_steps_per_second": 46.774,
+      "step": 162
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 270,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 8.062343844760649e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 16,
+    "seed": 22
+  }
+}

run-1/checkpoint-162/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2a285bd5d1ba7ccfe930c23cedf60174cf9292ccd17171c6cbf5354a51e2946
+size 4984

run-1/checkpoint-162/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-216/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-1/checkpoint-216/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5390e2c4c3d568fc71afe1987e968724b12ebb0ff1fb2952ffdaef05ea1632e9
+size 267832560

run-1/checkpoint-216/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53add8627aa0a30967afd48e973232ce59599a9edf12c32787d8bfef5c09612d
+size 535727290

run-1/checkpoint-216/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b99bd1a1dde66fbd9bfaac8ed2c34fb9bdde5bc7e9c66485be72a23c4b855e0
+size 14244

run-1/checkpoint-216/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a75b5a41ffe790fd9e286de60053f89c56d9ef6562a6877d0348d10932146bdb
+size 1064

run-1/checkpoint-216/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-1/checkpoint-216/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-216/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-1/checkpoint-216/trainer_state.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "best_metric": 0.3052591174566493,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-162",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 216,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5905566215515137,
+      "eval_matthews_correlation": 0.0928457264044978,
+      "eval_runtime": 0.6953,
+      "eval_samples_per_second": 1500.167,
+      "eval_steps_per_second": 94.929,
+      "step": 54
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.7765615582466125,
+      "eval_matthews_correlation": 0.2555516815486373,
+      "eval_runtime": 1.3105,
+      "eval_samples_per_second": 795.885,
+      "eval_steps_per_second": 50.363,
+      "step": 108
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 1.1682683229446411,
+      "eval_matthews_correlation": 0.3052591174566493,
+      "eval_runtime": 1.411,
+      "eval_samples_per_second": 739.172,
+      "eval_steps_per_second": 46.774,
+      "step": 162
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 1.3417069911956787,
+      "eval_matthews_correlation": 0.2801337199061549,
+      "eval_runtime": 0.7621,
+      "eval_samples_per_second": 1368.598,
+      "eval_steps_per_second": 86.604,
+      "step": 216
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 270,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 8.062343844760649e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 16,
+    "seed": 22
+  }
+}

run-1/checkpoint-216/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2a285bd5d1ba7ccfe930c23cedf60174cf9292ccd17171c6cbf5354a51e2946
+size 4984

run-1/checkpoint-216/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-270/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-1/checkpoint-270/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc22c8758aab5162b56a8466a09c3e12ae13635fe6c1692baf879ee681245f15
+size 267832560

run-1/checkpoint-270/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14a56d4c35eac2325246795501c7605efe7388c0124debdb51480dee627b8425
+size 535727290

run-1/checkpoint-270/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c19d11d585344ba606d8a3f52d2542cc2301bcb4dd63a6336369906566d51e5c
+size 14244

run-1/checkpoint-270/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d59507f3811c0ebd36f20c9d38253a9e91020beaf273945c8ad380f107bdeaa
+size 1064