SjardiWillems commited on Mar 7, 2024

Commit

2b4c6f3

verified ·

1 Parent(s): bf26557

Training in progress, epoch 1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

model.safetensors +1 -1
run-0/checkpoint-18/config.json +31 -0
run-0/checkpoint-18/model.safetensors +3 -0
run-0/checkpoint-18/optimizer.pt +3 -0
run-0/checkpoint-18/rng_state.pth +3 -0
run-0/checkpoint-18/scheduler.pt +3 -0
run-0/checkpoint-18/special_tokens_map.json +7 -0
run-0/checkpoint-18/tokenizer.json +0 -0
run-0/checkpoint-18/tokenizer_config.json +55 -0
run-0/checkpoint-18/trainer_state.json +46 -0
run-0/checkpoint-18/training_args.bin +3 -0
run-0/checkpoint-18/vocab.txt +0 -0
run-0/checkpoint-27/config.json +31 -0
run-0/checkpoint-27/model.safetensors +3 -0
run-0/checkpoint-27/optimizer.pt +3 -0
run-0/checkpoint-27/rng_state.pth +3 -0
run-0/checkpoint-27/scheduler.pt +3 -0
run-0/checkpoint-27/special_tokens_map.json +7 -0
run-0/checkpoint-27/tokenizer.json +0 -0
run-0/checkpoint-27/tokenizer_config.json +55 -0
run-0/checkpoint-27/trainer_state.json +56 -0
run-0/checkpoint-27/training_args.bin +3 -0
run-0/checkpoint-27/vocab.txt +0 -0
run-0/checkpoint-36/config.json +31 -0
run-0/checkpoint-36/model.safetensors +3 -0
run-0/checkpoint-36/optimizer.pt +3 -0
run-0/checkpoint-36/rng_state.pth +3 -0
run-0/checkpoint-36/scheduler.pt +3 -0
run-0/checkpoint-36/special_tokens_map.json +7 -0
run-0/checkpoint-36/tokenizer.json +0 -0
run-0/checkpoint-36/tokenizer_config.json +55 -0
run-0/checkpoint-36/trainer_state.json +66 -0
run-0/checkpoint-36/training_args.bin +3 -0
run-0/checkpoint-36/vocab.txt +0 -0
run-0/checkpoint-45/config.json +31 -0
run-0/checkpoint-45/model.safetensors +3 -0
run-0/checkpoint-45/optimizer.pt +3 -0
run-0/checkpoint-45/rng_state.pth +3 -0
run-0/checkpoint-45/scheduler.pt +3 -0
run-0/checkpoint-45/special_tokens_map.json +7 -0
run-0/checkpoint-45/tokenizer.json +0 -0
run-0/checkpoint-45/tokenizer_config.json +55 -0
run-0/checkpoint-45/trainer_state.json +76 -0
run-0/checkpoint-45/training_args.bin +3 -0
run-0/checkpoint-45/vocab.txt +0 -0
run-1/checkpoint-72/model.safetensors +1 -1
run-1/checkpoint-72/optimizer.pt +1 -1
run-1/checkpoint-72/rng_state.pth +2 -2
run-1/checkpoint-72/scheduler.pt +1 -1
run-1/checkpoint-72/trainer_state.json +15 -45

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed7607757531a0d94c65185f850961773e46fcb5d119d13e3863e3c032cea5c7
 size 267829484

 version https://git-lfs.github.com/spec/v1
+oid sha256:252cb0fde84c771fe9f26cd1f149f0c098738ed7f935bde0df5cbd3694bf14b3
 size 267829484

run-0/checkpoint-18/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-0/checkpoint-18/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:baf1bc3b6031979712ae9a703e994639a838c6d1418547d3de4cd5d42fcc1987
+size 267829484

run-0/checkpoint-18/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:678ee58ea945e14347259ddbee8c7087506c15ab3bb2d556a6d4952c8c3a495f
+size 535721146

run-0/checkpoint-18/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e286b9bff8b53bef74ce22a2bb491418497057e71b61563237976997da7ac700
+size 14244

run-0/checkpoint-18/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39cff69ca41eb8f894bf9831891f8d1750928f9531a056de16a4c4e7a269dc4a
+size 1064

run-0/checkpoint-18/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-18/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-18/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-18/trainer_state.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "best_metric": 0.4951149744376661,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-0/checkpoint-18",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 18,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 2.302802324295044,
+      "eval_pearson": 0.11274566771797583,
+      "eval_runtime": 0.9178,
+      "eval_samples_per_second": 1634.281,
+      "eval_spearmanr": 0.12987920930990302,
+      "eval_steps_per_second": 102.415,
+      "step": 9
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 2.3637888431549072,
+      "eval_pearson": 0.4951149744376661,
+      "eval_runtime": 1.7789,
+      "eval_samples_per_second": 843.198,
+      "eval_spearmanr": 0.4849163929338027,
+      "eval_steps_per_second": 52.84,
+      "step": 18
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 45,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.481764265618048e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 64,
+    "seed": 34
+  }
+}

run-0/checkpoint-18/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c09fba38efbfaed97000a3a94d8c55aa1cde1316bbda4831aa4e5e1bd7ee7a7
+size 4920

run-0/checkpoint-18/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-27/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-0/checkpoint-27/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8be6f6fa0df4bfbbbfda08cce877b6e7e3b0283edbd2c7b75f7df9bb5286c0b1
+size 267829484

run-0/checkpoint-27/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8b66082d319b2338bcfea10c929a0420129fc6863b06548d301048bdc23bd87
+size 535721146

run-0/checkpoint-27/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8d52213f53e08ff3d1242d3153448b0b10804720396a2908b09ce5eb6baf816
+size 14244

run-0/checkpoint-27/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5e15d089373332dccb9316de3e868ab812d210567758f27a5c51bef54f05aa9
+size 1064

run-0/checkpoint-27/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-27/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-27/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-27/trainer_state.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "best_metric": 0.6914272738462158,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-0/checkpoint-27",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 27,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 2.302802324295044,
+      "eval_pearson": 0.11274566771797583,
+      "eval_runtime": 0.9178,
+      "eval_samples_per_second": 1634.281,
+      "eval_spearmanr": 0.12987920930990302,
+      "eval_steps_per_second": 102.415,
+      "step": 9
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 2.3637888431549072,
+      "eval_pearson": 0.4951149744376661,
+      "eval_runtime": 1.7789,
+      "eval_samples_per_second": 843.198,
+      "eval_spearmanr": 0.4849163929338027,
+      "eval_steps_per_second": 52.84,
+      "step": 18
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 1.796323537826538,
+      "eval_pearson": 0.6914272738462158,
+      "eval_runtime": 2.2193,
+      "eval_samples_per_second": 675.895,
+      "eval_spearmanr": 0.6818774421011495,
+      "eval_steps_per_second": 42.356,
+      "step": 27
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 45,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.481764265618048e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 64,
+    "seed": 34
+  }
+}

run-0/checkpoint-27/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c09fba38efbfaed97000a3a94d8c55aa1cde1316bbda4831aa4e5e1bd7ee7a7
+size 4920

run-0/checkpoint-27/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-36/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-0/checkpoint-36/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4970e9ff8f9a3040577b0049ae497296e6987a30fac9b1978c983f913df74fa5
+size 267829484

run-0/checkpoint-36/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8139b1565518d5a05b7de8a7feebaf2491cddb1d6df45d777067b9c734fa147a
+size 535721146

run-0/checkpoint-36/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bab4519567a43826cccf1e52a5b955644cdaddb11d27e410800b049731a6001b
+size 14244

run-0/checkpoint-36/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b82dab0331e4c7b81b892b8fe9731b1b085524364be8bf73cf5a4d037bc735a6
+size 1064

run-0/checkpoint-36/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-36/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-36/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-36/trainer_state.json ADDED Viewed

	@@ -0,0 +1,66 @@

+{
+  "best_metric": 0.7636700196702717,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-0/checkpoint-36",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 36,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 2.302802324295044,
+      "eval_pearson": 0.11274566771797583,
+      "eval_runtime": 0.9178,
+      "eval_samples_per_second": 1634.281,
+      "eval_spearmanr": 0.12987920930990302,
+      "eval_steps_per_second": 102.415,
+      "step": 9
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 2.3637888431549072,
+      "eval_pearson": 0.4951149744376661,
+      "eval_runtime": 1.7789,
+      "eval_samples_per_second": 843.198,
+      "eval_spearmanr": 0.4849163929338027,
+      "eval_steps_per_second": 52.84,
+      "step": 18
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 1.796323537826538,
+      "eval_pearson": 0.6914272738462158,
+      "eval_runtime": 2.2193,
+      "eval_samples_per_second": 675.895,
+      "eval_spearmanr": 0.6818774421011495,
+      "eval_steps_per_second": 42.356,
+      "step": 27
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 1.0367522239685059,
+      "eval_pearson": 0.7636700196702717,
+      "eval_runtime": 1.9263,
+      "eval_samples_per_second": 778.713,
+      "eval_spearmanr": 0.7608002745735356,
+      "eval_steps_per_second": 48.799,
+      "step": 36
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 45,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.481764265618048e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 64,
+    "seed": 34
+  }
+}

run-0/checkpoint-36/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c09fba38efbfaed97000a3a94d8c55aa1cde1316bbda4831aa4e5e1bd7ee7a7
+size 4920

run-0/checkpoint-36/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-45/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-0/checkpoint-45/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a4722c63f648fc239d2d7575df701f7f042b0f1f2330e9322ec6da12374eeac
+size 267829484

run-0/checkpoint-45/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:456c7dad06cf3ecce368d782ff76c0738e27ea2dac98e40a035b1e2d897f2ad1
+size 535721146

run-0/checkpoint-45/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5db689fab0a3dc998ce051211556a4fde769c6ad5b5e6756d907fec36410b2aa
+size 14244

run-0/checkpoint-45/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4eddd4325e87d0a81d76b96f2b101ad452ca3c8c920ff3e7f6732184e009808
+size 1064

run-0/checkpoint-45/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-45/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-45/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-45/trainer_state.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "best_metric": 0.7772615297087864,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-0/checkpoint-45",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 45,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 2.302802324295044,
+      "eval_pearson": 0.11274566771797583,
+      "eval_runtime": 0.9178,
+      "eval_samples_per_second": 1634.281,
+      "eval_spearmanr": 0.12987920930990302,
+      "eval_steps_per_second": 102.415,
+      "step": 9
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 2.3637888431549072,
+      "eval_pearson": 0.4951149744376661,
+      "eval_runtime": 1.7789,
+      "eval_samples_per_second": 843.198,
+      "eval_spearmanr": 0.4849163929338027,
+      "eval_steps_per_second": 52.84,
+      "step": 18
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 1.796323537826538,
+      "eval_pearson": 0.6914272738462158,
+      "eval_runtime": 2.2193,
+      "eval_samples_per_second": 675.895,
+      "eval_spearmanr": 0.6818774421011495,
+      "eval_steps_per_second": 42.356,
+      "step": 27
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 1.0367522239685059,
+      "eval_pearson": 0.7636700196702717,
+      "eval_runtime": 1.9263,
+      "eval_samples_per_second": 778.713,
+      "eval_spearmanr": 0.7608002745735356,
+      "eval_steps_per_second": 48.799,
+      "step": 36
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 1.022187352180481,
+      "eval_pearson": 0.7772615297087864,
+      "eval_runtime": 1.1629,
+      "eval_samples_per_second": 1289.882,
+      "eval_spearmanr": 0.7651538770048484,
+      "eval_steps_per_second": 80.833,
+      "step": 45
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 45,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.481764265618048e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 64,
+    "seed": 34
+  }
+}

run-0/checkpoint-45/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c09fba38efbfaed97000a3a94d8c55aa1cde1316bbda4831aa4e5e1bd7ee7a7
+size 4920

run-0/checkpoint-45/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-72/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ef0f539ff34c02af79aeabedd7f85ddfb264b9b08b0195f92e6071e510d0277
 size 267829484

 version https://git-lfs.github.com/spec/v1
+oid sha256:252cb0fde84c771fe9f26cd1f149f0c098738ed7f935bde0df5cbd3694bf14b3
 size 267829484

run-1/checkpoint-72/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:372e9c401c792b2977f9e437177003754686d0ca357c78ce93b0a017f7c21a9b
 size 535721146

 version https://git-lfs.github.com/spec/v1
+oid sha256:6be8502a6b70d61fd71071c3d7a0bb95d5488c14b6bb1282e4f401b255c57c85
 size 535721146

run-1/checkpoint-72/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04a76703b7eb6896053490b2fcceed02a20fc9a12f67705107c1f3d275652092
-size 14180

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c9f2f274085426ce5f70a91b3b5b9148df7fb50b52222177f28f81034e3c481
+size 14244

run-1/checkpoint-72/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72004ae1ef7e1e254ecb0eadb9a5a46a7635836e00b1371ada1ced139a0be0a5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8eb0b045faee1fcc8466dcf28c9f8cbe045f9fe6e8ffc851d18de161e691e00
 size 1064

run-1/checkpoint-72/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "best_metric": 0.8337510188712361,
   "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-1/checkpoint-72",
-  "epoch": 4.0,
   "eval_steps": 500,
   "global_step": 72,
   "is_hyper_param_search": true,
@@ -10,57 +10,27 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "eval_loss": 1.5346204042434692,
-      "eval_pearson": 0.7009517245792942,
-      "eval_runtime": 0.7943,
-      "eval_samples_per_second": 1888.564,
-      "eval_spearmanr": 0.7023215368118,
-      "eval_steps_per_second": 118.35,
-      "step": 18
-    },
-    {
-      "epoch": 2.0,
-      "eval_loss": 1.7851035594940186,
-      "eval_pearson": 0.6869651194383046,
-      "eval_runtime": 0.8033,
-      "eval_samples_per_second": 1867.276,
-      "eval_spearmanr": 0.619343622594654,
-      "eval_steps_per_second": 117.016,
-      "step": 36
-    },
-    {
-      "epoch": 3.0,
-      "eval_loss": 0.7565258741378784,
-      "eval_pearson": 0.8285257169035537,
-      "eval_runtime": 0.8057,
-      "eval_samples_per_second": 1861.771,
-      "eval_spearmanr": 0.8319559534455686,
-      "eval_steps_per_second": 116.671,
-      "step": 54
-    },
-    {
-      "epoch": 4.0,
-      "eval_loss": 0.7455037236213684,
-      "eval_pearson": 0.8337510188712361,
-      "eval_runtime": 0.7987,
-      "eval_samples_per_second": 1877.938,
-      "eval_spearmanr": 0.8313859769239171,
-      "eval_steps_per_second": 117.684,
       "step": 72
     }
   ],
   "logging_steps": 500,
-  "max_steps": 90,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 5,
   "save_steps": 500,
   "total_flos": 0,
-  "train_batch_size": 32,
   "trial_name": null,
   "trial_params": {
-    "learning_rate": 9.415997134117997e-05,
-    "num_train_epochs": 5,
-    "per_device_train_batch_size": 32,
-    "seed": 10
   }
 }

 {
+  "best_metric": 0.7580884083785275,
   "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-1/checkpoint-72",
+  "epoch": 1.0,
   "eval_steps": 500,
   "global_step": 72,
   "is_hyper_param_search": true,
   "log_history": [
     {
       "epoch": 1.0,
+      "eval_loss": 1.1846531629562378,
+      "eval_pearson": 0.7580884083785275,
+      "eval_runtime": 0.9281,
+      "eval_samples_per_second": 1616.21,
+      "eval_spearmanr": 0.7189361728033606,
+      "eval_steps_per_second": 101.283,
       "step": 72
     }
   ],
   "logging_steps": 500,
+  "max_steps": 216,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
   "save_steps": 500,
   "total_flos": 0,
+  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": {
+    "learning_rate": 3.662007760473047e-05,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 8,
+    "seed": 7
   }
 }