SjardiWillems commited on Mar 6, 2024

Commit

b50fea4

verified ·

1 Parent(s): 66f370b

Training in progress, epoch 1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

model.safetensors +1 -1
run-3/checkpoint-288/config.json +31 -0
run-3/checkpoint-288/model.safetensors +3 -0
run-3/checkpoint-288/optimizer.pt +3 -0
run-3/checkpoint-288/rng_state.pth +3 -0
run-3/checkpoint-288/scheduler.pt +3 -0
run-3/checkpoint-288/special_tokens_map.json +7 -0
run-3/checkpoint-288/tokenizer.json +0 -0
run-3/checkpoint-288/tokenizer_config.json +55 -0
run-3/checkpoint-288/trainer_state.json +46 -0
run-3/checkpoint-288/training_args.bin +3 -0
run-3/checkpoint-288/vocab.txt +0 -0
run-3/checkpoint-432/config.json +31 -0
run-3/checkpoint-432/model.safetensors +3 -0
run-3/checkpoint-432/optimizer.pt +3 -0
run-3/checkpoint-432/rng_state.pth +3 -0
run-3/checkpoint-432/scheduler.pt +3 -0
run-3/checkpoint-432/special_tokens_map.json +7 -0
run-3/checkpoint-432/tokenizer.json +0 -0
run-3/checkpoint-432/tokenizer_config.json +55 -0
run-3/checkpoint-432/trainer_state.json +56 -0
run-3/checkpoint-432/training_args.bin +3 -0
run-3/checkpoint-432/vocab.txt +0 -0
run-3/checkpoint-576/config.json +31 -0
run-3/checkpoint-576/model.safetensors +3 -0
run-3/checkpoint-576/optimizer.pt +3 -0
run-3/checkpoint-576/rng_state.pth +3 -0
run-3/checkpoint-576/scheduler.pt +3 -0
run-3/checkpoint-576/special_tokens_map.json +7 -0
run-3/checkpoint-576/tokenizer.json +0 -0
run-3/checkpoint-576/tokenizer_config.json +55 -0
run-3/checkpoint-576/trainer_state.json +73 -0
run-3/checkpoint-576/training_args.bin +3 -0
run-3/checkpoint-576/vocab.txt +0 -0
run-3/checkpoint-720/config.json +31 -0
run-3/checkpoint-720/model.safetensors +3 -0
run-3/checkpoint-720/optimizer.pt +3 -0
run-3/checkpoint-720/rng_state.pth +3 -0
run-3/checkpoint-720/scheduler.pt +3 -0
run-3/checkpoint-720/special_tokens_map.json +7 -0
run-3/checkpoint-720/tokenizer.json +0 -0
run-3/checkpoint-720/tokenizer_config.json +55 -0
run-3/checkpoint-720/trainer_state.json +83 -0
run-3/checkpoint-720/training_args.bin +3 -0
run-3/checkpoint-720/vocab.txt +0 -0
run-4/checkpoint-72/model.safetensors +1 -1
run-4/checkpoint-72/optimizer.pt +1 -1
run-4/checkpoint-72/rng_state.pth +1 -1
run-4/checkpoint-72/scheduler.pt +1 -1
run-4/checkpoint-72/trainer_state.json +13 -23

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32f2b6344dbeefedd77c7295f5f43e3c60235bc524019549e354574302929153
 size 267829484

 version https://git-lfs.github.com/spec/v1
+oid sha256:afa16f4ad7735629159d0b17c8bb6475106028ef4c08958f9e4cfe8ee468271e
 size 267829484

run-3/checkpoint-288/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-3/checkpoint-288/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d61c763c886dbb531d62293ccddf79a5802b2bb562413c4d5db455ce0ca8a2fb
+size 267829484

run-3/checkpoint-288/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3dbce5211cc54e0465ac8be2041b7c2acfcfc05c146d963020091964689ca99d
+size 535721146

run-3/checkpoint-288/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b4567d37c30077318ed7ca2fd6ed8c79fd4e9422f26b3d853c9c457274f3e99
+size 14244

run-3/checkpoint-288/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6873788c8492efd7d168b02739521070490d7c8a8f85aee5192671cdda219904
+size 1064

run-3/checkpoint-288/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-288/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-288/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-288/trainer_state.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "best_metric": 0.8252063306594624,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-288",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 288,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.9698190689086914,
+      "eval_pearson": 0.7678320454774757,
+      "eval_runtime": 0.8035,
+      "eval_samples_per_second": 1866.77,
+      "eval_spearmanr": 0.712509933248186,
+      "eval_steps_per_second": 116.984,
+      "step": 144
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.7606285810470581,
+      "eval_pearson": 0.8252063306594624,
+      "eval_runtime": 0.7878,
+      "eval_samples_per_second": 1904.014,
+      "eval_spearmanr": 0.8218529358151725,
+      "eval_steps_per_second": 119.318,
+      "step": 288
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 720,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 5.4134010471541346e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 4
+  }
+}

run-3/checkpoint-288/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a89dbe21e169d083920168fe1c35f86c15d94a0c3b9ca2615dabe1e0930b053d
+size 4920

run-3/checkpoint-288/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-432/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-3/checkpoint-432/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54ee921761736374c58efdf205e400cb47f16270af5b9033facba3bb7383e53a
+size 267829484

run-3/checkpoint-432/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ac1fa64753adc4283dbcfd47227812a681fdced7ace3e7ccdb93f6e6b9336a5
+size 535721146

run-3/checkpoint-432/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a35f605eedd2102adde6198f38cba143faaed4ee12b11d5ffe4f701e82b262f2
+size 14244

run-3/checkpoint-432/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9422fdad9996632c1797fb4769bcfaa4d1bf52fc06a431fa4296855752c582f2
+size 1064

run-3/checkpoint-432/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-432/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-432/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-432/trainer_state.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "best_metric": 0.8325197619129224,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-432",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 432,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.9698190689086914,
+      "eval_pearson": 0.7678320454774757,
+      "eval_runtime": 0.8035,
+      "eval_samples_per_second": 1866.77,
+      "eval_spearmanr": 0.712509933248186,
+      "eval_steps_per_second": 116.984,
+      "step": 144
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.7606285810470581,
+      "eval_pearson": 0.8252063306594624,
+      "eval_runtime": 0.7878,
+      "eval_samples_per_second": 1904.014,
+      "eval_spearmanr": 0.8218529358151725,
+      "eval_steps_per_second": 119.318,
+      "step": 288
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.7174907326698303,
+      "eval_pearson": 0.8325197619129224,
+      "eval_runtime": 0.7962,
+      "eval_samples_per_second": 1883.934,
+      "eval_spearmanr": 0.8279573830775626,
+      "eval_steps_per_second": 118.06,
+      "step": 432
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 720,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 5.4134010471541346e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 4
+  }
+}

run-3/checkpoint-432/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a89dbe21e169d083920168fe1c35f86c15d94a0c3b9ca2615dabe1e0930b053d
+size 4920

run-3/checkpoint-432/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-576/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-3/checkpoint-576/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d3d6088776c555a453aba313194e46ee79702e29ce42f01cfa8a9c6961bc9c8
+size 267829484

run-3/checkpoint-576/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:266bfd784fb8d7d7deff582f100b5c733e610ba4b2b088848056436bc04f81be
+size 535721146

run-3/checkpoint-576/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:866764b15a22ef1ff0938a6647b07c0c9c90e6d2381339fde66f0f71097dcc37
+size 14244

run-3/checkpoint-576/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb27dabe46c6c138fbd2a477aac978f51b59d763c234a73ac470f1f9c0286e4a
+size 1064

run-3/checkpoint-576/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-576/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-576/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-576/trainer_state.json ADDED Viewed

	@@ -0,0 +1,73 @@

+{
+  "best_metric": 0.8407800570738724,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-576",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 576,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.9698190689086914,
+      "eval_pearson": 0.7678320454774757,
+      "eval_runtime": 0.8035,
+      "eval_samples_per_second": 1866.77,
+      "eval_spearmanr": 0.712509933248186,
+      "eval_steps_per_second": 116.984,
+      "step": 144
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.7606285810470581,
+      "eval_pearson": 0.8252063306594624,
+      "eval_runtime": 0.7878,
+      "eval_samples_per_second": 1904.014,
+      "eval_spearmanr": 0.8218529358151725,
+      "eval_steps_per_second": 119.318,
+      "step": 288
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.7174907326698303,
+      "eval_pearson": 0.8325197619129224,
+      "eval_runtime": 0.7962,
+      "eval_samples_per_second": 1883.934,
+      "eval_spearmanr": 0.8279573830775626,
+      "eval_steps_per_second": 118.06,
+      "step": 432
+    },
+    {
+      "epoch": 3.47,
+      "grad_norm": 10.061558723449707,
+      "learning_rate": 1.654094764408208e-05,
+      "loss": 0.9316,
+      "step": 500
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.7175036072731018,
+      "eval_pearson": 0.8407800570738724,
+      "eval_runtime": 0.7992,
+      "eval_samples_per_second": 1876.812,
+      "eval_spearmanr": 0.8358993750524432,
+      "eval_steps_per_second": 117.614,
+      "step": 576
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 720,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 22512846870384.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 5.4134010471541346e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 4
+  }
+}

run-3/checkpoint-576/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a89dbe21e169d083920168fe1c35f86c15d94a0c3b9ca2615dabe1e0930b053d
+size 4920

run-3/checkpoint-576/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-720/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-3/checkpoint-720/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa0d60c076141cf7c557f9e6e5ee5c4f86e889eb8bdf0a92abb93ba722721f86
+size 267829484

run-3/checkpoint-720/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a301924ca24aae6f1fb2c1c6ee9db250640a15a1a2d472c44a832785935b9318
+size 535721146

run-3/checkpoint-720/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f67419749962139f0cc43ca20b49e9704d0edbf7d5f97a6d0cab6b36fcd9155
+size 14244

run-3/checkpoint-720/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:423272412d97a885928e6da28e6e2e5c0c8b0cd8ad24d0ecdfd83d3aefd966c8
+size 1064

run-3/checkpoint-720/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-720/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-720/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-720/trainer_state.json ADDED Viewed

	@@ -0,0 +1,83 @@

+{
+  "best_metric": 0.8407800570738724,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-576",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 720,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.9698190689086914,
+      "eval_pearson": 0.7678320454774757,
+      "eval_runtime": 0.8035,
+      "eval_samples_per_second": 1866.77,
+      "eval_spearmanr": 0.712509933248186,
+      "eval_steps_per_second": 116.984,
+      "step": 144
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.7606285810470581,
+      "eval_pearson": 0.8252063306594624,
+      "eval_runtime": 0.7878,
+      "eval_samples_per_second": 1904.014,
+      "eval_spearmanr": 0.8218529358151725,
+      "eval_steps_per_second": 119.318,
+      "step": 288
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.7174907326698303,
+      "eval_pearson": 0.8325197619129224,
+      "eval_runtime": 0.7962,
+      "eval_samples_per_second": 1883.934,
+      "eval_spearmanr": 0.8279573830775626,
+      "eval_steps_per_second": 118.06,
+      "step": 432
+    },
+    {
+      "epoch": 3.47,
+      "grad_norm": 10.061558723449707,
+      "learning_rate": 1.654094764408208e-05,
+      "loss": 0.9316,
+      "step": 500
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.7175036072731018,
+      "eval_pearson": 0.8407800570738724,
+      "eval_runtime": 0.7992,
+      "eval_samples_per_second": 1876.812,
+      "eval_spearmanr": 0.8358993750524432,
+      "eval_steps_per_second": 117.614,
+      "step": 576
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.67606520652771,
+      "eval_pearson": 0.8389162758555168,
+      "eval_runtime": 0.8221,
+      "eval_samples_per_second": 1824.653,
+      "eval_spearmanr": 0.8352930144614386,
+      "eval_steps_per_second": 114.345,
+      "step": 720
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 720,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 22512846870384.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 5.4134010471541346e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 4
+  }
+}

run-3/checkpoint-720/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a89dbe21e169d083920168fe1c35f86c15d94a0c3b9ca2615dabe1e0930b053d
+size 4920

run-3/checkpoint-720/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-72/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:67b2e3d0a525135bd49569a8d64725ba116a2c1b8fd11bb3acfd5821409bf78a
 size 267829484

 version https://git-lfs.github.com/spec/v1
+oid sha256:afa16f4ad7735629159d0b17c8bb6475106028ef4c08958f9e4cfe8ee468271e
 size 267829484

run-4/checkpoint-72/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19a133f68e737fbf59ebad435b5eb189eb9bbb79881de220fe5850a71c95fccf
 size 535721146

 version https://git-lfs.github.com/spec/v1
+oid sha256:22321ea9dff9ffc35446ff13879ba713642a9bca373f8bd9e24fae5f0e536fec
 size 535721146

run-4/checkpoint-72/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86fd676f2765f76ae40e8279a83aad1f3f514402e7dd2ef3841c0eb67d7dcd5b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc4bccc032167d6736137944244a6658841689a7a7b626ec227ea770908c36c8
 size 14244

run-4/checkpoint-72/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b9a3508d8a761e914163950bd782ee13212d965a0ecd807e8d58838a564272d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb861a3e75a0387b8c64cfd53862ffa72d175c2f1ae81c93eae04dc37f17ce6b
 size 1064

run-4/checkpoint-72/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "best_metric": 0.22452541125699038,
   "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-4/checkpoint-72",
-  "epoch": 2.0,
   "eval_steps": 500,
   "global_step": 72,
   "is_hyper_param_search": true,
@@ -10,37 +10,27 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "eval_loss": 3.607897996902466,
-      "eval_pearson": 0.20189901594611118,
-      "eval_runtime": 0.9286,
-      "eval_samples_per_second": 1615.373,
-      "eval_spearmanr": 0.19575057246397828,
-      "eval_steps_per_second": 101.23,
-      "step": 36
-    },
-    {
-      "epoch": 2.0,
-      "eval_loss": 2.214840888977051,
-      "eval_pearson": 0.22452541125699038,
-      "eval_runtime": 0.9301,
-      "eval_samples_per_second": 1612.779,
-      "eval_spearmanr": 0.24754498733937647,
-      "eval_steps_per_second": 101.067,
       "step": 72
     }
   ],
   "logging_steps": 500,
-  "max_steps": 144,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 4,
   "save_steps": 500,
   "total_flos": 0,
-  "train_batch_size": 16,
   "trial_name": null,
   "trial_params": {
-    "learning_rate": 6.104946209890916e-06,
     "num_train_epochs": 4,
-    "per_device_train_batch_size": 16,
-    "seed": 9
   }
 }

 {
+  "best_metric": 0.6173001362903047,
   "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-4/checkpoint-72",
+  "epoch": 1.0,
   "eval_steps": 500,
   "global_step": 72,
   "is_hyper_param_search": true,
   "log_history": [
     {
       "epoch": 1.0,
+      "eval_loss": 1.9030543565750122,
+      "eval_pearson": 0.6173001362903047,
+      "eval_runtime": 0.7967,
+      "eval_samples_per_second": 1882.655,
+      "eval_spearmanr": 0.6065460603935124,
+      "eval_steps_per_second": 117.98,
       "step": 72
     }
   ],
   "logging_steps": 500,
+  "max_steps": 288,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 4,
   "save_steps": 500,
   "total_flos": 0,
+  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": {
+    "learning_rate": 2.5574276250903696e-05,
     "num_train_epochs": 4,
+    "per_device_train_batch_size": 8,
+    "seed": 40
   }
 }