SjardiWillems commited on Mar 10, 2024

Commit

b97fa27

verified ·

1 Parent(s): c817cd2

Training in progress, epoch 1

Browse files

Files changed (46) hide show

model.safetensors +1 -1
run-3/checkpoint-36/config.json +1 -1
run-3/checkpoint-36/model.safetensors +1 -1
run-3/checkpoint-36/optimizer.pt +1 -1
run-3/checkpoint-36/rng_state.pth +2 -2
run-3/checkpoint-36/scheduler.pt +1 -1
run-3/checkpoint-36/trainer_state.json +20 -40
run-3/checkpoint-36/training_args.bin +1 -1
run-3/checkpoint-54/config.json +31 -0
run-3/checkpoint-54/model.safetensors +3 -0
run-3/checkpoint-54/optimizer.pt +3 -0
run-3/checkpoint-54/rng_state.pth +3 -0
run-3/checkpoint-54/scheduler.pt +3 -0
run-3/checkpoint-54/special_tokens_map.json +7 -0
run-3/checkpoint-54/tokenizer.json +0 -0
run-3/checkpoint-54/tokenizer_config.json +55 -0
run-3/checkpoint-54/trainer_state.json +56 -0
run-3/checkpoint-54/training_args.bin +3 -0
run-3/checkpoint-54/vocab.txt +0 -0
run-3/checkpoint-72/config.json +1 -1
run-3/checkpoint-72/model.safetensors +1 -1
run-3/checkpoint-72/optimizer.pt +1 -1
run-3/checkpoint-72/rng_state.pth +1 -1
run-3/checkpoint-72/scheduler.pt +1 -1
run-3/checkpoint-72/trainer_state.json +46 -16
run-3/checkpoint-72/training_args.bin +1 -1
run-3/checkpoint-90/config.json +31 -0
run-3/checkpoint-90/model.safetensors +3 -0
run-3/checkpoint-90/optimizer.pt +3 -0
run-3/checkpoint-90/rng_state.pth +3 -0
run-3/checkpoint-90/scheduler.pt +3 -0
run-3/checkpoint-90/special_tokens_map.json +7 -0
run-3/checkpoint-90/tokenizer.json +0 -0
run-3/checkpoint-90/tokenizer_config.json +55 -0
run-3/checkpoint-90/trainer_state.json +76 -0
run-3/checkpoint-90/training_args.bin +3 -0
run-3/checkpoint-90/vocab.txt +0 -0
run-4/checkpoint-18/config.json +1 -1
run-4/checkpoint-18/model.safetensors +1 -1
run-4/checkpoint-18/optimizer.pt +1 -1
run-4/checkpoint-18/rng_state.pth +2 -2
run-4/checkpoint-18/scheduler.pt +1 -1
run-4/checkpoint-18/trainer_state.json +16 -26
run-4/checkpoint-18/training_args.bin +1 -1
runs/Mar10_22-33-58_0f0a24039c15/events.out.tfevents.1710110466.0f0a24039c15.273.6 +3 -0
training_args.bin +1 -1

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c72c160ce745efa2a168ba3065831c2eb692ce9297265e22265b0352f137ee9
 size 267829484

 version https://git-lfs.github.com/spec/v1
+oid sha256:137ea9a9d64c2fca4ff0672ce3b388497cdd88b583117dcb14cf56ef06450dd2
 size 267829484

run-3/checkpoint-36/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "distilbert-base-uncased",
   "activation": "gelu",
   "architectures": [
     "DistilBertForSequenceClassification"

 {
+  "_name_or_path": "SjardiWillems/distilbert-base-uncased-finetuned-stsb",
   "activation": "gelu",
   "architectures": [
     "DistilBertForSequenceClassification"

run-3/checkpoint-36/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6c629568e16dc74409d4dfa02a57321ba6ae6562c21d2fc48ffa505ded3a4e6
 size 267829484

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad30ede43c020749fbc655edf4e58895a5e7eb3d33517cc7b220377cf3b3aaf4
 size 267829484

run-3/checkpoint-36/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d60f8e6feef4eb32f7ba3f7c0faf9f4c6ae8f291f6e1418fea38e79ce1dc3601
 size 535721146

 version https://git-lfs.github.com/spec/v1
+oid sha256:33a2b7c57ce885803b45f98bbc8c840948b4a3f25a359ff99d58a3cbda4057f9
 size 535721146

run-3/checkpoint-36/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d83c4a58d2ee55cb30b32045e660936fc993b17a15c3e3f39a67e2c75fae7ec7
-size 14180

 version https://git-lfs.github.com/spec/v1
+oid sha256:d600bdc59e6dc868c416c7bb537f563e04469edf86acff4ed520679647068ccc
+size 14308

run-3/checkpoint-36/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:660ee25a0ddd4682dcd06e6ffe649e6382c31e6ac22a1275061d2ccb894eb7da
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:034e87c15bdb924d0c8e7456ae36135555d9fa00431e495a13cf852a2a875a48
 size 1064

run-3/checkpoint-36/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "best_metric": 0.06186573722446415,
   "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-36",
-  "epoch": 4.0,
   "eval_steps": 500,
   "global_step": 36,
   "is_hyper_param_search": true,
@@ -10,57 +10,37 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "eval_loss": 7.199407577514648,
-      "eval_pearson": 0.024278120881494558,
-      "eval_runtime": 0.9268,
-      "eval_samples_per_second": 1618.438,
-      "eval_spearmanr": 0.025433044219399577,
-      "eval_steps_per_second": 101.422,
-      "step": 9
-    },
-    {
-      "epoch": 2.0,
-      "eval_loss": 6.31046724319458,
-      "eval_pearson": 0.0470495796010619,
-      "eval_runtime": 0.9557,
-      "eval_samples_per_second": 1569.521,
-      "eval_spearmanr": 0.048936574609921006,
-      "eval_steps_per_second": 98.357,
       "step": 18
     },
     {
-      "epoch": 3.0,
-      "eval_loss": 5.604863166809082,
-      "eval_pearson": 0.05278675976955915,
-      "eval_runtime": 0.9249,
-      "eval_samples_per_second": 1621.719,
-      "eval_spearmanr": 0.05579666652056003,
-      "eval_steps_per_second": 101.628,
-      "step": 27
-    },
-    {
-      "epoch": 4.0,
-      "eval_loss": 5.163288593292236,
-      "eval_pearson": 0.06186573722446415,
-      "eval_runtime": 0.9496,
-      "eval_samples_per_second": 1579.549,
-      "eval_spearmanr": 0.06678878974516303,
-      "eval_steps_per_second": 98.985,
       "step": 36
     }
   ],
   "logging_steps": 500,
-  "max_steps": 45,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 500,
   "total_flos": 0,
-  "train_batch_size": 64,
   "trial_name": null,
   "trial_params": {
-    "learning_rate": 5.243560941114439e-06,
     "num_train_epochs": 5,
-    "per_device_train_batch_size": 64,
-    "seed": 10
   }
 }

 {
+  "best_metric": 0.8703719079793119,
   "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-36",
+  "epoch": 2.0,
   "eval_steps": 500,
   "global_step": 36,
   "is_hyper_param_search": true,
   "log_history": [
     {
       "epoch": 1.0,
+      "eval_loss": 0.6028859615325928,
+      "eval_pearson": 0.8640876954461902,
+      "eval_runtime": 0.7957,
+      "eval_samples_per_second": 1885.175,
+      "eval_spearmanr": 0.8614938677816929,
+      "eval_steps_per_second": 118.138,
       "step": 18
     },
     {
+      "epoch": 2.0,
+      "eval_loss": 0.5511023998260498,
+      "eval_pearson": 0.8703719079793119,
+      "eval_runtime": 0.786,
+      "eval_samples_per_second": 1908.456,
+      "eval_spearmanr": 0.866545511062528,
+      "eval_steps_per_second": 119.597,
       "step": 36
     }
   ],
   "logging_steps": 500,
+  "max_steps": 90,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 500,
   "total_flos": 0,
+  "train_batch_size": 32,
   "trial_name": null,
   "trial_params": {
+    "learning_rate": 5.304193372992487e-05,
     "num_train_epochs": 5,
+    "per_device_train_batch_size": 32,
+    "seed": 31
   }
 }

run-3/checkpoint-36/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43dea477fd3be8f6b457027410de4c6f7cf07a2710d25756956ebe9bfbdf4e31
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:39e6105fbde8ff9149ae2a4bbcd7a8dac84eb9cecad379f5bd1a4200bdc1f6ea
 size 4920

run-3/checkpoint-54/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "SjardiWillems/distilbert-base-uncased-finetuned-stsb",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-3/checkpoint-54/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c186d637863e57384b54ab2fc7dbf952b0dc4f47137ccd712d5e7f686b7cb5a4
+size 267829484

run-3/checkpoint-54/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1069ad833f7cf2b1614ee4cbc59f70f2794581760c6d04cf8c64fc841b745db
+size 535721146

run-3/checkpoint-54/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9e32de5813ecdfb48e5dd1d624acd0a70f2a5360740ef6edd53f7b4d7319206
+size 14308

run-3/checkpoint-54/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4564fa06afdd7a9495548b82cfa9fec739694a189cc628de539023b2017a9892
+size 1064

run-3/checkpoint-54/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-54/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-54/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-54/trainer_state.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "best_metric": 0.8703719079793119,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-36",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 54,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.6028859615325928,
+      "eval_pearson": 0.8640876954461902,
+      "eval_runtime": 0.7957,
+      "eval_samples_per_second": 1885.175,
+      "eval_spearmanr": 0.8614938677816929,
+      "eval_steps_per_second": 118.138,
+      "step": 18
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5511023998260498,
+      "eval_pearson": 0.8703719079793119,
+      "eval_runtime": 0.786,
+      "eval_samples_per_second": 1908.456,
+      "eval_spearmanr": 0.866545511062528,
+      "eval_steps_per_second": 119.597,
+      "step": 36
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.5914527177810669,
+      "eval_pearson": 0.8695007442729191,
+      "eval_runtime": 0.8064,
+      "eval_samples_per_second": 1860.165,
+      "eval_spearmanr": 0.8657982418648841,
+      "eval_steps_per_second": 116.57,
+      "step": 54
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 90,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 5.304193372992487e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 32,
+    "seed": 31
+  }
+}

run-3/checkpoint-54/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39e6105fbde8ff9149ae2a4bbcd7a8dac84eb9cecad379f5bd1a4200bdc1f6ea
+size 4920

run-3/checkpoint-54/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-72/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "distilbert-base-uncased",
   "activation": "gelu",
   "architectures": [
     "DistilBertForSequenceClassification"

 {
+  "_name_or_path": "SjardiWillems/distilbert-base-uncased-finetuned-stsb",
   "activation": "gelu",
   "architectures": [
     "DistilBertForSequenceClassification"

run-3/checkpoint-72/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:628e81c9d1cd28e94fd2c0528b29c2450ff4003f56c032e01547e58616527956
 size 267829484

 version https://git-lfs.github.com/spec/v1
+oid sha256:87bbb5e3c825ea117f159488dc603a85c23ee403d37a996586892bdcdb82b341
 size 267829484

run-3/checkpoint-72/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2efe5398abd461fcafd62df5d68501cdf7326c5d01c05bfb382b1f855d199173
 size 535721146

 version https://git-lfs.github.com/spec/v1
+oid sha256:8cce3687e29d958a9581c2eab8c194598006cd12484554cd4d3f40204c23498d
 size 535721146

run-3/checkpoint-72/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2dda7584660763f72c41c754dd6523df6f1c049b2a56e83473d0e06cc32bb35d
 size 14308

 version https://git-lfs.github.com/spec/v1
+oid sha256:afb6236b8299fe8ab86ae5a7a0125ffdb192834dfed7e461189fa3fff3b1d957
 size 14308

run-3/checkpoint-72/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:525313aa68006057abc2527656e139e39e304df0fae14edc96d58c8a10f35d43
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:58b4614e97f318c5b27378badbc8c75f30278c4733f2ec9cc0b63520d116b974
 size 1064

run-3/checkpoint-72/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "best_metric": 0.3488998094346536,
-  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-72",
-  "epoch": 1.0,
   "eval_steps": 500,
   "global_step": 72,
   "is_hyper_param_search": true,
@@ -10,27 +10,57 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "eval_loss": 2.188309907913208,
-      "eval_pearson": 0.3488998094346536,
-      "eval_runtime": 0.8689,
-      "eval_samples_per_second": 1726.372,
-      "eval_spearmanr": 0.3841984779795825,
-      "eval_steps_per_second": 108.186,
       "step": 72
     }
   ],
   "logging_steps": 500,
-  "max_steps": 72,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 1,
   "save_steps": 500,
   "total_flos": 0,
-  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": {
-    "learning_rate": 1.2281778565394951e-05,
-    "num_train_epochs": 1,
-    "per_device_train_batch_size": 8,
-    "seed": 24
   }
 }

 {
+  "best_metric": 0.8703719079793119,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-36",
+  "epoch": 4.0,
   "eval_steps": 500,
   "global_step": 72,
   "is_hyper_param_search": true,
   "log_history": [
     {
       "epoch": 1.0,
+      "eval_loss": 0.6028859615325928,
+      "eval_pearson": 0.8640876954461902,
+      "eval_runtime": 0.7957,
+      "eval_samples_per_second": 1885.175,
+      "eval_spearmanr": 0.8614938677816929,
+      "eval_steps_per_second": 118.138,
+      "step": 18
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5511023998260498,
+      "eval_pearson": 0.8703719079793119,
+      "eval_runtime": 0.786,
+      "eval_samples_per_second": 1908.456,
+      "eval_spearmanr": 0.866545511062528,
+      "eval_steps_per_second": 119.597,
+      "step": 36
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.5914527177810669,
+      "eval_pearson": 0.8695007442729191,
+      "eval_runtime": 0.8064,
+      "eval_samples_per_second": 1860.165,
+      "eval_spearmanr": 0.8657982418648841,
+      "eval_steps_per_second": 116.57,
+      "step": 54
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.5527331829071045,
+      "eval_pearson": 0.870253450177356,
+      "eval_runtime": 0.8029,
+      "eval_samples_per_second": 1868.343,
+      "eval_spearmanr": 0.8666505706240611,
+      "eval_steps_per_second": 117.083,
       "step": 72
     }
   ],
   "logging_steps": 500,
+  "max_steps": 90,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
   "save_steps": 500,
   "total_flos": 0,
+  "train_batch_size": 32,
   "trial_name": null,
   "trial_params": {
+    "learning_rate": 5.304193372992487e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 32,
+    "seed": 31
   }
 }

run-3/checkpoint-72/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:94a97181d28e1b7189cab54251a7b430fd73528c8be0f04e2930aea21ed4e946
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:39e6105fbde8ff9149ae2a4bbcd7a8dac84eb9cecad379f5bd1a4200bdc1f6ea
 size 4920

run-3/checkpoint-90/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "SjardiWillems/distilbert-base-uncased-finetuned-stsb",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-3/checkpoint-90/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fe879bec543a9d5c304e50f38c4331c25b9757978d34480cbe9f23ce33b03f0e
+size 267829484

run-3/checkpoint-90/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fbd78cde4da5586f24913972751775c83f4cb28fe0c11a22e4efa5428e4822f
+size 535721146

run-3/checkpoint-90/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cab4d2e6f9b0d070357fd74c3d5209fdbd5738db589136b19b23fab58fc5e55d
+size 14308

run-3/checkpoint-90/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b24c3757e327e4ae3872c99a62f9eda4e0c61b0b08286d512734603c3f8205b9
+size 1064

run-3/checkpoint-90/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-90/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-90/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-90/trainer_state.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "best_metric": 0.8703719079793119,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-36",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 90,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.6028859615325928,
+      "eval_pearson": 0.8640876954461902,
+      "eval_runtime": 0.7957,
+      "eval_samples_per_second": 1885.175,
+      "eval_spearmanr": 0.8614938677816929,
+      "eval_steps_per_second": 118.138,
+      "step": 18
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5511023998260498,
+      "eval_pearson": 0.8703719079793119,
+      "eval_runtime": 0.786,
+      "eval_samples_per_second": 1908.456,
+      "eval_spearmanr": 0.866545511062528,
+      "eval_steps_per_second": 119.597,
+      "step": 36
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.5914527177810669,
+      "eval_pearson": 0.8695007442729191,
+      "eval_runtime": 0.8064,
+      "eval_samples_per_second": 1860.165,
+      "eval_spearmanr": 0.8657982418648841,
+      "eval_steps_per_second": 116.57,
+      "step": 54
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.5527331829071045,
+      "eval_pearson": 0.870253450177356,
+      "eval_runtime": 0.8029,
+      "eval_samples_per_second": 1868.343,
+      "eval_spearmanr": 0.8666505706240611,
+      "eval_steps_per_second": 117.083,
+      "step": 72
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.5488625764846802,
+      "eval_pearson": 0.8699828916266209,
+      "eval_runtime": 0.7844,
+      "eval_samples_per_second": 1912.356,
+      "eval_spearmanr": 0.8658726120228372,
+      "eval_steps_per_second": 119.841,
+      "step": 90
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 90,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 5.304193372992487e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 32,
+    "seed": 31
+  }
+}

run-3/checkpoint-90/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39e6105fbde8ff9149ae2a4bbcd7a8dac84eb9cecad379f5bd1a4200bdc1f6ea
+size 4920

run-3/checkpoint-90/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-18/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "distilbert-base-uncased",
   "activation": "gelu",
   "architectures": [
     "DistilBertForSequenceClassification"

 {
+  "_name_or_path": "SjardiWillems/distilbert-base-uncased-finetuned-stsb",
   "activation": "gelu",
   "architectures": [
     "DistilBertForSequenceClassification"

run-4/checkpoint-18/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:56c9436a49a572459a2a44ddbdc061067a3843403ed9f6eca1db149dbcdbd98d
 size 267829484

 version https://git-lfs.github.com/spec/v1
+oid sha256:137ea9a9d64c2fca4ff0672ce3b388497cdd88b583117dcb14cf56ef06450dd2
 size 267829484

run-4/checkpoint-18/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cf8a4c79882ed42b0ba2938dc012222bb219784a5f37e9ad6b2769726ebfcbf0
 size 535721146

 version https://git-lfs.github.com/spec/v1
+oid sha256:288fe1db01ec36a78be841780b2535af1f4df14e228f6541f72757cd4b33973c
 size 535721146

run-4/checkpoint-18/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15a978540f0e297d2deb9452be6f34da155066843444206a3a770d9309265da5
-size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3489ad310a1c6f771a5ce3247bcfdda18268e6f5e4d886772bb1dbd75613e8de
+size 14180

run-4/checkpoint-18/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc1f03d35d35aad84c8160e55d20e180f5c3328f48c3f74d729338bf74cb3c93
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5f4eaffe579a51d750e17c4d1ab255305969dc4aefdc53aa2a41ebfb8e9dc44
 size 1064

run-4/checkpoint-18/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "best_metric": 0.24101392328980611,
-  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-4/checkpoint-9",
-  "epoch": 2.0,
   "eval_steps": 500,
   "global_step": 18,
   "is_hyper_param_search": true,
@@ -10,37 +10,27 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "eval_loss": 6.916525363922119,
-      "eval_pearson": 0.24101392328980611,
-      "eval_runtime": 0.9549,
-      "eval_samples_per_second": 1570.783,
-      "eval_spearmanr": 0.23527578828258416,
-      "eval_steps_per_second": 98.436,
-      "step": 9
-    },
-    {
-      "epoch": 2.0,
-      "eval_loss": 6.322813987731934,
-      "eval_pearson": 0.21685934779589452,
-      "eval_runtime": 1.3518,
-      "eval_samples_per_second": 1109.622,
-      "eval_spearmanr": 0.19981399518979143,
-      "eval_steps_per_second": 69.536,
       "step": 18
     }
   ],
   "logging_steps": 500,
-  "max_steps": 36,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 4,
   "save_steps": 500,
   "total_flos": 0,
-  "train_batch_size": 64,
   "trial_name": null,
   "trial_params": {
-    "learning_rate": 4.463445057905012e-06,
-    "num_train_epochs": 4,
-    "per_device_train_batch_size": 64,
-    "seed": 5
   }
 }

 {
+  "best_metric": 0.8698177849938507,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-4/checkpoint-18",
+  "epoch": 1.0,
   "eval_steps": 500,
   "global_step": 18,
   "is_hyper_param_search": true,
   "log_history": [
     {
       "epoch": 1.0,
+      "eval_loss": 0.5605268478393555,
+      "eval_pearson": 0.8698177849938507,
+      "eval_runtime": 0.7833,
+      "eval_samples_per_second": 1914.894,
+      "eval_spearmanr": 0.8653376248792928,
+      "eval_steps_per_second": 120.0,
       "step": 18
     }
   ],
   "logging_steps": 500,
+  "max_steps": 54,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
   "save_steps": 500,
   "total_flos": 0,
+  "train_batch_size": 32,
   "trial_name": null,
   "trial_params": {
+    "learning_rate": 9.914291118758786e-06,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 32,
+    "seed": 10
   }
 }

run-4/checkpoint-18/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf3f62aadffc261f72a51d4b7c87acc76e6bc2705585514f61113a84b0e34fd3
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0fb3669a562055766866e71b54806bb7235c4677b6cb8f825acdf2c31236bf5
 size 4920

runs/Mar10_22-33-58_0f0a24039c15/events.out.tfevents.1710110466.0f0a24039c15.273.6 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28b236390ebad4b19ffe6f513e3bb9328a12005ea07134351ce302675b0c00d7
+size 6162

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:39e6105fbde8ff9149ae2a4bbcd7a8dac84eb9cecad379f5bd1a4200bdc1f6ea
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0fb3669a562055766866e71b54806bb7235c4677b6cb8f825acdf2c31236bf5
 size 4920