SjardiWillems commited on Mar 6, 2024

Commit

608764a

verified ·

1 Parent(s): 608a33e

Training in progress, epoch 1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

model.safetensors +1 -1
run-0/checkpoint-288/model.safetensors +1 -1
run-0/checkpoint-288/optimizer.pt +1 -1
run-0/checkpoint-288/rng_state.pth +1 -1
run-0/checkpoint-288/scheduler.pt +1 -1
run-0/checkpoint-288/trainer_state.json +22 -42
run-0/checkpoint-288/training_args.bin +1 -1
run-0/checkpoint-432/config.json +31 -0
run-0/checkpoint-432/model.safetensors +3 -0
run-0/checkpoint-432/optimizer.pt +3 -0
run-0/checkpoint-432/rng_state.pth +3 -0
run-0/checkpoint-432/scheduler.pt +3 -0
run-0/checkpoint-432/special_tokens_map.json +7 -0
run-0/checkpoint-432/tokenizer.json +0 -0
run-0/checkpoint-432/tokenizer_config.json +55 -0
run-0/checkpoint-432/trainer_state.json +56 -0
run-0/checkpoint-432/training_args.bin +3 -0
run-0/checkpoint-432/vocab.txt +0 -0
run-0/checkpoint-576/config.json +31 -0
run-0/checkpoint-576/model.safetensors +3 -0
run-0/checkpoint-576/optimizer.pt +3 -0
run-0/checkpoint-576/rng_state.pth +3 -0
run-0/checkpoint-576/scheduler.pt +3 -0
run-0/checkpoint-576/special_tokens_map.json +7 -0
run-0/checkpoint-576/tokenizer.json +0 -0
run-0/checkpoint-576/tokenizer_config.json +55 -0
run-0/checkpoint-576/trainer_state.json +73 -0
run-0/checkpoint-576/training_args.bin +3 -0
run-0/checkpoint-576/vocab.txt +0 -0
run-0/checkpoint-720/config.json +31 -0
run-0/checkpoint-720/model.safetensors +3 -0
run-0/checkpoint-720/optimizer.pt +3 -0
run-0/checkpoint-720/rng_state.pth +3 -0
run-0/checkpoint-720/scheduler.pt +3 -0
run-0/checkpoint-720/special_tokens_map.json +7 -0
run-0/checkpoint-720/tokenizer.json +0 -0
run-0/checkpoint-720/tokenizer_config.json +55 -0
run-0/checkpoint-720/trainer_state.json +83 -0
run-0/checkpoint-720/training_args.bin +3 -0
run-0/checkpoint-720/vocab.txt +0 -0
run-1/checkpoint-18/config.json +31 -0
run-1/checkpoint-18/model.safetensors +3 -0
run-1/checkpoint-18/optimizer.pt +3 -0
run-1/checkpoint-18/rng_state.pth +3 -0
run-1/checkpoint-18/scheduler.pt +3 -0
run-1/checkpoint-18/special_tokens_map.json +7 -0
run-1/checkpoint-18/tokenizer.json +0 -0
run-1/checkpoint-18/tokenizer_config.json +55 -0
run-1/checkpoint-18/trainer_state.json +36 -0
run-1/checkpoint-18/training_args.bin +3 -0

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4ebbc80bdea594e9c45d0bd1ee785cd4c4ab9a369a7e11a11992bda11abc75a
 size 267829484

 version https://git-lfs.github.com/spec/v1
+oid sha256:b93dca996de1e2a65bd2b8805ca12671d565a8759726dc118ad227d9b8a09a73
 size 267829484

run-0/checkpoint-288/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e4a7e69db21e42d412939d1e36d85c5cce94cf888a48d410cdcebba133954961
 size 267829484

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8c1a7903290ebee27e018fe7af64668477f1c58876727dd95847ead5c27d63b
 size 267829484

run-0/checkpoint-288/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db29bdcba8691d7355db2d4b2efb03d2fc57abcb4241e331aadec87069aab9d0
 size 535721146

 version https://git-lfs.github.com/spec/v1
+oid sha256:03a4e7d16d9b7fc303ee20b231457bd7587b56276a8b12d313e29168dffc06dc
 size 535721146

run-0/checkpoint-288/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f626927ff09c9fc535421ec8dad43c9dbeb3494a6f853f65416b0df7692c7c0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b38ba9e98db64db97c6caf6af07e786ff7f1f58032c6cccc3082d122f2284155
 size 14244

run-0/checkpoint-288/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c84e36f5d86b2d14c61e406755bf7b1e2f465ae07eeff109e65a79476ac0d4a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8425eac3da69167f48985be7ad4006bec037e9307a080db2a5d39a3ec2bc86d
 size 1064

run-0/checkpoint-288/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "best_metric": 0.8324083319363956,
   "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-0/checkpoint-288",
-  "epoch": 4.0,
   "eval_steps": 500,
   "global_step": 288,
   "is_hyper_param_search": true,
@@ -10,57 +10,37 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "eval_loss": 1.0813779830932617,
-      "eval_pearson": 0.7567095621264116,
-      "eval_runtime": 0.9163,
-      "eval_samples_per_second": 1637.038,
-      "eval_spearmanr": 0.7572162649891061,
-      "eval_steps_per_second": 102.588,
-      "step": 72
-    },
-    {
-      "epoch": 2.0,
-      "eval_loss": 0.8653174042701721,
-      "eval_pearson": 0.8253446259273143,
-      "eval_runtime": 1.0205,
-      "eval_samples_per_second": 1469.804,
-      "eval_spearmanr": 0.8213518482816455,
-      "eval_steps_per_second": 92.108,
       "step": 144
     },
     {
-      "epoch": 3.0,
-      "eval_loss": 0.7286169528961182,
-      "eval_pearson": 0.8284186695774568,
-      "eval_runtime": 0.9489,
-      "eval_samples_per_second": 1580.844,
-      "eval_spearmanr": 0.8248712386045135,
-      "eval_steps_per_second": 99.066,
-      "step": 216
-    },
-    {
-      "epoch": 4.0,
-      "eval_loss": 0.7232770323753357,
-      "eval_pearson": 0.8324083319363956,
-      "eval_runtime": 0.9594,
-      "eval_samples_per_second": 1563.435,
-      "eval_spearmanr": 0.8281963049502055,
-      "eval_steps_per_second": 97.975,
       "step": 288
     }
   ],
   "logging_steps": 500,
-  "max_steps": 288,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 4,
   "save_steps": 500,
   "total_flos": 0,
-  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": {
-    "learning_rate": 3.8584811748752325e-05,
-    "num_train_epochs": 4,
-    "per_device_train_batch_size": 8,
-    "seed": 40
   }
 }

 {
+  "best_metric": 0.8281295598730645,
   "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-0/checkpoint-288",
+  "epoch": 2.0,
   "eval_steps": 500,
   "global_step": 288,
   "is_hyper_param_search": true,
   "log_history": [
     {
       "epoch": 1.0,
+      "eval_loss": 0.9918314814567566,
+      "eval_pearson": 0.7966375426002391,
+      "eval_runtime": 0.8021,
+      "eval_samples_per_second": 1870.124,
+      "eval_spearmanr": 0.7909051383387676,
+      "eval_steps_per_second": 117.194,
       "step": 144
     },
     {
+      "epoch": 2.0,
+      "eval_loss": 0.8358427882194519,
+      "eval_pearson": 0.8281295598730645,
+      "eval_runtime": 0.826,
+      "eval_samples_per_second": 1815.925,
+      "eval_spearmanr": 0.8254389941774403,
+      "eval_steps_per_second": 113.798,
       "step": 288
     }
   ],
   "logging_steps": 500,
+  "max_steps": 720,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
   "save_steps": 500,
   "total_flos": 0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": {
+    "learning_rate": 4.325876846375602e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 34
   }
 }

run-0/checkpoint-288/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd9a09d3e6d841ee4f64161d2a8c27492f996273463a537d8f3f6e05968d37fe
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:7a012d40728dc244700f11825615a9f3164fccd8635d4eae2a798016634c8eb7
 size 4920

run-0/checkpoint-432/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-0/checkpoint-432/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52a78349956a32cccdb86fbd3ffeefdcec18ae1e6629fa69d5aa10e375d1b084
+size 267829484

run-0/checkpoint-432/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa063bb9bcc3e87c6ae8f6523054cdf77ee61cf869728bbc6041c866d0c8031b
+size 535721146

run-0/checkpoint-432/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:552723159b177b7d8ca32e7c6f90b651f3fa88b2ec8caae63c424d3459d43420
+size 14244

run-0/checkpoint-432/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df8eea7cf4eda327822db790b849faa9079f12458896e8d33c15120c64bd2829
+size 1064

run-0/checkpoint-432/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-432/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-432/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-432/trainer_state.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "best_metric": 0.8375909620622599,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-0/checkpoint-432",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 432,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.9918314814567566,
+      "eval_pearson": 0.7966375426002391,
+      "eval_runtime": 0.8021,
+      "eval_samples_per_second": 1870.124,
+      "eval_spearmanr": 0.7909051383387676,
+      "eval_steps_per_second": 117.194,
+      "step": 144
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.8358427882194519,
+      "eval_pearson": 0.8281295598730645,
+      "eval_runtime": 0.826,
+      "eval_samples_per_second": 1815.925,
+      "eval_spearmanr": 0.8254389941774403,
+      "eval_steps_per_second": 113.798,
+      "step": 288
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.7649794220924377,
+      "eval_pearson": 0.8375909620622599,
+      "eval_runtime": 0.8159,
+      "eval_samples_per_second": 1838.53,
+      "eval_spearmanr": 0.8346396296700553,
+      "eval_steps_per_second": 115.215,
+      "step": 432
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 720,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.325876846375602e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 34
+  }
+}

run-0/checkpoint-432/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a012d40728dc244700f11825615a9f3164fccd8635d4eae2a798016634c8eb7
+size 4920

run-0/checkpoint-432/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-576/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-0/checkpoint-576/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89fee1f343d609c32b78702028e63afc1510b6efd173b7330786bb7ca136ff4b
+size 267829484

run-0/checkpoint-576/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:832bea56fdd81e4d63092b736372977900dd3e592f00981379557cfdb12bd3c4
+size 535721146

run-0/checkpoint-576/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1cfb4fbcbb39279ed855654b9b2be65f754c53719b80a2e89cf34c98e687dee
+size 14244

run-0/checkpoint-576/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34b95da1b206c5b250875abd83931670b9b8fde3f7fdb628b146b786c21b6fcd
+size 1064

run-0/checkpoint-576/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-576/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-576/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-576/trainer_state.json ADDED Viewed

	@@ -0,0 +1,73 @@

+{
+  "best_metric": 0.8417994404614288,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-0/checkpoint-576",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 576,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.9918314814567566,
+      "eval_pearson": 0.7966375426002391,
+      "eval_runtime": 0.8021,
+      "eval_samples_per_second": 1870.124,
+      "eval_spearmanr": 0.7909051383387676,
+      "eval_steps_per_second": 117.194,
+      "step": 144
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.8358427882194519,
+      "eval_pearson": 0.8281295598730645,
+      "eval_runtime": 0.826,
+      "eval_samples_per_second": 1815.925,
+      "eval_spearmanr": 0.8254389941774403,
+      "eval_steps_per_second": 113.798,
+      "step": 288
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.7649794220924377,
+      "eval_pearson": 0.8375909620622599,
+      "eval_runtime": 0.8159,
+      "eval_samples_per_second": 1838.53,
+      "eval_spearmanr": 0.8346396296700553,
+      "eval_steps_per_second": 115.215,
+      "step": 432
+    },
+    {
+      "epoch": 3.47,
+      "grad_norm": 14.258247375488281,
+      "learning_rate": 1.3217957030592118e-05,
+      "loss": 1.0074,
+      "step": 500
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.6654197573661804,
+      "eval_pearson": 0.8417994404614288,
+      "eval_runtime": 0.8349,
+      "eval_samples_per_second": 1796.689,
+      "eval_spearmanr": 0.835660530728647,
+      "eval_steps_per_second": 112.592,
+      "step": 576
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 720,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 22173922656444.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.325876846375602e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 34
+  }
+}

run-0/checkpoint-576/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a012d40728dc244700f11825615a9f3164fccd8635d4eae2a798016634c8eb7
+size 4920

run-0/checkpoint-576/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-720/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-0/checkpoint-720/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af9fc51efd1f9cfa1ae424128bd9844a8c1db1d916e7bab830758779e0222c66
+size 267829484

run-0/checkpoint-720/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b77497d40ad0b7ffbdf2f9fc5bba06417d472fc6997c7ab6f9e1fea42c8049b1
+size 535721146

run-0/checkpoint-720/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54678b1a742865b6995f347692246c5b2ebdd62e34620eb2d8e22040ffb26197
+size 14244

run-0/checkpoint-720/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac5caa780172fb363c7a06c323a0d9563ab8873a05d1602f15dc891ff2b06a29
+size 1064

run-0/checkpoint-720/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-720/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-720/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-720/trainer_state.json ADDED Viewed

	@@ -0,0 +1,83 @@

+{
+  "best_metric": 0.8428062719060604,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-0/checkpoint-720",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 720,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.9918314814567566,
+      "eval_pearson": 0.7966375426002391,
+      "eval_runtime": 0.8021,
+      "eval_samples_per_second": 1870.124,
+      "eval_spearmanr": 0.7909051383387676,
+      "eval_steps_per_second": 117.194,
+      "step": 144
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.8358427882194519,
+      "eval_pearson": 0.8281295598730645,
+      "eval_runtime": 0.826,
+      "eval_samples_per_second": 1815.925,
+      "eval_spearmanr": 0.8254389941774403,
+      "eval_steps_per_second": 113.798,
+      "step": 288
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.7649794220924377,
+      "eval_pearson": 0.8375909620622599,
+      "eval_runtime": 0.8159,
+      "eval_samples_per_second": 1838.53,
+      "eval_spearmanr": 0.8346396296700553,
+      "eval_steps_per_second": 115.215,
+      "step": 432
+    },
+    {
+      "epoch": 3.47,
+      "grad_norm": 14.258247375488281,
+      "learning_rate": 1.3217957030592118e-05,
+      "loss": 1.0074,
+      "step": 500
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.6654197573661804,
+      "eval_pearson": 0.8417994404614288,
+      "eval_runtime": 0.8349,
+      "eval_samples_per_second": 1796.689,
+      "eval_spearmanr": 0.835660530728647,
+      "eval_steps_per_second": 112.592,
+      "step": 576
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.6604835391044617,
+      "eval_pearson": 0.8428062719060604,
+      "eval_runtime": 0.8131,
+      "eval_samples_per_second": 1844.83,
+      "eval_spearmanr": 0.8363367917240269,
+      "eval_steps_per_second": 115.609,
+      "step": 720
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 720,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 22173922656444.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.325876846375602e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 34
+  }
+}

run-0/checkpoint-720/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a012d40728dc244700f11825615a9f3164fccd8635d4eae2a798016634c8eb7
+size 4920

run-0/checkpoint-720/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-18/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-1/checkpoint-18/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4530a8987afcc9341cc7563e3f5928ebbd7e33a6394892be9fb3ed2814e9ae01
+size 267829484

run-1/checkpoint-18/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89d8a3c21dddd5c4837a07b5d4d33ca418ac0cefddf3c3e641d7ed2d7c959ad7
+size 535721146

run-1/checkpoint-18/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fecb2849d94555e25eddc7fc457dd09217fe0b8511a41e4b415e0d296de65cf9
+size 14180

run-1/checkpoint-18/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0fcb701eb87535ed6005cd3531d8d7cd2516006d5a144652d4ab32a08c40df6
+size 1064

run-1/checkpoint-18/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-1/checkpoint-18/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-18/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-1/checkpoint-18/trainer_state.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "best_metric": 0.7009517245792942,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-1/checkpoint-18",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 18,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 1.5346204042434692,
+      "eval_pearson": 0.7009517245792942,
+      "eval_runtime": 0.7943,
+      "eval_samples_per_second": 1888.564,
+      "eval_spearmanr": 0.7023215368118,
+      "eval_steps_per_second": 118.35,
+      "step": 18
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 90,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 9.415997134117997e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 32,
+    "seed": 10
+  }
+}

run-1/checkpoint-18/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51a139e4fa7709c7bede3ad4026fd0ba3a4115364cd5d45e61fa50850707d74a
+size 4920