SjardiWillems commited on
Commit
b97fa27
·
verified ·
1 Parent(s): c817cd2

Training in progress, epoch 1

Browse files
Files changed (46) hide show
  1. model.safetensors +1 -1
  2. run-3/checkpoint-36/config.json +1 -1
  3. run-3/checkpoint-36/model.safetensors +1 -1
  4. run-3/checkpoint-36/optimizer.pt +1 -1
  5. run-3/checkpoint-36/rng_state.pth +2 -2
  6. run-3/checkpoint-36/scheduler.pt +1 -1
  7. run-3/checkpoint-36/trainer_state.json +20 -40
  8. run-3/checkpoint-36/training_args.bin +1 -1
  9. run-3/checkpoint-54/config.json +31 -0
  10. run-3/checkpoint-54/model.safetensors +3 -0
  11. run-3/checkpoint-54/optimizer.pt +3 -0
  12. run-3/checkpoint-54/rng_state.pth +3 -0
  13. run-3/checkpoint-54/scheduler.pt +3 -0
  14. run-3/checkpoint-54/special_tokens_map.json +7 -0
  15. run-3/checkpoint-54/tokenizer.json +0 -0
  16. run-3/checkpoint-54/tokenizer_config.json +55 -0
  17. run-3/checkpoint-54/trainer_state.json +56 -0
  18. run-3/checkpoint-54/training_args.bin +3 -0
  19. run-3/checkpoint-54/vocab.txt +0 -0
  20. run-3/checkpoint-72/config.json +1 -1
  21. run-3/checkpoint-72/model.safetensors +1 -1
  22. run-3/checkpoint-72/optimizer.pt +1 -1
  23. run-3/checkpoint-72/rng_state.pth +1 -1
  24. run-3/checkpoint-72/scheduler.pt +1 -1
  25. run-3/checkpoint-72/trainer_state.json +46 -16
  26. run-3/checkpoint-72/training_args.bin +1 -1
  27. run-3/checkpoint-90/config.json +31 -0
  28. run-3/checkpoint-90/model.safetensors +3 -0
  29. run-3/checkpoint-90/optimizer.pt +3 -0
  30. run-3/checkpoint-90/rng_state.pth +3 -0
  31. run-3/checkpoint-90/scheduler.pt +3 -0
  32. run-3/checkpoint-90/special_tokens_map.json +7 -0
  33. run-3/checkpoint-90/tokenizer.json +0 -0
  34. run-3/checkpoint-90/tokenizer_config.json +55 -0
  35. run-3/checkpoint-90/trainer_state.json +76 -0
  36. run-3/checkpoint-90/training_args.bin +3 -0
  37. run-3/checkpoint-90/vocab.txt +0 -0
  38. run-4/checkpoint-18/config.json +1 -1
  39. run-4/checkpoint-18/model.safetensors +1 -1
  40. run-4/checkpoint-18/optimizer.pt +1 -1
  41. run-4/checkpoint-18/rng_state.pth +2 -2
  42. run-4/checkpoint-18/scheduler.pt +1 -1
  43. run-4/checkpoint-18/trainer_state.json +16 -26
  44. run-4/checkpoint-18/training_args.bin +1 -1
  45. runs/Mar10_22-33-58_0f0a24039c15/events.out.tfevents.1710110466.0f0a24039c15.273.6 +3 -0
  46. training_args.bin +1 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c72c160ce745efa2a168ba3065831c2eb692ce9297265e22265b0352f137ee9
3
  size 267829484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:137ea9a9d64c2fca4ff0672ce3b388497cdd88b583117dcb14cf56ef06450dd2
3
  size 267829484
run-3/checkpoint-36/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "distilbert-base-uncased",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForSequenceClassification"
 
1
  {
2
+ "_name_or_path": "SjardiWillems/distilbert-base-uncased-finetuned-stsb",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForSequenceClassification"
run-3/checkpoint-36/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6c629568e16dc74409d4dfa02a57321ba6ae6562c21d2fc48ffa505ded3a4e6
3
  size 267829484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad30ede43c020749fbc655edf4e58895a5e7eb3d33517cc7b220377cf3b3aaf4
3
  size 267829484
run-3/checkpoint-36/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60f8e6feef4eb32f7ba3f7c0faf9f4c6ae8f291f6e1418fea38e79ce1dc3601
3
  size 535721146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33a2b7c57ce885803b45f98bbc8c840948b4a3f25a359ff99d58a3cbda4057f9
3
  size 535721146
run-3/checkpoint-36/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d83c4a58d2ee55cb30b32045e660936fc993b17a15c3e3f39a67e2c75fae7ec7
3
- size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d600bdc59e6dc868c416c7bb537f563e04469edf86acff4ed520679647068ccc
3
+ size 14308
run-3/checkpoint-36/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:660ee25a0ddd4682dcd06e6ffe649e6382c31e6ac22a1275061d2ccb894eb7da
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:034e87c15bdb924d0c8e7456ae36135555d9fa00431e495a13cf852a2a875a48
3
  size 1064
run-3/checkpoint-36/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.06186573722446415,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-36",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 36,
7
  "is_hyper_param_search": true,
@@ -10,57 +10,37 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 7.199407577514648,
14
- "eval_pearson": 0.024278120881494558,
15
- "eval_runtime": 0.9268,
16
- "eval_samples_per_second": 1618.438,
17
- "eval_spearmanr": 0.025433044219399577,
18
- "eval_steps_per_second": 101.422,
19
- "step": 9
20
- },
21
- {
22
- "epoch": 2.0,
23
- "eval_loss": 6.31046724319458,
24
- "eval_pearson": 0.0470495796010619,
25
- "eval_runtime": 0.9557,
26
- "eval_samples_per_second": 1569.521,
27
- "eval_spearmanr": 0.048936574609921006,
28
- "eval_steps_per_second": 98.357,
29
  "step": 18
30
  },
31
  {
32
- "epoch": 3.0,
33
- "eval_loss": 5.604863166809082,
34
- "eval_pearson": 0.05278675976955915,
35
- "eval_runtime": 0.9249,
36
- "eval_samples_per_second": 1621.719,
37
- "eval_spearmanr": 0.05579666652056003,
38
- "eval_steps_per_second": 101.628,
39
- "step": 27
40
- },
41
- {
42
- "epoch": 4.0,
43
- "eval_loss": 5.163288593292236,
44
- "eval_pearson": 0.06186573722446415,
45
- "eval_runtime": 0.9496,
46
- "eval_samples_per_second": 1579.549,
47
- "eval_spearmanr": 0.06678878974516303,
48
- "eval_steps_per_second": 98.985,
49
  "step": 36
50
  }
51
  ],
52
  "logging_steps": 500,
53
- "max_steps": 45,
54
  "num_input_tokens_seen": 0,
55
  "num_train_epochs": 5,
56
  "save_steps": 500,
57
  "total_flos": 0,
58
- "train_batch_size": 64,
59
  "trial_name": null,
60
  "trial_params": {
61
- "learning_rate": 5.243560941114439e-06,
62
  "num_train_epochs": 5,
63
- "per_device_train_batch_size": 64,
64
- "seed": 10
65
  }
66
  }
 
1
  {
2
+ "best_metric": 0.8703719079793119,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-36",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 36,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.6028859615325928,
14
+ "eval_pearson": 0.8640876954461902,
15
+ "eval_runtime": 0.7957,
16
+ "eval_samples_per_second": 1885.175,
17
+ "eval_spearmanr": 0.8614938677816929,
18
+ "eval_steps_per_second": 118.138,
 
 
 
 
 
 
 
 
 
 
19
  "step": 18
20
  },
21
  {
22
+ "epoch": 2.0,
23
+ "eval_loss": 0.5511023998260498,
24
+ "eval_pearson": 0.8703719079793119,
25
+ "eval_runtime": 0.786,
26
+ "eval_samples_per_second": 1908.456,
27
+ "eval_spearmanr": 0.866545511062528,
28
+ "eval_steps_per_second": 119.597,
 
 
 
 
 
 
 
 
 
 
29
  "step": 36
30
  }
31
  ],
32
  "logging_steps": 500,
33
+ "max_steps": 90,
34
  "num_input_tokens_seen": 0,
35
  "num_train_epochs": 5,
36
  "save_steps": 500,
37
  "total_flos": 0,
38
+ "train_batch_size": 32,
39
  "trial_name": null,
40
  "trial_params": {
41
+ "learning_rate": 5.304193372992487e-05,
42
  "num_train_epochs": 5,
43
+ "per_device_train_batch_size": 32,
44
+ "seed": 31
45
  }
46
  }
run-3/checkpoint-36/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43dea477fd3be8f6b457027410de4c6f7cf07a2710d25756956ebe9bfbdf4e31
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e6105fbde8ff9149ae2a4bbcd7a8dac84eb9cecad379f5bd1a4200bdc1f6ea
3
  size 4920
run-3/checkpoint-54/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "SjardiWillems/distilbert-base-uncased-finetuned-stsb",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "label2id": {
16
+ "LABEL_0": 0
17
+ },
18
+ "max_position_embeddings": 512,
19
+ "model_type": "distilbert",
20
+ "n_heads": 12,
21
+ "n_layers": 6,
22
+ "pad_token_id": 0,
23
+ "problem_type": "regression",
24
+ "qa_dropout": 0.1,
25
+ "seq_classif_dropout": 0.2,
26
+ "sinusoidal_pos_embds": false,
27
+ "tie_weights_": true,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.38.2",
30
+ "vocab_size": 30522
31
+ }
run-3/checkpoint-54/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c186d637863e57384b54ab2fc7dbf952b0dc4f47137ccd712d5e7f686b7cb5a4
3
+ size 267829484
run-3/checkpoint-54/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1069ad833f7cf2b1614ee4cbc59f70f2794581760c6d04cf8c64fc841b745db
3
+ size 535721146
run-3/checkpoint-54/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9e32de5813ecdfb48e5dd1d624acd0a70f2a5360740ef6edd53f7b4d7319206
3
+ size 14308
run-3/checkpoint-54/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4564fa06afdd7a9495548b82cfa9fec739694a189cc628de539023b2017a9892
3
+ size 1064
run-3/checkpoint-54/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-3/checkpoint-54/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-3/checkpoint-54/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
run-3/checkpoint-54/trainer_state.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8703719079793119,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-36",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 54,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 0.6028859615325928,
14
+ "eval_pearson": 0.8640876954461902,
15
+ "eval_runtime": 0.7957,
16
+ "eval_samples_per_second": 1885.175,
17
+ "eval_spearmanr": 0.8614938677816929,
18
+ "eval_steps_per_second": 118.138,
19
+ "step": 18
20
+ },
21
+ {
22
+ "epoch": 2.0,
23
+ "eval_loss": 0.5511023998260498,
24
+ "eval_pearson": 0.8703719079793119,
25
+ "eval_runtime": 0.786,
26
+ "eval_samples_per_second": 1908.456,
27
+ "eval_spearmanr": 0.866545511062528,
28
+ "eval_steps_per_second": 119.597,
29
+ "step": 36
30
+ },
31
+ {
32
+ "epoch": 3.0,
33
+ "eval_loss": 0.5914527177810669,
34
+ "eval_pearson": 0.8695007442729191,
35
+ "eval_runtime": 0.8064,
36
+ "eval_samples_per_second": 1860.165,
37
+ "eval_spearmanr": 0.8657982418648841,
38
+ "eval_steps_per_second": 116.57,
39
+ "step": 54
40
+ }
41
+ ],
42
+ "logging_steps": 500,
43
+ "max_steps": 90,
44
+ "num_input_tokens_seen": 0,
45
+ "num_train_epochs": 5,
46
+ "save_steps": 500,
47
+ "total_flos": 0,
48
+ "train_batch_size": 32,
49
+ "trial_name": null,
50
+ "trial_params": {
51
+ "learning_rate": 5.304193372992487e-05,
52
+ "num_train_epochs": 5,
53
+ "per_device_train_batch_size": 32,
54
+ "seed": 31
55
+ }
56
+ }
run-3/checkpoint-54/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e6105fbde8ff9149ae2a4bbcd7a8dac84eb9cecad379f5bd1a4200bdc1f6ea
3
+ size 4920
run-3/checkpoint-54/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-3/checkpoint-72/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "distilbert-base-uncased",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForSequenceClassification"
 
1
  {
2
+ "_name_or_path": "SjardiWillems/distilbert-base-uncased-finetuned-stsb",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForSequenceClassification"
run-3/checkpoint-72/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:628e81c9d1cd28e94fd2c0528b29c2450ff4003f56c032e01547e58616527956
3
  size 267829484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87bbb5e3c825ea117f159488dc603a85c23ee403d37a996586892bdcdb82b341
3
  size 267829484
run-3/checkpoint-72/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2efe5398abd461fcafd62df5d68501cdf7326c5d01c05bfb382b1f855d199173
3
  size 535721146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cce3687e29d958a9581c2eab8c194598006cd12484554cd4d3f40204c23498d
3
  size 535721146
run-3/checkpoint-72/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dda7584660763f72c41c754dd6523df6f1c049b2a56e83473d0e06cc32bb35d
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afb6236b8299fe8ab86ae5a7a0125ffdb192834dfed7e461189fa3fff3b1d957
3
  size 14308
run-3/checkpoint-72/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:525313aa68006057abc2527656e139e39e304df0fae14edc96d58c8a10f35d43
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58b4614e97f318c5b27378badbc8c75f30278c4733f2ec9cc0b63520d116b974
3
  size 1064
run-3/checkpoint-72/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.3488998094346536,
3
- "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-72",
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
  "global_step": 72,
7
  "is_hyper_param_search": true,
@@ -10,27 +10,57 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 2.188309907913208,
14
- "eval_pearson": 0.3488998094346536,
15
- "eval_runtime": 0.8689,
16
- "eval_samples_per_second": 1726.372,
17
- "eval_spearmanr": 0.3841984779795825,
18
- "eval_steps_per_second": 108.186,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "step": 72
20
  }
21
  ],
22
  "logging_steps": 500,
23
- "max_steps": 72,
24
  "num_input_tokens_seen": 0,
25
- "num_train_epochs": 1,
26
  "save_steps": 500,
27
  "total_flos": 0,
28
- "train_batch_size": 8,
29
  "trial_name": null,
30
  "trial_params": {
31
- "learning_rate": 1.2281778565394951e-05,
32
- "num_train_epochs": 1,
33
- "per_device_train_batch_size": 8,
34
- "seed": 24
35
  }
36
  }
 
1
  {
2
+ "best_metric": 0.8703719079793119,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-36",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 72,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.6028859615325928,
14
+ "eval_pearson": 0.8640876954461902,
15
+ "eval_runtime": 0.7957,
16
+ "eval_samples_per_second": 1885.175,
17
+ "eval_spearmanr": 0.8614938677816929,
18
+ "eval_steps_per_second": 118.138,
19
+ "step": 18
20
+ },
21
+ {
22
+ "epoch": 2.0,
23
+ "eval_loss": 0.5511023998260498,
24
+ "eval_pearson": 0.8703719079793119,
25
+ "eval_runtime": 0.786,
26
+ "eval_samples_per_second": 1908.456,
27
+ "eval_spearmanr": 0.866545511062528,
28
+ "eval_steps_per_second": 119.597,
29
+ "step": 36
30
+ },
31
+ {
32
+ "epoch": 3.0,
33
+ "eval_loss": 0.5914527177810669,
34
+ "eval_pearson": 0.8695007442729191,
35
+ "eval_runtime": 0.8064,
36
+ "eval_samples_per_second": 1860.165,
37
+ "eval_spearmanr": 0.8657982418648841,
38
+ "eval_steps_per_second": 116.57,
39
+ "step": 54
40
+ },
41
+ {
42
+ "epoch": 4.0,
43
+ "eval_loss": 0.5527331829071045,
44
+ "eval_pearson": 0.870253450177356,
45
+ "eval_runtime": 0.8029,
46
+ "eval_samples_per_second": 1868.343,
47
+ "eval_spearmanr": 0.8666505706240611,
48
+ "eval_steps_per_second": 117.083,
49
  "step": 72
50
  }
51
  ],
52
  "logging_steps": 500,
53
+ "max_steps": 90,
54
  "num_input_tokens_seen": 0,
55
+ "num_train_epochs": 5,
56
  "save_steps": 500,
57
  "total_flos": 0,
58
+ "train_batch_size": 32,
59
  "trial_name": null,
60
  "trial_params": {
61
+ "learning_rate": 5.304193372992487e-05,
62
+ "num_train_epochs": 5,
63
+ "per_device_train_batch_size": 32,
64
+ "seed": 31
65
  }
66
  }
run-3/checkpoint-72/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94a97181d28e1b7189cab54251a7b430fd73528c8be0f04e2930aea21ed4e946
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e6105fbde8ff9149ae2a4bbcd7a8dac84eb9cecad379f5bd1a4200bdc1f6ea
3
  size 4920
run-3/checkpoint-90/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "SjardiWillems/distilbert-base-uncased-finetuned-stsb",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "label2id": {
16
+ "LABEL_0": 0
17
+ },
18
+ "max_position_embeddings": 512,
19
+ "model_type": "distilbert",
20
+ "n_heads": 12,
21
+ "n_layers": 6,
22
+ "pad_token_id": 0,
23
+ "problem_type": "regression",
24
+ "qa_dropout": 0.1,
25
+ "seq_classif_dropout": 0.2,
26
+ "sinusoidal_pos_embds": false,
27
+ "tie_weights_": true,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.38.2",
30
+ "vocab_size": 30522
31
+ }
run-3/checkpoint-90/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe879bec543a9d5c304e50f38c4331c25b9757978d34480cbe9f23ce33b03f0e
3
+ size 267829484
run-3/checkpoint-90/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fbd78cde4da5586f24913972751775c83f4cb28fe0c11a22e4efa5428e4822f
3
+ size 535721146
run-3/checkpoint-90/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cab4d2e6f9b0d070357fd74c3d5209fdbd5738db589136b19b23fab58fc5e55d
3
+ size 14308
run-3/checkpoint-90/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b24c3757e327e4ae3872c99a62f9eda4e0c61b0b08286d512734603c3f8205b9
3
+ size 1064
run-3/checkpoint-90/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-3/checkpoint-90/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-3/checkpoint-90/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
run-3/checkpoint-90/trainer_state.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8703719079793119,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-36",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 90,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 0.6028859615325928,
14
+ "eval_pearson": 0.8640876954461902,
15
+ "eval_runtime": 0.7957,
16
+ "eval_samples_per_second": 1885.175,
17
+ "eval_spearmanr": 0.8614938677816929,
18
+ "eval_steps_per_second": 118.138,
19
+ "step": 18
20
+ },
21
+ {
22
+ "epoch": 2.0,
23
+ "eval_loss": 0.5511023998260498,
24
+ "eval_pearson": 0.8703719079793119,
25
+ "eval_runtime": 0.786,
26
+ "eval_samples_per_second": 1908.456,
27
+ "eval_spearmanr": 0.866545511062528,
28
+ "eval_steps_per_second": 119.597,
29
+ "step": 36
30
+ },
31
+ {
32
+ "epoch": 3.0,
33
+ "eval_loss": 0.5914527177810669,
34
+ "eval_pearson": 0.8695007442729191,
35
+ "eval_runtime": 0.8064,
36
+ "eval_samples_per_second": 1860.165,
37
+ "eval_spearmanr": 0.8657982418648841,
38
+ "eval_steps_per_second": 116.57,
39
+ "step": 54
40
+ },
41
+ {
42
+ "epoch": 4.0,
43
+ "eval_loss": 0.5527331829071045,
44
+ "eval_pearson": 0.870253450177356,
45
+ "eval_runtime": 0.8029,
46
+ "eval_samples_per_second": 1868.343,
47
+ "eval_spearmanr": 0.8666505706240611,
48
+ "eval_steps_per_second": 117.083,
49
+ "step": 72
50
+ },
51
+ {
52
+ "epoch": 5.0,
53
+ "eval_loss": 0.5488625764846802,
54
+ "eval_pearson": 0.8699828916266209,
55
+ "eval_runtime": 0.7844,
56
+ "eval_samples_per_second": 1912.356,
57
+ "eval_spearmanr": 0.8658726120228372,
58
+ "eval_steps_per_second": 119.841,
59
+ "step": 90
60
+ }
61
+ ],
62
+ "logging_steps": 500,
63
+ "max_steps": 90,
64
+ "num_input_tokens_seen": 0,
65
+ "num_train_epochs": 5,
66
+ "save_steps": 500,
67
+ "total_flos": 0,
68
+ "train_batch_size": 32,
69
+ "trial_name": null,
70
+ "trial_params": {
71
+ "learning_rate": 5.304193372992487e-05,
72
+ "num_train_epochs": 5,
73
+ "per_device_train_batch_size": 32,
74
+ "seed": 31
75
+ }
76
+ }
run-3/checkpoint-90/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e6105fbde8ff9149ae2a4bbcd7a8dac84eb9cecad379f5bd1a4200bdc1f6ea
3
+ size 4920
run-3/checkpoint-90/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-4/checkpoint-18/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "distilbert-base-uncased",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForSequenceClassification"
 
1
  {
2
+ "_name_or_path": "SjardiWillems/distilbert-base-uncased-finetuned-stsb",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForSequenceClassification"
run-4/checkpoint-18/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56c9436a49a572459a2a44ddbdc061067a3843403ed9f6eca1db149dbcdbd98d
3
  size 267829484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:137ea9a9d64c2fca4ff0672ce3b388497cdd88b583117dcb14cf56ef06450dd2
3
  size 267829484
run-4/checkpoint-18/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf8a4c79882ed42b0ba2938dc012222bb219784a5f37e9ad6b2769726ebfcbf0
3
  size 535721146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:288fe1db01ec36a78be841780b2535af1f4df14e228f6541f72757cd4b33973c
3
  size 535721146
run-4/checkpoint-18/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15a978540f0e297d2deb9452be6f34da155066843444206a3a770d9309265da5
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3489ad310a1c6f771a5ce3247bcfdda18268e6f5e4d886772bb1dbd75613e8de
3
+ size 14180
run-4/checkpoint-18/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc1f03d35d35aad84c8160e55d20e180f5c3328f48c3f74d729338bf74cb3c93
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5f4eaffe579a51d750e17c4d1ab255305969dc4aefdc53aa2a41ebfb8e9dc44
3
  size 1064
run-4/checkpoint-18/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.24101392328980611,
3
- "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-4/checkpoint-9",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 18,
7
  "is_hyper_param_search": true,
@@ -10,37 +10,27 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 6.916525363922119,
14
- "eval_pearson": 0.24101392328980611,
15
- "eval_runtime": 0.9549,
16
- "eval_samples_per_second": 1570.783,
17
- "eval_spearmanr": 0.23527578828258416,
18
- "eval_steps_per_second": 98.436,
19
- "step": 9
20
- },
21
- {
22
- "epoch": 2.0,
23
- "eval_loss": 6.322813987731934,
24
- "eval_pearson": 0.21685934779589452,
25
- "eval_runtime": 1.3518,
26
- "eval_samples_per_second": 1109.622,
27
- "eval_spearmanr": 0.19981399518979143,
28
- "eval_steps_per_second": 69.536,
29
  "step": 18
30
  }
31
  ],
32
  "logging_steps": 500,
33
- "max_steps": 36,
34
  "num_input_tokens_seen": 0,
35
- "num_train_epochs": 4,
36
  "save_steps": 500,
37
  "total_flos": 0,
38
- "train_batch_size": 64,
39
  "trial_name": null,
40
  "trial_params": {
41
- "learning_rate": 4.463445057905012e-06,
42
- "num_train_epochs": 4,
43
- "per_device_train_batch_size": 64,
44
- "seed": 5
45
  }
46
  }
 
1
  {
2
+ "best_metric": 0.8698177849938507,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-4/checkpoint-18",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
  "global_step": 18,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.5605268478393555,
14
+ "eval_pearson": 0.8698177849938507,
15
+ "eval_runtime": 0.7833,
16
+ "eval_samples_per_second": 1914.894,
17
+ "eval_spearmanr": 0.8653376248792928,
18
+ "eval_steps_per_second": 120.0,
 
 
 
 
 
 
 
 
 
 
19
  "step": 18
20
  }
21
  ],
22
  "logging_steps": 500,
23
+ "max_steps": 54,
24
  "num_input_tokens_seen": 0,
25
+ "num_train_epochs": 3,
26
  "save_steps": 500,
27
  "total_flos": 0,
28
+ "train_batch_size": 32,
29
  "trial_name": null,
30
  "trial_params": {
31
+ "learning_rate": 9.914291118758786e-06,
32
+ "num_train_epochs": 3,
33
+ "per_device_train_batch_size": 32,
34
+ "seed": 10
35
  }
36
  }
run-4/checkpoint-18/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf3f62aadffc261f72a51d4b7c87acc76e6bc2705585514f61113a84b0e34fd3
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0fb3669a562055766866e71b54806bb7235c4677b6cb8f825acdf2c31236bf5
3
  size 4920
runs/Mar10_22-33-58_0f0a24039c15/events.out.tfevents.1710110466.0f0a24039c15.273.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28b236390ebad4b19ffe6f513e3bb9328a12005ea07134351ce302675b0c00d7
3
+ size 6162
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39e6105fbde8ff9149ae2a4bbcd7a8dac84eb9cecad379f5bd1a4200bdc1f6ea
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0fb3669a562055766866e71b54806bb7235c4677b6cb8f825acdf2c31236bf5
3
  size 4920