SjardiWillems commited on
Commit
9f0cd70
·
verified ·
1 Parent(s): 318ed4d

Training in progress, epoch 1

Browse files
Files changed (37) hide show
  1. model.safetensors +1 -1
  2. run-2/checkpoint-18/model.safetensors +1 -1
  3. run-2/checkpoint-18/optimizer.pt +1 -1
  4. run-2/checkpoint-18/rng_state.pth +2 -2
  5. run-2/checkpoint-18/scheduler.pt +1 -1
  6. run-2/checkpoint-18/trainer_state.json +23 -13
  7. run-2/checkpoint-18/training_args.bin +1 -1
  8. run-2/checkpoint-27/config.json +31 -0
  9. run-2/checkpoint-27/model.safetensors +3 -0
  10. run-2/checkpoint-27/optimizer.pt +3 -0
  11. run-2/checkpoint-27/rng_state.pth +3 -0
  12. run-2/checkpoint-27/scheduler.pt +3 -0
  13. run-2/checkpoint-27/special_tokens_map.json +7 -0
  14. run-2/checkpoint-27/tokenizer.json +0 -0
  15. run-2/checkpoint-27/tokenizer_config.json +55 -0
  16. run-2/checkpoint-27/trainer_state.json +56 -0
  17. run-2/checkpoint-27/training_args.bin +3 -0
  18. run-2/checkpoint-27/vocab.txt +0 -0
  19. run-2/checkpoint-36/model.safetensors +1 -1
  20. run-2/checkpoint-36/optimizer.pt +1 -1
  21. run-2/checkpoint-36/rng_state.pth +2 -2
  22. run-2/checkpoint-36/scheduler.pt +1 -1
  23. run-2/checkpoint-36/trainer_state.json +40 -20
  24. run-2/checkpoint-36/training_args.bin +1 -1
  25. run-3/checkpoint-72/config.json +31 -0
  26. run-3/checkpoint-72/model.safetensors +3 -0
  27. run-3/checkpoint-72/optimizer.pt +3 -0
  28. run-3/checkpoint-72/rng_state.pth +3 -0
  29. run-3/checkpoint-72/scheduler.pt +3 -0
  30. run-3/checkpoint-72/special_tokens_map.json +7 -0
  31. run-3/checkpoint-72/tokenizer.json +0 -0
  32. run-3/checkpoint-72/tokenizer_config.json +55 -0
  33. run-3/checkpoint-72/trainer_state.json +36 -0
  34. run-3/checkpoint-72/training_args.bin +3 -0
  35. run-3/checkpoint-72/vocab.txt +0 -0
  36. runs/Mar07_01-09-00_6dcd98e81284/events.out.tfevents.1709774024.6dcd98e81284.961.5 +3 -0
  37. training_args.bin +1 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:475be2c7e113d40683406e8823d7389e3567d12d4398a875869febd179abda22
3
  size 267829484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:628e81c9d1cd28e94fd2c0528b29c2450ff4003f56c032e01547e58616527956
3
  size 267829484
run-2/checkpoint-18/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f91234e83dbce56155401ad1d694d158623c6dbcb479b1db268fcce8bb17a34
3
  size 267829484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3bd9289f6d13dfb86bf6d6c5ea2fc8272397af0ae0983f98a2e95ce54ff6c87
3
  size 267829484
run-2/checkpoint-18/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12d7dfcd85bbe095a7319684aa46db30a3c31c331bf0d626069d29ca390da693
3
  size 535721146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:290209986adc4145147597ae9f00174c08b322ff434765f5343dc28c4c50bea9
3
  size 535721146
run-2/checkpoint-18/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03ffbb71a78626d320e0c7e109df0deaee9a942aef8d9d09deef70e0bfd7c11c
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2278d47bd604800f774ab03ae4190531fb4f0bb9ade998d2253395fb7fc7a062
3
+ size 14308
run-2/checkpoint-18/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e42290e228d418923069eab3d6144e284e05546d2e53f8e66ffe83b7fd46d65
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0daca37ead5c3454bd7b82bc2ba88d4ad343140ccb779d04e2bfa88439b40fea
3
  size 1064
run-2/checkpoint-18/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.3111201117407732,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-2/checkpoint-18",
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
  "global_step": 18,
7
  "is_hyper_param_search": true,
@@ -10,27 +10,37 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 2.5358598232269287,
14
- "eval_pearson": 0.3111201117407732,
15
- "eval_runtime": 0.9088,
16
- "eval_samples_per_second": 1650.59,
17
- "eval_spearmanr": 0.30189341919396323,
18
- "eval_steps_per_second": 103.437,
 
 
 
 
 
 
 
 
 
 
19
  "step": 18
20
  }
21
  ],
22
  "logging_steps": 500,
23
- "max_steps": 72,
24
  "num_input_tokens_seen": 0,
25
  "num_train_epochs": 4,
26
  "save_steps": 500,
27
  "total_flos": 0,
28
- "train_batch_size": 32,
29
  "trial_name": null,
30
  "trial_params": {
31
- "learning_rate": 3.9708935486543325e-05,
32
  "num_train_epochs": 4,
33
- "per_device_train_batch_size": 32,
34
- "seed": 27
35
  }
36
  }
 
1
  {
2
+ "best_metric": 0.2841659960070816,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-2/checkpoint-18",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 18,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 4.423223495483398,
14
+ "eval_pearson": 0.17909300472522865,
15
+ "eval_runtime": 0.8624,
16
+ "eval_samples_per_second": 1739.427,
17
+ "eval_spearmanr": 0.1740721288711004,
18
+ "eval_steps_per_second": 109.004,
19
+ "step": 9
20
+ },
21
+ {
22
+ "epoch": 2.0,
23
+ "eval_loss": 2.390650987625122,
24
+ "eval_pearson": 0.2841659960070816,
25
+ "eval_runtime": 0.8895,
26
+ "eval_samples_per_second": 1686.303,
27
+ "eval_spearmanr": 0.32493796565837335,
28
+ "eval_steps_per_second": 105.675,
29
  "step": 18
30
  }
31
  ],
32
  "logging_steps": 500,
33
+ "max_steps": 36,
34
  "num_input_tokens_seen": 0,
35
  "num_train_epochs": 4,
36
  "save_steps": 500,
37
  "total_flos": 0,
38
+ "train_batch_size": 64,
39
  "trial_name": null,
40
  "trial_params": {
41
+ "learning_rate": 2.6680721367432187e-05,
42
  "num_train_epochs": 4,
43
+ "per_device_train_batch_size": 64,
44
+ "seed": 29
45
  }
46
  }
run-2/checkpoint-18/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a58e573bd94f8beb9db88df304942d0e9c8bc04406d12b1190de70c7fae2b2b
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10030bf67bebf9712be4aee13e4eb2bc3bd722e9da638f545180055dd9d691b8
3
  size 4920
run-2/checkpoint-27/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "label2id": {
16
+ "LABEL_0": 0
17
+ },
18
+ "max_position_embeddings": 512,
19
+ "model_type": "distilbert",
20
+ "n_heads": 12,
21
+ "n_layers": 6,
22
+ "pad_token_id": 0,
23
+ "problem_type": "regression",
24
+ "qa_dropout": 0.1,
25
+ "seq_classif_dropout": 0.2,
26
+ "sinusoidal_pos_embds": false,
27
+ "tie_weights_": true,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.38.2",
30
+ "vocab_size": 30522
31
+ }
run-2/checkpoint-27/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2585a91a160915792ac68da62718015090f9979e10f55c61b2c7f242b180ea
3
+ size 267829484
run-2/checkpoint-27/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c5e9229a912b577743d0bc96305ef0ad812f22077c33bfe0cae938f2d2bf21e
3
+ size 535721146
run-2/checkpoint-27/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18212658bc6abf57a7b3b876c2db102461910a9ecc7699b5fd646bfb5015c263
3
+ size 14308
run-2/checkpoint-27/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a56a7631c2cc7c6f0e675edb161ea7fc33df616f3ef3940733b1988c0cea6ee
3
+ size 1064
run-2/checkpoint-27/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-2/checkpoint-27/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-2/checkpoint-27/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
run-2/checkpoint-27/trainer_state.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.30476307824800447,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-2/checkpoint-27",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 27,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 4.423223495483398,
14
+ "eval_pearson": 0.17909300472522865,
15
+ "eval_runtime": 0.8624,
16
+ "eval_samples_per_second": 1739.427,
17
+ "eval_spearmanr": 0.1740721288711004,
18
+ "eval_steps_per_second": 109.004,
19
+ "step": 9
20
+ },
21
+ {
22
+ "epoch": 2.0,
23
+ "eval_loss": 2.390650987625122,
24
+ "eval_pearson": 0.2841659960070816,
25
+ "eval_runtime": 0.8895,
26
+ "eval_samples_per_second": 1686.303,
27
+ "eval_spearmanr": 0.32493796565837335,
28
+ "eval_steps_per_second": 105.675,
29
+ "step": 18
30
+ },
31
+ {
32
+ "epoch": 3.0,
33
+ "eval_loss": 2.221266746520996,
34
+ "eval_pearson": 0.30476307824800447,
35
+ "eval_runtime": 0.8761,
36
+ "eval_samples_per_second": 1712.204,
37
+ "eval_spearmanr": 0.32409413348965677,
38
+ "eval_steps_per_second": 107.298,
39
+ "step": 27
40
+ }
41
+ ],
42
+ "logging_steps": 500,
43
+ "max_steps": 36,
44
+ "num_input_tokens_seen": 0,
45
+ "num_train_epochs": 4,
46
+ "save_steps": 500,
47
+ "total_flos": 0,
48
+ "train_batch_size": 64,
49
+ "trial_name": null,
50
+ "trial_params": {
51
+ "learning_rate": 2.6680721367432187e-05,
52
+ "num_train_epochs": 4,
53
+ "per_device_train_batch_size": 64,
54
+ "seed": 29
55
+ }
56
+ }
run-2/checkpoint-27/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10030bf67bebf9712be4aee13e4eb2bc3bd722e9da638f545180055dd9d691b8
3
+ size 4920
run-2/checkpoint-27/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-2/checkpoint-36/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f930b8e96467ee59dea52bcf8712dae91677530f12e1f7ea5a39a7a4572f96e9
3
  size 267829484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:569bbe56c4f6402c0da1d737655763630777779fa1fd831d3eeecdcfdb0ce5cc
3
  size 267829484
run-2/checkpoint-36/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb506fecce1fb0b394149d81f89c829a9f59f5e9b82210e2b4a2581d50a25010
3
  size 535721146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76e649ef690cf4032595a982b8f2c8846e01840db57b5c1d46e7acb894238ab2
3
  size 535721146
run-2/checkpoint-36/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e301dfdefa0bdcaf96abb4785d80c44201447f4b6cd7ecb8a0638b8d695f8a09
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:120b265081defe782dd089076117c7ba9160d0e705e276032b793732a324ec6b
3
+ size 14308
run-2/checkpoint-36/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79c8286a4eb3ca2b934455ad2ce9ed451db18e27aab2aa7fe20c57a9ac5c9294
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:645db668f6b8180ecbe45335051022b20e24a308d9373fde921c514362069146
3
  size 1064
run-2/checkpoint-36/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.6951648539564048,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-2/checkpoint-36",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 36,
7
  "is_hyper_param_search": true,
@@ -10,37 +10,57 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 2.5358598232269287,
14
- "eval_pearson": 0.3111201117407732,
15
- "eval_runtime": 0.9088,
16
- "eval_samples_per_second": 1650.59,
17
- "eval_spearmanr": 0.30189341919396323,
18
- "eval_steps_per_second": 103.437,
19
- "step": 18
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_loss": 1.8630882501602173,
24
- "eval_pearson": 0.6951648539564048,
25
- "eval_runtime": 1.1428,
26
- "eval_samples_per_second": 1312.613,
27
- "eval_spearmanr": 0.6897209166518194,
28
- "eval_steps_per_second": 82.257,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "step": 36
30
  }
31
  ],
32
  "logging_steps": 500,
33
- "max_steps": 72,
34
  "num_input_tokens_seen": 0,
35
  "num_train_epochs": 4,
36
  "save_steps": 500,
37
  "total_flos": 0,
38
- "train_batch_size": 32,
39
  "trial_name": null,
40
  "trial_params": {
41
- "learning_rate": 3.9708935486543325e-05,
42
  "num_train_epochs": 4,
43
- "per_device_train_batch_size": 32,
44
- "seed": 27
45
  }
46
  }
 
1
  {
2
+ "best_metric": 0.4586603784701155,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-2/checkpoint-36",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 36,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 4.423223495483398,
14
+ "eval_pearson": 0.17909300472522865,
15
+ "eval_runtime": 0.8624,
16
+ "eval_samples_per_second": 1739.427,
17
+ "eval_spearmanr": 0.1740721288711004,
18
+ "eval_steps_per_second": 109.004,
19
+ "step": 9
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_loss": 2.390650987625122,
24
+ "eval_pearson": 0.2841659960070816,
25
+ "eval_runtime": 0.8895,
26
+ "eval_samples_per_second": 1686.303,
27
+ "eval_spearmanr": 0.32493796565837335,
28
+ "eval_steps_per_second": 105.675,
29
+ "step": 18
30
+ },
31
+ {
32
+ "epoch": 3.0,
33
+ "eval_loss": 2.221266746520996,
34
+ "eval_pearson": 0.30476307824800447,
35
+ "eval_runtime": 0.8761,
36
+ "eval_samples_per_second": 1712.204,
37
+ "eval_spearmanr": 0.32409413348965677,
38
+ "eval_steps_per_second": 107.298,
39
+ "step": 27
40
+ },
41
+ {
42
+ "epoch": 4.0,
43
+ "eval_loss": 2.2502002716064453,
44
+ "eval_pearson": 0.4586603784701155,
45
+ "eval_runtime": 0.8808,
46
+ "eval_samples_per_second": 1702.946,
47
+ "eval_spearmanr": 0.4929584589747905,
48
+ "eval_steps_per_second": 106.718,
49
  "step": 36
50
  }
51
  ],
52
  "logging_steps": 500,
53
+ "max_steps": 36,
54
  "num_input_tokens_seen": 0,
55
  "num_train_epochs": 4,
56
  "save_steps": 500,
57
  "total_flos": 0,
58
+ "train_batch_size": 64,
59
  "trial_name": null,
60
  "trial_params": {
61
+ "learning_rate": 2.6680721367432187e-05,
62
  "num_train_epochs": 4,
63
+ "per_device_train_batch_size": 64,
64
+ "seed": 29
65
  }
66
  }
run-2/checkpoint-36/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a58e573bd94f8beb9db88df304942d0e9c8bc04406d12b1190de70c7fae2b2b
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10030bf67bebf9712be4aee13e4eb2bc3bd722e9da638f545180055dd9d691b8
3
  size 4920
run-3/checkpoint-72/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "label2id": {
16
+ "LABEL_0": 0
17
+ },
18
+ "max_position_embeddings": 512,
19
+ "model_type": "distilbert",
20
+ "n_heads": 12,
21
+ "n_layers": 6,
22
+ "pad_token_id": 0,
23
+ "problem_type": "regression",
24
+ "qa_dropout": 0.1,
25
+ "seq_classif_dropout": 0.2,
26
+ "sinusoidal_pos_embds": false,
27
+ "tie_weights_": true,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.38.2",
30
+ "vocab_size": 30522
31
+ }
run-3/checkpoint-72/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:628e81c9d1cd28e94fd2c0528b29c2450ff4003f56c032e01547e58616527956
3
+ size 267829484
run-3/checkpoint-72/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2efe5398abd461fcafd62df5d68501cdf7326c5d01c05bfb382b1f855d199173
3
+ size 535721146
run-3/checkpoint-72/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dda7584660763f72c41c754dd6523df6f1c049b2a56e83473d0e06cc32bb35d
3
+ size 14308
run-3/checkpoint-72/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525313aa68006057abc2527656e139e39e304df0fae14edc96d58c8a10f35d43
3
+ size 1064
run-3/checkpoint-72/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-3/checkpoint-72/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-3/checkpoint-72/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
run-3/checkpoint-72/trainer_state.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3488998094346536,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-72",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 72,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 2.188309907913208,
14
+ "eval_pearson": 0.3488998094346536,
15
+ "eval_runtime": 0.8689,
16
+ "eval_samples_per_second": 1726.372,
17
+ "eval_spearmanr": 0.3841984779795825,
18
+ "eval_steps_per_second": 108.186,
19
+ "step": 72
20
+ }
21
+ ],
22
+ "logging_steps": 500,
23
+ "max_steps": 72,
24
+ "num_input_tokens_seen": 0,
25
+ "num_train_epochs": 1,
26
+ "save_steps": 500,
27
+ "total_flos": 0,
28
+ "train_batch_size": 8,
29
+ "trial_name": null,
30
+ "trial_params": {
31
+ "learning_rate": 1.2281778565394951e-05,
32
+ "num_train_epochs": 1,
33
+ "per_device_train_batch_size": 8,
34
+ "seed": 24
35
+ }
36
+ }
run-3/checkpoint-72/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94a97181d28e1b7189cab54251a7b430fd73528c8be0f04e2930aea21ed4e946
3
+ size 4920
run-3/checkpoint-72/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
runs/Mar07_01-09-00_6dcd98e81284/events.out.tfevents.1709774024.6dcd98e81284.961.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cee97ac540578c3df21b9b8dc3092bf2cd45b9b1706e1bbc863071a79f4a9ca
3
+ size 5323
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10030bf67bebf9712be4aee13e4eb2bc3bd722e9da638f545180055dd9d691b8
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94a97181d28e1b7189cab54251a7b430fd73528c8be0f04e2930aea21ed4e946
3
  size 4920