Training in progress, epoch 1
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- model.safetensors +1 -1
- run-4/checkpoint-18/config.json +31 -0
- run-4/checkpoint-18/model.safetensors +3 -0
- run-4/checkpoint-18/optimizer.pt +3 -0
- run-4/checkpoint-18/rng_state.pth +3 -0
- run-4/checkpoint-18/scheduler.pt +3 -0
- run-4/checkpoint-18/special_tokens_map.json +7 -0
- run-4/checkpoint-18/tokenizer.json +0 -0
- run-4/checkpoint-18/tokenizer_config.json +55 -0
- run-4/checkpoint-18/trainer_state.json +46 -0
- run-4/checkpoint-18/training_args.bin +3 -0
- run-4/checkpoint-18/vocab.txt +0 -0
- run-4/checkpoint-27/config.json +31 -0
- run-4/checkpoint-27/model.safetensors +3 -0
- run-4/checkpoint-27/optimizer.pt +3 -0
- run-4/checkpoint-27/rng_state.pth +3 -0
- run-4/checkpoint-27/scheduler.pt +3 -0
- run-4/checkpoint-27/special_tokens_map.json +7 -0
- run-4/checkpoint-27/tokenizer.json +0 -0
- run-4/checkpoint-27/tokenizer_config.json +55 -0
- run-4/checkpoint-27/trainer_state.json +56 -0
- run-4/checkpoint-27/training_args.bin +3 -0
- run-4/checkpoint-27/vocab.txt +0 -0
- run-4/checkpoint-36/model.safetensors +1 -1
- run-4/checkpoint-36/optimizer.pt +1 -1
- run-4/checkpoint-36/rng_state.pth +1 -1
- run-4/checkpoint-36/scheduler.pt +1 -1
- run-4/checkpoint-36/trainer_state.json +44 -14
- run-4/checkpoint-36/training_args.bin +1 -1
- run-4/checkpoint-9/config.json +31 -0
- run-4/checkpoint-9/model.safetensors +3 -0
- run-4/checkpoint-9/optimizer.pt +3 -0
- run-4/checkpoint-9/rng_state.pth +3 -0
- run-4/checkpoint-9/scheduler.pt +3 -0
- run-4/checkpoint-9/special_tokens_map.json +7 -0
- run-4/checkpoint-9/tokenizer.json +0 -0
- run-4/checkpoint-9/tokenizer_config.json +55 -0
- run-4/checkpoint-9/trainer_state.json +36 -0
- run-4/checkpoint-9/training_args.bin +3 -0
- run-4/checkpoint-9/vocab.txt +0 -0
- run-6/checkpoint-9/config.json +31 -0
- run-6/checkpoint-9/model.safetensors +3 -0
- run-6/checkpoint-9/optimizer.pt +3 -0
- run-6/checkpoint-9/rng_state.pth +3 -0
- run-6/checkpoint-9/scheduler.pt +3 -0
- run-6/checkpoint-9/special_tokens_map.json +7 -0
- run-6/checkpoint-9/tokenizer.json +0 -0
- run-6/checkpoint-9/tokenizer_config.json +55 -0
- run-6/checkpoint-9/trainer_state.json +36 -0
- run-6/checkpoint-9/training_args.bin +3 -0
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 267829484
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae3934d95cf8eb918f260142b4c1c843186b97d06be5f5a9bf8a64000fc01868
|
3 |
size 267829484
|
run-4/checkpoint-18/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert-base-uncased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0"
|
13 |
+
},
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"label2id": {
|
16 |
+
"LABEL_0": 0
|
17 |
+
},
|
18 |
+
"max_position_embeddings": 512,
|
19 |
+
"model_type": "distilbert",
|
20 |
+
"n_heads": 12,
|
21 |
+
"n_layers": 6,
|
22 |
+
"pad_token_id": 0,
|
23 |
+
"problem_type": "regression",
|
24 |
+
"qa_dropout": 0.1,
|
25 |
+
"seq_classif_dropout": 0.2,
|
26 |
+
"sinusoidal_pos_embds": false,
|
27 |
+
"tie_weights_": true,
|
28 |
+
"torch_dtype": "float32",
|
29 |
+
"transformers_version": "4.38.2",
|
30 |
+
"vocab_size": 30522
|
31 |
+
}
|
run-4/checkpoint-18/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56c9436a49a572459a2a44ddbdc061067a3843403ed9f6eca1db149dbcdbd98d
|
3 |
+
size 267829484
|
run-4/checkpoint-18/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf8a4c79882ed42b0ba2938dc012222bb219784a5f37e9ad6b2769726ebfcbf0
|
3 |
+
size 535721146
|
run-4/checkpoint-18/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15a978540f0e297d2deb9452be6f34da155066843444206a3a770d9309265da5
|
3 |
+
size 14244
|
run-4/checkpoint-18/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc1f03d35d35aad84c8160e55d20e180f5c3328f48c3f74d729338bf74cb3c93
|
3 |
+
size 1064
|
run-4/checkpoint-18/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-4/checkpoint-18/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-4/checkpoint-18/tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 512,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"sep_token": "[SEP]",
|
51 |
+
"strip_accents": null,
|
52 |
+
"tokenize_chinese_chars": true,
|
53 |
+
"tokenizer_class": "DistilBertTokenizer",
|
54 |
+
"unk_token": "[UNK]"
|
55 |
+
}
|
run-4/checkpoint-18/trainer_state.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.24101392328980611,
|
3 |
+
"best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-4/checkpoint-9",
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 18,
|
7 |
+
"is_hyper_param_search": true,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"eval_loss": 6.916525363922119,
|
14 |
+
"eval_pearson": 0.24101392328980611,
|
15 |
+
"eval_runtime": 0.9549,
|
16 |
+
"eval_samples_per_second": 1570.783,
|
17 |
+
"eval_spearmanr": 0.23527578828258416,
|
18 |
+
"eval_steps_per_second": 98.436,
|
19 |
+
"step": 9
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"epoch": 2.0,
|
23 |
+
"eval_loss": 6.322813987731934,
|
24 |
+
"eval_pearson": 0.21685934779589452,
|
25 |
+
"eval_runtime": 1.3518,
|
26 |
+
"eval_samples_per_second": 1109.622,
|
27 |
+
"eval_spearmanr": 0.19981399518979143,
|
28 |
+
"eval_steps_per_second": 69.536,
|
29 |
+
"step": 18
|
30 |
+
}
|
31 |
+
],
|
32 |
+
"logging_steps": 500,
|
33 |
+
"max_steps": 36,
|
34 |
+
"num_input_tokens_seen": 0,
|
35 |
+
"num_train_epochs": 4,
|
36 |
+
"save_steps": 500,
|
37 |
+
"total_flos": 0,
|
38 |
+
"train_batch_size": 64,
|
39 |
+
"trial_name": null,
|
40 |
+
"trial_params": {
|
41 |
+
"learning_rate": 4.463445057905012e-06,
|
42 |
+
"num_train_epochs": 4,
|
43 |
+
"per_device_train_batch_size": 64,
|
44 |
+
"seed": 5
|
45 |
+
}
|
46 |
+
}
|
run-4/checkpoint-18/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf3f62aadffc261f72a51d4b7c87acc76e6bc2705585514f61113a84b0e34fd3
|
3 |
+
size 4920
|
run-4/checkpoint-18/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-4/checkpoint-27/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert-base-uncased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0"
|
13 |
+
},
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"label2id": {
|
16 |
+
"LABEL_0": 0
|
17 |
+
},
|
18 |
+
"max_position_embeddings": 512,
|
19 |
+
"model_type": "distilbert",
|
20 |
+
"n_heads": 12,
|
21 |
+
"n_layers": 6,
|
22 |
+
"pad_token_id": 0,
|
23 |
+
"problem_type": "regression",
|
24 |
+
"qa_dropout": 0.1,
|
25 |
+
"seq_classif_dropout": 0.2,
|
26 |
+
"sinusoidal_pos_embds": false,
|
27 |
+
"tie_weights_": true,
|
28 |
+
"torch_dtype": "float32",
|
29 |
+
"transformers_version": "4.38.2",
|
30 |
+
"vocab_size": 30522
|
31 |
+
}
|
run-4/checkpoint-27/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbbf7bc06426b92ee379ca03773c48d62e6c3896520c4fcad9faa5425201449f
|
3 |
+
size 267829484
|
run-4/checkpoint-27/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:677e9e5c0165b652fd63cc12b1bb4b56e06f7877b6f1948f4a2d0029591886ea
|
3 |
+
size 535721146
|
run-4/checkpoint-27/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74bf8511d98e1083038a1c44da2555854e7810b23ac8b465f2afba64cbf6a5f6
|
3 |
+
size 14244
|
run-4/checkpoint-27/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2811b91f91f70fdc876f392b523111587006bf59fe4aec234d20e739b0e6826c
|
3 |
+
size 1064
|
run-4/checkpoint-27/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-4/checkpoint-27/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-4/checkpoint-27/tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 512,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"sep_token": "[SEP]",
|
51 |
+
"strip_accents": null,
|
52 |
+
"tokenize_chinese_chars": true,
|
53 |
+
"tokenizer_class": "DistilBertTokenizer",
|
54 |
+
"unk_token": "[UNK]"
|
55 |
+
}
|
run-4/checkpoint-27/trainer_state.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.24101392328980611,
|
3 |
+
"best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-4/checkpoint-9",
|
4 |
+
"epoch": 3.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 27,
|
7 |
+
"is_hyper_param_search": true,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"eval_loss": 6.916525363922119,
|
14 |
+
"eval_pearson": 0.24101392328980611,
|
15 |
+
"eval_runtime": 0.9549,
|
16 |
+
"eval_samples_per_second": 1570.783,
|
17 |
+
"eval_spearmanr": 0.23527578828258416,
|
18 |
+
"eval_steps_per_second": 98.436,
|
19 |
+
"step": 9
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"epoch": 2.0,
|
23 |
+
"eval_loss": 6.322813987731934,
|
24 |
+
"eval_pearson": 0.21685934779589452,
|
25 |
+
"eval_runtime": 1.3518,
|
26 |
+
"eval_samples_per_second": 1109.622,
|
27 |
+
"eval_spearmanr": 0.19981399518979143,
|
28 |
+
"eval_steps_per_second": 69.536,
|
29 |
+
"step": 18
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 3.0,
|
33 |
+
"eval_loss": 5.925507068634033,
|
34 |
+
"eval_pearson": 0.19966300919689736,
|
35 |
+
"eval_runtime": 2.0553,
|
36 |
+
"eval_samples_per_second": 729.829,
|
37 |
+
"eval_spearmanr": 0.18294599603450873,
|
38 |
+
"eval_steps_per_second": 45.736,
|
39 |
+
"step": 27
|
40 |
+
}
|
41 |
+
],
|
42 |
+
"logging_steps": 500,
|
43 |
+
"max_steps": 36,
|
44 |
+
"num_input_tokens_seen": 0,
|
45 |
+
"num_train_epochs": 4,
|
46 |
+
"save_steps": 500,
|
47 |
+
"total_flos": 0,
|
48 |
+
"train_batch_size": 64,
|
49 |
+
"trial_name": null,
|
50 |
+
"trial_params": {
|
51 |
+
"learning_rate": 4.463445057905012e-06,
|
52 |
+
"num_train_epochs": 4,
|
53 |
+
"per_device_train_batch_size": 64,
|
54 |
+
"seed": 5
|
55 |
+
}
|
56 |
+
}
|
run-4/checkpoint-27/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf3f62aadffc261f72a51d4b7c87acc76e6bc2705585514f61113a84b0e34fd3
|
3 |
+
size 4920
|
run-4/checkpoint-27/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-4/checkpoint-36/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 267829484
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75bd7b149fb8cbcc4a98c1a500b77cf2498f7d115a33b7c3c58030ec49f7ab84
|
3 |
size 267829484
|
run-4/checkpoint-36/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 535721146
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b9ee39c83a3485f89033746bc673df88e46052d3beddf40596ab7d759b591dc
|
3 |
size 535721146
|
run-4/checkpoint-36/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fef671614c0d6f27cd73dcd86b2ff032aa12d8c88e7fd3678f3e0a264d90dbfa
|
3 |
size 14244
|
run-4/checkpoint-36/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5fd7f61c7117cf44e1b5dcb41fba06e442d903463f3acc9daaac3751d9061b0
|
3 |
size 1064
|
run-4/checkpoint-36/trainer_state.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-4/checkpoint-
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 36,
|
7 |
"is_hyper_param_search": true,
|
@@ -10,27 +10,57 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_loss":
|
14 |
-
"eval_pearson": 0.
|
15 |
-
"eval_runtime": 0.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_spearmanr": 0.
|
18 |
-
"eval_steps_per_second":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
"step": 36
|
20 |
}
|
21 |
],
|
22 |
"logging_steps": 500,
|
23 |
-
"max_steps":
|
24 |
"num_input_tokens_seen": 0,
|
25 |
"num_train_epochs": 4,
|
26 |
"save_steps": 500,
|
27 |
"total_flos": 0,
|
28 |
-
"train_batch_size":
|
29 |
"trial_name": null,
|
30 |
"trial_params": {
|
31 |
-
"learning_rate":
|
32 |
"num_train_epochs": 4,
|
33 |
-
"per_device_train_batch_size":
|
34 |
-
"seed":
|
35 |
}
|
36 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.24101392328980611,
|
3 |
+
"best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-4/checkpoint-9",
|
4 |
+
"epoch": 4.0,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 36,
|
7 |
"is_hyper_param_search": true,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_loss": 6.916525363922119,
|
14 |
+
"eval_pearson": 0.24101392328980611,
|
15 |
+
"eval_runtime": 0.9549,
|
16 |
+
"eval_samples_per_second": 1570.783,
|
17 |
+
"eval_spearmanr": 0.23527578828258416,
|
18 |
+
"eval_steps_per_second": 98.436,
|
19 |
+
"step": 9
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"epoch": 2.0,
|
23 |
+
"eval_loss": 6.322813987731934,
|
24 |
+
"eval_pearson": 0.21685934779589452,
|
25 |
+
"eval_runtime": 1.3518,
|
26 |
+
"eval_samples_per_second": 1109.622,
|
27 |
+
"eval_spearmanr": 0.19981399518979143,
|
28 |
+
"eval_steps_per_second": 69.536,
|
29 |
+
"step": 18
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 3.0,
|
33 |
+
"eval_loss": 5.925507068634033,
|
34 |
+
"eval_pearson": 0.19966300919689736,
|
35 |
+
"eval_runtime": 2.0553,
|
36 |
+
"eval_samples_per_second": 729.829,
|
37 |
+
"eval_spearmanr": 0.18294599603450873,
|
38 |
+
"eval_steps_per_second": 45.736,
|
39 |
+
"step": 27
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 4.0,
|
43 |
+
"eval_loss": 5.775755882263184,
|
44 |
+
"eval_pearson": 0.19587323739505505,
|
45 |
+
"eval_runtime": 1.1748,
|
46 |
+
"eval_samples_per_second": 1276.801,
|
47 |
+
"eval_spearmanr": 0.17919550590843902,
|
48 |
+
"eval_steps_per_second": 80.013,
|
49 |
"step": 36
|
50 |
}
|
51 |
],
|
52 |
"logging_steps": 500,
|
53 |
+
"max_steps": 36,
|
54 |
"num_input_tokens_seen": 0,
|
55 |
"num_train_epochs": 4,
|
56 |
"save_steps": 500,
|
57 |
"total_flos": 0,
|
58 |
+
"train_batch_size": 64,
|
59 |
"trial_name": null,
|
60 |
"trial_params": {
|
61 |
+
"learning_rate": 4.463445057905012e-06,
|
62 |
"num_train_epochs": 4,
|
63 |
+
"per_device_train_batch_size": 64,
|
64 |
+
"seed": 5
|
65 |
}
|
66 |
}
|
run-4/checkpoint-36/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf3f62aadffc261f72a51d4b7c87acc76e6bc2705585514f61113a84b0e34fd3
|
3 |
size 4920
|
run-4/checkpoint-9/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert-base-uncased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0"
|
13 |
+
},
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"label2id": {
|
16 |
+
"LABEL_0": 0
|
17 |
+
},
|
18 |
+
"max_position_embeddings": 512,
|
19 |
+
"model_type": "distilbert",
|
20 |
+
"n_heads": 12,
|
21 |
+
"n_layers": 6,
|
22 |
+
"pad_token_id": 0,
|
23 |
+
"problem_type": "regression",
|
24 |
+
"qa_dropout": 0.1,
|
25 |
+
"seq_classif_dropout": 0.2,
|
26 |
+
"sinusoidal_pos_embds": false,
|
27 |
+
"tie_weights_": true,
|
28 |
+
"torch_dtype": "float32",
|
29 |
+
"transformers_version": "4.38.2",
|
30 |
+
"vocab_size": 30522
|
31 |
+
}
|
run-4/checkpoint-9/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84bec32edce0c34ec348a06616c8c7aa85c9f1a33d5d27ae77ad3e090d412754
|
3 |
+
size 267829484
|
run-4/checkpoint-9/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96e9cbbbcb19cf07b549302db9b4b6d6b845219260b82b301ddd8e2caf706d65
|
3 |
+
size 535721146
|
run-4/checkpoint-9/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebe90af1f89c0dce692300f940c4cba6a0ecfa2a03461f201ac0bbba7d84bc72
|
3 |
+
size 14244
|
run-4/checkpoint-9/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4693f3ca79582f6f0df58248da3ed2ca85049f9be4d77f8c6536dfda8476a824
|
3 |
+
size 1064
|
run-4/checkpoint-9/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-4/checkpoint-9/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-4/checkpoint-9/tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 512,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"sep_token": "[SEP]",
|
51 |
+
"strip_accents": null,
|
52 |
+
"tokenize_chinese_chars": true,
|
53 |
+
"tokenizer_class": "DistilBertTokenizer",
|
54 |
+
"unk_token": "[UNK]"
|
55 |
+
}
|
run-4/checkpoint-9/trainer_state.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.24101392328980611,
|
3 |
+
"best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-4/checkpoint-9",
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 9,
|
7 |
+
"is_hyper_param_search": true,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"eval_loss": 6.916525363922119,
|
14 |
+
"eval_pearson": 0.24101392328980611,
|
15 |
+
"eval_runtime": 0.9549,
|
16 |
+
"eval_samples_per_second": 1570.783,
|
17 |
+
"eval_spearmanr": 0.23527578828258416,
|
18 |
+
"eval_steps_per_second": 98.436,
|
19 |
+
"step": 9
|
20 |
+
}
|
21 |
+
],
|
22 |
+
"logging_steps": 500,
|
23 |
+
"max_steps": 36,
|
24 |
+
"num_input_tokens_seen": 0,
|
25 |
+
"num_train_epochs": 4,
|
26 |
+
"save_steps": 500,
|
27 |
+
"total_flos": 0,
|
28 |
+
"train_batch_size": 64,
|
29 |
+
"trial_name": null,
|
30 |
+
"trial_params": {
|
31 |
+
"learning_rate": 4.463445057905012e-06,
|
32 |
+
"num_train_epochs": 4,
|
33 |
+
"per_device_train_batch_size": 64,
|
34 |
+
"seed": 5
|
35 |
+
}
|
36 |
+
}
|
run-4/checkpoint-9/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf3f62aadffc261f72a51d4b7c87acc76e6bc2705585514f61113a84b0e34fd3
|
3 |
+
size 4920
|
run-4/checkpoint-9/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-6/checkpoint-9/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert-base-uncased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0"
|
13 |
+
},
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"label2id": {
|
16 |
+
"LABEL_0": 0
|
17 |
+
},
|
18 |
+
"max_position_embeddings": 512,
|
19 |
+
"model_type": "distilbert",
|
20 |
+
"n_heads": 12,
|
21 |
+
"n_layers": 6,
|
22 |
+
"pad_token_id": 0,
|
23 |
+
"problem_type": "regression",
|
24 |
+
"qa_dropout": 0.1,
|
25 |
+
"seq_classif_dropout": 0.2,
|
26 |
+
"sinusoidal_pos_embds": false,
|
27 |
+
"tie_weights_": true,
|
28 |
+
"torch_dtype": "float32",
|
29 |
+
"transformers_version": "4.38.2",
|
30 |
+
"vocab_size": 30522
|
31 |
+
}
|
run-6/checkpoint-9/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae3934d95cf8eb918f260142b4c1c843186b97d06be5f5a9bf8a64000fc01868
|
3 |
+
size 267829484
|
run-6/checkpoint-9/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3f02b2fb28bffd524925f8abd015e8cae9bf674d66e51b380bddbae017f8a25
|
3 |
+
size 535721146
|
run-6/checkpoint-9/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ba09405d7a94ee517c5ceffb8c23248b8d494f1eb31d40acbe62eb99ce9be12
|
3 |
+
size 14308
|
run-6/checkpoint-9/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63805fb43f2a2098f29266655d5e2d89eb8334418990671ad96bdb4fa05d753c
|
3 |
+
size 1064
|
run-6/checkpoint-9/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-6/checkpoint-9/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-6/checkpoint-9/tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 512,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"sep_token": "[SEP]",
|
51 |
+
"strip_accents": null,
|
52 |
+
"tokenize_chinese_chars": true,
|
53 |
+
"tokenizer_class": "DistilBertTokenizer",
|
54 |
+
"unk_token": "[UNK]"
|
55 |
+
}
|
run-6/checkpoint-9/trainer_state.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.3882847703858072,
|
3 |
+
"best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-6/checkpoint-9",
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 9,
|
7 |
+
"is_hyper_param_search": true,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"eval_loss": 2.4611966609954834,
|
14 |
+
"eval_pearson": 0.3882847703858072,
|
15 |
+
"eval_runtime": 0.921,
|
16 |
+
"eval_samples_per_second": 1628.726,
|
17 |
+
"eval_spearmanr": 0.37140201965369346,
|
18 |
+
"eval_steps_per_second": 102.067,
|
19 |
+
"step": 9
|
20 |
+
}
|
21 |
+
],
|
22 |
+
"logging_steps": 500,
|
23 |
+
"max_steps": 27,
|
24 |
+
"num_input_tokens_seen": 0,
|
25 |
+
"num_train_epochs": 3,
|
26 |
+
"save_steps": 500,
|
27 |
+
"total_flos": 0,
|
28 |
+
"train_batch_size": 64,
|
29 |
+
"trial_name": null,
|
30 |
+
"trial_params": {
|
31 |
+
"learning_rate": 9.878134307436757e-05,
|
32 |
+
"num_train_epochs": 3,
|
33 |
+
"per_device_train_batch_size": 64,
|
34 |
+
"seed": 8
|
35 |
+
}
|
36 |
+
}
|
run-6/checkpoint-9/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c368c3ba5426669339e270191c6c3ba4b431cf75363417c7b14d47f7020f6c43
|
3 |
+
size 4920
|