Training in progress, epoch 1
Browse files- model.safetensors +1 -1
- run-3/checkpoint-288/model.safetensors +1 -1
- run-3/checkpoint-288/optimizer.pt +1 -1
- run-3/checkpoint-288/rng_state.pth +1 -1
- run-3/checkpoint-288/scheduler.pt +1 -1
- run-3/checkpoint-288/trainer_state.json +15 -15
- run-3/checkpoint-288/training_args.bin +1 -1
- run-3/checkpoint-432/model.safetensors +1 -1
- run-3/checkpoint-432/optimizer.pt +1 -1
- run-3/checkpoint-432/rng_state.pth +1 -1
- run-3/checkpoint-432/scheduler.pt +1 -1
- run-3/checkpoint-432/trainer_state.json +21 -21
- run-3/checkpoint-432/training_args.bin +1 -1
- run-3/checkpoint-576/model.safetensors +1 -1
- run-3/checkpoint-576/optimizer.pt +1 -1
- run-3/checkpoint-576/rng_state.pth +1 -1
- run-3/checkpoint-576/scheduler.pt +1 -1
- run-3/checkpoint-576/trainer_state.json +31 -31
- run-3/checkpoint-576/training_args.bin +1 -1
- run-3/checkpoint-720/model.safetensors +1 -1
- run-3/checkpoint-720/optimizer.pt +1 -1
- run-3/checkpoint-720/rng_state.pth +1 -1
- run-3/checkpoint-720/scheduler.pt +1 -1
- run-3/checkpoint-720/trainer_state.json +38 -38
- run-3/checkpoint-720/training_args.bin +1 -1
- runs/Mar07_00-50-37_758c92e55f7e/events.out.tfevents.1709773116.758c92e55f7e.2638.6 +3 -0
- training_args.bin +1 -1
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 267829484
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84bec32edce0c34ec348a06616c8c7aa85c9f1a33d5d27ae77ad3e090d412754
|
3 |
size 267829484
|
run-3/checkpoint-288/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 267829484
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e70149ba23dd0d2f866d218b9336961ec98113bb1af5729569e5b571ffd29e9f
|
3 |
size 267829484
|
run-3/checkpoint-288/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 535721146
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89a83c71a35c4c8a47c9f9d461682720d58447a2d19c908bf8f73ad93239b467
|
3 |
size 535721146
|
run-3/checkpoint-288/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa0756249ef89554273490d3f25c592cf35d7de3c714c99d3ee3ec2bbb73a5d4
|
3 |
size 14244
|
run-3/checkpoint-288/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e813b361e3bb66e575ae412545ecddb9e1a2907d17551c0c7b2959b9d9137cd8
|
3 |
size 1064
|
run-3/checkpoint-288/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-288",
|
4 |
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
@@ -10,22 +10,22 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_loss":
|
14 |
-
"eval_pearson": 0.
|
15 |
-
"eval_runtime": 0.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_spearmanr": 0.
|
18 |
-
"eval_steps_per_second":
|
19 |
"step": 144
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.0,
|
23 |
-
"eval_loss": 0.
|
24 |
-
"eval_pearson": 0.
|
25 |
-
"eval_runtime":
|
26 |
-
"eval_samples_per_second":
|
27 |
-
"eval_spearmanr": 0.
|
28 |
-
"eval_steps_per_second":
|
29 |
"step": 288
|
30 |
}
|
31 |
],
|
@@ -38,9 +38,9 @@
|
|
38 |
"train_batch_size": 4,
|
39 |
"trial_name": null,
|
40 |
"trial_params": {
|
41 |
-
"learning_rate":
|
42 |
"num_train_epochs": 5,
|
43 |
"per_device_train_batch_size": 4,
|
44 |
-
"seed":
|
45 |
}
|
46 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.8294526923897652,
|
3 |
"best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-288",
|
4 |
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_loss": 1.0159441232681274,
|
14 |
+
"eval_pearson": 0.7754149460856693,
|
15 |
+
"eval_runtime": 0.9276,
|
16 |
+
"eval_samples_per_second": 1617.073,
|
17 |
+
"eval_spearmanr": 0.7770625188634587,
|
18 |
+
"eval_steps_per_second": 101.337,
|
19 |
"step": 144
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.0,
|
23 |
+
"eval_loss": 0.7027656435966492,
|
24 |
+
"eval_pearson": 0.8294526923897652,
|
25 |
+
"eval_runtime": 1.1203,
|
26 |
+
"eval_samples_per_second": 1338.921,
|
27 |
+
"eval_spearmanr": 0.8239919782128128,
|
28 |
+
"eval_steps_per_second": 83.906,
|
29 |
"step": 288
|
30 |
}
|
31 |
],
|
|
|
38 |
"train_batch_size": 4,
|
39 |
"trial_name": null,
|
40 |
"trial_params": {
|
41 |
+
"learning_rate": 3.444308458582572e-05,
|
42 |
"num_train_epochs": 5,
|
43 |
"per_device_train_batch_size": 4,
|
44 |
+
"seed": 38
|
45 |
}
|
46 |
}
|
run-3/checkpoint-288/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4821c7433b1b1cc47986c64fa81c811ca3c1f9863a29a79f5e5c8b19c0b27c05
|
3 |
size 4920
|
run-3/checkpoint-432/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 267829484
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4868147f9f64d8d4e4ba0994d128b9aa79ef2693115460ecba2d47291a2e064f
|
3 |
size 267829484
|
run-3/checkpoint-432/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 535721146
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:700b4b368b7b1f8c8ce908d043229c8e86e8657842ed24bba642b02baecb35c6
|
3 |
size 535721146
|
run-3/checkpoint-432/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:808e511081723cb6acf19ed232057cc3dd2d7b62e1a05317219f9befc0215f61
|
3 |
size 14244
|
run-3/checkpoint-432/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb739c138f564a5262bc1447b75225686d934caaecccce181d2b86227142035c
|
3 |
size 1064
|
run-3/checkpoint-432/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-432",
|
4 |
"epoch": 3.0,
|
5 |
"eval_steps": 500,
|
@@ -10,32 +10,32 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_loss":
|
14 |
-
"eval_pearson": 0.
|
15 |
-
"eval_runtime": 0.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_spearmanr": 0.
|
18 |
-
"eval_steps_per_second":
|
19 |
"step": 144
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.0,
|
23 |
-
"eval_loss": 0.
|
24 |
-
"eval_pearson": 0.
|
25 |
-
"eval_runtime":
|
26 |
-
"eval_samples_per_second":
|
27 |
-
"eval_spearmanr": 0.
|
28 |
-
"eval_steps_per_second":
|
29 |
"step": 288
|
30 |
},
|
31 |
{
|
32 |
"epoch": 3.0,
|
33 |
-
"eval_loss": 0.
|
34 |
-
"eval_pearson": 0.
|
35 |
-
"eval_runtime":
|
36 |
-
"eval_samples_per_second":
|
37 |
-
"eval_spearmanr": 0.
|
38 |
-
"eval_steps_per_second":
|
39 |
"step": 432
|
40 |
}
|
41 |
],
|
@@ -48,9 +48,9 @@
|
|
48 |
"train_batch_size": 4,
|
49 |
"trial_name": null,
|
50 |
"trial_params": {
|
51 |
-
"learning_rate":
|
52 |
"num_train_epochs": 5,
|
53 |
"per_device_train_batch_size": 4,
|
54 |
-
"seed":
|
55 |
}
|
56 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.8371525320180763,
|
3 |
"best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-432",
|
4 |
"epoch": 3.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_loss": 1.0159441232681274,
|
14 |
+
"eval_pearson": 0.7754149460856693,
|
15 |
+
"eval_runtime": 0.9276,
|
16 |
+
"eval_samples_per_second": 1617.073,
|
17 |
+
"eval_spearmanr": 0.7770625188634587,
|
18 |
+
"eval_steps_per_second": 101.337,
|
19 |
"step": 144
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.0,
|
23 |
+
"eval_loss": 0.7027656435966492,
|
24 |
+
"eval_pearson": 0.8294526923897652,
|
25 |
+
"eval_runtime": 1.1203,
|
26 |
+
"eval_samples_per_second": 1338.921,
|
27 |
+
"eval_spearmanr": 0.8239919782128128,
|
28 |
+
"eval_steps_per_second": 83.906,
|
29 |
"step": 288
|
30 |
},
|
31 |
{
|
32 |
"epoch": 3.0,
|
33 |
+
"eval_loss": 0.6792955994606018,
|
34 |
+
"eval_pearson": 0.8371525320180763,
|
35 |
+
"eval_runtime": 1.1241,
|
36 |
+
"eval_samples_per_second": 1334.415,
|
37 |
+
"eval_spearmanr": 0.8358915789303897,
|
38 |
+
"eval_steps_per_second": 83.623,
|
39 |
"step": 432
|
40 |
}
|
41 |
],
|
|
|
48 |
"train_batch_size": 4,
|
49 |
"trial_name": null,
|
50 |
"trial_params": {
|
51 |
+
"learning_rate": 3.444308458582572e-05,
|
52 |
"num_train_epochs": 5,
|
53 |
"per_device_train_batch_size": 4,
|
54 |
+
"seed": 38
|
55 |
}
|
56 |
}
|
run-3/checkpoint-432/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4821c7433b1b1cc47986c64fa81c811ca3c1f9863a29a79f5e5c8b19c0b27c05
|
3 |
size 4920
|
run-3/checkpoint-576/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 267829484
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0252f09672f42eb2def3a2d92dafa09af532b35ad3edace25eb853de6dc06ba
|
3 |
size 267829484
|
run-3/checkpoint-576/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 535721146
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64ecde12fc2a704b464816f349a6de99b4e9647b5ec943f520b69c97f3f472e6
|
3 |
size 535721146
|
run-3/checkpoint-576/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa277fec9a04a686204af70247a8b4e3568d8153e92f5bd3a9eb50c7d3e807a1
|
3 |
size 14244
|
run-3/checkpoint-576/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3bfa2eaedf155ed3818f2d7439057a71e6beb4a37ac73fd5d2e8e33136054d8
|
3 |
size 1064
|
run-3/checkpoint-576/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-576",
|
4 |
"epoch": 4.0,
|
5 |
"eval_steps": 500,
|
@@ -10,49 +10,49 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_loss":
|
14 |
-
"eval_pearson": 0.
|
15 |
-
"eval_runtime": 0.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_spearmanr": 0.
|
18 |
-
"eval_steps_per_second":
|
19 |
"step": 144
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.0,
|
23 |
-
"eval_loss": 0.
|
24 |
-
"eval_pearson": 0.
|
25 |
-
"eval_runtime":
|
26 |
-
"eval_samples_per_second":
|
27 |
-
"eval_spearmanr": 0.
|
28 |
-
"eval_steps_per_second":
|
29 |
"step": 288
|
30 |
},
|
31 |
{
|
32 |
"epoch": 3.0,
|
33 |
-
"eval_loss": 0.
|
34 |
-
"eval_pearson": 0.
|
35 |
-
"eval_runtime":
|
36 |
-
"eval_samples_per_second":
|
37 |
-
"eval_spearmanr": 0.
|
38 |
-
"eval_steps_per_second":
|
39 |
"step": 432
|
40 |
},
|
41 |
{
|
42 |
"epoch": 3.47,
|
43 |
-
"grad_norm":
|
44 |
-
"learning_rate": 1.
|
45 |
-
"loss": 0.
|
46 |
"step": 500
|
47 |
},
|
48 |
{
|
49 |
"epoch": 4.0,
|
50 |
-
"eval_loss": 0.
|
51 |
-
"eval_pearson": 0.
|
52 |
-
"eval_runtime":
|
53 |
-
"eval_samples_per_second":
|
54 |
-
"eval_spearmanr": 0.
|
55 |
-
"eval_steps_per_second":
|
56 |
"step": 576
|
57 |
}
|
58 |
],
|
@@ -61,13 +61,13 @@
|
|
61 |
"num_input_tokens_seen": 0,
|
62 |
"num_train_epochs": 5,
|
63 |
"save_steps": 500,
|
64 |
-
"total_flos":
|
65 |
"train_batch_size": 4,
|
66 |
"trial_name": null,
|
67 |
"trial_params": {
|
68 |
-
"learning_rate":
|
69 |
"num_train_epochs": 5,
|
70 |
"per_device_train_batch_size": 4,
|
71 |
-
"seed":
|
72 |
}
|
73 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.8416256812127808,
|
3 |
"best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-576",
|
4 |
"epoch": 4.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_loss": 1.0159441232681274,
|
14 |
+
"eval_pearson": 0.7754149460856693,
|
15 |
+
"eval_runtime": 0.9276,
|
16 |
+
"eval_samples_per_second": 1617.073,
|
17 |
+
"eval_spearmanr": 0.7770625188634587,
|
18 |
+
"eval_steps_per_second": 101.337,
|
19 |
"step": 144
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.0,
|
23 |
+
"eval_loss": 0.7027656435966492,
|
24 |
+
"eval_pearson": 0.8294526923897652,
|
25 |
+
"eval_runtime": 1.1203,
|
26 |
+
"eval_samples_per_second": 1338.921,
|
27 |
+
"eval_spearmanr": 0.8239919782128128,
|
28 |
+
"eval_steps_per_second": 83.906,
|
29 |
"step": 288
|
30 |
},
|
31 |
{
|
32 |
"epoch": 3.0,
|
33 |
+
"eval_loss": 0.6792955994606018,
|
34 |
+
"eval_pearson": 0.8371525320180763,
|
35 |
+
"eval_runtime": 1.1241,
|
36 |
+
"eval_samples_per_second": 1334.415,
|
37 |
+
"eval_spearmanr": 0.8358915789303897,
|
38 |
+
"eval_steps_per_second": 83.623,
|
39 |
"step": 432
|
40 |
},
|
41 |
{
|
42 |
"epoch": 3.47,
|
43 |
+
"grad_norm": 24.244155883789062,
|
44 |
+
"learning_rate": 1.052427584566897e-05,
|
45 |
+
"loss": 0.9809,
|
46 |
"step": 500
|
47 |
},
|
48 |
{
|
49 |
"epoch": 4.0,
|
50 |
+
"eval_loss": 0.6611331105232239,
|
51 |
+
"eval_pearson": 0.8416256812127808,
|
52 |
+
"eval_runtime": 1.1415,
|
53 |
+
"eval_samples_per_second": 1314.012,
|
54 |
+
"eval_spearmanr": 0.8371352595017586,
|
55 |
+
"eval_steps_per_second": 82.345,
|
56 |
"step": 576
|
57 |
}
|
58 |
],
|
|
|
61 |
"num_input_tokens_seen": 0,
|
62 |
"num_train_epochs": 5,
|
63 |
"save_steps": 500,
|
64 |
+
"total_flos": 22488785838402.0,
|
65 |
"train_batch_size": 4,
|
66 |
"trial_name": null,
|
67 |
"trial_params": {
|
68 |
+
"learning_rate": 3.444308458582572e-05,
|
69 |
"num_train_epochs": 5,
|
70 |
"per_device_train_batch_size": 4,
|
71 |
+
"seed": 38
|
72 |
}
|
73 |
}
|
run-3/checkpoint-576/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4821c7433b1b1cc47986c64fa81c811ca3c1f9863a29a79f5e5c8b19c0b27c05
|
3 |
size 4920
|
run-3/checkpoint-720/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 267829484
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5bc1b531032dd209d8612697b6d66430437909170e4f5e9db1464a69cb7170b
|
3 |
size 267829484
|
run-3/checkpoint-720/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 535721146
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85bd33f89e970d5eed5b1700ce0879d341256f399b5e49a28441dea48ec6ef7d
|
3 |
size 535721146
|
run-3/checkpoint-720/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c107ae0ea3f1f1b34f3b300cf1b0955ef489d70d5f2d9e4aa62d79e0bebdc5cf
|
3 |
size 14244
|
run-3/checkpoint-720/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e936e0aaf767fe7e9ef1cc507adb5e75f602c8f4ddb24d590baff19f29fe3662
|
3 |
size 1064
|
run-3/checkpoint-720/trainer_state.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-
|
4 |
"epoch": 5.0,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 720,
|
@@ -10,59 +10,59 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_loss":
|
14 |
-
"eval_pearson": 0.
|
15 |
-
"eval_runtime": 0.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_spearmanr": 0.
|
18 |
-
"eval_steps_per_second":
|
19 |
"step": 144
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.0,
|
23 |
-
"eval_loss": 0.
|
24 |
-
"eval_pearson": 0.
|
25 |
-
"eval_runtime":
|
26 |
-
"eval_samples_per_second":
|
27 |
-
"eval_spearmanr": 0.
|
28 |
-
"eval_steps_per_second":
|
29 |
"step": 288
|
30 |
},
|
31 |
{
|
32 |
"epoch": 3.0,
|
33 |
-
"eval_loss": 0.
|
34 |
-
"eval_pearson": 0.
|
35 |
-
"eval_runtime":
|
36 |
-
"eval_samples_per_second":
|
37 |
-
"eval_spearmanr": 0.
|
38 |
-
"eval_steps_per_second":
|
39 |
"step": 432
|
40 |
},
|
41 |
{
|
42 |
"epoch": 3.47,
|
43 |
-
"grad_norm":
|
44 |
-
"learning_rate": 1.
|
45 |
-
"loss": 0.
|
46 |
"step": 500
|
47 |
},
|
48 |
{
|
49 |
"epoch": 4.0,
|
50 |
-
"eval_loss": 0.
|
51 |
-
"eval_pearson": 0.
|
52 |
-
"eval_runtime":
|
53 |
-
"eval_samples_per_second":
|
54 |
-
"eval_spearmanr": 0.
|
55 |
-
"eval_steps_per_second":
|
56 |
"step": 576
|
57 |
},
|
58 |
{
|
59 |
"epoch": 5.0,
|
60 |
-
"eval_loss": 0.
|
61 |
-
"eval_pearson": 0.
|
62 |
-
"eval_runtime":
|
63 |
-
"eval_samples_per_second":
|
64 |
-
"eval_spearmanr": 0.
|
65 |
-
"eval_steps_per_second":
|
66 |
"step": 720
|
67 |
}
|
68 |
],
|
@@ -71,13 +71,13 @@
|
|
71 |
"num_input_tokens_seen": 0,
|
72 |
"num_train_epochs": 5,
|
73 |
"save_steps": 500,
|
74 |
-
"total_flos":
|
75 |
"train_batch_size": 4,
|
76 |
"trial_name": null,
|
77 |
"trial_params": {
|
78 |
-
"learning_rate":
|
79 |
"num_train_epochs": 5,
|
80 |
"per_device_train_batch_size": 4,
|
81 |
-
"seed":
|
82 |
}
|
83 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.8422757387186379,
|
3 |
+
"best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-720",
|
4 |
"epoch": 5.0,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 720,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_loss": 1.0159441232681274,
|
14 |
+
"eval_pearson": 0.7754149460856693,
|
15 |
+
"eval_runtime": 0.9276,
|
16 |
+
"eval_samples_per_second": 1617.073,
|
17 |
+
"eval_spearmanr": 0.7770625188634587,
|
18 |
+
"eval_steps_per_second": 101.337,
|
19 |
"step": 144
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.0,
|
23 |
+
"eval_loss": 0.7027656435966492,
|
24 |
+
"eval_pearson": 0.8294526923897652,
|
25 |
+
"eval_runtime": 1.1203,
|
26 |
+
"eval_samples_per_second": 1338.921,
|
27 |
+
"eval_spearmanr": 0.8239919782128128,
|
28 |
+
"eval_steps_per_second": 83.906,
|
29 |
"step": 288
|
30 |
},
|
31 |
{
|
32 |
"epoch": 3.0,
|
33 |
+
"eval_loss": 0.6792955994606018,
|
34 |
+
"eval_pearson": 0.8371525320180763,
|
35 |
+
"eval_runtime": 1.1241,
|
36 |
+
"eval_samples_per_second": 1334.415,
|
37 |
+
"eval_spearmanr": 0.8358915789303897,
|
38 |
+
"eval_steps_per_second": 83.623,
|
39 |
"step": 432
|
40 |
},
|
41 |
{
|
42 |
"epoch": 3.47,
|
43 |
+
"grad_norm": 24.244155883789062,
|
44 |
+
"learning_rate": 1.052427584566897e-05,
|
45 |
+
"loss": 0.9809,
|
46 |
"step": 500
|
47 |
},
|
48 |
{
|
49 |
"epoch": 4.0,
|
50 |
+
"eval_loss": 0.6611331105232239,
|
51 |
+
"eval_pearson": 0.8416256812127808,
|
52 |
+
"eval_runtime": 1.1415,
|
53 |
+
"eval_samples_per_second": 1314.012,
|
54 |
+
"eval_spearmanr": 0.8371352595017586,
|
55 |
+
"eval_steps_per_second": 82.345,
|
56 |
"step": 576
|
57 |
},
|
58 |
{
|
59 |
"epoch": 5.0,
|
60 |
+
"eval_loss": 0.6691386699676514,
|
61 |
+
"eval_pearson": 0.8422757387186379,
|
62 |
+
"eval_runtime": 1.0993,
|
63 |
+
"eval_samples_per_second": 1364.543,
|
64 |
+
"eval_spearmanr": 0.8374301458257349,
|
65 |
+
"eval_steps_per_second": 85.511,
|
66 |
"step": 720
|
67 |
}
|
68 |
],
|
|
|
71 |
"num_input_tokens_seen": 0,
|
72 |
"num_train_epochs": 5,
|
73 |
"save_steps": 500,
|
74 |
+
"total_flos": 22488785838402.0,
|
75 |
"train_batch_size": 4,
|
76 |
"trial_name": null,
|
77 |
"trial_params": {
|
78 |
+
"learning_rate": 3.444308458582572e-05,
|
79 |
"num_train_epochs": 5,
|
80 |
"per_device_train_batch_size": 4,
|
81 |
+
"seed": 38
|
82 |
}
|
83 |
}
|
run-3/checkpoint-720/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4821c7433b1b1cc47986c64fa81c811ca3c1f9863a29a79f5e5c8b19c0b27c05
|
3 |
size 4920
|
runs/Mar07_00-50-37_758c92e55f7e/events.out.tfevents.1709773116.758c92e55f7e.2638.6
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17e4d54666ff128c9e9ff7873136a55131dae24f207cacad191725739f7f7b43
|
3 |
+
size 6426
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf3f62aadffc261f72a51d4b7c87acc76e6bc2705585514f61113a84b0e34fd3
|
3 |
size 4920
|