Training in progress, epoch 2
Browse files- model.safetensors +1 -1
- run-2/checkpoint-4276/config.json +1 -1
- run-2/checkpoint-4276/model.safetensors +1 -1
- run-2/checkpoint-4276/optimizer.pt +1 -1
- run-2/checkpoint-4276/rng_state.pth +2 -2
- run-2/checkpoint-4276/scheduler.pt +1 -1
- run-2/checkpoint-4276/trainer_state.json +56 -64
- run-2/checkpoint-4276/training_args.bin +2 -2
- runs/Mar16_01-46-29_6e5f088ca464/events.out.tfevents.1710554238.6e5f088ca464.226.4 +2 -2
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 267832560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:934a427b00be68da3ed8fde7b1bcdd15d4eee49375d3c550286d6c6b91d75482
|
3 |
size 267832560
|
run-2/checkpoint-4276/config.json
CHANGED
@@ -20,6 +20,6 @@
|
|
20 |
"sinusoidal_pos_embds": false,
|
21 |
"tie_weights_": true,
|
22 |
"torch_dtype": "float32",
|
23 |
-
"transformers_version": "4.
|
24 |
"vocab_size": 30522
|
25 |
}
|
|
|
20 |
"sinusoidal_pos_embds": false,
|
21 |
"tie_weights_": true,
|
22 |
"torch_dtype": "float32",
|
23 |
+
"transformers_version": "4.38.2",
|
24 |
"vocab_size": 30522
|
25 |
}
|
run-2/checkpoint-4276/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 267832560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:934a427b00be68da3ed8fde7b1bcdd15d4eee49375d3c550286d6c6b91d75482
|
3 |
size 267832560
|
run-2/checkpoint-4276/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 535727290
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f41bf4a3c1d763cdaef986d5e50ce022d06df3a30a5eab7fc97e283653bcb0f
|
3 |
size 535727290
|
run-2/checkpoint-4276/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00e672a9362c6f5f488b05417701d476fdfce4dbe9c5c1eff5573f6a228e6183
|
3 |
+
size 14244
|
run-2/checkpoint-4276/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d883f9bbfa01924e0f8592716e3b9514c8da859b9f26198cdd40cf5b4791cd9
|
3 |
size 1064
|
run-2/checkpoint-4276/trainer_state.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 4276,
|
7 |
"is_hyper_param_search": true,
|
@@ -9,100 +9,92 @@
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"
|
14 |
-
"
|
|
|
15 |
"step": 500
|
16 |
},
|
17 |
{
|
18 |
-
"epoch": 0.
|
19 |
-
"
|
20 |
-
"
|
|
|
21 |
"step": 1000
|
22 |
},
|
23 |
{
|
24 |
-
"epoch":
|
25 |
-
"
|
26 |
-
"
|
27 |
-
"
|
28 |
-
"eval_samples_per_second": 1295.012,
|
29 |
-
"eval_steps_per_second": 81.947,
|
30 |
-
"step": 1069
|
31 |
-
},
|
32 |
-
{
|
33 |
-
"epoch": 1.4,
|
34 |
-
"learning_rate": 4.370333149644712e-06,
|
35 |
-
"loss": 0.4286,
|
36 |
"step": 1500
|
37 |
},
|
38 |
{
|
39 |
-
"epoch":
|
40 |
-
"
|
41 |
-
"
|
|
|
42 |
"step": 2000
|
43 |
},
|
44 |
{
|
45 |
-
"epoch":
|
46 |
-
"eval_loss": 0.
|
47 |
-
"eval_matthews_correlation": 0.
|
48 |
-
"eval_runtime": 0.
|
49 |
-
"eval_samples_per_second":
|
50 |
-
"eval_steps_per_second":
|
51 |
"step": 2138
|
52 |
},
|
53 |
{
|
54 |
-
"epoch":
|
55 |
-
"
|
56 |
-
"
|
|
|
57 |
"step": 2500
|
58 |
},
|
59 |
{
|
60 |
-
"epoch":
|
61 |
-
"
|
62 |
-
"
|
|
|
63 |
"step": 3000
|
64 |
},
|
65 |
{
|
66 |
-
"epoch":
|
67 |
-
"
|
68 |
-
"
|
69 |
-
"
|
70 |
-
"eval_samples_per_second": 1154.15,
|
71 |
-
"eval_steps_per_second": 73.033,
|
72 |
-
"step": 3207
|
73 |
-
},
|
74 |
-
{
|
75 |
-
"epoch": 3.27,
|
76 |
-
"learning_rate": 1.2216781426960722e-06,
|
77 |
-
"loss": 0.2988,
|
78 |
"step": 3500
|
79 |
},
|
80 |
{
|
81 |
-
"epoch":
|
82 |
-
"
|
83 |
-
"
|
|
|
84 |
"step": 4000
|
85 |
},
|
86 |
{
|
87 |
-
"epoch":
|
88 |
-
"eval_loss": 0.
|
89 |
-
"eval_matthews_correlation": 0.
|
90 |
-
"eval_runtime":
|
91 |
-
"eval_samples_per_second":
|
92 |
-
"eval_steps_per_second":
|
93 |
"step": 4276
|
94 |
}
|
95 |
],
|
96 |
"logging_steps": 500,
|
97 |
-
"max_steps":
|
98 |
-
"
|
|
|
99 |
"save_steps": 500,
|
100 |
-
"total_flos":
|
|
|
101 |
"trial_name": null,
|
102 |
"trial_params": {
|
103 |
-
"learning_rate":
|
104 |
-
"num_train_epochs":
|
105 |
-
"per_device_train_batch_size":
|
106 |
-
"seed":
|
107 |
}
|
108 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.4691032179514943,
|
3 |
+
"best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-4276",
|
4 |
+
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 4276,
|
7 |
"is_hyper_param_search": true,
|
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.23,
|
13 |
+
"grad_norm": 4.28505277633667,
|
14 |
+
"learning_rate": 3.0702893894484785e-06,
|
15 |
+
"loss": 0.6069,
|
16 |
"step": 500
|
17 |
},
|
18 |
{
|
19 |
+
"epoch": 0.47,
|
20 |
+
"grad_norm": 9.482794761657715,
|
21 |
+
"learning_rate": 2.9196373094951675e-06,
|
22 |
+
"loss": 0.5628,
|
23 |
"step": 1000
|
24 |
},
|
25 |
{
|
26 |
+
"epoch": 0.7,
|
27 |
+
"grad_norm": 22.521339416503906,
|
28 |
+
"learning_rate": 2.7689852295418565e-06,
|
29 |
+
"loss": 0.5565,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
"step": 1500
|
31 |
},
|
32 |
{
|
33 |
+
"epoch": 0.94,
|
34 |
+
"grad_norm": 26.7753849029541,
|
35 |
+
"learning_rate": 2.6183331495885454e-06,
|
36 |
+
"loss": 0.5184,
|
37 |
"step": 2000
|
38 |
},
|
39 |
{
|
40 |
+
"epoch": 1.0,
|
41 |
+
"eval_loss": 0.5730993747711182,
|
42 |
+
"eval_matthews_correlation": 0.3853198145814999,
|
43 |
+
"eval_runtime": 0.7612,
|
44 |
+
"eval_samples_per_second": 1370.225,
|
45 |
+
"eval_steps_per_second": 86.706,
|
46 |
"step": 2138
|
47 |
},
|
48 |
{
|
49 |
+
"epoch": 1.17,
|
50 |
+
"grad_norm": 17.77669334411621,
|
51 |
+
"learning_rate": 2.4676810696352344e-06,
|
52 |
+
"loss": 0.4619,
|
53 |
"step": 2500
|
54 |
},
|
55 |
{
|
56 |
+
"epoch": 1.4,
|
57 |
+
"grad_norm": 37.4239387512207,
|
58 |
+
"learning_rate": 2.3170289896819234e-06,
|
59 |
+
"loss": 0.5014,
|
60 |
"step": 3000
|
61 |
},
|
62 |
{
|
63 |
+
"epoch": 1.64,
|
64 |
+
"grad_norm": 46.75569534301758,
|
65 |
+
"learning_rate": 2.1663769097286124e-06,
|
66 |
+
"loss": 0.492,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
"step": 3500
|
68 |
},
|
69 |
{
|
70 |
+
"epoch": 1.87,
|
71 |
+
"grad_norm": 66.9134750366211,
|
72 |
+
"learning_rate": 2.0157248297753013e-06,
|
73 |
+
"loss": 0.4809,
|
74 |
"step": 4000
|
75 |
},
|
76 |
{
|
77 |
+
"epoch": 2.0,
|
78 |
+
"eval_loss": 0.6646500825881958,
|
79 |
+
"eval_matthews_correlation": 0.4691032179514943,
|
80 |
+
"eval_runtime": 0.8224,
|
81 |
+
"eval_samples_per_second": 1268.193,
|
82 |
+
"eval_steps_per_second": 80.25,
|
83 |
"step": 4276
|
84 |
}
|
85 |
],
|
86 |
"logging_steps": 500,
|
87 |
+
"max_steps": 10690,
|
88 |
+
"num_input_tokens_seen": 0,
|
89 |
+
"num_train_epochs": 5,
|
90 |
"save_steps": 500,
|
91 |
+
"total_flos": 65200091402940.0,
|
92 |
+
"train_batch_size": 4,
|
93 |
"trial_name": null,
|
94 |
"trial_params": {
|
95 |
+
"learning_rate": 3.2209414694017896e-06,
|
96 |
+
"num_train_epochs": 5,
|
97 |
+
"per_device_train_batch_size": 4,
|
98 |
+
"seed": 16
|
99 |
}
|
100 |
}
|
run-2/checkpoint-4276/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88d2c3d6804ca2d9d22cb74f328c5ae8ec320f8d12a0ef15ea5ae2037f02bd85
|
3 |
+
size 4984
|
runs/Mar16_01-46-29_6e5f088ca464/events.out.tfevents.1710554238.6e5f088ca464.226.4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e275f4a807e39d73a49dad5d1ea7c25874258e28fe23616f020c0ad00492050
|
3 |
+
size 7158
|