Training in progress, step 36500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 151061672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37abee5478f9f35aa29c89d624e5085193b13e9235b29001ce31bf46535ebf26
|
3 |
size 151061672
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 297616186
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fda54d4a50875fe15eb5b4dc26e94e3e3fc09bc8fdfef3c113a4c25da7c3b2e0
|
3 |
size 297616186
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2edf09a864a623bade513bcc647f43081439cf12510590843404e7e232c4476a
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47ffe2dc2785190c476b20c92a5589a6418c0dc6fcfc14d8ccf5dc30cce41d32
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b717e1cf5316edd52bfea17acc3366c2d40f7c06837ff0be49b0b31d863734c
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 4.
|
6 |
"eval_steps": 7000,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -252048,6 +252048,3506 @@
|
|
252048 |
"learning_rate": 1.5856497329864439e-06,
|
252049 |
"loss": 0.0061,
|
252050 |
"step": 36000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252051 |
}
|
252052 |
],
|
252053 |
"logging_steps": 1,
|
@@ -252067,7 +255567,7 @@
|
|
252067 |
"attributes": {}
|
252068 |
}
|
252069 |
},
|
252070 |
-
"total_flos": 1.
|
252071 |
"train_batch_size": 16,
|
252072 |
"trial_name": null,
|
252073 |
"trial_params": null
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 4.99111171885683,
|
6 |
"eval_steps": 7000,
|
7 |
+
"global_step": 36500,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
252048 |
"learning_rate": 1.5856497329864439e-06,
|
252049 |
"loss": 0.0061,
|
252050 |
"step": 36000
|
252051 |
+
},
|
252052 |
+
{
|
252053 |
+
"epoch": 4.9228770682346505,
|
252054 |
+
"grad_norm": 2.1278064250946045,
|
252055 |
+
"learning_rate": 1.582911132411338e-06,
|
252056 |
+
"loss": 0.0209,
|
252057 |
+
"step": 36001
|
252058 |
+
},
|
252059 |
+
{
|
252060 |
+
"epoch": 4.923013811021469,
|
252061 |
+
"grad_norm": 0.8256312012672424,
|
252062 |
+
"learning_rate": 1.580172531836232e-06,
|
252063 |
+
"loss": 0.0049,
|
252064 |
+
"step": 36002
|
252065 |
+
},
|
252066 |
+
{
|
252067 |
+
"epoch": 4.923150553808287,
|
252068 |
+
"grad_norm": 1.027895450592041,
|
252069 |
+
"learning_rate": 1.5774339312611255e-06,
|
252070 |
+
"loss": 0.0033,
|
252071 |
+
"step": 36003
|
252072 |
+
},
|
252073 |
+
{
|
252074 |
+
"epoch": 4.923287296595104,
|
252075 |
+
"grad_norm": 0.8473876714706421,
|
252076 |
+
"learning_rate": 1.5746953306860195e-06,
|
252077 |
+
"loss": 0.0064,
|
252078 |
+
"step": 36004
|
252079 |
+
},
|
252080 |
+
{
|
252081 |
+
"epoch": 4.923424039381922,
|
252082 |
+
"grad_norm": 0.6120457649230957,
|
252083 |
+
"learning_rate": 1.5719567301109136e-06,
|
252084 |
+
"loss": 0.004,
|
252085 |
+
"step": 36005
|
252086 |
+
},
|
252087 |
+
{
|
252088 |
+
"epoch": 4.923560782168741,
|
252089 |
+
"grad_norm": 0.48810142278671265,
|
252090 |
+
"learning_rate": 1.5692181295358072e-06,
|
252091 |
+
"loss": 0.0032,
|
252092 |
+
"step": 36006
|
252093 |
+
},
|
252094 |
+
{
|
252095 |
+
"epoch": 4.923697524955559,
|
252096 |
+
"grad_norm": 1.827744483947754,
|
252097 |
+
"learning_rate": 1.5664795289607012e-06,
|
252098 |
+
"loss": 0.0155,
|
252099 |
+
"step": 36007
|
252100 |
+
},
|
252101 |
+
{
|
252102 |
+
"epoch": 4.923834267742377,
|
252103 |
+
"grad_norm": 0.3755244314670563,
|
252104 |
+
"learning_rate": 1.5637409283855952e-06,
|
252105 |
+
"loss": 0.0023,
|
252106 |
+
"step": 36008
|
252107 |
+
},
|
252108 |
+
{
|
252109 |
+
"epoch": 4.923971010529194,
|
252110 |
+
"grad_norm": 0.2780938148498535,
|
252111 |
+
"learning_rate": 1.561002327810489e-06,
|
252112 |
+
"loss": 0.0019,
|
252113 |
+
"step": 36009
|
252114 |
+
},
|
252115 |
+
{
|
252116 |
+
"epoch": 4.9241077533160125,
|
252117 |
+
"grad_norm": 1.1747560501098633,
|
252118 |
+
"learning_rate": 1.5582637272353828e-06,
|
252119 |
+
"loss": 0.007,
|
252120 |
+
"step": 36010
|
252121 |
+
},
|
252122 |
+
{
|
252123 |
+
"epoch": 4.924244496102831,
|
252124 |
+
"grad_norm": 1.105936884880066,
|
252125 |
+
"learning_rate": 1.5555251266602769e-06,
|
252126 |
+
"loss": 0.0104,
|
252127 |
+
"step": 36011
|
252128 |
+
},
|
252129 |
+
{
|
252130 |
+
"epoch": 4.924381238889649,
|
252131 |
+
"grad_norm": 0.5342653393745422,
|
252132 |
+
"learning_rate": 1.5527865260851707e-06,
|
252133 |
+
"loss": 0.0035,
|
252134 |
+
"step": 36012
|
252135 |
+
},
|
252136 |
+
{
|
252137 |
+
"epoch": 4.924517981676466,
|
252138 |
+
"grad_norm": 0.9065399169921875,
|
252139 |
+
"learning_rate": 1.5500479255100645e-06,
|
252140 |
+
"loss": 0.0152,
|
252141 |
+
"step": 36013
|
252142 |
+
},
|
252143 |
+
{
|
252144 |
+
"epoch": 4.924654724463284,
|
252145 |
+
"grad_norm": 0.8544608950614929,
|
252146 |
+
"learning_rate": 1.5473093249349583e-06,
|
252147 |
+
"loss": 0.0062,
|
252148 |
+
"step": 36014
|
252149 |
+
},
|
252150 |
+
{
|
252151 |
+
"epoch": 4.9247914672501025,
|
252152 |
+
"grad_norm": 1.5266153812408447,
|
252153 |
+
"learning_rate": 1.544570724359852e-06,
|
252154 |
+
"loss": 0.0085,
|
252155 |
+
"step": 36015
|
252156 |
+
},
|
252157 |
+
{
|
252158 |
+
"epoch": 4.924928210036921,
|
252159 |
+
"grad_norm": 0.23297074437141418,
|
252160 |
+
"learning_rate": 1.541832123784746e-06,
|
252161 |
+
"loss": 0.0019,
|
252162 |
+
"step": 36016
|
252163 |
+
},
|
252164 |
+
{
|
252165 |
+
"epoch": 4.925064952823739,
|
252166 |
+
"grad_norm": 1.0378493070602417,
|
252167 |
+
"learning_rate": 1.53909352320964e-06,
|
252168 |
+
"loss": 0.0076,
|
252169 |
+
"step": 36017
|
252170 |
+
},
|
252171 |
+
{
|
252172 |
+
"epoch": 4.925201695610556,
|
252173 |
+
"grad_norm": 0.34137728810310364,
|
252174 |
+
"learning_rate": 1.5363549226345337e-06,
|
252175 |
+
"loss": 0.0023,
|
252176 |
+
"step": 36018
|
252177 |
+
},
|
252178 |
+
{
|
252179 |
+
"epoch": 4.925338438397374,
|
252180 |
+
"grad_norm": 0.8581326007843018,
|
252181 |
+
"learning_rate": 1.5336163220594278e-06,
|
252182 |
+
"loss": 0.0047,
|
252183 |
+
"step": 36019
|
252184 |
+
},
|
252185 |
+
{
|
252186 |
+
"epoch": 4.925475181184193,
|
252187 |
+
"grad_norm": 0.567088782787323,
|
252188 |
+
"learning_rate": 1.5308777214843216e-06,
|
252189 |
+
"loss": 0.0038,
|
252190 |
+
"step": 36020
|
252191 |
+
},
|
252192 |
+
{
|
252193 |
+
"epoch": 4.925611923971011,
|
252194 |
+
"grad_norm": 0.533941388130188,
|
252195 |
+
"learning_rate": 1.5281391209092154e-06,
|
252196 |
+
"loss": 0.0029,
|
252197 |
+
"step": 36021
|
252198 |
+
},
|
252199 |
+
{
|
252200 |
+
"epoch": 4.925748666757828,
|
252201 |
+
"grad_norm": 1.1052790880203247,
|
252202 |
+
"learning_rate": 1.5254005203341094e-06,
|
252203 |
+
"loss": 0.0098,
|
252204 |
+
"step": 36022
|
252205 |
+
},
|
252206 |
+
{
|
252207 |
+
"epoch": 4.925885409544646,
|
252208 |
+
"grad_norm": 0.8087671995162964,
|
252209 |
+
"learning_rate": 1.5226619197590032e-06,
|
252210 |
+
"loss": 0.0054,
|
252211 |
+
"step": 36023
|
252212 |
+
},
|
252213 |
+
{
|
252214 |
+
"epoch": 4.9260221523314645,
|
252215 |
+
"grad_norm": 0.8573110699653625,
|
252216 |
+
"learning_rate": 1.519923319183897e-06,
|
252217 |
+
"loss": 0.006,
|
252218 |
+
"step": 36024
|
252219 |
+
},
|
252220 |
+
{
|
252221 |
+
"epoch": 4.926158895118283,
|
252222 |
+
"grad_norm": 0.2526284158229828,
|
252223 |
+
"learning_rate": 1.517184718608791e-06,
|
252224 |
+
"loss": 0.0016,
|
252225 |
+
"step": 36025
|
252226 |
+
},
|
252227 |
+
{
|
252228 |
+
"epoch": 4.926295637905101,
|
252229 |
+
"grad_norm": 1.1121177673339844,
|
252230 |
+
"learning_rate": 1.5144461180336849e-06,
|
252231 |
+
"loss": 0.009,
|
252232 |
+
"step": 36026
|
252233 |
+
},
|
252234 |
+
{
|
252235 |
+
"epoch": 4.926432380691918,
|
252236 |
+
"grad_norm": 0.7677934169769287,
|
252237 |
+
"learning_rate": 1.5117075174585789e-06,
|
252238 |
+
"loss": 0.0059,
|
252239 |
+
"step": 36027
|
252240 |
+
},
|
252241 |
+
{
|
252242 |
+
"epoch": 4.926569123478736,
|
252243 |
+
"grad_norm": 0.23495355248451233,
|
252244 |
+
"learning_rate": 1.5089689168834727e-06,
|
252245 |
+
"loss": 0.0021,
|
252246 |
+
"step": 36028
|
252247 |
+
},
|
252248 |
+
{
|
252249 |
+
"epoch": 4.9267058662655545,
|
252250 |
+
"grad_norm": 1.2202261686325073,
|
252251 |
+
"learning_rate": 1.5062303163083665e-06,
|
252252 |
+
"loss": 0.0122,
|
252253 |
+
"step": 36029
|
252254 |
+
},
|
252255 |
+
{
|
252256 |
+
"epoch": 4.926842609052373,
|
252257 |
+
"grad_norm": 0.5867053866386414,
|
252258 |
+
"learning_rate": 1.5034917157332603e-06,
|
252259 |
+
"loss": 0.0038,
|
252260 |
+
"step": 36030
|
252261 |
+
},
|
252262 |
+
{
|
252263 |
+
"epoch": 4.92697935183919,
|
252264 |
+
"grad_norm": 1.1660468578338623,
|
252265 |
+
"learning_rate": 1.5007531151581544e-06,
|
252266 |
+
"loss": 0.0076,
|
252267 |
+
"step": 36031
|
252268 |
+
},
|
252269 |
+
{
|
252270 |
+
"epoch": 4.927116094626008,
|
252271 |
+
"grad_norm": 0.8292264342308044,
|
252272 |
+
"learning_rate": 1.4980145145830482e-06,
|
252273 |
+
"loss": 0.0059,
|
252274 |
+
"step": 36032
|
252275 |
+
},
|
252276 |
+
{
|
252277 |
+
"epoch": 4.927252837412826,
|
252278 |
+
"grad_norm": 0.987408459186554,
|
252279 |
+
"learning_rate": 1.495275914007942e-06,
|
252280 |
+
"loss": 0.0085,
|
252281 |
+
"step": 36033
|
252282 |
+
},
|
252283 |
+
{
|
252284 |
+
"epoch": 4.927389580199645,
|
252285 |
+
"grad_norm": 1.8758208751678467,
|
252286 |
+
"learning_rate": 1.4925373134328358e-06,
|
252287 |
+
"loss": 0.0146,
|
252288 |
+
"step": 36034
|
252289 |
+
},
|
252290 |
+
{
|
252291 |
+
"epoch": 4.927526322986463,
|
252292 |
+
"grad_norm": 0.5282612442970276,
|
252293 |
+
"learning_rate": 1.4897987128577298e-06,
|
252294 |
+
"loss": 0.0039,
|
252295 |
+
"step": 36035
|
252296 |
+
},
|
252297 |
+
{
|
252298 |
+
"epoch": 4.927663065773281,
|
252299 |
+
"grad_norm": 0.44931164383888245,
|
252300 |
+
"learning_rate": 1.4870601122826236e-06,
|
252301 |
+
"loss": 0.0037,
|
252302 |
+
"step": 36036
|
252303 |
+
},
|
252304 |
+
{
|
252305 |
+
"epoch": 4.927799808560098,
|
252306 |
+
"grad_norm": 3.296675682067871,
|
252307 |
+
"learning_rate": 1.4843215117075174e-06,
|
252308 |
+
"loss": 0.0384,
|
252309 |
+
"step": 36037
|
252310 |
+
},
|
252311 |
+
{
|
252312 |
+
"epoch": 4.927936551346916,
|
252313 |
+
"grad_norm": 1.655234694480896,
|
252314 |
+
"learning_rate": 1.4815829111324115e-06,
|
252315 |
+
"loss": 0.0081,
|
252316 |
+
"step": 36038
|
252317 |
+
},
|
252318 |
+
{
|
252319 |
+
"epoch": 4.928073294133735,
|
252320 |
+
"grad_norm": 1.7422348260879517,
|
252321 |
+
"learning_rate": 1.4788443105573053e-06,
|
252322 |
+
"loss": 0.0098,
|
252323 |
+
"step": 36039
|
252324 |
+
},
|
252325 |
+
{
|
252326 |
+
"epoch": 4.928210036920553,
|
252327 |
+
"grad_norm": 1.2778112888336182,
|
252328 |
+
"learning_rate": 1.476105709982199e-06,
|
252329 |
+
"loss": 0.0081,
|
252330 |
+
"step": 36040
|
252331 |
+
},
|
252332 |
+
{
|
252333 |
+
"epoch": 4.92834677970737,
|
252334 |
+
"grad_norm": 1.4430122375488281,
|
252335 |
+
"learning_rate": 1.473367109407093e-06,
|
252336 |
+
"loss": 0.0142,
|
252337 |
+
"step": 36041
|
252338 |
+
},
|
252339 |
+
{
|
252340 |
+
"epoch": 4.928483522494188,
|
252341 |
+
"grad_norm": 0.561543345451355,
|
252342 |
+
"learning_rate": 1.470628508831987e-06,
|
252343 |
+
"loss": 0.0044,
|
252344 |
+
"step": 36042
|
252345 |
+
},
|
252346 |
+
{
|
252347 |
+
"epoch": 4.9286202652810065,
|
252348 |
+
"grad_norm": 1.047837495803833,
|
252349 |
+
"learning_rate": 1.467889908256881e-06,
|
252350 |
+
"loss": 0.005,
|
252351 |
+
"step": 36043
|
252352 |
+
},
|
252353 |
+
{
|
252354 |
+
"epoch": 4.928757008067825,
|
252355 |
+
"grad_norm": 0.5944101810455322,
|
252356 |
+
"learning_rate": 1.4651513076817747e-06,
|
252357 |
+
"loss": 0.0037,
|
252358 |
+
"step": 36044
|
252359 |
+
},
|
252360 |
+
{
|
252361 |
+
"epoch": 4.928893750854643,
|
252362 |
+
"grad_norm": 1.154360294342041,
|
252363 |
+
"learning_rate": 1.4624127071066686e-06,
|
252364 |
+
"loss": 0.0076,
|
252365 |
+
"step": 36045
|
252366 |
+
},
|
252367 |
+
{
|
252368 |
+
"epoch": 4.92903049364146,
|
252369 |
+
"grad_norm": 1.5193301439285278,
|
252370 |
+
"learning_rate": 1.4596741065315626e-06,
|
252371 |
+
"loss": 0.016,
|
252372 |
+
"step": 36046
|
252373 |
+
},
|
252374 |
+
{
|
252375 |
+
"epoch": 4.929167236428278,
|
252376 |
+
"grad_norm": 0.3227151036262512,
|
252377 |
+
"learning_rate": 1.4569355059564564e-06,
|
252378 |
+
"loss": 0.0026,
|
252379 |
+
"step": 36047
|
252380 |
+
},
|
252381 |
+
{
|
252382 |
+
"epoch": 4.9293039792150966,
|
252383 |
+
"grad_norm": 1.1807106733322144,
|
252384 |
+
"learning_rate": 1.4541969053813502e-06,
|
252385 |
+
"loss": 0.0048,
|
252386 |
+
"step": 36048
|
252387 |
+
},
|
252388 |
+
{
|
252389 |
+
"epoch": 4.929440722001915,
|
252390 |
+
"grad_norm": 2.785722255706787,
|
252391 |
+
"learning_rate": 1.451458304806244e-06,
|
252392 |
+
"loss": 0.0127,
|
252393 |
+
"step": 36049
|
252394 |
+
},
|
252395 |
+
{
|
252396 |
+
"epoch": 4.929577464788732,
|
252397 |
+
"grad_norm": 0.43490323424339294,
|
252398 |
+
"learning_rate": 1.448719704231138e-06,
|
252399 |
+
"loss": 0.0036,
|
252400 |
+
"step": 36050
|
252401 |
+
},
|
252402 |
+
{
|
252403 |
+
"epoch": 4.92971420757555,
|
252404 |
+
"grad_norm": 1.3853864669799805,
|
252405 |
+
"learning_rate": 1.4459811036560318e-06,
|
252406 |
+
"loss": 0.0111,
|
252407 |
+
"step": 36051
|
252408 |
+
},
|
252409 |
+
{
|
252410 |
+
"epoch": 4.929850950362368,
|
252411 |
+
"grad_norm": 1.4280378818511963,
|
252412 |
+
"learning_rate": 1.4432425030809257e-06,
|
252413 |
+
"loss": 0.0112,
|
252414 |
+
"step": 36052
|
252415 |
+
},
|
252416 |
+
{
|
252417 |
+
"epoch": 4.929987693149187,
|
252418 |
+
"grad_norm": 1.1049832105636597,
|
252419 |
+
"learning_rate": 1.4405039025058195e-06,
|
252420 |
+
"loss": 0.0064,
|
252421 |
+
"step": 36053
|
252422 |
+
},
|
252423 |
+
{
|
252424 |
+
"epoch": 4.930124435936005,
|
252425 |
+
"grad_norm": 2.7903225421905518,
|
252426 |
+
"learning_rate": 1.4377653019307135e-06,
|
252427 |
+
"loss": 0.0443,
|
252428 |
+
"step": 36054
|
252429 |
+
},
|
252430 |
+
{
|
252431 |
+
"epoch": 4.930261178722822,
|
252432 |
+
"grad_norm": 1.0399909019470215,
|
252433 |
+
"learning_rate": 1.4350267013556073e-06,
|
252434 |
+
"loss": 0.0084,
|
252435 |
+
"step": 36055
|
252436 |
+
},
|
252437 |
+
{
|
252438 |
+
"epoch": 4.93039792150964,
|
252439 |
+
"grad_norm": 0.7063827514648438,
|
252440 |
+
"learning_rate": 1.4322881007805011e-06,
|
252441 |
+
"loss": 0.0026,
|
252442 |
+
"step": 36056
|
252443 |
+
},
|
252444 |
+
{
|
252445 |
+
"epoch": 4.9305346642964585,
|
252446 |
+
"grad_norm": 0.8873728513717651,
|
252447 |
+
"learning_rate": 1.4295495002053951e-06,
|
252448 |
+
"loss": 0.0041,
|
252449 |
+
"step": 36057
|
252450 |
+
},
|
252451 |
+
{
|
252452 |
+
"epoch": 4.930671407083277,
|
252453 |
+
"grad_norm": 0.8514203429222107,
|
252454 |
+
"learning_rate": 1.426810899630289e-06,
|
252455 |
+
"loss": 0.0042,
|
252456 |
+
"step": 36058
|
252457 |
+
},
|
252458 |
+
{
|
252459 |
+
"epoch": 4.930808149870094,
|
252460 |
+
"grad_norm": 1.7229218482971191,
|
252461 |
+
"learning_rate": 1.424072299055183e-06,
|
252462 |
+
"loss": 0.0122,
|
252463 |
+
"step": 36059
|
252464 |
+
},
|
252465 |
+
{
|
252466 |
+
"epoch": 4.930944892656912,
|
252467 |
+
"grad_norm": 1.4280128479003906,
|
252468 |
+
"learning_rate": 1.4213336984800768e-06,
|
252469 |
+
"loss": 0.0094,
|
252470 |
+
"step": 36060
|
252471 |
+
},
|
252472 |
+
{
|
252473 |
+
"epoch": 4.93108163544373,
|
252474 |
+
"grad_norm": 0.3208808898925781,
|
252475 |
+
"learning_rate": 1.4185950979049706e-06,
|
252476 |
+
"loss": 0.0023,
|
252477 |
+
"step": 36061
|
252478 |
+
},
|
252479 |
+
{
|
252480 |
+
"epoch": 4.9312183782305485,
|
252481 |
+
"grad_norm": 2.5721123218536377,
|
252482 |
+
"learning_rate": 1.4158564973298646e-06,
|
252483 |
+
"loss": 0.014,
|
252484 |
+
"step": 36062
|
252485 |
+
},
|
252486 |
+
{
|
252487 |
+
"epoch": 4.931355121017367,
|
252488 |
+
"grad_norm": 2.1366584300994873,
|
252489 |
+
"learning_rate": 1.4131178967547584e-06,
|
252490 |
+
"loss": 0.0129,
|
252491 |
+
"step": 36063
|
252492 |
+
},
|
252493 |
+
{
|
252494 |
+
"epoch": 4.931491863804184,
|
252495 |
+
"grad_norm": 1.0783027410507202,
|
252496 |
+
"learning_rate": 1.4103792961796522e-06,
|
252497 |
+
"loss": 0.0092,
|
252498 |
+
"step": 36064
|
252499 |
+
},
|
252500 |
+
{
|
252501 |
+
"epoch": 4.931628606591002,
|
252502 |
+
"grad_norm": 0.5576790571212769,
|
252503 |
+
"learning_rate": 1.4076406956045463e-06,
|
252504 |
+
"loss": 0.0035,
|
252505 |
+
"step": 36065
|
252506 |
+
},
|
252507 |
+
{
|
252508 |
+
"epoch": 4.93176534937782,
|
252509 |
+
"grad_norm": 0.6204835176467896,
|
252510 |
+
"learning_rate": 1.40490209502944e-06,
|
252511 |
+
"loss": 0.0034,
|
252512 |
+
"step": 36066
|
252513 |
+
},
|
252514 |
+
{
|
252515 |
+
"epoch": 4.931902092164639,
|
252516 |
+
"grad_norm": 1.1946561336517334,
|
252517 |
+
"learning_rate": 1.4021634944543339e-06,
|
252518 |
+
"loss": 0.0067,
|
252519 |
+
"step": 36067
|
252520 |
+
},
|
252521 |
+
{
|
252522 |
+
"epoch": 4.932038834951456,
|
252523 |
+
"grad_norm": 0.7430503368377686,
|
252524 |
+
"learning_rate": 1.3994248938792277e-06,
|
252525 |
+
"loss": 0.0044,
|
252526 |
+
"step": 36068
|
252527 |
+
},
|
252528 |
+
{
|
252529 |
+
"epoch": 4.932175577738274,
|
252530 |
+
"grad_norm": 0.751792311668396,
|
252531 |
+
"learning_rate": 1.3966862933041217e-06,
|
252532 |
+
"loss": 0.005,
|
252533 |
+
"step": 36069
|
252534 |
+
},
|
252535 |
+
{
|
252536 |
+
"epoch": 4.932312320525092,
|
252537 |
+
"grad_norm": 0.5992054343223572,
|
252538 |
+
"learning_rate": 1.3939476927290155e-06,
|
252539 |
+
"loss": 0.0038,
|
252540 |
+
"step": 36070
|
252541 |
+
},
|
252542 |
+
{
|
252543 |
+
"epoch": 4.9324490633119105,
|
252544 |
+
"grad_norm": 0.3429768979549408,
|
252545 |
+
"learning_rate": 1.3912090921539093e-06,
|
252546 |
+
"loss": 0.0024,
|
252547 |
+
"step": 36071
|
252548 |
+
},
|
252549 |
+
{
|
252550 |
+
"epoch": 4.932585806098729,
|
252551 |
+
"grad_norm": 1.5080138444900513,
|
252552 |
+
"learning_rate": 1.3884704915788032e-06,
|
252553 |
+
"loss": 0.008,
|
252554 |
+
"step": 36072
|
252555 |
+
},
|
252556 |
+
{
|
252557 |
+
"epoch": 4.932722548885546,
|
252558 |
+
"grad_norm": 1.4930524826049805,
|
252559 |
+
"learning_rate": 1.3857318910036972e-06,
|
252560 |
+
"loss": 0.0154,
|
252561 |
+
"step": 36073
|
252562 |
+
},
|
252563 |
+
{
|
252564 |
+
"epoch": 4.932859291672364,
|
252565 |
+
"grad_norm": 2.23296856880188,
|
252566 |
+
"learning_rate": 1.382993290428591e-06,
|
252567 |
+
"loss": 0.0107,
|
252568 |
+
"step": 36074
|
252569 |
+
},
|
252570 |
+
{
|
252571 |
+
"epoch": 4.932996034459182,
|
252572 |
+
"grad_norm": 0.6318745613098145,
|
252573 |
+
"learning_rate": 1.3802546898534848e-06,
|
252574 |
+
"loss": 0.0038,
|
252575 |
+
"step": 36075
|
252576 |
+
},
|
252577 |
+
{
|
252578 |
+
"epoch": 4.9331327772460005,
|
252579 |
+
"grad_norm": 0.528644859790802,
|
252580 |
+
"learning_rate": 1.3775160892783788e-06,
|
252581 |
+
"loss": 0.0034,
|
252582 |
+
"step": 36076
|
252583 |
+
},
|
252584 |
+
{
|
252585 |
+
"epoch": 4.933269520032818,
|
252586 |
+
"grad_norm": 1.7099889516830444,
|
252587 |
+
"learning_rate": 1.3747774887032726e-06,
|
252588 |
+
"loss": 0.0163,
|
252589 |
+
"step": 36077
|
252590 |
+
},
|
252591 |
+
{
|
252592 |
+
"epoch": 4.933406262819636,
|
252593 |
+
"grad_norm": 0.9289507269859314,
|
252594 |
+
"learning_rate": 1.3720388881281667e-06,
|
252595 |
+
"loss": 0.006,
|
252596 |
+
"step": 36078
|
252597 |
+
},
|
252598 |
+
{
|
252599 |
+
"epoch": 4.933543005606454,
|
252600 |
+
"grad_norm": 2.4060261249542236,
|
252601 |
+
"learning_rate": 1.3693002875530605e-06,
|
252602 |
+
"loss": 0.0341,
|
252603 |
+
"step": 36079
|
252604 |
+
},
|
252605 |
+
{
|
252606 |
+
"epoch": 4.933679748393272,
|
252607 |
+
"grad_norm": 0.9098917841911316,
|
252608 |
+
"learning_rate": 1.3665616869779543e-06,
|
252609 |
+
"loss": 0.0063,
|
252610 |
+
"step": 36080
|
252611 |
+
},
|
252612 |
+
{
|
252613 |
+
"epoch": 4.933816491180091,
|
252614 |
+
"grad_norm": 1.1836994886398315,
|
252615 |
+
"learning_rate": 1.3638230864028483e-06,
|
252616 |
+
"loss": 0.0096,
|
252617 |
+
"step": 36081
|
252618 |
+
},
|
252619 |
+
{
|
252620 |
+
"epoch": 4.933953233966908,
|
252621 |
+
"grad_norm": 1.6330406665802002,
|
252622 |
+
"learning_rate": 1.3610844858277421e-06,
|
252623 |
+
"loss": 0.0067,
|
252624 |
+
"step": 36082
|
252625 |
+
},
|
252626 |
+
{
|
252627 |
+
"epoch": 4.934089976753726,
|
252628 |
+
"grad_norm": 0.5137028098106384,
|
252629 |
+
"learning_rate": 1.358345885252636e-06,
|
252630 |
+
"loss": 0.0041,
|
252631 |
+
"step": 36083
|
252632 |
+
},
|
252633 |
+
{
|
252634 |
+
"epoch": 4.934226719540544,
|
252635 |
+
"grad_norm": 0.2890148460865021,
|
252636 |
+
"learning_rate": 1.35560728467753e-06,
|
252637 |
+
"loss": 0.0024,
|
252638 |
+
"step": 36084
|
252639 |
+
},
|
252640 |
+
{
|
252641 |
+
"epoch": 4.9343634623273624,
|
252642 |
+
"grad_norm": 0.3262675702571869,
|
252643 |
+
"learning_rate": 1.3528686841024238e-06,
|
252644 |
+
"loss": 0.0026,
|
252645 |
+
"step": 36085
|
252646 |
+
},
|
252647 |
+
{
|
252648 |
+
"epoch": 4.93450020511418,
|
252649 |
+
"grad_norm": 0.970128059387207,
|
252650 |
+
"learning_rate": 1.3501300835273176e-06,
|
252651 |
+
"loss": 0.0056,
|
252652 |
+
"step": 36086
|
252653 |
+
},
|
252654 |
+
{
|
252655 |
+
"epoch": 4.934636947900998,
|
252656 |
+
"grad_norm": 0.6113590598106384,
|
252657 |
+
"learning_rate": 1.3473914829522114e-06,
|
252658 |
+
"loss": 0.0047,
|
252659 |
+
"step": 36087
|
252660 |
+
},
|
252661 |
+
{
|
252662 |
+
"epoch": 4.934773690687816,
|
252663 |
+
"grad_norm": 1.1611425876617432,
|
252664 |
+
"learning_rate": 1.3446528823771054e-06,
|
252665 |
+
"loss": 0.0056,
|
252666 |
+
"step": 36088
|
252667 |
+
},
|
252668 |
+
{
|
252669 |
+
"epoch": 4.934910433474634,
|
252670 |
+
"grad_norm": 1.583804965019226,
|
252671 |
+
"learning_rate": 1.3419142818019992e-06,
|
252672 |
+
"loss": 0.0153,
|
252673 |
+
"step": 36089
|
252674 |
+
},
|
252675 |
+
{
|
252676 |
+
"epoch": 4.9350471762614525,
|
252677 |
+
"grad_norm": 1.1482213735580444,
|
252678 |
+
"learning_rate": 1.339175681226893e-06,
|
252679 |
+
"loss": 0.006,
|
252680 |
+
"step": 36090
|
252681 |
+
},
|
252682 |
+
{
|
252683 |
+
"epoch": 4.935183919048271,
|
252684 |
+
"grad_norm": 1.2166626453399658,
|
252685 |
+
"learning_rate": 1.3364370806517868e-06,
|
252686 |
+
"loss": 0.0076,
|
252687 |
+
"step": 36091
|
252688 |
+
},
|
252689 |
+
{
|
252690 |
+
"epoch": 4.935320661835088,
|
252691 |
+
"grad_norm": 1.0527071952819824,
|
252692 |
+
"learning_rate": 1.3336984800766809e-06,
|
252693 |
+
"loss": 0.0063,
|
252694 |
+
"step": 36092
|
252695 |
+
},
|
252696 |
+
{
|
252697 |
+
"epoch": 4.935457404621906,
|
252698 |
+
"grad_norm": 0.45512905716896057,
|
252699 |
+
"learning_rate": 1.3309598795015747e-06,
|
252700 |
+
"loss": 0.0033,
|
252701 |
+
"step": 36093
|
252702 |
+
},
|
252703 |
+
{
|
252704 |
+
"epoch": 4.935594147408724,
|
252705 |
+
"grad_norm": 0.37925460934638977,
|
252706 |
+
"learning_rate": 1.3282212789264687e-06,
|
252707 |
+
"loss": 0.0025,
|
252708 |
+
"step": 36094
|
252709 |
+
},
|
252710 |
+
{
|
252711 |
+
"epoch": 4.935730890195543,
|
252712 |
+
"grad_norm": 0.4799692928791046,
|
252713 |
+
"learning_rate": 1.3254826783513625e-06,
|
252714 |
+
"loss": 0.0035,
|
252715 |
+
"step": 36095
|
252716 |
+
},
|
252717 |
+
{
|
252718 |
+
"epoch": 4.93586763298236,
|
252719 |
+
"grad_norm": 1.4849737882614136,
|
252720 |
+
"learning_rate": 1.3227440777762563e-06,
|
252721 |
+
"loss": 0.0071,
|
252722 |
+
"step": 36096
|
252723 |
+
},
|
252724 |
+
{
|
252725 |
+
"epoch": 4.936004375769178,
|
252726 |
+
"grad_norm": 0.46090370416641235,
|
252727 |
+
"learning_rate": 1.3200054772011503e-06,
|
252728 |
+
"loss": 0.0032,
|
252729 |
+
"step": 36097
|
252730 |
+
},
|
252731 |
+
{
|
252732 |
+
"epoch": 4.936141118555996,
|
252733 |
+
"grad_norm": 0.4467523694038391,
|
252734 |
+
"learning_rate": 1.3172668766260442e-06,
|
252735 |
+
"loss": 0.0033,
|
252736 |
+
"step": 36098
|
252737 |
+
},
|
252738 |
+
{
|
252739 |
+
"epoch": 4.936277861342814,
|
252740 |
+
"grad_norm": 0.327291339635849,
|
252741 |
+
"learning_rate": 1.314528276050938e-06,
|
252742 |
+
"loss": 0.0025,
|
252743 |
+
"step": 36099
|
252744 |
+
},
|
252745 |
+
{
|
252746 |
+
"epoch": 4.936414604129633,
|
252747 |
+
"grad_norm": 1.9502886533737183,
|
252748 |
+
"learning_rate": 1.311789675475832e-06,
|
252749 |
+
"loss": 0.0179,
|
252750 |
+
"step": 36100
|
252751 |
+
},
|
252752 |
+
{
|
252753 |
+
"epoch": 4.93655134691645,
|
252754 |
+
"grad_norm": 1.1486557722091675,
|
252755 |
+
"learning_rate": 1.3090510749007258e-06,
|
252756 |
+
"loss": 0.0068,
|
252757 |
+
"step": 36101
|
252758 |
+
},
|
252759 |
+
{
|
252760 |
+
"epoch": 4.936688089703268,
|
252761 |
+
"grad_norm": 1.153026819229126,
|
252762 |
+
"learning_rate": 1.3063124743256198e-06,
|
252763 |
+
"loss": 0.005,
|
252764 |
+
"step": 36102
|
252765 |
+
},
|
252766 |
+
{
|
252767 |
+
"epoch": 4.936824832490086,
|
252768 |
+
"grad_norm": 1.803147792816162,
|
252769 |
+
"learning_rate": 1.3035738737505136e-06,
|
252770 |
+
"loss": 0.0077,
|
252771 |
+
"step": 36103
|
252772 |
+
},
|
252773 |
+
{
|
252774 |
+
"epoch": 4.9369615752769045,
|
252775 |
+
"grad_norm": 0.41003742814064026,
|
252776 |
+
"learning_rate": 1.3008352731754074e-06,
|
252777 |
+
"loss": 0.0022,
|
252778 |
+
"step": 36104
|
252779 |
+
},
|
252780 |
+
{
|
252781 |
+
"epoch": 4.937098318063722,
|
252782 |
+
"grad_norm": 0.4293813407421112,
|
252783 |
+
"learning_rate": 1.2980966726003013e-06,
|
252784 |
+
"loss": 0.0028,
|
252785 |
+
"step": 36105
|
252786 |
+
},
|
252787 |
+
{
|
252788 |
+
"epoch": 4.93723506085054,
|
252789 |
+
"grad_norm": 0.6052153706550598,
|
252790 |
+
"learning_rate": 1.295358072025195e-06,
|
252791 |
+
"loss": 0.0036,
|
252792 |
+
"step": 36106
|
252793 |
+
},
|
252794 |
+
{
|
252795 |
+
"epoch": 4.937371803637358,
|
252796 |
+
"grad_norm": 1.356513500213623,
|
252797 |
+
"learning_rate": 1.2926194714500889e-06,
|
252798 |
+
"loss": 0.007,
|
252799 |
+
"step": 36107
|
252800 |
+
},
|
252801 |
+
{
|
252802 |
+
"epoch": 4.937508546424176,
|
252803 |
+
"grad_norm": 1.718141794204712,
|
252804 |
+
"learning_rate": 1.289880870874983e-06,
|
252805 |
+
"loss": 0.0124,
|
252806 |
+
"step": 36108
|
252807 |
+
},
|
252808 |
+
{
|
252809 |
+
"epoch": 4.9376452892109945,
|
252810 |
+
"grad_norm": 1.8543354272842407,
|
252811 |
+
"learning_rate": 1.2871422702998767e-06,
|
252812 |
+
"loss": 0.0348,
|
252813 |
+
"step": 36109
|
252814 |
+
},
|
252815 |
+
{
|
252816 |
+
"epoch": 4.937782031997812,
|
252817 |
+
"grad_norm": 0.7800920009613037,
|
252818 |
+
"learning_rate": 1.2844036697247707e-06,
|
252819 |
+
"loss": 0.006,
|
252820 |
+
"step": 36110
|
252821 |
+
},
|
252822 |
+
{
|
252823 |
+
"epoch": 4.93791877478463,
|
252824 |
+
"grad_norm": 0.8616371750831604,
|
252825 |
+
"learning_rate": 1.2816650691496645e-06,
|
252826 |
+
"loss": 0.0055,
|
252827 |
+
"step": 36111
|
252828 |
+
},
|
252829 |
+
{
|
252830 |
+
"epoch": 4.938055517571448,
|
252831 |
+
"grad_norm": 0.34152454137802124,
|
252832 |
+
"learning_rate": 1.2789264685745584e-06,
|
252833 |
+
"loss": 0.0026,
|
252834 |
+
"step": 36112
|
252835 |
+
},
|
252836 |
+
{
|
252837 |
+
"epoch": 4.938192260358266,
|
252838 |
+
"grad_norm": 0.19512180984020233,
|
252839 |
+
"learning_rate": 1.2761878679994524e-06,
|
252840 |
+
"loss": 0.0014,
|
252841 |
+
"step": 36113
|
252842 |
+
},
|
252843 |
+
{
|
252844 |
+
"epoch": 4.938329003145084,
|
252845 |
+
"grad_norm": 0.3059760630130768,
|
252846 |
+
"learning_rate": 1.2734492674243462e-06,
|
252847 |
+
"loss": 0.0018,
|
252848 |
+
"step": 36114
|
252849 |
+
},
|
252850 |
+
{
|
252851 |
+
"epoch": 4.938465745931902,
|
252852 |
+
"grad_norm": 0.8782281875610352,
|
252853 |
+
"learning_rate": 1.27071066684924e-06,
|
252854 |
+
"loss": 0.0057,
|
252855 |
+
"step": 36115
|
252856 |
+
},
|
252857 |
+
{
|
252858 |
+
"epoch": 4.93860248871872,
|
252859 |
+
"grad_norm": 0.9511374235153198,
|
252860 |
+
"learning_rate": 1.267972066274134e-06,
|
252861 |
+
"loss": 0.0058,
|
252862 |
+
"step": 36116
|
252863 |
+
},
|
252864 |
+
{
|
252865 |
+
"epoch": 4.938739231505538,
|
252866 |
+
"grad_norm": 0.8876736760139465,
|
252867 |
+
"learning_rate": 1.2652334656990278e-06,
|
252868 |
+
"loss": 0.007,
|
252869 |
+
"step": 36117
|
252870 |
+
},
|
252871 |
+
{
|
252872 |
+
"epoch": 4.9388759742923565,
|
252873 |
+
"grad_norm": 1.9388644695281982,
|
252874 |
+
"learning_rate": 1.2624948651239219e-06,
|
252875 |
+
"loss": 0.0117,
|
252876 |
+
"step": 36118
|
252877 |
+
},
|
252878 |
+
{
|
252879 |
+
"epoch": 4.939012717079174,
|
252880 |
+
"grad_norm": 1.3188329935073853,
|
252881 |
+
"learning_rate": 1.2597562645488157e-06,
|
252882 |
+
"loss": 0.0079,
|
252883 |
+
"step": 36119
|
252884 |
+
},
|
252885 |
+
{
|
252886 |
+
"epoch": 4.939149459865992,
|
252887 |
+
"grad_norm": 1.0971708297729492,
|
252888 |
+
"learning_rate": 1.2570176639737095e-06,
|
252889 |
+
"loss": 0.0076,
|
252890 |
+
"step": 36120
|
252891 |
+
},
|
252892 |
+
{
|
252893 |
+
"epoch": 4.93928620265281,
|
252894 |
+
"grad_norm": 1.1161755323410034,
|
252895 |
+
"learning_rate": 1.2542790633986035e-06,
|
252896 |
+
"loss": 0.0067,
|
252897 |
+
"step": 36121
|
252898 |
+
},
|
252899 |
+
{
|
252900 |
+
"epoch": 4.939422945439628,
|
252901 |
+
"grad_norm": 1.1405102014541626,
|
252902 |
+
"learning_rate": 1.2515404628234973e-06,
|
252903 |
+
"loss": 0.0037,
|
252904 |
+
"step": 36122
|
252905 |
+
},
|
252906 |
+
{
|
252907 |
+
"epoch": 4.939559688226446,
|
252908 |
+
"grad_norm": 0.5782989263534546,
|
252909 |
+
"learning_rate": 1.2488018622483911e-06,
|
252910 |
+
"loss": 0.0041,
|
252911 |
+
"step": 36123
|
252912 |
+
},
|
252913 |
+
{
|
252914 |
+
"epoch": 4.939696431013264,
|
252915 |
+
"grad_norm": 0.7016936540603638,
|
252916 |
+
"learning_rate": 1.246063261673285e-06,
|
252917 |
+
"loss": 0.0063,
|
252918 |
+
"step": 36124
|
252919 |
+
},
|
252920 |
+
{
|
252921 |
+
"epoch": 4.939833173800082,
|
252922 |
+
"grad_norm": 1.3623028993606567,
|
252923 |
+
"learning_rate": 1.2433246610981788e-06,
|
252924 |
+
"loss": 0.0087,
|
252925 |
+
"step": 36125
|
252926 |
+
},
|
252927 |
+
{
|
252928 |
+
"epoch": 4.9399699165869,
|
252929 |
+
"grad_norm": 0.578167200088501,
|
252930 |
+
"learning_rate": 1.2405860605230728e-06,
|
252931 |
+
"loss": 0.0027,
|
252932 |
+
"step": 36126
|
252933 |
+
},
|
252934 |
+
{
|
252935 |
+
"epoch": 4.940106659373718,
|
252936 |
+
"grad_norm": 2.803514242172241,
|
252937 |
+
"learning_rate": 1.2378474599479666e-06,
|
252938 |
+
"loss": 0.0269,
|
252939 |
+
"step": 36127
|
252940 |
+
},
|
252941 |
+
{
|
252942 |
+
"epoch": 4.940243402160536,
|
252943 |
+
"grad_norm": 1.4078305959701538,
|
252944 |
+
"learning_rate": 1.2351088593728604e-06,
|
252945 |
+
"loss": 0.0075,
|
252946 |
+
"step": 36128
|
252947 |
+
},
|
252948 |
+
{
|
252949 |
+
"epoch": 4.940380144947354,
|
252950 |
+
"grad_norm": 0.24666063487529755,
|
252951 |
+
"learning_rate": 1.2323702587977544e-06,
|
252952 |
+
"loss": 0.0019,
|
252953 |
+
"step": 36129
|
252954 |
+
},
|
252955 |
+
{
|
252956 |
+
"epoch": 4.940516887734172,
|
252957 |
+
"grad_norm": 0.494110107421875,
|
252958 |
+
"learning_rate": 1.2296316582226482e-06,
|
252959 |
+
"loss": 0.0034,
|
252960 |
+
"step": 36130
|
252961 |
+
},
|
252962 |
+
{
|
252963 |
+
"epoch": 4.94065363052099,
|
252964 |
+
"grad_norm": 0.767799437046051,
|
252965 |
+
"learning_rate": 1.226893057647542e-06,
|
252966 |
+
"loss": 0.0054,
|
252967 |
+
"step": 36131
|
252968 |
+
},
|
252969 |
+
{
|
252970 |
+
"epoch": 4.940790373307808,
|
252971 |
+
"grad_norm": 1.2233718633651733,
|
252972 |
+
"learning_rate": 1.224154457072436e-06,
|
252973 |
+
"loss": 0.0063,
|
252974 |
+
"step": 36132
|
252975 |
+
},
|
252976 |
+
{
|
252977 |
+
"epoch": 4.940927116094626,
|
252978 |
+
"grad_norm": 1.0252876281738281,
|
252979 |
+
"learning_rate": 1.2214158564973299e-06,
|
252980 |
+
"loss": 0.0066,
|
252981 |
+
"step": 36133
|
252982 |
+
},
|
252983 |
+
{
|
252984 |
+
"epoch": 4.941063858881444,
|
252985 |
+
"grad_norm": 1.476625919342041,
|
252986 |
+
"learning_rate": 1.218677255922224e-06,
|
252987 |
+
"loss": 0.0092,
|
252988 |
+
"step": 36134
|
252989 |
+
},
|
252990 |
+
{
|
252991 |
+
"epoch": 4.941200601668262,
|
252992 |
+
"grad_norm": 1.61685049533844,
|
252993 |
+
"learning_rate": 1.2159386553471177e-06,
|
252994 |
+
"loss": 0.011,
|
252995 |
+
"step": 36135
|
252996 |
+
},
|
252997 |
+
{
|
252998 |
+
"epoch": 4.94133734445508,
|
252999 |
+
"grad_norm": 1.8993734121322632,
|
253000 |
+
"learning_rate": 1.2132000547720115e-06,
|
253001 |
+
"loss": 0.015,
|
253002 |
+
"step": 36136
|
253003 |
+
},
|
253004 |
+
{
|
253005 |
+
"epoch": 4.941474087241898,
|
253006 |
+
"grad_norm": 1.086776852607727,
|
253007 |
+
"learning_rate": 1.2104614541969055e-06,
|
253008 |
+
"loss": 0.0092,
|
253009 |
+
"step": 36137
|
253010 |
+
},
|
253011 |
+
{
|
253012 |
+
"epoch": 4.941610830028716,
|
253013 |
+
"grad_norm": 1.9535847902297974,
|
253014 |
+
"learning_rate": 1.2077228536217994e-06,
|
253015 |
+
"loss": 0.0188,
|
253016 |
+
"step": 36138
|
253017 |
+
},
|
253018 |
+
{
|
253019 |
+
"epoch": 4.941747572815534,
|
253020 |
+
"grad_norm": 1.2405314445495605,
|
253021 |
+
"learning_rate": 1.2049842530466932e-06,
|
253022 |
+
"loss": 0.0071,
|
253023 |
+
"step": 36139
|
253024 |
+
},
|
253025 |
+
{
|
253026 |
+
"epoch": 4.941884315602352,
|
253027 |
+
"grad_norm": 1.0428907871246338,
|
253028 |
+
"learning_rate": 1.2022456524715872e-06,
|
253029 |
+
"loss": 0.0058,
|
253030 |
+
"step": 36140
|
253031 |
+
},
|
253032 |
+
{
|
253033 |
+
"epoch": 4.9420210583891695,
|
253034 |
+
"grad_norm": 1.5566105842590332,
|
253035 |
+
"learning_rate": 1.199507051896481e-06,
|
253036 |
+
"loss": 0.008,
|
253037 |
+
"step": 36141
|
253038 |
+
},
|
253039 |
+
{
|
253040 |
+
"epoch": 4.942157801175988,
|
253041 |
+
"grad_norm": 1.4527117013931274,
|
253042 |
+
"learning_rate": 1.1967684513213748e-06,
|
253043 |
+
"loss": 0.0084,
|
253044 |
+
"step": 36142
|
253045 |
+
},
|
253046 |
+
{
|
253047 |
+
"epoch": 4.942294543962806,
|
253048 |
+
"grad_norm": 0.9299144148826599,
|
253049 |
+
"learning_rate": 1.1940298507462686e-06,
|
253050 |
+
"loss": 0.0077,
|
253051 |
+
"step": 36143
|
253052 |
+
},
|
253053 |
+
{
|
253054 |
+
"epoch": 4.942431286749624,
|
253055 |
+
"grad_norm": 1.5292224884033203,
|
253056 |
+
"learning_rate": 1.1912912501711624e-06,
|
253057 |
+
"loss": 0.0084,
|
253058 |
+
"step": 36144
|
253059 |
+
},
|
253060 |
+
{
|
253061 |
+
"epoch": 4.942568029536442,
|
253062 |
+
"grad_norm": 0.7213050723075867,
|
253063 |
+
"learning_rate": 1.1885526495960565e-06,
|
253064 |
+
"loss": 0.0056,
|
253065 |
+
"step": 36145
|
253066 |
+
},
|
253067 |
+
{
|
253068 |
+
"epoch": 4.94270477232326,
|
253069 |
+
"grad_norm": 1.017021656036377,
|
253070 |
+
"learning_rate": 1.1858140490209503e-06,
|
253071 |
+
"loss": 0.005,
|
253072 |
+
"step": 36146
|
253073 |
+
},
|
253074 |
+
{
|
253075 |
+
"epoch": 4.942841515110078,
|
253076 |
+
"grad_norm": 1.7737218141555786,
|
253077 |
+
"learning_rate": 1.183075448445844e-06,
|
253078 |
+
"loss": 0.0133,
|
253079 |
+
"step": 36147
|
253080 |
+
},
|
253081 |
+
{
|
253082 |
+
"epoch": 4.942978257896896,
|
253083 |
+
"grad_norm": 2.1853582859039307,
|
253084 |
+
"learning_rate": 1.1803368478707381e-06,
|
253085 |
+
"loss": 0.0144,
|
253086 |
+
"step": 36148
|
253087 |
+
},
|
253088 |
+
{
|
253089 |
+
"epoch": 4.943115000683714,
|
253090 |
+
"grad_norm": 3.165393829345703,
|
253091 |
+
"learning_rate": 1.177598247295632e-06,
|
253092 |
+
"loss": 0.0176,
|
253093 |
+
"step": 36149
|
253094 |
+
},
|
253095 |
+
{
|
253096 |
+
"epoch": 4.943251743470532,
|
253097 |
+
"grad_norm": 0.28671663999557495,
|
253098 |
+
"learning_rate": 1.174859646720526e-06,
|
253099 |
+
"loss": 0.0024,
|
253100 |
+
"step": 36150
|
253101 |
+
},
|
253102 |
+
{
|
253103 |
+
"epoch": 4.94338848625735,
|
253104 |
+
"grad_norm": 0.9110774397850037,
|
253105 |
+
"learning_rate": 1.1721210461454198e-06,
|
253106 |
+
"loss": 0.0029,
|
253107 |
+
"step": 36151
|
253108 |
+
},
|
253109 |
+
{
|
253110 |
+
"epoch": 4.943525229044168,
|
253111 |
+
"grad_norm": 3.578433036804199,
|
253112 |
+
"learning_rate": 1.1693824455703136e-06,
|
253113 |
+
"loss": 0.04,
|
253114 |
+
"step": 36152
|
253115 |
+
},
|
253116 |
+
{
|
253117 |
+
"epoch": 4.943661971830986,
|
253118 |
+
"grad_norm": 0.5597577095031738,
|
253119 |
+
"learning_rate": 1.1666438449952076e-06,
|
253120 |
+
"loss": 0.0037,
|
253121 |
+
"step": 36153
|
253122 |
+
},
|
253123 |
+
{
|
253124 |
+
"epoch": 4.943798714617804,
|
253125 |
+
"grad_norm": 0.40373650193214417,
|
253126 |
+
"learning_rate": 1.1639052444201014e-06,
|
253127 |
+
"loss": 0.0019,
|
253128 |
+
"step": 36154
|
253129 |
+
},
|
253130 |
+
{
|
253131 |
+
"epoch": 4.943935457404622,
|
253132 |
+
"grad_norm": 1.272361397743225,
|
253133 |
+
"learning_rate": 1.1611666438449952e-06,
|
253134 |
+
"loss": 0.0073,
|
253135 |
+
"step": 36155
|
253136 |
+
},
|
253137 |
+
{
|
253138 |
+
"epoch": 4.94407220019144,
|
253139 |
+
"grad_norm": 0.7587971687316895,
|
253140 |
+
"learning_rate": 1.1584280432698892e-06,
|
253141 |
+
"loss": 0.0052,
|
253142 |
+
"step": 36156
|
253143 |
+
},
|
253144 |
+
{
|
253145 |
+
"epoch": 4.944208942978258,
|
253146 |
+
"grad_norm": 0.27546823024749756,
|
253147 |
+
"learning_rate": 1.155689442694783e-06,
|
253148 |
+
"loss": 0.0022,
|
253149 |
+
"step": 36157
|
253150 |
+
},
|
253151 |
+
{
|
253152 |
+
"epoch": 4.944345685765076,
|
253153 |
+
"grad_norm": 0.25228631496429443,
|
253154 |
+
"learning_rate": 1.152950842119677e-06,
|
253155 |
+
"loss": 0.0019,
|
253156 |
+
"step": 36158
|
253157 |
+
},
|
253158 |
+
{
|
253159 |
+
"epoch": 4.944482428551894,
|
253160 |
+
"grad_norm": 0.1939551830291748,
|
253161 |
+
"learning_rate": 1.1502122415445709e-06,
|
253162 |
+
"loss": 0.0016,
|
253163 |
+
"step": 36159
|
253164 |
+
},
|
253165 |
+
{
|
253166 |
+
"epoch": 4.9446191713387115,
|
253167 |
+
"grad_norm": 1.4617421627044678,
|
253168 |
+
"learning_rate": 1.1474736409694647e-06,
|
253169 |
+
"loss": 0.0049,
|
253170 |
+
"step": 36160
|
253171 |
+
},
|
253172 |
+
{
|
253173 |
+
"epoch": 4.94475591412553,
|
253174 |
+
"grad_norm": 0.6310351490974426,
|
253175 |
+
"learning_rate": 1.1447350403943585e-06,
|
253176 |
+
"loss": 0.0029,
|
253177 |
+
"step": 36161
|
253178 |
+
},
|
253179 |
+
{
|
253180 |
+
"epoch": 4.944892656912348,
|
253181 |
+
"grad_norm": 1.8543330430984497,
|
253182 |
+
"learning_rate": 1.1419964398192523e-06,
|
253183 |
+
"loss": 0.0146,
|
253184 |
+
"step": 36162
|
253185 |
+
},
|
253186 |
+
{
|
253187 |
+
"epoch": 4.945029399699166,
|
253188 |
+
"grad_norm": 0.4343065917491913,
|
253189 |
+
"learning_rate": 1.1392578392441461e-06,
|
253190 |
+
"loss": 0.003,
|
253191 |
+
"step": 36163
|
253192 |
+
},
|
253193 |
+
{
|
253194 |
+
"epoch": 4.945166142485984,
|
253195 |
+
"grad_norm": 1.0777355432510376,
|
253196 |
+
"learning_rate": 1.1365192386690401e-06,
|
253197 |
+
"loss": 0.0068,
|
253198 |
+
"step": 36164
|
253199 |
+
},
|
253200 |
+
{
|
253201 |
+
"epoch": 4.945302885272802,
|
253202 |
+
"grad_norm": 0.5695866942405701,
|
253203 |
+
"learning_rate": 1.133780638093934e-06,
|
253204 |
+
"loss": 0.0046,
|
253205 |
+
"step": 36165
|
253206 |
+
},
|
253207 |
+
{
|
253208 |
+
"epoch": 4.94543962805962,
|
253209 |
+
"grad_norm": 0.433201402425766,
|
253210 |
+
"learning_rate": 1.131042037518828e-06,
|
253211 |
+
"loss": 0.0035,
|
253212 |
+
"step": 36166
|
253213 |
+
},
|
253214 |
+
{
|
253215 |
+
"epoch": 4.945576370846438,
|
253216 |
+
"grad_norm": 1.6096434593200684,
|
253217 |
+
"learning_rate": 1.1283034369437218e-06,
|
253218 |
+
"loss": 0.0147,
|
253219 |
+
"step": 36167
|
253220 |
+
},
|
253221 |
+
{
|
253222 |
+
"epoch": 4.945713113633256,
|
253223 |
+
"grad_norm": 1.5094058513641357,
|
253224 |
+
"learning_rate": 1.1255648363686156e-06,
|
253225 |
+
"loss": 0.0415,
|
253226 |
+
"step": 36168
|
253227 |
+
},
|
253228 |
+
{
|
253229 |
+
"epoch": 4.9458498564200735,
|
253230 |
+
"grad_norm": 1.4107251167297363,
|
253231 |
+
"learning_rate": 1.1228262357935096e-06,
|
253232 |
+
"loss": 0.0099,
|
253233 |
+
"step": 36169
|
253234 |
+
},
|
253235 |
+
{
|
253236 |
+
"epoch": 4.945986599206892,
|
253237 |
+
"grad_norm": 0.6913461685180664,
|
253238 |
+
"learning_rate": 1.1200876352184034e-06,
|
253239 |
+
"loss": 0.0038,
|
253240 |
+
"step": 36170
|
253241 |
+
},
|
253242 |
+
{
|
253243 |
+
"epoch": 4.94612334199371,
|
253244 |
+
"grad_norm": 0.4153614044189453,
|
253245 |
+
"learning_rate": 1.1173490346432973e-06,
|
253246 |
+
"loss": 0.0035,
|
253247 |
+
"step": 36171
|
253248 |
+
},
|
253249 |
+
{
|
253250 |
+
"epoch": 4.946260084780528,
|
253251 |
+
"grad_norm": 1.4469714164733887,
|
253252 |
+
"learning_rate": 1.1146104340681913e-06,
|
253253 |
+
"loss": 0.0106,
|
253254 |
+
"step": 36172
|
253255 |
+
},
|
253256 |
+
{
|
253257 |
+
"epoch": 4.946396827567346,
|
253258 |
+
"grad_norm": 1.860593557357788,
|
253259 |
+
"learning_rate": 1.111871833493085e-06,
|
253260 |
+
"loss": 0.017,
|
253261 |
+
"step": 36173
|
253262 |
+
},
|
253263 |
+
{
|
253264 |
+
"epoch": 4.9465335703541635,
|
253265 |
+
"grad_norm": 1.0710150003433228,
|
253266 |
+
"learning_rate": 1.1091332329179791e-06,
|
253267 |
+
"loss": 0.0081,
|
253268 |
+
"step": 36174
|
253269 |
+
},
|
253270 |
+
{
|
253271 |
+
"epoch": 4.946670313140982,
|
253272 |
+
"grad_norm": 1.6947203874588013,
|
253273 |
+
"learning_rate": 1.106394632342873e-06,
|
253274 |
+
"loss": 0.0086,
|
253275 |
+
"step": 36175
|
253276 |
+
},
|
253277 |
+
{
|
253278 |
+
"epoch": 4.9468070559278,
|
253279 |
+
"grad_norm": 0.6095795035362244,
|
253280 |
+
"learning_rate": 1.1036560317677667e-06,
|
253281 |
+
"loss": 0.0035,
|
253282 |
+
"step": 36176
|
253283 |
+
},
|
253284 |
+
{
|
253285 |
+
"epoch": 4.946943798714618,
|
253286 |
+
"grad_norm": 1.200270652770996,
|
253287 |
+
"learning_rate": 1.1009174311926608e-06,
|
253288 |
+
"loss": 0.0078,
|
253289 |
+
"step": 36177
|
253290 |
+
},
|
253291 |
+
{
|
253292 |
+
"epoch": 4.947080541501435,
|
253293 |
+
"grad_norm": 2.369452476501465,
|
253294 |
+
"learning_rate": 1.0981788306175546e-06,
|
253295 |
+
"loss": 0.0091,
|
253296 |
+
"step": 36178
|
253297 |
+
},
|
253298 |
+
{
|
253299 |
+
"epoch": 4.947217284288254,
|
253300 |
+
"grad_norm": 0.9962687492370605,
|
253301 |
+
"learning_rate": 1.0954402300424484e-06,
|
253302 |
+
"loss": 0.0047,
|
253303 |
+
"step": 36179
|
253304 |
+
},
|
253305 |
+
{
|
253306 |
+
"epoch": 4.947354027075072,
|
253307 |
+
"grad_norm": 0.176607146859169,
|
253308 |
+
"learning_rate": 1.0927016294673422e-06,
|
253309 |
+
"loss": 0.0018,
|
253310 |
+
"step": 36180
|
253311 |
+
},
|
253312 |
+
{
|
253313 |
+
"epoch": 4.94749076986189,
|
253314 |
+
"grad_norm": 1.382371187210083,
|
253315 |
+
"learning_rate": 1.089963028892236e-06,
|
253316 |
+
"loss": 0.0099,
|
253317 |
+
"step": 36181
|
253318 |
+
},
|
253319 |
+
{
|
253320 |
+
"epoch": 4.947627512648708,
|
253321 |
+
"grad_norm": 0.6020751595497131,
|
253322 |
+
"learning_rate": 1.08722442831713e-06,
|
253323 |
+
"loss": 0.0051,
|
253324 |
+
"step": 36182
|
253325 |
+
},
|
253326 |
+
{
|
253327 |
+
"epoch": 4.9477642554355254,
|
253328 |
+
"grad_norm": 0.8201928734779358,
|
253329 |
+
"learning_rate": 1.0844858277420238e-06,
|
253330 |
+
"loss": 0.0055,
|
253331 |
+
"step": 36183
|
253332 |
+
},
|
253333 |
+
{
|
253334 |
+
"epoch": 4.947900998222344,
|
253335 |
+
"grad_norm": 0.9658450484275818,
|
253336 |
+
"learning_rate": 1.0817472271669176e-06,
|
253337 |
+
"loss": 0.0038,
|
253338 |
+
"step": 36184
|
253339 |
+
},
|
253340 |
+
{
|
253341 |
+
"epoch": 4.948037741009162,
|
253342 |
+
"grad_norm": 1.2474859952926636,
|
253343 |
+
"learning_rate": 1.0790086265918117e-06,
|
253344 |
+
"loss": 0.0053,
|
253345 |
+
"step": 36185
|
253346 |
+
},
|
253347 |
+
{
|
253348 |
+
"epoch": 4.94817448379598,
|
253349 |
+
"grad_norm": 1.2580018043518066,
|
253350 |
+
"learning_rate": 1.0762700260167055e-06,
|
253351 |
+
"loss": 0.0094,
|
253352 |
+
"step": 36186
|
253353 |
+
},
|
253354 |
+
{
|
253355 |
+
"epoch": 4.948311226582797,
|
253356 |
+
"grad_norm": 1.6777409315109253,
|
253357 |
+
"learning_rate": 1.0735314254415993e-06,
|
253358 |
+
"loss": 0.0144,
|
253359 |
+
"step": 36187
|
253360 |
+
},
|
253361 |
+
{
|
253362 |
+
"epoch": 4.9484479693696155,
|
253363 |
+
"grad_norm": 0.3638398349285126,
|
253364 |
+
"learning_rate": 1.0707928248664933e-06,
|
253365 |
+
"loss": 0.0026,
|
253366 |
+
"step": 36188
|
253367 |
+
},
|
253368 |
+
{
|
253369 |
+
"epoch": 4.948584712156434,
|
253370 |
+
"grad_norm": 0.20980893075466156,
|
253371 |
+
"learning_rate": 1.0680542242913871e-06,
|
253372 |
+
"loss": 0.002,
|
253373 |
+
"step": 36189
|
253374 |
+
},
|
253375 |
+
{
|
253376 |
+
"epoch": 4.948721454943252,
|
253377 |
+
"grad_norm": 1.8688292503356934,
|
253378 |
+
"learning_rate": 1.0653156237162811e-06,
|
253379 |
+
"loss": 0.0211,
|
253380 |
+
"step": 36190
|
253381 |
+
},
|
253382 |
+
{
|
253383 |
+
"epoch": 4.94885819773007,
|
253384 |
+
"grad_norm": 2.552511692047119,
|
253385 |
+
"learning_rate": 1.062577023141175e-06,
|
253386 |
+
"loss": 0.0181,
|
253387 |
+
"step": 36191
|
253388 |
+
},
|
253389 |
+
{
|
253390 |
+
"epoch": 4.948994940516887,
|
253391 |
+
"grad_norm": 1.2783429622650146,
|
253392 |
+
"learning_rate": 1.0598384225660688e-06,
|
253393 |
+
"loss": 0.0089,
|
253394 |
+
"step": 36192
|
253395 |
+
},
|
253396 |
+
{
|
253397 |
+
"epoch": 4.949131683303706,
|
253398 |
+
"grad_norm": 1.2584046125411987,
|
253399 |
+
"learning_rate": 1.0570998219909628e-06,
|
253400 |
+
"loss": 0.0087,
|
253401 |
+
"step": 36193
|
253402 |
+
},
|
253403 |
+
{
|
253404 |
+
"epoch": 4.949268426090524,
|
253405 |
+
"grad_norm": 1.4774506092071533,
|
253406 |
+
"learning_rate": 1.0543612214158566e-06,
|
253407 |
+
"loss": 0.0098,
|
253408 |
+
"step": 36194
|
253409 |
+
},
|
253410 |
+
{
|
253411 |
+
"epoch": 4.949405168877342,
|
253412 |
+
"grad_norm": 2.094944953918457,
|
253413 |
+
"learning_rate": 1.0516226208407504e-06,
|
253414 |
+
"loss": 0.0168,
|
253415 |
+
"step": 36195
|
253416 |
+
},
|
253417 |
+
{
|
253418 |
+
"epoch": 4.949541911664159,
|
253419 |
+
"grad_norm": 0.517128586769104,
|
253420 |
+
"learning_rate": 1.0488840202656444e-06,
|
253421 |
+
"loss": 0.0027,
|
253422 |
+
"step": 36196
|
253423 |
+
},
|
253424 |
+
{
|
253425 |
+
"epoch": 4.949678654450977,
|
253426 |
+
"grad_norm": 0.5653784275054932,
|
253427 |
+
"learning_rate": 1.0461454196905382e-06,
|
253428 |
+
"loss": 0.0025,
|
253429 |
+
"step": 36197
|
253430 |
+
},
|
253431 |
+
{
|
253432 |
+
"epoch": 4.949815397237796,
|
253433 |
+
"grad_norm": 0.19387076795101166,
|
253434 |
+
"learning_rate": 1.043406819115432e-06,
|
253435 |
+
"loss": 0.0015,
|
253436 |
+
"step": 36198
|
253437 |
+
},
|
253438 |
+
{
|
253439 |
+
"epoch": 4.949952140024614,
|
253440 |
+
"grad_norm": 1.4040969610214233,
|
253441 |
+
"learning_rate": 1.0406682185403259e-06,
|
253442 |
+
"loss": 0.0119,
|
253443 |
+
"step": 36199
|
253444 |
+
},
|
253445 |
+
{
|
253446 |
+
"epoch": 4.950088882811432,
|
253447 |
+
"grad_norm": 1.779250144958496,
|
253448 |
+
"learning_rate": 1.0379296179652197e-06,
|
253449 |
+
"loss": 0.0236,
|
253450 |
+
"step": 36200
|
253451 |
+
},
|
253452 |
+
{
|
253453 |
+
"epoch": 4.95022562559825,
|
253454 |
+
"grad_norm": 0.8612940907478333,
|
253455 |
+
"learning_rate": 1.0351910173901137e-06,
|
253456 |
+
"loss": 0.0042,
|
253457 |
+
"step": 36201
|
253458 |
+
},
|
253459 |
+
{
|
253460 |
+
"epoch": 4.9503623683850675,
|
253461 |
+
"grad_norm": 1.4387714862823486,
|
253462 |
+
"learning_rate": 1.0324524168150075e-06,
|
253463 |
+
"loss": 0.0074,
|
253464 |
+
"step": 36202
|
253465 |
+
},
|
253466 |
+
{
|
253467 |
+
"epoch": 4.950499111171886,
|
253468 |
+
"grad_norm": 1.2749367952346802,
|
253469 |
+
"learning_rate": 1.0297138162399013e-06,
|
253470 |
+
"loss": 0.0083,
|
253471 |
+
"step": 36203
|
253472 |
+
},
|
253473 |
+
{
|
253474 |
+
"epoch": 4.950635853958704,
|
253475 |
+
"grad_norm": 0.26780301332473755,
|
253476 |
+
"learning_rate": 1.0269752156647954e-06,
|
253477 |
+
"loss": 0.0019,
|
253478 |
+
"step": 36204
|
253479 |
+
},
|
253480 |
+
{
|
253481 |
+
"epoch": 4.950772596745522,
|
253482 |
+
"grad_norm": 1.8081685304641724,
|
253483 |
+
"learning_rate": 1.0242366150896892e-06,
|
253484 |
+
"loss": 0.0196,
|
253485 |
+
"step": 36205
|
253486 |
+
},
|
253487 |
+
{
|
253488 |
+
"epoch": 4.950909339532339,
|
253489 |
+
"grad_norm": 1.7771567106246948,
|
253490 |
+
"learning_rate": 1.021498014514583e-06,
|
253491 |
+
"loss": 0.0055,
|
253492 |
+
"step": 36206
|
253493 |
+
},
|
253494 |
+
{
|
253495 |
+
"epoch": 4.9510460823191575,
|
253496 |
+
"grad_norm": 0.41912898421287537,
|
253497 |
+
"learning_rate": 1.018759413939477e-06,
|
253498 |
+
"loss": 0.0032,
|
253499 |
+
"step": 36207
|
253500 |
+
},
|
253501 |
+
{
|
253502 |
+
"epoch": 4.951182825105976,
|
253503 |
+
"grad_norm": 1.4742532968521118,
|
253504 |
+
"learning_rate": 1.0160208133643708e-06,
|
253505 |
+
"loss": 0.0174,
|
253506 |
+
"step": 36208
|
253507 |
+
},
|
253508 |
+
{
|
253509 |
+
"epoch": 4.951319567892794,
|
253510 |
+
"grad_norm": 1.734173059463501,
|
253511 |
+
"learning_rate": 1.0132822127892648e-06,
|
253512 |
+
"loss": 0.0189,
|
253513 |
+
"step": 36209
|
253514 |
+
},
|
253515 |
+
{
|
253516 |
+
"epoch": 4.951456310679612,
|
253517 |
+
"grad_norm": 0.9341569542884827,
|
253518 |
+
"learning_rate": 1.0105436122141586e-06,
|
253519 |
+
"loss": 0.0063,
|
253520 |
+
"step": 36210
|
253521 |
+
},
|
253522 |
+
{
|
253523 |
+
"epoch": 4.951593053466429,
|
253524 |
+
"grad_norm": 1.0700740814208984,
|
253525 |
+
"learning_rate": 1.0078050116390525e-06,
|
253526 |
+
"loss": 0.0066,
|
253527 |
+
"step": 36211
|
253528 |
+
},
|
253529 |
+
{
|
253530 |
+
"epoch": 4.951729796253248,
|
253531 |
+
"grad_norm": 0.8219623565673828,
|
253532 |
+
"learning_rate": 1.0050664110639465e-06,
|
253533 |
+
"loss": 0.0042,
|
253534 |
+
"step": 36212
|
253535 |
+
},
|
253536 |
+
{
|
253537 |
+
"epoch": 4.951866539040066,
|
253538 |
+
"grad_norm": 0.5274037718772888,
|
253539 |
+
"learning_rate": 1.0023278104888403e-06,
|
253540 |
+
"loss": 0.0028,
|
253541 |
+
"step": 36213
|
253542 |
+
},
|
253543 |
+
{
|
253544 |
+
"epoch": 4.952003281826884,
|
253545 |
+
"grad_norm": 0.34048992395401,
|
253546 |
+
"learning_rate": 9.99589209913734e-07,
|
253547 |
+
"loss": 0.0027,
|
253548 |
+
"step": 36214
|
253549 |
+
},
|
253550 |
+
{
|
253551 |
+
"epoch": 4.952140024613701,
|
253552 |
+
"grad_norm": 2.3717310428619385,
|
253553 |
+
"learning_rate": 9.968506093386281e-07,
|
253554 |
+
"loss": 0.0208,
|
253555 |
+
"step": 36215
|
253556 |
+
},
|
253557 |
+
{
|
253558 |
+
"epoch": 4.9522767674005195,
|
253559 |
+
"grad_norm": 1.1878130435943604,
|
253560 |
+
"learning_rate": 9.94112008763522e-07,
|
253561 |
+
"loss": 0.0074,
|
253562 |
+
"step": 36216
|
253563 |
+
},
|
253564 |
+
{
|
253565 |
+
"epoch": 4.952413510187338,
|
253566 |
+
"grad_norm": 0.6002843976020813,
|
253567 |
+
"learning_rate": 9.913734081884157e-07,
|
253568 |
+
"loss": 0.0051,
|
253569 |
+
"step": 36217
|
253570 |
+
},
|
253571 |
+
{
|
253572 |
+
"epoch": 4.952550252974156,
|
253573 |
+
"grad_norm": 1.0540211200714111,
|
253574 |
+
"learning_rate": 9.886348076133096e-07,
|
253575 |
+
"loss": 0.0106,
|
253576 |
+
"step": 36218
|
253577 |
+
},
|
253578 |
+
{
|
253579 |
+
"epoch": 4.952686995760974,
|
253580 |
+
"grad_norm": 1.3463940620422363,
|
253581 |
+
"learning_rate": 9.858962070382034e-07,
|
253582 |
+
"loss": 0.0066,
|
253583 |
+
"step": 36219
|
253584 |
+
},
|
253585 |
+
{
|
253586 |
+
"epoch": 4.952823738547791,
|
253587 |
+
"grad_norm": 1.1114718914031982,
|
253588 |
+
"learning_rate": 9.831576064630974e-07,
|
253589 |
+
"loss": 0.0059,
|
253590 |
+
"step": 36220
|
253591 |
+
},
|
253592 |
+
{
|
253593 |
+
"epoch": 4.9529604813346095,
|
253594 |
+
"grad_norm": 0.7646312713623047,
|
253595 |
+
"learning_rate": 9.804190058879912e-07,
|
253596 |
+
"loss": 0.0048,
|
253597 |
+
"step": 36221
|
253598 |
+
},
|
253599 |
+
{
|
253600 |
+
"epoch": 4.953097224121428,
|
253601 |
+
"grad_norm": 0.5947393178939819,
|
253602 |
+
"learning_rate": 9.77680405312885e-07,
|
253603 |
+
"loss": 0.0035,
|
253604 |
+
"step": 36222
|
253605 |
+
},
|
253606 |
+
{
|
253607 |
+
"epoch": 4.953233966908246,
|
253608 |
+
"grad_norm": 1.3472728729248047,
|
253609 |
+
"learning_rate": 9.74941804737779e-07,
|
253610 |
+
"loss": 0.0067,
|
253611 |
+
"step": 36223
|
253612 |
+
},
|
253613 |
+
{
|
253614 |
+
"epoch": 4.953370709695063,
|
253615 |
+
"grad_norm": 0.23445451259613037,
|
253616 |
+
"learning_rate": 9.722032041626728e-07,
|
253617 |
+
"loss": 0.0023,
|
253618 |
+
"step": 36224
|
253619 |
+
},
|
253620 |
+
{
|
253621 |
+
"epoch": 4.953507452481881,
|
253622 |
+
"grad_norm": 0.8013550639152527,
|
253623 |
+
"learning_rate": 9.694646035875669e-07,
|
253624 |
+
"loss": 0.0049,
|
253625 |
+
"step": 36225
|
253626 |
+
},
|
253627 |
+
{
|
253628 |
+
"epoch": 4.9536441952687,
|
253629 |
+
"grad_norm": 0.41143661737442017,
|
253630 |
+
"learning_rate": 9.667260030124607e-07,
|
253631 |
+
"loss": 0.0024,
|
253632 |
+
"step": 36226
|
253633 |
+
},
|
253634 |
+
{
|
253635 |
+
"epoch": 4.953780938055518,
|
253636 |
+
"grad_norm": 1.1132488250732422,
|
253637 |
+
"learning_rate": 9.639874024373545e-07,
|
253638 |
+
"loss": 0.0057,
|
253639 |
+
"step": 36227
|
253640 |
+
},
|
253641 |
+
{
|
253642 |
+
"epoch": 4.953917680842336,
|
253643 |
+
"grad_norm": 2.2572362422943115,
|
253644 |
+
"learning_rate": 9.612488018622485e-07,
|
253645 |
+
"loss": 0.0135,
|
253646 |
+
"step": 36228
|
253647 |
+
},
|
253648 |
+
{
|
253649 |
+
"epoch": 4.954054423629153,
|
253650 |
+
"grad_norm": 1.970612645149231,
|
253651 |
+
"learning_rate": 9.585102012871423e-07,
|
253652 |
+
"loss": 0.0152,
|
253653 |
+
"step": 36229
|
253654 |
+
},
|
253655 |
+
{
|
253656 |
+
"epoch": 4.9541911664159715,
|
253657 |
+
"grad_norm": 0.6752140522003174,
|
253658 |
+
"learning_rate": 9.557716007120361e-07,
|
253659 |
+
"loss": 0.0041,
|
253660 |
+
"step": 36230
|
253661 |
+
},
|
253662 |
+
{
|
253663 |
+
"epoch": 4.95432790920279,
|
253664 |
+
"grad_norm": 1.830278992652893,
|
253665 |
+
"learning_rate": 9.530330001369301e-07,
|
253666 |
+
"loss": 0.0087,
|
253667 |
+
"step": 36231
|
253668 |
+
},
|
253669 |
+
{
|
253670 |
+
"epoch": 4.954464651989608,
|
253671 |
+
"grad_norm": 2.2445948123931885,
|
253672 |
+
"learning_rate": 9.502943995618239e-07,
|
253673 |
+
"loss": 0.0184,
|
253674 |
+
"step": 36232
|
253675 |
+
},
|
253676 |
+
{
|
253677 |
+
"epoch": 4.954601394776425,
|
253678 |
+
"grad_norm": 1.1071373224258423,
|
253679 |
+
"learning_rate": 9.475557989867179e-07,
|
253680 |
+
"loss": 0.0083,
|
253681 |
+
"step": 36233
|
253682 |
+
},
|
253683 |
+
{
|
253684 |
+
"epoch": 4.954738137563243,
|
253685 |
+
"grad_norm": 1.1520310640335083,
|
253686 |
+
"learning_rate": 9.448171984116117e-07,
|
253687 |
+
"loss": 0.0055,
|
253688 |
+
"step": 36234
|
253689 |
+
},
|
253690 |
+
{
|
253691 |
+
"epoch": 4.9548748803500615,
|
253692 |
+
"grad_norm": 1.311435341835022,
|
253693 |
+
"learning_rate": 9.420785978365055e-07,
|
253694 |
+
"loss": 0.0091,
|
253695 |
+
"step": 36235
|
253696 |
+
},
|
253697 |
+
{
|
253698 |
+
"epoch": 4.95501162313688,
|
253699 |
+
"grad_norm": 1.1327369213104248,
|
253700 |
+
"learning_rate": 9.393399972613995e-07,
|
253701 |
+
"loss": 0.0068,
|
253702 |
+
"step": 36236
|
253703 |
+
},
|
253704 |
+
{
|
253705 |
+
"epoch": 4.955148365923698,
|
253706 |
+
"grad_norm": 0.9175917506217957,
|
253707 |
+
"learning_rate": 9.366013966862933e-07,
|
253708 |
+
"loss": 0.004,
|
253709 |
+
"step": 36237
|
253710 |
+
},
|
253711 |
+
{
|
253712 |
+
"epoch": 4.955285108710515,
|
253713 |
+
"grad_norm": 0.5956702828407288,
|
253714 |
+
"learning_rate": 9.338627961111872e-07,
|
253715 |
+
"loss": 0.0032,
|
253716 |
+
"step": 36238
|
253717 |
+
},
|
253718 |
+
{
|
253719 |
+
"epoch": 4.955421851497333,
|
253720 |
+
"grad_norm": 2.1436238288879395,
|
253721 |
+
"learning_rate": 9.311241955360812e-07,
|
253722 |
+
"loss": 0.0285,
|
253723 |
+
"step": 36239
|
253724 |
+
},
|
253725 |
+
{
|
253726 |
+
"epoch": 4.955558594284152,
|
253727 |
+
"grad_norm": 1.074121117591858,
|
253728 |
+
"learning_rate": 9.28385594960975e-07,
|
253729 |
+
"loss": 0.0062,
|
253730 |
+
"step": 36240
|
253731 |
+
},
|
253732 |
+
{
|
253733 |
+
"epoch": 4.95569533707097,
|
253734 |
+
"grad_norm": 0.2673737108707428,
|
253735 |
+
"learning_rate": 9.256469943858689e-07,
|
253736 |
+
"loss": 0.0018,
|
253737 |
+
"step": 36241
|
253738 |
+
},
|
253739 |
+
{
|
253740 |
+
"epoch": 4.955832079857787,
|
253741 |
+
"grad_norm": 2.348092794418335,
|
253742 |
+
"learning_rate": 9.229083938107627e-07,
|
253743 |
+
"loss": 0.0205,
|
253744 |
+
"step": 36242
|
253745 |
+
},
|
253746 |
+
{
|
253747 |
+
"epoch": 4.955968822644605,
|
253748 |
+
"grad_norm": 0.7228333950042725,
|
253749 |
+
"learning_rate": 9.201697932356565e-07,
|
253750 |
+
"loss": 0.0048,
|
253751 |
+
"step": 36243
|
253752 |
+
},
|
253753 |
+
{
|
253754 |
+
"epoch": 4.956105565431423,
|
253755 |
+
"grad_norm": 1.0640839338302612,
|
253756 |
+
"learning_rate": 9.174311926605506e-07,
|
253757 |
+
"loss": 0.0062,
|
253758 |
+
"step": 36244
|
253759 |
+
},
|
253760 |
+
{
|
253761 |
+
"epoch": 4.956242308218242,
|
253762 |
+
"grad_norm": 0.2423754334449768,
|
253763 |
+
"learning_rate": 9.146925920854444e-07,
|
253764 |
+
"loss": 0.0023,
|
253765 |
+
"step": 36245
|
253766 |
+
},
|
253767 |
+
{
|
253768 |
+
"epoch": 4.95637905100506,
|
253769 |
+
"grad_norm": 0.6095932722091675,
|
253770 |
+
"learning_rate": 9.119539915103382e-07,
|
253771 |
+
"loss": 0.0031,
|
253772 |
+
"step": 36246
|
253773 |
+
},
|
253774 |
+
{
|
253775 |
+
"epoch": 4.956515793791877,
|
253776 |
+
"grad_norm": 1.1391270160675049,
|
253777 |
+
"learning_rate": 9.092153909352322e-07,
|
253778 |
+
"loss": 0.0232,
|
253779 |
+
"step": 36247
|
253780 |
+
},
|
253781 |
+
{
|
253782 |
+
"epoch": 4.956652536578695,
|
253783 |
+
"grad_norm": 0.668916642665863,
|
253784 |
+
"learning_rate": 9.06476790360126e-07,
|
253785 |
+
"loss": 0.0037,
|
253786 |
+
"step": 36248
|
253787 |
+
},
|
253788 |
+
{
|
253789 |
+
"epoch": 4.9567892793655135,
|
253790 |
+
"grad_norm": 1.7962534427642822,
|
253791 |
+
"learning_rate": 9.037381897850199e-07,
|
253792 |
+
"loss": 0.0114,
|
253793 |
+
"step": 36249
|
253794 |
+
},
|
253795 |
+
{
|
253796 |
+
"epoch": 4.956926022152332,
|
253797 |
+
"grad_norm": 0.9312837719917297,
|
253798 |
+
"learning_rate": 9.009995892099137e-07,
|
253799 |
+
"loss": 0.007,
|
253800 |
+
"step": 36250
|
253801 |
+
},
|
253802 |
+
{
|
253803 |
+
"epoch": 4.957062764939149,
|
253804 |
+
"grad_norm": 1.4244067668914795,
|
253805 |
+
"learning_rate": 8.982609886348076e-07,
|
253806 |
+
"loss": 0.0129,
|
253807 |
+
"step": 36251
|
253808 |
+
},
|
253809 |
+
{
|
253810 |
+
"epoch": 4.957199507725967,
|
253811 |
+
"grad_norm": 1.2367985248565674,
|
253812 |
+
"learning_rate": 8.955223880597016e-07,
|
253813 |
+
"loss": 0.0087,
|
253814 |
+
"step": 36252
|
253815 |
+
},
|
253816 |
+
{
|
253817 |
+
"epoch": 4.957336250512785,
|
253818 |
+
"grad_norm": 0.5678768157958984,
|
253819 |
+
"learning_rate": 8.927837874845954e-07,
|
253820 |
+
"loss": 0.0039,
|
253821 |
+
"step": 36253
|
253822 |
+
},
|
253823 |
+
{
|
253824 |
+
"epoch": 4.957472993299604,
|
253825 |
+
"grad_norm": 1.308166265487671,
|
253826 |
+
"learning_rate": 8.900451869094892e-07,
|
253827 |
+
"loss": 0.0091,
|
253828 |
+
"step": 36254
|
253829 |
+
},
|
253830 |
+
{
|
253831 |
+
"epoch": 4.957609736086422,
|
253832 |
+
"grad_norm": 1.2538342475891113,
|
253833 |
+
"learning_rate": 8.873065863343832e-07,
|
253834 |
+
"loss": 0.0091,
|
253835 |
+
"step": 36255
|
253836 |
+
},
|
253837 |
+
{
|
253838 |
+
"epoch": 4.957746478873239,
|
253839 |
+
"grad_norm": 0.8632281422615051,
|
253840 |
+
"learning_rate": 8.84567985759277e-07,
|
253841 |
+
"loss": 0.006,
|
253842 |
+
"step": 36256
|
253843 |
+
},
|
253844 |
+
{
|
253845 |
+
"epoch": 4.957883221660057,
|
253846 |
+
"grad_norm": 0.4599374532699585,
|
253847 |
+
"learning_rate": 8.818293851841711e-07,
|
253848 |
+
"loss": 0.0027,
|
253849 |
+
"step": 36257
|
253850 |
+
},
|
253851 |
+
{
|
253852 |
+
"epoch": 4.958019964446875,
|
253853 |
+
"grad_norm": 0.3497755527496338,
|
253854 |
+
"learning_rate": 8.790907846090649e-07,
|
253855 |
+
"loss": 0.002,
|
253856 |
+
"step": 36258
|
253857 |
+
},
|
253858 |
+
{
|
253859 |
+
"epoch": 4.958156707233694,
|
253860 |
+
"grad_norm": 1.289759874343872,
|
253861 |
+
"learning_rate": 8.763521840339587e-07,
|
253862 |
+
"loss": 0.0092,
|
253863 |
+
"step": 36259
|
253864 |
+
},
|
253865 |
+
{
|
253866 |
+
"epoch": 4.958293450020512,
|
253867 |
+
"grad_norm": 0.47318127751350403,
|
253868 |
+
"learning_rate": 8.736135834588526e-07,
|
253869 |
+
"loss": 0.0037,
|
253870 |
+
"step": 36260
|
253871 |
+
},
|
253872 |
+
{
|
253873 |
+
"epoch": 4.958430192807329,
|
253874 |
+
"grad_norm": 1.1473171710968018,
|
253875 |
+
"learning_rate": 8.708749828837464e-07,
|
253876 |
+
"loss": 0.0147,
|
253877 |
+
"step": 36261
|
253878 |
+
},
|
253879 |
+
{
|
253880 |
+
"epoch": 4.958566935594147,
|
253881 |
+
"grad_norm": 0.9846006035804749,
|
253882 |
+
"learning_rate": 8.681363823086402e-07,
|
253883 |
+
"loss": 0.0047,
|
253884 |
+
"step": 36262
|
253885 |
+
},
|
253886 |
+
{
|
253887 |
+
"epoch": 4.9587036783809655,
|
253888 |
+
"grad_norm": 0.7684176564216614,
|
253889 |
+
"learning_rate": 8.653977817335342e-07,
|
253890 |
+
"loss": 0.0046,
|
253891 |
+
"step": 36263
|
253892 |
+
},
|
253893 |
+
{
|
253894 |
+
"epoch": 4.958840421167784,
|
253895 |
+
"grad_norm": 0.44203710556030273,
|
253896 |
+
"learning_rate": 8.626591811584281e-07,
|
253897 |
+
"loss": 0.0028,
|
253898 |
+
"step": 36264
|
253899 |
+
},
|
253900 |
+
{
|
253901 |
+
"epoch": 4.958977163954602,
|
253902 |
+
"grad_norm": 1.0821181535720825,
|
253903 |
+
"learning_rate": 8.599205805833221e-07,
|
253904 |
+
"loss": 0.0066,
|
253905 |
+
"step": 36265
|
253906 |
+
},
|
253907 |
+
{
|
253908 |
+
"epoch": 4.959113906741419,
|
253909 |
+
"grad_norm": 2.810882329940796,
|
253910 |
+
"learning_rate": 8.571819800082159e-07,
|
253911 |
+
"loss": 0.023,
|
253912 |
+
"step": 36266
|
253913 |
+
},
|
253914 |
+
{
|
253915 |
+
"epoch": 4.959250649528237,
|
253916 |
+
"grad_norm": 0.5956805348396301,
|
253917 |
+
"learning_rate": 8.544433794331097e-07,
|
253918 |
+
"loss": 0.0036,
|
253919 |
+
"step": 36267
|
253920 |
+
},
|
253921 |
+
{
|
253922 |
+
"epoch": 4.9593873923150555,
|
253923 |
+
"grad_norm": 0.9995694160461426,
|
253924 |
+
"learning_rate": 8.517047788580036e-07,
|
253925 |
+
"loss": 0.0052,
|
253926 |
+
"step": 36268
|
253927 |
+
},
|
253928 |
+
{
|
253929 |
+
"epoch": 4.959524135101874,
|
253930 |
+
"grad_norm": 1.01091730594635,
|
253931 |
+
"learning_rate": 8.489661782828974e-07,
|
253932 |
+
"loss": 0.0068,
|
253933 |
+
"step": 36269
|
253934 |
+
},
|
253935 |
+
{
|
253936 |
+
"epoch": 4.959660877888691,
|
253937 |
+
"grad_norm": 0.8615798354148865,
|
253938 |
+
"learning_rate": 8.462275777077912e-07,
|
253939 |
+
"loss": 0.0058,
|
253940 |
+
"step": 36270
|
253941 |
+
},
|
253942 |
+
{
|
253943 |
+
"epoch": 4.959797620675509,
|
253944 |
+
"grad_norm": 2.6280083656311035,
|
253945 |
+
"learning_rate": 8.434889771326853e-07,
|
253946 |
+
"loss": 0.0317,
|
253947 |
+
"step": 36271
|
253948 |
+
},
|
253949 |
+
{
|
253950 |
+
"epoch": 4.959934363462327,
|
253951 |
+
"grad_norm": 0.3481854200363159,
|
253952 |
+
"learning_rate": 8.407503765575791e-07,
|
253953 |
+
"loss": 0.0021,
|
253954 |
+
"step": 36272
|
253955 |
+
},
|
253956 |
+
{
|
253957 |
+
"epoch": 4.960071106249146,
|
253958 |
+
"grad_norm": 1.5129525661468506,
|
253959 |
+
"learning_rate": 8.380117759824731e-07,
|
253960 |
+
"loss": 0.0069,
|
253961 |
+
"step": 36273
|
253962 |
+
},
|
253963 |
+
{
|
253964 |
+
"epoch": 4.960207849035964,
|
253965 |
+
"grad_norm": 0.35493358969688416,
|
253966 |
+
"learning_rate": 8.352731754073669e-07,
|
253967 |
+
"loss": 0.0023,
|
253968 |
+
"step": 36274
|
253969 |
+
},
|
253970 |
+
{
|
253971 |
+
"epoch": 4.960344591822781,
|
253972 |
+
"grad_norm": 0.4850999116897583,
|
253973 |
+
"learning_rate": 8.325345748322607e-07,
|
253974 |
+
"loss": 0.004,
|
253975 |
+
"step": 36275
|
253976 |
+
},
|
253977 |
+
{
|
253978 |
+
"epoch": 4.960481334609599,
|
253979 |
+
"grad_norm": 1.13119375705719,
|
253980 |
+
"learning_rate": 8.297959742571547e-07,
|
253981 |
+
"loss": 0.0057,
|
253982 |
+
"step": 36276
|
253983 |
+
},
|
253984 |
+
{
|
253985 |
+
"epoch": 4.9606180773964175,
|
253986 |
+
"grad_norm": 1.2310129404067993,
|
253987 |
+
"learning_rate": 8.270573736820486e-07,
|
253988 |
+
"loss": 0.0096,
|
253989 |
+
"step": 36277
|
253990 |
+
},
|
253991 |
+
{
|
253992 |
+
"epoch": 4.960754820183236,
|
253993 |
+
"grad_norm": 0.4807657301425934,
|
253994 |
+
"learning_rate": 8.243187731069424e-07,
|
253995 |
+
"loss": 0.0025,
|
253996 |
+
"step": 36278
|
253997 |
+
},
|
253998 |
+
{
|
253999 |
+
"epoch": 4.960891562970053,
|
254000 |
+
"grad_norm": 0.6868378520011902,
|
254001 |
+
"learning_rate": 8.215801725318363e-07,
|
254002 |
+
"loss": 0.0046,
|
254003 |
+
"step": 36279
|
254004 |
+
},
|
254005 |
+
{
|
254006 |
+
"epoch": 4.961028305756871,
|
254007 |
+
"grad_norm": 2.0783612728118896,
|
254008 |
+
"learning_rate": 8.188415719567301e-07,
|
254009 |
+
"loss": 0.0108,
|
254010 |
+
"step": 36280
|
254011 |
+
},
|
254012 |
+
{
|
254013 |
+
"epoch": 4.961165048543689,
|
254014 |
+
"grad_norm": 0.5675146579742432,
|
254015 |
+
"learning_rate": 8.161029713816241e-07,
|
254016 |
+
"loss": 0.0042,
|
254017 |
+
"step": 36281
|
254018 |
+
},
|
254019 |
+
{
|
254020 |
+
"epoch": 4.9613017913305075,
|
254021 |
+
"grad_norm": 3.569162607192993,
|
254022 |
+
"learning_rate": 8.133643708065179e-07,
|
254023 |
+
"loss": 0.0368,
|
254024 |
+
"step": 36282
|
254025 |
+
},
|
254026 |
+
{
|
254027 |
+
"epoch": 4.961438534117326,
|
254028 |
+
"grad_norm": 1.668241262435913,
|
254029 |
+
"learning_rate": 8.106257702314117e-07,
|
254030 |
+
"loss": 0.0205,
|
254031 |
+
"step": 36283
|
254032 |
+
},
|
254033 |
+
{
|
254034 |
+
"epoch": 4.961575276904143,
|
254035 |
+
"grad_norm": 1.616251826286316,
|
254036 |
+
"learning_rate": 8.078871696563058e-07,
|
254037 |
+
"loss": 0.0061,
|
254038 |
+
"step": 36284
|
254039 |
+
},
|
254040 |
+
{
|
254041 |
+
"epoch": 4.961712019690961,
|
254042 |
+
"grad_norm": 2.459717035293579,
|
254043 |
+
"learning_rate": 8.051485690811996e-07,
|
254044 |
+
"loss": 0.0126,
|
254045 |
+
"step": 36285
|
254046 |
+
},
|
254047 |
+
{
|
254048 |
+
"epoch": 4.961848762477779,
|
254049 |
+
"grad_norm": 0.6150050163269043,
|
254050 |
+
"learning_rate": 8.024099685060934e-07,
|
254051 |
+
"loss": 0.0035,
|
254052 |
+
"step": 36286
|
254053 |
+
},
|
254054 |
+
{
|
254055 |
+
"epoch": 4.961985505264598,
|
254056 |
+
"grad_norm": 1.09522545337677,
|
254057 |
+
"learning_rate": 7.996713679309873e-07,
|
254058 |
+
"loss": 0.0081,
|
254059 |
+
"step": 36287
|
254060 |
+
},
|
254061 |
+
{
|
254062 |
+
"epoch": 4.962122248051415,
|
254063 |
+
"grad_norm": 1.8971471786499023,
|
254064 |
+
"learning_rate": 7.969327673558811e-07,
|
254065 |
+
"loss": 0.0209,
|
254066 |
+
"step": 36288
|
254067 |
+
},
|
254068 |
+
{
|
254069 |
+
"epoch": 4.962258990838233,
|
254070 |
+
"grad_norm": 1.5086989402770996,
|
254071 |
+
"learning_rate": 7.941941667807751e-07,
|
254072 |
+
"loss": 0.0145,
|
254073 |
+
"step": 36289
|
254074 |
+
},
|
254075 |
+
{
|
254076 |
+
"epoch": 4.962395733625051,
|
254077 |
+
"grad_norm": 1.753340721130371,
|
254078 |
+
"learning_rate": 7.91455566205669e-07,
|
254079 |
+
"loss": 0.0113,
|
254080 |
+
"step": 36290
|
254081 |
+
},
|
254082 |
+
{
|
254083 |
+
"epoch": 4.9625324764118695,
|
254084 |
+
"grad_norm": 1.2799098491668701,
|
254085 |
+
"learning_rate": 7.887169656305628e-07,
|
254086 |
+
"loss": 0.0067,
|
254087 |
+
"step": 36291
|
254088 |
+
},
|
254089 |
+
{
|
254090 |
+
"epoch": 4.962669219198688,
|
254091 |
+
"grad_norm": 0.34612464904785156,
|
254092 |
+
"learning_rate": 7.859783650554568e-07,
|
254093 |
+
"loss": 0.0023,
|
254094 |
+
"step": 36292
|
254095 |
+
},
|
254096 |
+
{
|
254097 |
+
"epoch": 4.962805961985505,
|
254098 |
+
"grad_norm": 1.0636358261108398,
|
254099 |
+
"learning_rate": 7.832397644803506e-07,
|
254100 |
+
"loss": 0.0051,
|
254101 |
+
"step": 36293
|
254102 |
+
},
|
254103 |
+
{
|
254104 |
+
"epoch": 4.962942704772323,
|
254105 |
+
"grad_norm": 0.776213526725769,
|
254106 |
+
"learning_rate": 7.805011639052445e-07,
|
254107 |
+
"loss": 0.005,
|
254108 |
+
"step": 36294
|
254109 |
+
},
|
254110 |
+
{
|
254111 |
+
"epoch": 4.963079447559141,
|
254112 |
+
"grad_norm": 0.30032598972320557,
|
254113 |
+
"learning_rate": 7.777625633301384e-07,
|
254114 |
+
"loss": 0.0023,
|
254115 |
+
"step": 36295
|
254116 |
+
},
|
254117 |
+
{
|
254118 |
+
"epoch": 4.9632161903459595,
|
254119 |
+
"grad_norm": 0.8580377697944641,
|
254120 |
+
"learning_rate": 7.750239627550322e-07,
|
254121 |
+
"loss": 0.0052,
|
254122 |
+
"step": 36296
|
254123 |
+
},
|
254124 |
+
{
|
254125 |
+
"epoch": 4.963352933132777,
|
254126 |
+
"grad_norm": 0.30256152153015137,
|
254127 |
+
"learning_rate": 7.72285362179926e-07,
|
254128 |
+
"loss": 0.0023,
|
254129 |
+
"step": 36297
|
254130 |
+
},
|
254131 |
+
{
|
254132 |
+
"epoch": 4.963489675919595,
|
254133 |
+
"grad_norm": 0.18553410470485687,
|
254134 |
+
"learning_rate": 7.6954676160482e-07,
|
254135 |
+
"loss": 0.0015,
|
254136 |
+
"step": 36298
|
254137 |
+
},
|
254138 |
+
{
|
254139 |
+
"epoch": 4.963626418706413,
|
254140 |
+
"grad_norm": 1.510427474975586,
|
254141 |
+
"learning_rate": 7.668081610297139e-07,
|
254142 |
+
"loss": 0.0094,
|
254143 |
+
"step": 36299
|
254144 |
+
},
|
254145 |
+
{
|
254146 |
+
"epoch": 4.963763161493231,
|
254147 |
+
"grad_norm": 0.5121397376060486,
|
254148 |
+
"learning_rate": 7.640695604546077e-07,
|
254149 |
+
"loss": 0.0031,
|
254150 |
+
"step": 36300
|
254151 |
+
},
|
254152 |
+
{
|
254153 |
+
"epoch": 4.96389990428005,
|
254154 |
+
"grad_norm": 1.8147364854812622,
|
254155 |
+
"learning_rate": 7.613309598795016e-07,
|
254156 |
+
"loss": 0.0262,
|
254157 |
+
"step": 36301
|
254158 |
+
},
|
254159 |
+
{
|
254160 |
+
"epoch": 4.964036647066867,
|
254161 |
+
"grad_norm": 0.42693233489990234,
|
254162 |
+
"learning_rate": 7.585923593043955e-07,
|
254163 |
+
"loss": 0.003,
|
254164 |
+
"step": 36302
|
254165 |
+
},
|
254166 |
+
{
|
254167 |
+
"epoch": 4.964173389853685,
|
254168 |
+
"grad_norm": 0.5560454726219177,
|
254169 |
+
"learning_rate": 7.558537587292894e-07,
|
254170 |
+
"loss": 0.0036,
|
254171 |
+
"step": 36303
|
254172 |
+
},
|
254173 |
+
{
|
254174 |
+
"epoch": 4.964310132640503,
|
254175 |
+
"grad_norm": 1.2067631483078003,
|
254176 |
+
"learning_rate": 7.531151581541833e-07,
|
254177 |
+
"loss": 0.0067,
|
254178 |
+
"step": 36304
|
254179 |
+
},
|
254180 |
+
{
|
254181 |
+
"epoch": 4.964446875427321,
|
254182 |
+
"grad_norm": 0.6074029207229614,
|
254183 |
+
"learning_rate": 7.503765575790772e-07,
|
254184 |
+
"loss": 0.0041,
|
254185 |
+
"step": 36305
|
254186 |
+
},
|
254187 |
+
{
|
254188 |
+
"epoch": 4.964583618214139,
|
254189 |
+
"grad_norm": 1.2939320802688599,
|
254190 |
+
"learning_rate": 7.47637957003971e-07,
|
254191 |
+
"loss": 0.0092,
|
254192 |
+
"step": 36306
|
254193 |
+
},
|
254194 |
+
{
|
254195 |
+
"epoch": 4.964720361000957,
|
254196 |
+
"grad_norm": 1.179916501045227,
|
254197 |
+
"learning_rate": 7.448993564288649e-07,
|
254198 |
+
"loss": 0.0106,
|
254199 |
+
"step": 36307
|
254200 |
+
},
|
254201 |
+
{
|
254202 |
+
"epoch": 4.964857103787775,
|
254203 |
+
"grad_norm": 0.2681238651275635,
|
254204 |
+
"learning_rate": 7.421607558537587e-07,
|
254205 |
+
"loss": 0.0019,
|
254206 |
+
"step": 36308
|
254207 |
+
},
|
254208 |
+
{
|
254209 |
+
"epoch": 4.964993846574593,
|
254210 |
+
"grad_norm": 1.2272545099258423,
|
254211 |
+
"learning_rate": 7.394221552786526e-07,
|
254212 |
+
"loss": 0.0086,
|
254213 |
+
"step": 36309
|
254214 |
+
},
|
254215 |
+
{
|
254216 |
+
"epoch": 4.9651305893614115,
|
254217 |
+
"grad_norm": 8.822099685668945,
|
254218 |
+
"learning_rate": 7.366835547035465e-07,
|
254219 |
+
"loss": 0.0809,
|
254220 |
+
"step": 36310
|
254221 |
+
},
|
254222 |
+
{
|
254223 |
+
"epoch": 4.965267332148229,
|
254224 |
+
"grad_norm": 0.9617989659309387,
|
254225 |
+
"learning_rate": 7.339449541284405e-07,
|
254226 |
+
"loss": 0.0065,
|
254227 |
+
"step": 36311
|
254228 |
+
},
|
254229 |
+
{
|
254230 |
+
"epoch": 4.965404074935047,
|
254231 |
+
"grad_norm": 0.4196038246154785,
|
254232 |
+
"learning_rate": 7.312063535533343e-07,
|
254233 |
+
"loss": 0.0023,
|
254234 |
+
"step": 36312
|
254235 |
+
},
|
254236 |
+
{
|
254237 |
+
"epoch": 4.965540817721865,
|
254238 |
+
"grad_norm": 1.741355061531067,
|
254239 |
+
"learning_rate": 7.284677529782282e-07,
|
254240 |
+
"loss": 0.0183,
|
254241 |
+
"step": 36313
|
254242 |
+
},
|
254243 |
+
{
|
254244 |
+
"epoch": 4.965677560508683,
|
254245 |
+
"grad_norm": 1.5667476654052734,
|
254246 |
+
"learning_rate": 7.25729152403122e-07,
|
254247 |
+
"loss": 0.0119,
|
254248 |
+
"step": 36314
|
254249 |
+
},
|
254250 |
+
{
|
254251 |
+
"epoch": 4.965814303295501,
|
254252 |
+
"grad_norm": 0.9788122177124023,
|
254253 |
+
"learning_rate": 7.229905518280159e-07,
|
254254 |
+
"loss": 0.0075,
|
254255 |
+
"step": 36315
|
254256 |
+
},
|
254257 |
+
{
|
254258 |
+
"epoch": 4.965951046082319,
|
254259 |
+
"grad_norm": 1.9734033346176147,
|
254260 |
+
"learning_rate": 7.202519512529097e-07,
|
254261 |
+
"loss": 0.0134,
|
254262 |
+
"step": 36316
|
254263 |
+
},
|
254264 |
+
{
|
254265 |
+
"epoch": 4.966087788869137,
|
254266 |
+
"grad_norm": 1.2615394592285156,
|
254267 |
+
"learning_rate": 7.175133506778037e-07,
|
254268 |
+
"loss": 0.0048,
|
254269 |
+
"step": 36317
|
254270 |
+
},
|
254271 |
+
{
|
254272 |
+
"epoch": 4.966224531655955,
|
254273 |
+
"grad_norm": 0.6502422094345093,
|
254274 |
+
"learning_rate": 7.147747501026976e-07,
|
254275 |
+
"loss": 0.0037,
|
254276 |
+
"step": 36318
|
254277 |
+
},
|
254278 |
+
{
|
254279 |
+
"epoch": 4.966361274442773,
|
254280 |
+
"grad_norm": 1.4703865051269531,
|
254281 |
+
"learning_rate": 7.120361495275915e-07,
|
254282 |
+
"loss": 0.0111,
|
254283 |
+
"step": 36319
|
254284 |
+
},
|
254285 |
+
{
|
254286 |
+
"epoch": 4.966498017229592,
|
254287 |
+
"grad_norm": 0.9054369330406189,
|
254288 |
+
"learning_rate": 7.092975489524853e-07,
|
254289 |
+
"loss": 0.0054,
|
254290 |
+
"step": 36320
|
254291 |
+
},
|
254292 |
+
{
|
254293 |
+
"epoch": 4.966634760016409,
|
254294 |
+
"grad_norm": 1.6246464252471924,
|
254295 |
+
"learning_rate": 7.065589483773792e-07,
|
254296 |
+
"loss": 0.0112,
|
254297 |
+
"step": 36321
|
254298 |
+
},
|
254299 |
+
{
|
254300 |
+
"epoch": 4.966771502803227,
|
254301 |
+
"grad_norm": 0.9567500948905945,
|
254302 |
+
"learning_rate": 7.038203478022731e-07,
|
254303 |
+
"loss": 0.0033,
|
254304 |
+
"step": 36322
|
254305 |
+
},
|
254306 |
+
{
|
254307 |
+
"epoch": 4.966908245590045,
|
254308 |
+
"grad_norm": 2.039433240890503,
|
254309 |
+
"learning_rate": 7.010817472271669e-07,
|
254310 |
+
"loss": 0.0094,
|
254311 |
+
"step": 36323
|
254312 |
+
},
|
254313 |
+
{
|
254314 |
+
"epoch": 4.9670449883768635,
|
254315 |
+
"grad_norm": 1.6395562887191772,
|
254316 |
+
"learning_rate": 6.983431466520609e-07,
|
254317 |
+
"loss": 0.0163,
|
254318 |
+
"step": 36324
|
254319 |
+
},
|
254320 |
+
{
|
254321 |
+
"epoch": 4.967181731163681,
|
254322 |
+
"grad_norm": 0.5998454689979553,
|
254323 |
+
"learning_rate": 6.956045460769547e-07,
|
254324 |
+
"loss": 0.0041,
|
254325 |
+
"step": 36325
|
254326 |
+
},
|
254327 |
+
{
|
254328 |
+
"epoch": 4.967318473950499,
|
254329 |
+
"grad_norm": 0.44041162729263306,
|
254330 |
+
"learning_rate": 6.928659455018486e-07,
|
254331 |
+
"loss": 0.0035,
|
254332 |
+
"step": 36326
|
254333 |
+
},
|
254334 |
+
{
|
254335 |
+
"epoch": 4.967455216737317,
|
254336 |
+
"grad_norm": 0.9579271674156189,
|
254337 |
+
"learning_rate": 6.901273449267424e-07,
|
254338 |
+
"loss": 0.0061,
|
254339 |
+
"step": 36327
|
254340 |
+
},
|
254341 |
+
{
|
254342 |
+
"epoch": 4.967591959524135,
|
254343 |
+
"grad_norm": 1.7388497591018677,
|
254344 |
+
"learning_rate": 6.873887443516363e-07,
|
254345 |
+
"loss": 0.0176,
|
254346 |
+
"step": 36328
|
254347 |
+
},
|
254348 |
+
{
|
254349 |
+
"epoch": 4.9677287023109535,
|
254350 |
+
"grad_norm": 1.919324517250061,
|
254351 |
+
"learning_rate": 6.846501437765302e-07,
|
254352 |
+
"loss": 0.0173,
|
254353 |
+
"step": 36329
|
254354 |
+
},
|
254355 |
+
{
|
254356 |
+
"epoch": 4.967865445097771,
|
254357 |
+
"grad_norm": 1.157983660697937,
|
254358 |
+
"learning_rate": 6.819115432014242e-07,
|
254359 |
+
"loss": 0.0079,
|
254360 |
+
"step": 36330
|
254361 |
+
},
|
254362 |
+
{
|
254363 |
+
"epoch": 4.968002187884589,
|
254364 |
+
"grad_norm": 0.37030211091041565,
|
254365 |
+
"learning_rate": 6.79172942626318e-07,
|
254366 |
+
"loss": 0.0023,
|
254367 |
+
"step": 36331
|
254368 |
+
},
|
254369 |
+
{
|
254370 |
+
"epoch": 4.968138930671407,
|
254371 |
+
"grad_norm": 1.0942816734313965,
|
254372 |
+
"learning_rate": 6.764343420512119e-07,
|
254373 |
+
"loss": 0.0072,
|
254374 |
+
"step": 36332
|
254375 |
+
},
|
254376 |
+
{
|
254377 |
+
"epoch": 4.968275673458225,
|
254378 |
+
"grad_norm": 0.39465776085853577,
|
254379 |
+
"learning_rate": 6.736957414761057e-07,
|
254380 |
+
"loss": 0.0034,
|
254381 |
+
"step": 36333
|
254382 |
+
},
|
254383 |
+
{
|
254384 |
+
"epoch": 4.968412416245043,
|
254385 |
+
"grad_norm": 0.6832284331321716,
|
254386 |
+
"learning_rate": 6.709571409009996e-07,
|
254387 |
+
"loss": 0.005,
|
254388 |
+
"step": 36334
|
254389 |
+
},
|
254390 |
+
{
|
254391 |
+
"epoch": 4.968549159031861,
|
254392 |
+
"grad_norm": 1.4067507982254028,
|
254393 |
+
"learning_rate": 6.682185403258934e-07,
|
254394 |
+
"loss": 0.0102,
|
254395 |
+
"step": 36335
|
254396 |
+
},
|
254397 |
+
{
|
254398 |
+
"epoch": 4.968685901818679,
|
254399 |
+
"grad_norm": 1.6809030771255493,
|
254400 |
+
"learning_rate": 6.654799397507873e-07,
|
254401 |
+
"loss": 0.0079,
|
254402 |
+
"step": 36336
|
254403 |
+
},
|
254404 |
+
{
|
254405 |
+
"epoch": 4.968822644605497,
|
254406 |
+
"grad_norm": 0.5156656503677368,
|
254407 |
+
"learning_rate": 6.627413391756813e-07,
|
254408 |
+
"loss": 0.0037,
|
254409 |
+
"step": 36337
|
254410 |
+
},
|
254411 |
+
{
|
254412 |
+
"epoch": 4.9689593873923155,
|
254413 |
+
"grad_norm": 1.3650546073913574,
|
254414 |
+
"learning_rate": 6.600027386005752e-07,
|
254415 |
+
"loss": 0.0084,
|
254416 |
+
"step": 36338
|
254417 |
+
},
|
254418 |
+
{
|
254419 |
+
"epoch": 4.969096130179133,
|
254420 |
+
"grad_norm": 1.0154880285263062,
|
254421 |
+
"learning_rate": 6.57264138025469e-07,
|
254422 |
+
"loss": 0.0065,
|
254423 |
+
"step": 36339
|
254424 |
+
},
|
254425 |
+
{
|
254426 |
+
"epoch": 4.969232872965951,
|
254427 |
+
"grad_norm": 0.354332834482193,
|
254428 |
+
"learning_rate": 6.545255374503629e-07,
|
254429 |
+
"loss": 0.0023,
|
254430 |
+
"step": 36340
|
254431 |
+
},
|
254432 |
+
{
|
254433 |
+
"epoch": 4.969369615752769,
|
254434 |
+
"grad_norm": 1.535766839981079,
|
254435 |
+
"learning_rate": 6.517869368752568e-07,
|
254436 |
+
"loss": 0.0113,
|
254437 |
+
"step": 36341
|
254438 |
+
},
|
254439 |
+
{
|
254440 |
+
"epoch": 4.969506358539587,
|
254441 |
+
"grad_norm": 1.2972038984298706,
|
254442 |
+
"learning_rate": 6.490483363001506e-07,
|
254443 |
+
"loss": 0.0122,
|
254444 |
+
"step": 36342
|
254445 |
+
},
|
254446 |
+
{
|
254447 |
+
"epoch": 4.969643101326405,
|
254448 |
+
"grad_norm": 0.656410276889801,
|
254449 |
+
"learning_rate": 6.463097357250444e-07,
|
254450 |
+
"loss": 0.0037,
|
254451 |
+
"step": 36343
|
254452 |
+
},
|
254453 |
+
{
|
254454 |
+
"epoch": 4.969779844113223,
|
254455 |
+
"grad_norm": 1.6895966529846191,
|
254456 |
+
"learning_rate": 6.435711351499384e-07,
|
254457 |
+
"loss": 0.0108,
|
254458 |
+
"step": 36344
|
254459 |
+
},
|
254460 |
+
{
|
254461 |
+
"epoch": 4.969916586900041,
|
254462 |
+
"grad_norm": 1.5515249967575073,
|
254463 |
+
"learning_rate": 6.408325345748323e-07,
|
254464 |
+
"loss": 0.0178,
|
254465 |
+
"step": 36345
|
254466 |
+
},
|
254467 |
+
{
|
254468 |
+
"epoch": 4.970053329686859,
|
254469 |
+
"grad_norm": 2.562568187713623,
|
254470 |
+
"learning_rate": 6.380939339997262e-07,
|
254471 |
+
"loss": 0.0177,
|
254472 |
+
"step": 36346
|
254473 |
+
},
|
254474 |
+
{
|
254475 |
+
"epoch": 4.970190072473677,
|
254476 |
+
"grad_norm": 0.42520925402641296,
|
254477 |
+
"learning_rate": 6.3535533342462e-07,
|
254478 |
+
"loss": 0.0028,
|
254479 |
+
"step": 36347
|
254480 |
+
},
|
254481 |
+
{
|
254482 |
+
"epoch": 4.970326815260495,
|
254483 |
+
"grad_norm": 1.1355477571487427,
|
254484 |
+
"learning_rate": 6.326167328495139e-07,
|
254485 |
+
"loss": 0.0072,
|
254486 |
+
"step": 36348
|
254487 |
+
},
|
254488 |
+
{
|
254489 |
+
"epoch": 4.970463558047313,
|
254490 |
+
"grad_norm": 1.2798725366592407,
|
254491 |
+
"learning_rate": 6.298781322744078e-07,
|
254492 |
+
"loss": 0.0067,
|
254493 |
+
"step": 36349
|
254494 |
+
},
|
254495 |
+
{
|
254496 |
+
"epoch": 4.970600300834131,
|
254497 |
+
"grad_norm": 0.20809626579284668,
|
254498 |
+
"learning_rate": 6.271395316993018e-07,
|
254499 |
+
"loss": 0.0019,
|
254500 |
+
"step": 36350
|
254501 |
+
},
|
254502 |
+
{
|
254503 |
+
"epoch": 4.970737043620949,
|
254504 |
+
"grad_norm": 3.786583662033081,
|
254505 |
+
"learning_rate": 6.244009311241956e-07,
|
254506 |
+
"loss": 0.0055,
|
254507 |
+
"step": 36351
|
254508 |
+
},
|
254509 |
+
{
|
254510 |
+
"epoch": 4.970873786407767,
|
254511 |
+
"grad_norm": 0.5132194757461548,
|
254512 |
+
"learning_rate": 6.216623305490894e-07,
|
254513 |
+
"loss": 0.0037,
|
254514 |
+
"step": 36352
|
254515 |
+
},
|
254516 |
+
{
|
254517 |
+
"epoch": 4.971010529194585,
|
254518 |
+
"grad_norm": 1.8282694816589355,
|
254519 |
+
"learning_rate": 6.189237299739833e-07,
|
254520 |
+
"loss": 0.021,
|
254521 |
+
"step": 36353
|
254522 |
+
},
|
254523 |
+
{
|
254524 |
+
"epoch": 4.971147271981403,
|
254525 |
+
"grad_norm": 0.8856275677680969,
|
254526 |
+
"learning_rate": 6.161851293988772e-07,
|
254527 |
+
"loss": 0.0065,
|
254528 |
+
"step": 36354
|
254529 |
+
},
|
254530 |
+
{
|
254531 |
+
"epoch": 4.971284014768221,
|
254532 |
+
"grad_norm": 0.5708004832267761,
|
254533 |
+
"learning_rate": 6.13446528823771e-07,
|
254534 |
+
"loss": 0.0039,
|
254535 |
+
"step": 36355
|
254536 |
+
},
|
254537 |
+
{
|
254538 |
+
"epoch": 4.971420757555039,
|
254539 |
+
"grad_norm": 0.6354495882987976,
|
254540 |
+
"learning_rate": 6.107079282486649e-07,
|
254541 |
+
"loss": 0.0038,
|
254542 |
+
"step": 36356
|
254543 |
+
},
|
254544 |
+
{
|
254545 |
+
"epoch": 4.971557500341857,
|
254546 |
+
"grad_norm": 2.339801788330078,
|
254547 |
+
"learning_rate": 6.079693276735589e-07,
|
254548 |
+
"loss": 0.0248,
|
254549 |
+
"step": 36357
|
254550 |
+
},
|
254551 |
+
{
|
254552 |
+
"epoch": 4.971694243128675,
|
254553 |
+
"grad_norm": 0.42952823638916016,
|
254554 |
+
"learning_rate": 6.052307270984528e-07,
|
254555 |
+
"loss": 0.0021,
|
254556 |
+
"step": 36358
|
254557 |
+
},
|
254558 |
+
{
|
254559 |
+
"epoch": 4.971830985915493,
|
254560 |
+
"grad_norm": 1.789933204650879,
|
254561 |
+
"learning_rate": 6.024921265233466e-07,
|
254562 |
+
"loss": 0.0122,
|
254563 |
+
"step": 36359
|
254564 |
+
},
|
254565 |
+
{
|
254566 |
+
"epoch": 4.971967728702311,
|
254567 |
+
"grad_norm": 0.8511978387832642,
|
254568 |
+
"learning_rate": 5.997535259482405e-07,
|
254569 |
+
"loss": 0.0051,
|
254570 |
+
"step": 36360
|
254571 |
+
},
|
254572 |
+
{
|
254573 |
+
"epoch": 4.9721044714891285,
|
254574 |
+
"grad_norm": 1.7900525331497192,
|
254575 |
+
"learning_rate": 5.970149253731343e-07,
|
254576 |
+
"loss": 0.0101,
|
254577 |
+
"step": 36361
|
254578 |
+
},
|
254579 |
+
{
|
254580 |
+
"epoch": 4.972241214275947,
|
254581 |
+
"grad_norm": 1.2664616107940674,
|
254582 |
+
"learning_rate": 5.942763247980282e-07,
|
254583 |
+
"loss": 0.0092,
|
254584 |
+
"step": 36362
|
254585 |
+
},
|
254586 |
+
{
|
254587 |
+
"epoch": 4.972377957062765,
|
254588 |
+
"grad_norm": 1.7419706583023071,
|
254589 |
+
"learning_rate": 5.91537724222922e-07,
|
254590 |
+
"loss": 0.026,
|
254591 |
+
"step": 36363
|
254592 |
+
},
|
254593 |
+
{
|
254594 |
+
"epoch": 4.972514699849583,
|
254595 |
+
"grad_norm": 0.9249622225761414,
|
254596 |
+
"learning_rate": 5.88799123647816e-07,
|
254597 |
+
"loss": 0.0046,
|
254598 |
+
"step": 36364
|
254599 |
+
},
|
254600 |
+
{
|
254601 |
+
"epoch": 4.972651442636401,
|
254602 |
+
"grad_norm": 0.9466738104820251,
|
254603 |
+
"learning_rate": 5.860605230727099e-07,
|
254604 |
+
"loss": 0.0065,
|
254605 |
+
"step": 36365
|
254606 |
+
},
|
254607 |
+
{
|
254608 |
+
"epoch": 4.9727881854232185,
|
254609 |
+
"grad_norm": 2.1827280521392822,
|
254610 |
+
"learning_rate": 5.833219224976038e-07,
|
254611 |
+
"loss": 0.0223,
|
254612 |
+
"step": 36366
|
254613 |
+
},
|
254614 |
+
{
|
254615 |
+
"epoch": 4.972924928210037,
|
254616 |
+
"grad_norm": 1.4412447214126587,
|
254617 |
+
"learning_rate": 5.805833219224976e-07,
|
254618 |
+
"loss": 0.0116,
|
254619 |
+
"step": 36367
|
254620 |
+
},
|
254621 |
+
{
|
254622 |
+
"epoch": 4.973061670996855,
|
254623 |
+
"grad_norm": 0.9633800387382507,
|
254624 |
+
"learning_rate": 5.778447213473915e-07,
|
254625 |
+
"loss": 0.008,
|
254626 |
+
"step": 36368
|
254627 |
+
},
|
254628 |
+
{
|
254629 |
+
"epoch": 4.973198413783673,
|
254630 |
+
"grad_norm": 0.48838338255882263,
|
254631 |
+
"learning_rate": 5.751061207722854e-07,
|
254632 |
+
"loss": 0.0035,
|
254633 |
+
"step": 36369
|
254634 |
+
},
|
254635 |
+
{
|
254636 |
+
"epoch": 4.97333515657049,
|
254637 |
+
"grad_norm": 0.33468934893608093,
|
254638 |
+
"learning_rate": 5.723675201971793e-07,
|
254639 |
+
"loss": 0.0022,
|
254640 |
+
"step": 36370
|
254641 |
+
},
|
254642 |
+
{
|
254643 |
+
"epoch": 4.973471899357309,
|
254644 |
+
"grad_norm": 0.9498015642166138,
|
254645 |
+
"learning_rate": 5.696289196220731e-07,
|
254646 |
+
"loss": 0.0057,
|
254647 |
+
"step": 36371
|
254648 |
+
},
|
254649 |
+
{
|
254650 |
+
"epoch": 4.973608642144127,
|
254651 |
+
"grad_norm": 0.7090117931365967,
|
254652 |
+
"learning_rate": 5.66890319046967e-07,
|
254653 |
+
"loss": 0.0047,
|
254654 |
+
"step": 36372
|
254655 |
+
},
|
254656 |
+
{
|
254657 |
+
"epoch": 4.973745384930945,
|
254658 |
+
"grad_norm": 1.1588622331619263,
|
254659 |
+
"learning_rate": 5.641517184718609e-07,
|
254660 |
+
"loss": 0.0101,
|
254661 |
+
"step": 36373
|
254662 |
+
},
|
254663 |
+
{
|
254664 |
+
"epoch": 4.973882127717763,
|
254665 |
+
"grad_norm": 1.2889727354049683,
|
254666 |
+
"learning_rate": 5.614131178967548e-07,
|
254667 |
+
"loss": 0.0094,
|
254668 |
+
"step": 36374
|
254669 |
+
},
|
254670 |
+
{
|
254671 |
+
"epoch": 4.974018870504581,
|
254672 |
+
"grad_norm": 1.1730540990829468,
|
254673 |
+
"learning_rate": 5.586745173216486e-07,
|
254674 |
+
"loss": 0.007,
|
254675 |
+
"step": 36375
|
254676 |
+
},
|
254677 |
+
{
|
254678 |
+
"epoch": 4.974155613291399,
|
254679 |
+
"grad_norm": 0.4053683578968048,
|
254680 |
+
"learning_rate": 5.559359167465425e-07,
|
254681 |
+
"loss": 0.0027,
|
254682 |
+
"step": 36376
|
254683 |
+
},
|
254684 |
+
{
|
254685 |
+
"epoch": 4.974292356078217,
|
254686 |
+
"grad_norm": 2.4154279232025146,
|
254687 |
+
"learning_rate": 5.531973161714365e-07,
|
254688 |
+
"loss": 0.0118,
|
254689 |
+
"step": 36377
|
254690 |
+
},
|
254691 |
+
{
|
254692 |
+
"epoch": 4.974429098865035,
|
254693 |
+
"grad_norm": 1.058783769607544,
|
254694 |
+
"learning_rate": 5.504587155963304e-07,
|
254695 |
+
"loss": 0.0082,
|
254696 |
+
"step": 36378
|
254697 |
+
},
|
254698 |
+
{
|
254699 |
+
"epoch": 4.974565841651853,
|
254700 |
+
"grad_norm": 0.6236831545829773,
|
254701 |
+
"learning_rate": 5.477201150212242e-07,
|
254702 |
+
"loss": 0.0051,
|
254703 |
+
"step": 36379
|
254704 |
+
},
|
254705 |
+
{
|
254706 |
+
"epoch": 4.9747025844386705,
|
254707 |
+
"grad_norm": 0.49539268016815186,
|
254708 |
+
"learning_rate": 5.44981514446118e-07,
|
254709 |
+
"loss": 0.0029,
|
254710 |
+
"step": 36380
|
254711 |
+
},
|
254712 |
+
{
|
254713 |
+
"epoch": 4.974839327225489,
|
254714 |
+
"grad_norm": 0.21157889068126678,
|
254715 |
+
"learning_rate": 5.422429138710119e-07,
|
254716 |
+
"loss": 0.0017,
|
254717 |
+
"step": 36381
|
254718 |
+
},
|
254719 |
+
{
|
254720 |
+
"epoch": 4.974976070012307,
|
254721 |
+
"grad_norm": 1.1301451921463013,
|
254722 |
+
"learning_rate": 5.395043132959058e-07,
|
254723 |
+
"loss": 0.0074,
|
254724 |
+
"step": 36382
|
254725 |
+
},
|
254726 |
+
{
|
254727 |
+
"epoch": 4.975112812799125,
|
254728 |
+
"grad_norm": 0.3338627219200134,
|
254729 |
+
"learning_rate": 5.367657127207996e-07,
|
254730 |
+
"loss": 0.0024,
|
254731 |
+
"step": 36383
|
254732 |
+
},
|
254733 |
+
{
|
254734 |
+
"epoch": 4.975249555585943,
|
254735 |
+
"grad_norm": 0.5278462171554565,
|
254736 |
+
"learning_rate": 5.340271121456936e-07,
|
254737 |
+
"loss": 0.0033,
|
254738 |
+
"step": 36384
|
254739 |
+
},
|
254740 |
+
{
|
254741 |
+
"epoch": 4.975386298372761,
|
254742 |
+
"grad_norm": 1.3437061309814453,
|
254743 |
+
"learning_rate": 5.312885115705875e-07,
|
254744 |
+
"loss": 0.0065,
|
254745 |
+
"step": 36385
|
254746 |
+
},
|
254747 |
+
{
|
254748 |
+
"epoch": 4.975523041159579,
|
254749 |
+
"grad_norm": 1.0089112520217896,
|
254750 |
+
"learning_rate": 5.285499109954814e-07,
|
254751 |
+
"loss": 0.0076,
|
254752 |
+
"step": 36386
|
254753 |
+
},
|
254754 |
+
{
|
254755 |
+
"epoch": 4.975659783946397,
|
254756 |
+
"grad_norm": 0.4511217474937439,
|
254757 |
+
"learning_rate": 5.258113104203752e-07,
|
254758 |
+
"loss": 0.0038,
|
254759 |
+
"step": 36387
|
254760 |
+
},
|
254761 |
+
{
|
254762 |
+
"epoch": 4.975796526733215,
|
254763 |
+
"grad_norm": 1.544424295425415,
|
254764 |
+
"learning_rate": 5.230727098452691e-07,
|
254765 |
+
"loss": 0.0128,
|
254766 |
+
"step": 36388
|
254767 |
+
},
|
254768 |
+
{
|
254769 |
+
"epoch": 4.9759332695200325,
|
254770 |
+
"grad_norm": 0.20929007232189178,
|
254771 |
+
"learning_rate": 5.203341092701629e-07,
|
254772 |
+
"loss": 0.002,
|
254773 |
+
"step": 36389
|
254774 |
+
},
|
254775 |
+
{
|
254776 |
+
"epoch": 4.976070012306851,
|
254777 |
+
"grad_norm": 0.22444817423820496,
|
254778 |
+
"learning_rate": 5.175955086950569e-07,
|
254779 |
+
"loss": 0.0018,
|
254780 |
+
"step": 36390
|
254781 |
+
},
|
254782 |
+
{
|
254783 |
+
"epoch": 4.976206755093669,
|
254784 |
+
"grad_norm": 1.1658614873886108,
|
254785 |
+
"learning_rate": 5.148569081199507e-07,
|
254786 |
+
"loss": 0.0118,
|
254787 |
+
"step": 36391
|
254788 |
+
},
|
254789 |
+
{
|
254790 |
+
"epoch": 4.976343497880487,
|
254791 |
+
"grad_norm": 1.1837693452835083,
|
254792 |
+
"learning_rate": 5.121183075448446e-07,
|
254793 |
+
"loss": 0.0119,
|
254794 |
+
"step": 36392
|
254795 |
+
},
|
254796 |
+
{
|
254797 |
+
"epoch": 4.976480240667305,
|
254798 |
+
"grad_norm": 1.6772544384002686,
|
254799 |
+
"learning_rate": 5.093797069697385e-07,
|
254800 |
+
"loss": 0.0102,
|
254801 |
+
"step": 36393
|
254802 |
+
},
|
254803 |
+
{
|
254804 |
+
"epoch": 4.9766169834541225,
|
254805 |
+
"grad_norm": 1.0344278812408447,
|
254806 |
+
"learning_rate": 5.066411063946324e-07,
|
254807 |
+
"loss": 0.0066,
|
254808 |
+
"step": 36394
|
254809 |
+
},
|
254810 |
+
{
|
254811 |
+
"epoch": 4.976753726240941,
|
254812 |
+
"grad_norm": 0.721714198589325,
|
254813 |
+
"learning_rate": 5.039025058195262e-07,
|
254814 |
+
"loss": 0.0043,
|
254815 |
+
"step": 36395
|
254816 |
+
},
|
254817 |
+
{
|
254818 |
+
"epoch": 4.976890469027759,
|
254819 |
+
"grad_norm": 0.8767109513282776,
|
254820 |
+
"learning_rate": 5.011639052444201e-07,
|
254821 |
+
"loss": 0.0053,
|
254822 |
+
"step": 36396
|
254823 |
+
},
|
254824 |
+
{
|
254825 |
+
"epoch": 4.977027211814577,
|
254826 |
+
"grad_norm": 1.6311348676681519,
|
254827 |
+
"learning_rate": 4.984253046693141e-07,
|
254828 |
+
"loss": 0.0126,
|
254829 |
+
"step": 36397
|
254830 |
+
},
|
254831 |
+
{
|
254832 |
+
"epoch": 4.977163954601394,
|
254833 |
+
"grad_norm": 1.0749458074569702,
|
254834 |
+
"learning_rate": 4.956867040942079e-07,
|
254835 |
+
"loss": 0.0093,
|
254836 |
+
"step": 36398
|
254837 |
+
},
|
254838 |
+
{
|
254839 |
+
"epoch": 4.977300697388213,
|
254840 |
+
"grad_norm": 0.4695919454097748,
|
254841 |
+
"learning_rate": 4.929481035191017e-07,
|
254842 |
+
"loss": 0.0019,
|
254843 |
+
"step": 36399
|
254844 |
+
},
|
254845 |
+
{
|
254846 |
+
"epoch": 4.977437440175031,
|
254847 |
+
"grad_norm": 0.4844362437725067,
|
254848 |
+
"learning_rate": 4.902095029439956e-07,
|
254849 |
+
"loss": 0.003,
|
254850 |
+
"step": 36400
|
254851 |
+
},
|
254852 |
+
{
|
254853 |
+
"epoch": 4.977574182961849,
|
254854 |
+
"grad_norm": 1.223527431488037,
|
254855 |
+
"learning_rate": 4.874709023688895e-07,
|
254856 |
+
"loss": 0.0071,
|
254857 |
+
"step": 36401
|
254858 |
+
},
|
254859 |
+
{
|
254860 |
+
"epoch": 4.977710925748667,
|
254861 |
+
"grad_norm": 0.8384305238723755,
|
254862 |
+
"learning_rate": 4.847323017937834e-07,
|
254863 |
+
"loss": 0.0043,
|
254864 |
+
"step": 36402
|
254865 |
+
},
|
254866 |
+
{
|
254867 |
+
"epoch": 4.977847668535484,
|
254868 |
+
"grad_norm": 0.45676931738853455,
|
254869 |
+
"learning_rate": 4.819937012186772e-07,
|
254870 |
+
"loss": 0.0027,
|
254871 |
+
"step": 36403
|
254872 |
+
},
|
254873 |
+
{
|
254874 |
+
"epoch": 4.977984411322303,
|
254875 |
+
"grad_norm": 0.6682257652282715,
|
254876 |
+
"learning_rate": 4.792551006435712e-07,
|
254877 |
+
"loss": 0.0057,
|
254878 |
+
"step": 36404
|
254879 |
+
},
|
254880 |
+
{
|
254881 |
+
"epoch": 4.978121154109121,
|
254882 |
+
"grad_norm": 0.4746088981628418,
|
254883 |
+
"learning_rate": 4.7651650006846503e-07,
|
254884 |
+
"loss": 0.0024,
|
254885 |
+
"step": 36405
|
254886 |
+
},
|
254887 |
+
{
|
254888 |
+
"epoch": 4.978257896895939,
|
254889 |
+
"grad_norm": 0.8960059285163879,
|
254890 |
+
"learning_rate": 4.7377789949335895e-07,
|
254891 |
+
"loss": 0.0043,
|
254892 |
+
"step": 36406
|
254893 |
+
},
|
254894 |
+
{
|
254895 |
+
"epoch": 4.978394639682756,
|
254896 |
+
"grad_norm": 0.7882165908813477,
|
254897 |
+
"learning_rate": 4.7103929891825276e-07,
|
254898 |
+
"loss": 0.0053,
|
254899 |
+
"step": 36407
|
254900 |
+
},
|
254901 |
+
{
|
254902 |
+
"epoch": 4.9785313824695745,
|
254903 |
+
"grad_norm": 2.5005037784576416,
|
254904 |
+
"learning_rate": 4.683006983431467e-07,
|
254905 |
+
"loss": 0.0302,
|
254906 |
+
"step": 36408
|
254907 |
+
},
|
254908 |
+
{
|
254909 |
+
"epoch": 4.978668125256393,
|
254910 |
+
"grad_norm": 0.9965479969978333,
|
254911 |
+
"learning_rate": 4.655620977680406e-07,
|
254912 |
+
"loss": 0.006,
|
254913 |
+
"step": 36409
|
254914 |
+
},
|
254915 |
+
{
|
254916 |
+
"epoch": 4.978804868043211,
|
254917 |
+
"grad_norm": 0.3341805040836334,
|
254918 |
+
"learning_rate": 4.6282349719293446e-07,
|
254919 |
+
"loss": 0.0024,
|
254920 |
+
"step": 36410
|
254921 |
+
},
|
254922 |
+
{
|
254923 |
+
"epoch": 4.978941610830029,
|
254924 |
+
"grad_norm": 1.8366689682006836,
|
254925 |
+
"learning_rate": 4.6008489661782827e-07,
|
254926 |
+
"loss": 0.0135,
|
254927 |
+
"step": 36411
|
254928 |
+
},
|
254929 |
+
{
|
254930 |
+
"epoch": 4.979078353616846,
|
254931 |
+
"grad_norm": 0.45825546979904175,
|
254932 |
+
"learning_rate": 4.573462960427222e-07,
|
254933 |
+
"loss": 0.003,
|
254934 |
+
"step": 36412
|
254935 |
+
},
|
254936 |
+
{
|
254937 |
+
"epoch": 4.9792150964036646,
|
254938 |
+
"grad_norm": 2.4964966773986816,
|
254939 |
+
"learning_rate": 4.546076954676161e-07,
|
254940 |
+
"loss": 0.0388,
|
254941 |
+
"step": 36413
|
254942 |
+
},
|
254943 |
+
{
|
254944 |
+
"epoch": 4.979351839190483,
|
254945 |
+
"grad_norm": 0.2576245665550232,
|
254946 |
+
"learning_rate": 4.5186909489250997e-07,
|
254947 |
+
"loss": 0.0016,
|
254948 |
+
"step": 36414
|
254949 |
+
},
|
254950 |
+
{
|
254951 |
+
"epoch": 4.979488581977301,
|
254952 |
+
"grad_norm": 0.36908894777297974,
|
254953 |
+
"learning_rate": 4.491304943174038e-07,
|
254954 |
+
"loss": 0.0027,
|
254955 |
+
"step": 36415
|
254956 |
+
},
|
254957 |
+
{
|
254958 |
+
"epoch": 4.979625324764118,
|
254959 |
+
"grad_norm": 0.3902880549430847,
|
254960 |
+
"learning_rate": 4.463918937422977e-07,
|
254961 |
+
"loss": 0.0022,
|
254962 |
+
"step": 36416
|
254963 |
+
},
|
254964 |
+
{
|
254965 |
+
"epoch": 4.979762067550936,
|
254966 |
+
"grad_norm": 0.9277600646018982,
|
254967 |
+
"learning_rate": 4.436532931671916e-07,
|
254968 |
+
"loss": 0.0056,
|
254969 |
+
"step": 36417
|
254970 |
+
},
|
254971 |
+
{
|
254972 |
+
"epoch": 4.979898810337755,
|
254973 |
+
"grad_norm": 1.2694799900054932,
|
254974 |
+
"learning_rate": 4.4091469259208553e-07,
|
254975 |
+
"loss": 0.0125,
|
254976 |
+
"step": 36418
|
254977 |
+
},
|
254978 |
+
{
|
254979 |
+
"epoch": 4.980035553124573,
|
254980 |
+
"grad_norm": 2.5731959342956543,
|
254981 |
+
"learning_rate": 4.3817609201697934e-07,
|
254982 |
+
"loss": 0.0196,
|
254983 |
+
"step": 36419
|
254984 |
+
},
|
254985 |
+
{
|
254986 |
+
"epoch": 4.980172295911391,
|
254987 |
+
"grad_norm": 1.186779260635376,
|
254988 |
+
"learning_rate": 4.354374914418732e-07,
|
254989 |
+
"loss": 0.0089,
|
254990 |
+
"step": 36420
|
254991 |
+
},
|
254992 |
+
{
|
254993 |
+
"epoch": 4.980309038698208,
|
254994 |
+
"grad_norm": 0.4634382724761963,
|
254995 |
+
"learning_rate": 4.326988908667671e-07,
|
254996 |
+
"loss": 0.0033,
|
254997 |
+
"step": 36421
|
254998 |
+
},
|
254999 |
+
{
|
255000 |
+
"epoch": 4.9804457814850265,
|
255001 |
+
"grad_norm": 1.509121060371399,
|
255002 |
+
"learning_rate": 4.2996029029166104e-07,
|
255003 |
+
"loss": 0.009,
|
255004 |
+
"step": 36422
|
255005 |
+
},
|
255006 |
+
{
|
255007 |
+
"epoch": 4.980582524271845,
|
255008 |
+
"grad_norm": 1.5799131393432617,
|
255009 |
+
"learning_rate": 4.2722168971655485e-07,
|
255010 |
+
"loss": 0.0238,
|
255011 |
+
"step": 36423
|
255012 |
+
},
|
255013 |
+
{
|
255014 |
+
"epoch": 4.980719267058663,
|
255015 |
+
"grad_norm": 0.8843979835510254,
|
255016 |
+
"learning_rate": 4.244830891414487e-07,
|
255017 |
+
"loss": 0.0056,
|
255018 |
+
"step": 36424
|
255019 |
+
},
|
255020 |
+
{
|
255021 |
+
"epoch": 4.98085600984548,
|
255022 |
+
"grad_norm": 0.6450397968292236,
|
255023 |
+
"learning_rate": 4.2174448856634263e-07,
|
255024 |
+
"loss": 0.0034,
|
255025 |
+
"step": 36425
|
255026 |
+
},
|
255027 |
+
{
|
255028 |
+
"epoch": 4.980992752632298,
|
255029 |
+
"grad_norm": 1.6002848148345947,
|
255030 |
+
"learning_rate": 4.1900588799123655e-07,
|
255031 |
+
"loss": 0.0083,
|
255032 |
+
"step": 36426
|
255033 |
+
},
|
255034 |
+
{
|
255035 |
+
"epoch": 4.9811294954191165,
|
255036 |
+
"grad_norm": 2.1150643825531006,
|
255037 |
+
"learning_rate": 4.1626728741613036e-07,
|
255038 |
+
"loss": 0.0348,
|
255039 |
+
"step": 36427
|
255040 |
+
},
|
255041 |
+
{
|
255042 |
+
"epoch": 4.981266238205935,
|
255043 |
+
"grad_norm": 1.034896731376648,
|
255044 |
+
"learning_rate": 4.135286868410243e-07,
|
255045 |
+
"loss": 0.0068,
|
255046 |
+
"step": 36428
|
255047 |
+
},
|
255048 |
+
{
|
255049 |
+
"epoch": 4.981402980992753,
|
255050 |
+
"grad_norm": 1.2414902448654175,
|
255051 |
+
"learning_rate": 4.1079008626591814e-07,
|
255052 |
+
"loss": 0.0089,
|
255053 |
+
"step": 36429
|
255054 |
+
},
|
255055 |
+
{
|
255056 |
+
"epoch": 4.981539723779571,
|
255057 |
+
"grad_norm": 1.1372041702270508,
|
255058 |
+
"learning_rate": 4.0805148569081206e-07,
|
255059 |
+
"loss": 0.0108,
|
255060 |
+
"step": 36430
|
255061 |
+
},
|
255062 |
+
{
|
255063 |
+
"epoch": 4.981676466566388,
|
255064 |
+
"grad_norm": 2.149465560913086,
|
255065 |
+
"learning_rate": 4.0531288511570587e-07,
|
255066 |
+
"loss": 0.0213,
|
255067 |
+
"step": 36431
|
255068 |
+
},
|
255069 |
+
{
|
255070 |
+
"epoch": 4.981813209353207,
|
255071 |
+
"grad_norm": 1.4256832599639893,
|
255072 |
+
"learning_rate": 4.025742845405998e-07,
|
255073 |
+
"loss": 0.0085,
|
255074 |
+
"step": 36432
|
255075 |
+
},
|
255076 |
+
{
|
255077 |
+
"epoch": 4.981949952140025,
|
255078 |
+
"grad_norm": 1.3839974403381348,
|
255079 |
+
"learning_rate": 3.9983568396549365e-07,
|
255080 |
+
"loss": 0.0067,
|
255081 |
+
"step": 36433
|
255082 |
+
},
|
255083 |
+
{
|
255084 |
+
"epoch": 4.982086694926843,
|
255085 |
+
"grad_norm": 1.0227705240249634,
|
255086 |
+
"learning_rate": 3.9709708339038757e-07,
|
255087 |
+
"loss": 0.0065,
|
255088 |
+
"step": 36434
|
255089 |
+
},
|
255090 |
+
{
|
255091 |
+
"epoch": 4.98222343771366,
|
255092 |
+
"grad_norm": 1.1390491724014282,
|
255093 |
+
"learning_rate": 3.943584828152814e-07,
|
255094 |
+
"loss": 0.007,
|
255095 |
+
"step": 36435
|
255096 |
+
},
|
255097 |
+
{
|
255098 |
+
"epoch": 4.9823601805004785,
|
255099 |
+
"grad_norm": 1.4464291334152222,
|
255100 |
+
"learning_rate": 3.916198822401753e-07,
|
255101 |
+
"loss": 0.0115,
|
255102 |
+
"step": 36436
|
255103 |
+
},
|
255104 |
+
{
|
255105 |
+
"epoch": 4.982496923287297,
|
255106 |
+
"grad_norm": 1.3123579025268555,
|
255107 |
+
"learning_rate": 3.888812816650692e-07,
|
255108 |
+
"loss": 0.008,
|
255109 |
+
"step": 36437
|
255110 |
+
},
|
255111 |
+
{
|
255112 |
+
"epoch": 4.982633666074115,
|
255113 |
+
"grad_norm": 0.8159112334251404,
|
255114 |
+
"learning_rate": 3.86142681089963e-07,
|
255115 |
+
"loss": 0.0057,
|
255116 |
+
"step": 36438
|
255117 |
+
},
|
255118 |
+
{
|
255119 |
+
"epoch": 4.982770408860933,
|
255120 |
+
"grad_norm": 0.3648214638233185,
|
255121 |
+
"learning_rate": 3.8340408051485694e-07,
|
255122 |
+
"loss": 0.0026,
|
255123 |
+
"step": 36439
|
255124 |
+
},
|
255125 |
+
{
|
255126 |
+
"epoch": 4.98290715164775,
|
255127 |
+
"grad_norm": 1.0108463764190674,
|
255128 |
+
"learning_rate": 3.806654799397508e-07,
|
255129 |
+
"loss": 0.0054,
|
255130 |
+
"step": 36440
|
255131 |
+
},
|
255132 |
+
{
|
255133 |
+
"epoch": 4.9830438944345685,
|
255134 |
+
"grad_norm": 0.6247110366821289,
|
255135 |
+
"learning_rate": 3.779268793646447e-07,
|
255136 |
+
"loss": 0.0044,
|
255137 |
+
"step": 36441
|
255138 |
+
},
|
255139 |
+
{
|
255140 |
+
"epoch": 4.983180637221387,
|
255141 |
+
"grad_norm": 0.9175667762756348,
|
255142 |
+
"learning_rate": 3.751882787895386e-07,
|
255143 |
+
"loss": 0.007,
|
255144 |
+
"step": 36442
|
255145 |
+
},
|
255146 |
+
{
|
255147 |
+
"epoch": 4.983317380008205,
|
255148 |
+
"grad_norm": 2.7067008018493652,
|
255149 |
+
"learning_rate": 3.7244967821443245e-07,
|
255150 |
+
"loss": 0.0134,
|
255151 |
+
"step": 36443
|
255152 |
+
},
|
255153 |
+
{
|
255154 |
+
"epoch": 4.983454122795022,
|
255155 |
+
"grad_norm": 2.7963175773620605,
|
255156 |
+
"learning_rate": 3.697110776393263e-07,
|
255157 |
+
"loss": 0.0235,
|
255158 |
+
"step": 36444
|
255159 |
+
},
|
255160 |
+
{
|
255161 |
+
"epoch": 4.98359086558184,
|
255162 |
+
"grad_norm": 0.6021952629089355,
|
255163 |
+
"learning_rate": 3.6697247706422023e-07,
|
255164 |
+
"loss": 0.0034,
|
255165 |
+
"step": 36445
|
255166 |
+
},
|
255167 |
+
{
|
255168 |
+
"epoch": 4.983727608368659,
|
255169 |
+
"grad_norm": 1.8978484869003296,
|
255170 |
+
"learning_rate": 3.642338764891141e-07,
|
255171 |
+
"loss": 0.0189,
|
255172 |
+
"step": 36446
|
255173 |
+
},
|
255174 |
+
{
|
255175 |
+
"epoch": 4.983864351155477,
|
255176 |
+
"grad_norm": 0.12915493547916412,
|
255177 |
+
"learning_rate": 3.6149527591400796e-07,
|
255178 |
+
"loss": 0.0012,
|
255179 |
+
"step": 36447
|
255180 |
+
},
|
255181 |
+
{
|
255182 |
+
"epoch": 4.984001093942295,
|
255183 |
+
"grad_norm": 0.4594871997833252,
|
255184 |
+
"learning_rate": 3.587566753389018e-07,
|
255185 |
+
"loss": 0.0026,
|
255186 |
+
"step": 36448
|
255187 |
+
},
|
255188 |
+
{
|
255189 |
+
"epoch": 4.984137836729112,
|
255190 |
+
"grad_norm": 1.2668795585632324,
|
255191 |
+
"learning_rate": 3.5601807476379574e-07,
|
255192 |
+
"loss": 0.0122,
|
255193 |
+
"step": 36449
|
255194 |
+
},
|
255195 |
+
{
|
255196 |
+
"epoch": 4.9842745795159304,
|
255197 |
+
"grad_norm": 0.7023041248321533,
|
255198 |
+
"learning_rate": 3.532794741886896e-07,
|
255199 |
+
"loss": 0.0059,
|
255200 |
+
"step": 36450
|
255201 |
+
},
|
255202 |
+
{
|
255203 |
+
"epoch": 4.984411322302749,
|
255204 |
+
"grad_norm": 0.48713958263397217,
|
255205 |
+
"learning_rate": 3.5054087361358347e-07,
|
255206 |
+
"loss": 0.0027,
|
255207 |
+
"step": 36451
|
255208 |
+
},
|
255209 |
+
{
|
255210 |
+
"epoch": 4.984548065089567,
|
255211 |
+
"grad_norm": 0.7305535674095154,
|
255212 |
+
"learning_rate": 3.4780227303847734e-07,
|
255213 |
+
"loss": 0.0046,
|
255214 |
+
"step": 36452
|
255215 |
+
},
|
255216 |
+
{
|
255217 |
+
"epoch": 4.984684807876384,
|
255218 |
+
"grad_norm": 1.5992416143417358,
|
255219 |
+
"learning_rate": 3.450636724633712e-07,
|
255220 |
+
"loss": 0.0084,
|
255221 |
+
"step": 36453
|
255222 |
+
},
|
255223 |
+
{
|
255224 |
+
"epoch": 4.984821550663202,
|
255225 |
+
"grad_norm": 0.8926239609718323,
|
255226 |
+
"learning_rate": 3.423250718882651e-07,
|
255227 |
+
"loss": 0.0053,
|
255228 |
+
"step": 36454
|
255229 |
+
},
|
255230 |
+
{
|
255231 |
+
"epoch": 4.9849582934500205,
|
255232 |
+
"grad_norm": 2.919534206390381,
|
255233 |
+
"learning_rate": 3.39586471313159e-07,
|
255234 |
+
"loss": 0.0304,
|
255235 |
+
"step": 36455
|
255236 |
+
},
|
255237 |
+
{
|
255238 |
+
"epoch": 4.985095036236839,
|
255239 |
+
"grad_norm": 0.7906861305236816,
|
255240 |
+
"learning_rate": 3.3684787073805285e-07,
|
255241 |
+
"loss": 0.0048,
|
255242 |
+
"step": 36456
|
255243 |
+
},
|
255244 |
+
{
|
255245 |
+
"epoch": 4.985231779023657,
|
255246 |
+
"grad_norm": 0.868878960609436,
|
255247 |
+
"learning_rate": 3.341092701629467e-07,
|
255248 |
+
"loss": 0.0054,
|
255249 |
+
"step": 36457
|
255250 |
+
},
|
255251 |
+
{
|
255252 |
+
"epoch": 4.985368521810474,
|
255253 |
+
"grad_norm": 1.4025198221206665,
|
255254 |
+
"learning_rate": 3.3137066958784063e-07,
|
255255 |
+
"loss": 0.0088,
|
255256 |
+
"step": 36458
|
255257 |
+
},
|
255258 |
+
{
|
255259 |
+
"epoch": 4.985505264597292,
|
255260 |
+
"grad_norm": 1.726462960243225,
|
255261 |
+
"learning_rate": 3.286320690127345e-07,
|
255262 |
+
"loss": 0.0287,
|
255263 |
+
"step": 36459
|
255264 |
+
},
|
255265 |
+
{
|
255266 |
+
"epoch": 4.985642007384111,
|
255267 |
+
"grad_norm": 1.3135771751403809,
|
255268 |
+
"learning_rate": 3.258934684376284e-07,
|
255269 |
+
"loss": 0.0099,
|
255270 |
+
"step": 36460
|
255271 |
+
},
|
255272 |
+
{
|
255273 |
+
"epoch": 4.985778750170929,
|
255274 |
+
"grad_norm": 1.3983891010284424,
|
255275 |
+
"learning_rate": 3.231548678625222e-07,
|
255276 |
+
"loss": 0.0077,
|
255277 |
+
"step": 36461
|
255278 |
+
},
|
255279 |
+
{
|
255280 |
+
"epoch": 4.985915492957746,
|
255281 |
+
"grad_norm": 1.3720247745513916,
|
255282 |
+
"learning_rate": 3.2041626728741614e-07,
|
255283 |
+
"loss": 0.0079,
|
255284 |
+
"step": 36462
|
255285 |
+
},
|
255286 |
+
{
|
255287 |
+
"epoch": 4.986052235744564,
|
255288 |
+
"grad_norm": 1.290981650352478,
|
255289 |
+
"learning_rate": 3.1767766671231e-07,
|
255290 |
+
"loss": 0.0068,
|
255291 |
+
"step": 36463
|
255292 |
+
},
|
255293 |
+
{
|
255294 |
+
"epoch": 4.986188978531382,
|
255295 |
+
"grad_norm": 2.5063252449035645,
|
255296 |
+
"learning_rate": 3.149390661372039e-07,
|
255297 |
+
"loss": 0.0368,
|
255298 |
+
"step": 36464
|
255299 |
+
},
|
255300 |
+
{
|
255301 |
+
"epoch": 4.986325721318201,
|
255302 |
+
"grad_norm": 0.6218810677528381,
|
255303 |
+
"learning_rate": 3.122004655620978e-07,
|
255304 |
+
"loss": 0.0042,
|
255305 |
+
"step": 36465
|
255306 |
+
},
|
255307 |
+
{
|
255308 |
+
"epoch": 4.986462464105019,
|
255309 |
+
"grad_norm": 1.2588287591934204,
|
255310 |
+
"learning_rate": 3.0946186498699165e-07,
|
255311 |
+
"loss": 0.0064,
|
255312 |
+
"step": 36466
|
255313 |
+
},
|
255314 |
+
{
|
255315 |
+
"epoch": 4.986599206891836,
|
255316 |
+
"grad_norm": 0.7435799241065979,
|
255317 |
+
"learning_rate": 3.067232644118855e-07,
|
255318 |
+
"loss": 0.0057,
|
255319 |
+
"step": 36467
|
255320 |
+
},
|
255321 |
+
{
|
255322 |
+
"epoch": 4.986735949678654,
|
255323 |
+
"grad_norm": 2.245314359664917,
|
255324 |
+
"learning_rate": 3.0398466383677943e-07,
|
255325 |
+
"loss": 0.0156,
|
255326 |
+
"step": 36468
|
255327 |
+
},
|
255328 |
+
{
|
255329 |
+
"epoch": 4.9868726924654725,
|
255330 |
+
"grad_norm": 0.7292805314064026,
|
255331 |
+
"learning_rate": 3.012460632616733e-07,
|
255332 |
+
"loss": 0.0038,
|
255333 |
+
"step": 36469
|
255334 |
+
},
|
255335 |
+
{
|
255336 |
+
"epoch": 4.987009435252291,
|
255337 |
+
"grad_norm": 1.3342645168304443,
|
255338 |
+
"learning_rate": 2.9850746268656716e-07,
|
255339 |
+
"loss": 0.0356,
|
255340 |
+
"step": 36470
|
255341 |
+
},
|
255342 |
+
{
|
255343 |
+
"epoch": 4.987146178039108,
|
255344 |
+
"grad_norm": 0.37937304377555847,
|
255345 |
+
"learning_rate": 2.95768862111461e-07,
|
255346 |
+
"loss": 0.003,
|
255347 |
+
"step": 36471
|
255348 |
+
},
|
255349 |
+
{
|
255350 |
+
"epoch": 4.987282920825926,
|
255351 |
+
"grad_norm": 1.4500336647033691,
|
255352 |
+
"learning_rate": 2.9303026153635494e-07,
|
255353 |
+
"loss": 0.0239,
|
255354 |
+
"step": 36472
|
255355 |
+
},
|
255356 |
+
{
|
255357 |
+
"epoch": 4.987419663612744,
|
255358 |
+
"grad_norm": 1.189920425415039,
|
255359 |
+
"learning_rate": 2.902916609612488e-07,
|
255360 |
+
"loss": 0.0099,
|
255361 |
+
"step": 36473
|
255362 |
+
},
|
255363 |
+
{
|
255364 |
+
"epoch": 4.9875564063995625,
|
255365 |
+
"grad_norm": 0.861122727394104,
|
255366 |
+
"learning_rate": 2.875530603861427e-07,
|
255367 |
+
"loss": 0.0056,
|
255368 |
+
"step": 36474
|
255369 |
+
},
|
255370 |
+
{
|
255371 |
+
"epoch": 4.987693149186381,
|
255372 |
+
"grad_norm": 1.5202419757843018,
|
255373 |
+
"learning_rate": 2.8481445981103653e-07,
|
255374 |
+
"loss": 0.0089,
|
255375 |
+
"step": 36475
|
255376 |
+
},
|
255377 |
+
{
|
255378 |
+
"epoch": 4.987829891973198,
|
255379 |
+
"grad_norm": 1.0348633527755737,
|
255380 |
+
"learning_rate": 2.8207585923593045e-07,
|
255381 |
+
"loss": 0.0069,
|
255382 |
+
"step": 36476
|
255383 |
+
},
|
255384 |
+
{
|
255385 |
+
"epoch": 4.987966634760016,
|
255386 |
+
"grad_norm": 1.0687999725341797,
|
255387 |
+
"learning_rate": 2.793372586608243e-07,
|
255388 |
+
"loss": 0.0074,
|
255389 |
+
"step": 36477
|
255390 |
+
},
|
255391 |
+
{
|
255392 |
+
"epoch": 4.988103377546834,
|
255393 |
+
"grad_norm": 0.9345245957374573,
|
255394 |
+
"learning_rate": 2.7659865808571823e-07,
|
255395 |
+
"loss": 0.0041,
|
255396 |
+
"step": 36478
|
255397 |
+
},
|
255398 |
+
{
|
255399 |
+
"epoch": 4.988240120333653,
|
255400 |
+
"grad_norm": 0.98683100938797,
|
255401 |
+
"learning_rate": 2.738600575106121e-07,
|
255402 |
+
"loss": 0.0069,
|
255403 |
+
"step": 36479
|
255404 |
+
},
|
255405 |
+
{
|
255406 |
+
"epoch": 4.98837686312047,
|
255407 |
+
"grad_norm": 0.7972103357315063,
|
255408 |
+
"learning_rate": 2.7112145693550596e-07,
|
255409 |
+
"loss": 0.0058,
|
255410 |
+
"step": 36480
|
255411 |
+
},
|
255412 |
+
{
|
255413 |
+
"epoch": 4.988513605907288,
|
255414 |
+
"grad_norm": 1.1654131412506104,
|
255415 |
+
"learning_rate": 2.683828563603998e-07,
|
255416 |
+
"loss": 0.0082,
|
255417 |
+
"step": 36481
|
255418 |
+
},
|
255419 |
+
{
|
255420 |
+
"epoch": 4.988650348694106,
|
255421 |
+
"grad_norm": 0.38147568702697754,
|
255422 |
+
"learning_rate": 2.6564425578529374e-07,
|
255423 |
+
"loss": 0.0027,
|
255424 |
+
"step": 36482
|
255425 |
+
},
|
255426 |
+
{
|
255427 |
+
"epoch": 4.9887870914809245,
|
255428 |
+
"grad_norm": 1.7539217472076416,
|
255429 |
+
"learning_rate": 2.629056552101876e-07,
|
255430 |
+
"loss": 0.0154,
|
255431 |
+
"step": 36483
|
255432 |
+
},
|
255433 |
+
{
|
255434 |
+
"epoch": 4.988923834267743,
|
255435 |
+
"grad_norm": 1.9670413732528687,
|
255436 |
+
"learning_rate": 2.6016705463508147e-07,
|
255437 |
+
"loss": 0.0105,
|
255438 |
+
"step": 36484
|
255439 |
+
},
|
255440 |
+
{
|
255441 |
+
"epoch": 4.989060577054561,
|
255442 |
+
"grad_norm": 0.5444817543029785,
|
255443 |
+
"learning_rate": 2.5742845405997533e-07,
|
255444 |
+
"loss": 0.0037,
|
255445 |
+
"step": 36485
|
255446 |
+
},
|
255447 |
+
{
|
255448 |
+
"epoch": 4.989197319841378,
|
255449 |
+
"grad_norm": 0.9214085340499878,
|
255450 |
+
"learning_rate": 2.5468985348486925e-07,
|
255451 |
+
"loss": 0.006,
|
255452 |
+
"step": 36486
|
255453 |
+
},
|
255454 |
+
{
|
255455 |
+
"epoch": 4.989334062628196,
|
255456 |
+
"grad_norm": 5.291913032531738,
|
255457 |
+
"learning_rate": 2.519512529097631e-07,
|
255458 |
+
"loss": 0.0272,
|
255459 |
+
"step": 36487
|
255460 |
+
},
|
255461 |
+
{
|
255462 |
+
"epoch": 4.9894708054150145,
|
255463 |
+
"grad_norm": 2.1445093154907227,
|
255464 |
+
"learning_rate": 2.4921265233465703e-07,
|
255465 |
+
"loss": 0.0143,
|
255466 |
+
"step": 36488
|
255467 |
+
},
|
255468 |
+
{
|
255469 |
+
"epoch": 4.989607548201833,
|
255470 |
+
"grad_norm": 0.4100111424922943,
|
255471 |
+
"learning_rate": 2.4647405175955084e-07,
|
255472 |
+
"loss": 0.0034,
|
255473 |
+
"step": 36489
|
255474 |
+
},
|
255475 |
+
{
|
255476 |
+
"epoch": 4.98974429098865,
|
255477 |
+
"grad_norm": 1.5850245952606201,
|
255478 |
+
"learning_rate": 2.4373545118444476e-07,
|
255479 |
+
"loss": 0.0147,
|
255480 |
+
"step": 36490
|
255481 |
+
},
|
255482 |
+
{
|
255483 |
+
"epoch": 4.989881033775468,
|
255484 |
+
"grad_norm": 1.6083455085754395,
|
255485 |
+
"learning_rate": 2.409968506093386e-07,
|
255486 |
+
"loss": 0.011,
|
255487 |
+
"step": 36491
|
255488 |
+
},
|
255489 |
+
{
|
255490 |
+
"epoch": 4.990017776562286,
|
255491 |
+
"grad_norm": 1.6191855669021606,
|
255492 |
+
"learning_rate": 2.3825825003423251e-07,
|
255493 |
+
"loss": 0.0087,
|
255494 |
+
"step": 36492
|
255495 |
+
},
|
255496 |
+
{
|
255497 |
+
"epoch": 4.990154519349105,
|
255498 |
+
"grad_norm": 1.2378071546554565,
|
255499 |
+
"learning_rate": 2.3551964945912638e-07,
|
255500 |
+
"loss": 0.0135,
|
255501 |
+
"step": 36493
|
255502 |
+
},
|
255503 |
+
{
|
255504 |
+
"epoch": 4.990291262135923,
|
255505 |
+
"grad_norm": 1.5864588022232056,
|
255506 |
+
"learning_rate": 2.327810488840203e-07,
|
255507 |
+
"loss": 0.01,
|
255508 |
+
"step": 36494
|
255509 |
+
},
|
255510 |
+
{
|
255511 |
+
"epoch": 4.99042800492274,
|
255512 |
+
"grad_norm": 1.265009880065918,
|
255513 |
+
"learning_rate": 2.3004244830891413e-07,
|
255514 |
+
"loss": 0.0068,
|
255515 |
+
"step": 36495
|
255516 |
+
},
|
255517 |
+
{
|
255518 |
+
"epoch": 4.990564747709558,
|
255519 |
+
"grad_norm": 1.408017873764038,
|
255520 |
+
"learning_rate": 2.2730384773380805e-07,
|
255521 |
+
"loss": 0.0063,
|
255522 |
+
"step": 36496
|
255523 |
+
},
|
255524 |
+
{
|
255525 |
+
"epoch": 4.9907014904963765,
|
255526 |
+
"grad_norm": 1.365462303161621,
|
255527 |
+
"learning_rate": 2.245652471587019e-07,
|
255528 |
+
"loss": 0.007,
|
255529 |
+
"step": 36497
|
255530 |
+
},
|
255531 |
+
{
|
255532 |
+
"epoch": 4.990838233283195,
|
255533 |
+
"grad_norm": 1.0909748077392578,
|
255534 |
+
"learning_rate": 2.218266465835958e-07,
|
255535 |
+
"loss": 0.0058,
|
255536 |
+
"step": 36498
|
255537 |
+
},
|
255538 |
+
{
|
255539 |
+
"epoch": 4.990974976070012,
|
255540 |
+
"grad_norm": 0.8327326774597168,
|
255541 |
+
"learning_rate": 2.1908804600848967e-07,
|
255542 |
+
"loss": 0.004,
|
255543 |
+
"step": 36499
|
255544 |
+
},
|
255545 |
+
{
|
255546 |
+
"epoch": 4.99111171885683,
|
255547 |
+
"grad_norm": 0.6343533992767334,
|
255548 |
+
"learning_rate": 2.1634944543338356e-07,
|
255549 |
+
"loss": 0.0048,
|
255550 |
+
"step": 36500
|
255551 |
}
|
255552 |
],
|
255553 |
"logging_steps": 1,
|
|
|
255567 |
"attributes": {}
|
255568 |
}
|
255569 |
},
|
255570 |
+
"total_flos": 1.437614872805376e+19,
|
255571 |
"train_batch_size": 16,
|
255572 |
"trial_name": null,
|
255573 |
"trial_params": null
|