Training in progress, step 1596, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 125248064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d33a7836934c3376b1aee732e866ccd1e8a9ba2eff2d7504df16cb35ca19ff30
|
3 |
size 125248064
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 64220436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b5af58bee9b7e7884ca03bb1f6d52f8b5b753f7022fa866427771e04c4bbbb8
|
3 |
size 64220436
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92258d11f2fa8e1fad8ed26a405928cd050fb45c2a2c4c7ab589bda4cda73287
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9589770fbbdf616be9b60e4236b2068dd072c97c15dc40ad405979e59da84e39
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8386,6 +8386,2799 @@
|
|
8386 |
"learning_rate": 1.4732034477444844e-05,
|
8387 |
"loss": 1.3927,
|
8388 |
"step": 1197
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8389 |
}
|
8390 |
],
|
8391 |
"logging_steps": 1,
|
@@ -8400,12 +11193,12 @@
|
|
8400 |
"should_evaluate": false,
|
8401 |
"should_log": false,
|
8402 |
"should_save": true,
|
8403 |
-
"should_training_stop":
|
8404 |
},
|
8405 |
"attributes": {}
|
8406 |
}
|
8407 |
},
|
8408 |
-
"total_flos":
|
8409 |
"train_batch_size": 4,
|
8410 |
"trial_name": null,
|
8411 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.2730071843995895,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1596,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8386 |
"learning_rate": 1.4732034477444844e-05,
|
8387 |
"loss": 1.3927,
|
8388 |
"step": 1197
|
8389 |
+
},
|
8390 |
+
{
|
8391 |
+
"epoch": 0.20492644543277455,
|
8392 |
+
"grad_norm": 2.5925216674804688,
|
8393 |
+
"learning_rate": 1.4662118433498229e-05,
|
8394 |
+
"loss": 1.4143,
|
8395 |
+
"step": 1198
|
8396 |
+
},
|
8397 |
+
{
|
8398 |
+
"epoch": 0.205097502565857,
|
8399 |
+
"grad_norm": 3.2330739498138428,
|
8400 |
+
"learning_rate": 1.4592340173858493e-05,
|
8401 |
+
"loss": 1.4354,
|
8402 |
+
"step": 1199
|
8403 |
+
},
|
8404 |
+
{
|
8405 |
+
"epoch": 0.20526855969893945,
|
8406 |
+
"grad_norm": 3.7249834537506104,
|
8407 |
+
"learning_rate": 1.4522699970594839e-05,
|
8408 |
+
"loss": 1.3993,
|
8409 |
+
"step": 1200
|
8410 |
+
},
|
8411 |
+
{
|
8412 |
+
"epoch": 0.2054396168320219,
|
8413 |
+
"grad_norm": 0.4889140725135803,
|
8414 |
+
"learning_rate": 1.4453198095238162e-05,
|
8415 |
+
"loss": 1.1751,
|
8416 |
+
"step": 1201
|
8417 |
+
},
|
8418 |
+
{
|
8419 |
+
"epoch": 0.20561067396510435,
|
8420 |
+
"grad_norm": 0.537131130695343,
|
8421 |
+
"learning_rate": 1.4383834818780073e-05,
|
8422 |
+
"loss": 1.1036,
|
8423 |
+
"step": 1202
|
8424 |
+
},
|
8425 |
+
{
|
8426 |
+
"epoch": 0.2057817310981868,
|
8427 |
+
"grad_norm": 0.48564401268959045,
|
8428 |
+
"learning_rate": 1.4314610411671664e-05,
|
8429 |
+
"loss": 1.1957,
|
8430 |
+
"step": 1203
|
8431 |
+
},
|
8432 |
+
{
|
8433 |
+
"epoch": 0.20595278823126925,
|
8434 |
+
"grad_norm": 0.5370252728462219,
|
8435 |
+
"learning_rate": 1.424552514382269e-05,
|
8436 |
+
"loss": 1.1697,
|
8437 |
+
"step": 1204
|
8438 |
+
},
|
8439 |
+
{
|
8440 |
+
"epoch": 0.2061238453643517,
|
8441 |
+
"grad_norm": 0.5765929818153381,
|
8442 |
+
"learning_rate": 1.4176579284600289e-05,
|
8443 |
+
"loss": 1.2709,
|
8444 |
+
"step": 1205
|
8445 |
+
},
|
8446 |
+
{
|
8447 |
+
"epoch": 0.20629490249743415,
|
8448 |
+
"grad_norm": 0.5106022357940674,
|
8449 |
+
"learning_rate": 1.410777310282812e-05,
|
8450 |
+
"loss": 1.272,
|
8451 |
+
"step": 1206
|
8452 |
+
},
|
8453 |
+
{
|
8454 |
+
"epoch": 0.2064659596305166,
|
8455 |
+
"grad_norm": 0.5149855017662048,
|
8456 |
+
"learning_rate": 1.403910686678519e-05,
|
8457 |
+
"loss": 1.2519,
|
8458 |
+
"step": 1207
|
8459 |
+
},
|
8460 |
+
{
|
8461 |
+
"epoch": 0.20663701676359905,
|
8462 |
+
"grad_norm": 0.5240956544876099,
|
8463 |
+
"learning_rate": 1.3970580844204855e-05,
|
8464 |
+
"loss": 1.1715,
|
8465 |
+
"step": 1208
|
8466 |
+
},
|
8467 |
+
{
|
8468 |
+
"epoch": 0.2068080738966815,
|
8469 |
+
"grad_norm": 0.5452810525894165,
|
8470 |
+
"learning_rate": 1.3902195302273779e-05,
|
8471 |
+
"loss": 1.2607,
|
8472 |
+
"step": 1209
|
8473 |
+
},
|
8474 |
+
{
|
8475 |
+
"epoch": 0.20697913102976395,
|
8476 |
+
"grad_norm": 0.5322362780570984,
|
8477 |
+
"learning_rate": 1.3833950507630872e-05,
|
8478 |
+
"loss": 1.2733,
|
8479 |
+
"step": 1210
|
8480 |
+
},
|
8481 |
+
{
|
8482 |
+
"epoch": 0.2071501881628464,
|
8483 |
+
"grad_norm": 0.524337649345398,
|
8484 |
+
"learning_rate": 1.3765846726366277e-05,
|
8485 |
+
"loss": 1.3067,
|
8486 |
+
"step": 1211
|
8487 |
+
},
|
8488 |
+
{
|
8489 |
+
"epoch": 0.20732124529592885,
|
8490 |
+
"grad_norm": 0.5577448010444641,
|
8491 |
+
"learning_rate": 1.3697884224020307e-05,
|
8492 |
+
"loss": 1.2954,
|
8493 |
+
"step": 1212
|
8494 |
+
},
|
8495 |
+
{
|
8496 |
+
"epoch": 0.2074923024290113,
|
8497 |
+
"grad_norm": 0.5897120833396912,
|
8498 |
+
"learning_rate": 1.3630063265582416e-05,
|
8499 |
+
"loss": 1.2379,
|
8500 |
+
"step": 1213
|
8501 |
+
},
|
8502 |
+
{
|
8503 |
+
"epoch": 0.20766335956209375,
|
8504 |
+
"grad_norm": 0.5850743055343628,
|
8505 |
+
"learning_rate": 1.3562384115490224e-05,
|
8506 |
+
"loss": 1.3104,
|
8507 |
+
"step": 1214
|
8508 |
+
},
|
8509 |
+
{
|
8510 |
+
"epoch": 0.2078344166951762,
|
8511 |
+
"grad_norm": 0.5843027830123901,
|
8512 |
+
"learning_rate": 1.349484703762834e-05,
|
8513 |
+
"loss": 1.3101,
|
8514 |
+
"step": 1215
|
8515 |
+
},
|
8516 |
+
{
|
8517 |
+
"epoch": 0.20800547382825862,
|
8518 |
+
"grad_norm": 0.6306421160697937,
|
8519 |
+
"learning_rate": 1.3427452295327487e-05,
|
8520 |
+
"loss": 1.4251,
|
8521 |
+
"step": 1216
|
8522 |
+
},
|
8523 |
+
{
|
8524 |
+
"epoch": 0.2081765309613411,
|
8525 |
+
"grad_norm": 0.6246312856674194,
|
8526 |
+
"learning_rate": 1.3360200151363412e-05,
|
8527 |
+
"loss": 1.2161,
|
8528 |
+
"step": 1217
|
8529 |
+
},
|
8530 |
+
{
|
8531 |
+
"epoch": 0.20834758809442352,
|
8532 |
+
"grad_norm": 0.6040066480636597,
|
8533 |
+
"learning_rate": 1.3293090867955826e-05,
|
8534 |
+
"loss": 1.1843,
|
8535 |
+
"step": 1218
|
8536 |
+
},
|
8537 |
+
{
|
8538 |
+
"epoch": 0.208518645227506,
|
8539 |
+
"grad_norm": 0.5797515511512756,
|
8540 |
+
"learning_rate": 1.32261247067675e-05,
|
8541 |
+
"loss": 1.2465,
|
8542 |
+
"step": 1219
|
8543 |
+
},
|
8544 |
+
{
|
8545 |
+
"epoch": 0.20868970236058842,
|
8546 |
+
"grad_norm": 0.6712151765823364,
|
8547 |
+
"learning_rate": 1.3159301928903044e-05,
|
8548 |
+
"loss": 1.3117,
|
8549 |
+
"step": 1220
|
8550 |
+
},
|
8551 |
+
{
|
8552 |
+
"epoch": 0.2088607594936709,
|
8553 |
+
"grad_norm": 0.6769530177116394,
|
8554 |
+
"learning_rate": 1.3092622794908143e-05,
|
8555 |
+
"loss": 1.3089,
|
8556 |
+
"step": 1221
|
8557 |
+
},
|
8558 |
+
{
|
8559 |
+
"epoch": 0.20903181662675333,
|
8560 |
+
"grad_norm": 0.6964675188064575,
|
8561 |
+
"learning_rate": 1.3026087564768264e-05,
|
8562 |
+
"loss": 1.2753,
|
8563 |
+
"step": 1222
|
8564 |
+
},
|
8565 |
+
{
|
8566 |
+
"epoch": 0.2092028737598358,
|
8567 |
+
"grad_norm": 0.7019360065460205,
|
8568 |
+
"learning_rate": 1.2959696497907931e-05,
|
8569 |
+
"loss": 1.0983,
|
8570 |
+
"step": 1223
|
8571 |
+
},
|
8572 |
+
{
|
8573 |
+
"epoch": 0.20937393089291823,
|
8574 |
+
"grad_norm": 0.7152572274208069,
|
8575 |
+
"learning_rate": 1.2893449853189432e-05,
|
8576 |
+
"loss": 1.2083,
|
8577 |
+
"step": 1224
|
8578 |
+
},
|
8579 |
+
{
|
8580 |
+
"epoch": 0.2095449880260007,
|
8581 |
+
"grad_norm": 0.7500102519989014,
|
8582 |
+
"learning_rate": 1.2827347888912055e-05,
|
8583 |
+
"loss": 1.2162,
|
8584 |
+
"step": 1225
|
8585 |
+
},
|
8586 |
+
{
|
8587 |
+
"epoch": 0.20971604515908313,
|
8588 |
+
"grad_norm": 0.6842475533485413,
|
8589 |
+
"learning_rate": 1.2761390862810907e-05,
|
8590 |
+
"loss": 1.0126,
|
8591 |
+
"step": 1226
|
8592 |
+
},
|
8593 |
+
{
|
8594 |
+
"epoch": 0.2098871022921656,
|
8595 |
+
"grad_norm": 0.7505866885185242,
|
8596 |
+
"learning_rate": 1.2695579032056e-05,
|
8597 |
+
"loss": 1.0843,
|
8598 |
+
"step": 1227
|
8599 |
+
},
|
8600 |
+
{
|
8601 |
+
"epoch": 0.21005815942524803,
|
8602 |
+
"grad_norm": 0.7881115674972534,
|
8603 |
+
"learning_rate": 1.2629912653251196e-05,
|
8604 |
+
"loss": 1.1237,
|
8605 |
+
"step": 1228
|
8606 |
+
},
|
8607 |
+
{
|
8608 |
+
"epoch": 0.2102292165583305,
|
8609 |
+
"grad_norm": 0.8163465261459351,
|
8610 |
+
"learning_rate": 1.2564391982433271e-05,
|
8611 |
+
"loss": 1.1496,
|
8612 |
+
"step": 1229
|
8613 |
+
},
|
8614 |
+
{
|
8615 |
+
"epoch": 0.21040027369141293,
|
8616 |
+
"grad_norm": 0.9275565147399902,
|
8617 |
+
"learning_rate": 1.2499017275070834e-05,
|
8618 |
+
"loss": 1.2823,
|
8619 |
+
"step": 1230
|
8620 |
+
},
|
8621 |
+
{
|
8622 |
+
"epoch": 0.2105713308244954,
|
8623 |
+
"grad_norm": 0.9730026721954346,
|
8624 |
+
"learning_rate": 1.2433788786063416e-05,
|
8625 |
+
"loss": 1.288,
|
8626 |
+
"step": 1231
|
8627 |
+
},
|
8628 |
+
{
|
8629 |
+
"epoch": 0.21074238795757783,
|
8630 |
+
"grad_norm": 0.8942968249320984,
|
8631 |
+
"learning_rate": 1.2368706769740385e-05,
|
8632 |
+
"loss": 1.0438,
|
8633 |
+
"step": 1232
|
8634 |
+
},
|
8635 |
+
{
|
8636 |
+
"epoch": 0.2109134450906603,
|
8637 |
+
"grad_norm": 0.9115703105926514,
|
8638 |
+
"learning_rate": 1.2303771479860089e-05,
|
8639 |
+
"loss": 1.1263,
|
8640 |
+
"step": 1233
|
8641 |
+
},
|
8642 |
+
{
|
8643 |
+
"epoch": 0.21108450222374273,
|
8644 |
+
"grad_norm": 1.0126967430114746,
|
8645 |
+
"learning_rate": 1.2238983169608659e-05,
|
8646 |
+
"loss": 1.0783,
|
8647 |
+
"step": 1234
|
8648 |
+
},
|
8649 |
+
{
|
8650 |
+
"epoch": 0.2112555593568252,
|
8651 |
+
"grad_norm": 1.1496914625167847,
|
8652 |
+
"learning_rate": 1.2174342091599277e-05,
|
8653 |
+
"loss": 1.2131,
|
8654 |
+
"step": 1235
|
8655 |
+
},
|
8656 |
+
{
|
8657 |
+
"epoch": 0.21142661648990763,
|
8658 |
+
"grad_norm": 1.173646330833435,
|
8659 |
+
"learning_rate": 1.2109848497870945e-05,
|
8660 |
+
"loss": 1.2618,
|
8661 |
+
"step": 1236
|
8662 |
+
},
|
8663 |
+
{
|
8664 |
+
"epoch": 0.2115976736229901,
|
8665 |
+
"grad_norm": 1.1939373016357422,
|
8666 |
+
"learning_rate": 1.2045502639887673e-05,
|
8667 |
+
"loss": 1.4783,
|
8668 |
+
"step": 1237
|
8669 |
+
},
|
8670 |
+
{
|
8671 |
+
"epoch": 0.21176873075607253,
|
8672 |
+
"grad_norm": 1.2593927383422852,
|
8673 |
+
"learning_rate": 1.1981304768537472e-05,
|
8674 |
+
"loss": 1.223,
|
8675 |
+
"step": 1238
|
8676 |
+
},
|
8677 |
+
{
|
8678 |
+
"epoch": 0.211939787889155,
|
8679 |
+
"grad_norm": 1.4290164709091187,
|
8680 |
+
"learning_rate": 1.1917255134131244e-05,
|
8681 |
+
"loss": 1.0375,
|
8682 |
+
"step": 1239
|
8683 |
+
},
|
8684 |
+
{
|
8685 |
+
"epoch": 0.21211084502223743,
|
8686 |
+
"grad_norm": 1.3649468421936035,
|
8687 |
+
"learning_rate": 1.1853353986402033e-05,
|
8688 |
+
"loss": 1.3399,
|
8689 |
+
"step": 1240
|
8690 |
+
},
|
8691 |
+
{
|
8692 |
+
"epoch": 0.2122819021553199,
|
8693 |
+
"grad_norm": 1.5981239080429077,
|
8694 |
+
"learning_rate": 1.17896015745038e-05,
|
8695 |
+
"loss": 1.2674,
|
8696 |
+
"step": 1241
|
8697 |
+
},
|
8698 |
+
{
|
8699 |
+
"epoch": 0.21245295928840233,
|
8700 |
+
"grad_norm": 1.9569214582443237,
|
8701 |
+
"learning_rate": 1.1725998147010686e-05,
|
8702 |
+
"loss": 1.3237,
|
8703 |
+
"step": 1242
|
8704 |
+
},
|
8705 |
+
{
|
8706 |
+
"epoch": 0.21262401642148476,
|
8707 |
+
"grad_norm": 1.690134048461914,
|
8708 |
+
"learning_rate": 1.1662543951915872e-05,
|
8709 |
+
"loss": 1.2336,
|
8710 |
+
"step": 1243
|
8711 |
+
},
|
8712 |
+
{
|
8713 |
+
"epoch": 0.21279507355456723,
|
8714 |
+
"grad_norm": 1.9291313886642456,
|
8715 |
+
"learning_rate": 1.1599239236630682e-05,
|
8716 |
+
"loss": 1.2388,
|
8717 |
+
"step": 1244
|
8718 |
+
},
|
8719 |
+
{
|
8720 |
+
"epoch": 0.21296613068764966,
|
8721 |
+
"grad_norm": 1.8828405141830444,
|
8722 |
+
"learning_rate": 1.1536084247983624e-05,
|
8723 |
+
"loss": 1.0056,
|
8724 |
+
"step": 1245
|
8725 |
+
},
|
8726 |
+
{
|
8727 |
+
"epoch": 0.21313718782073213,
|
8728 |
+
"grad_norm": 2.913674831390381,
|
8729 |
+
"learning_rate": 1.1473079232219403e-05,
|
8730 |
+
"loss": 1.6633,
|
8731 |
+
"step": 1246
|
8732 |
+
},
|
8733 |
+
{
|
8734 |
+
"epoch": 0.21330824495381456,
|
8735 |
+
"grad_norm": 2.3827452659606934,
|
8736 |
+
"learning_rate": 1.1410224434997978e-05,
|
8737 |
+
"loss": 1.5012,
|
8738 |
+
"step": 1247
|
8739 |
+
},
|
8740 |
+
{
|
8741 |
+
"epoch": 0.21347930208689703,
|
8742 |
+
"grad_norm": 2.63147234916687,
|
8743 |
+
"learning_rate": 1.1347520101393588e-05,
|
8744 |
+
"loss": 1.3265,
|
8745 |
+
"step": 1248
|
8746 |
+
},
|
8747 |
+
{
|
8748 |
+
"epoch": 0.21365035921997946,
|
8749 |
+
"grad_norm": 3.035972833633423,
|
8750 |
+
"learning_rate": 1.1284966475893804e-05,
|
8751 |
+
"loss": 1.5027,
|
8752 |
+
"step": 1249
|
8753 |
+
},
|
8754 |
+
{
|
8755 |
+
"epoch": 0.21382141635306193,
|
8756 |
+
"grad_norm": 3.904467821121216,
|
8757 |
+
"learning_rate": 1.1222563802398628e-05,
|
8758 |
+
"loss": 1.7931,
|
8759 |
+
"step": 1250
|
8760 |
+
},
|
8761 |
+
{
|
8762 |
+
"epoch": 0.21399247348614436,
|
8763 |
+
"grad_norm": 0.5079928636550903,
|
8764 |
+
"learning_rate": 1.1160312324219396e-05,
|
8765 |
+
"loss": 1.1821,
|
8766 |
+
"step": 1251
|
8767 |
+
},
|
8768 |
+
{
|
8769 |
+
"epoch": 0.21416353061922683,
|
8770 |
+
"grad_norm": 0.5283001661300659,
|
8771 |
+
"learning_rate": 1.1098212284078036e-05,
|
8772 |
+
"loss": 1.2658,
|
8773 |
+
"step": 1252
|
8774 |
+
},
|
8775 |
+
{
|
8776 |
+
"epoch": 0.21433458775230926,
|
8777 |
+
"grad_norm": 0.5386720895767212,
|
8778 |
+
"learning_rate": 1.1036263924105893e-05,
|
8779 |
+
"loss": 1.1306,
|
8780 |
+
"step": 1253
|
8781 |
+
},
|
8782 |
+
{
|
8783 |
+
"epoch": 0.21450564488539173,
|
8784 |
+
"grad_norm": 0.5462440848350525,
|
8785 |
+
"learning_rate": 1.0974467485843016e-05,
|
8786 |
+
"loss": 1.1831,
|
8787 |
+
"step": 1254
|
8788 |
+
},
|
8789 |
+
{
|
8790 |
+
"epoch": 0.21467670201847416,
|
8791 |
+
"grad_norm": 0.5248839259147644,
|
8792 |
+
"learning_rate": 1.0912823210237033e-05,
|
8793 |
+
"loss": 1.2268,
|
8794 |
+
"step": 1255
|
8795 |
+
},
|
8796 |
+
{
|
8797 |
+
"epoch": 0.21484775915155663,
|
8798 |
+
"grad_norm": 0.5346410274505615,
|
8799 |
+
"learning_rate": 1.08513313376423e-05,
|
8800 |
+
"loss": 1.2779,
|
8801 |
+
"step": 1256
|
8802 |
+
},
|
8803 |
+
{
|
8804 |
+
"epoch": 0.21501881628463906,
|
8805 |
+
"grad_norm": 0.5157681703567505,
|
8806 |
+
"learning_rate": 1.0789992107818964e-05,
|
8807 |
+
"loss": 1.2893,
|
8808 |
+
"step": 1257
|
8809 |
+
},
|
8810 |
+
{
|
8811 |
+
"epoch": 0.21518987341772153,
|
8812 |
+
"grad_norm": 0.5325741767883301,
|
8813 |
+
"learning_rate": 1.0728805759931948e-05,
|
8814 |
+
"loss": 1.1769,
|
8815 |
+
"step": 1258
|
8816 |
+
},
|
8817 |
+
{
|
8818 |
+
"epoch": 0.21536093055080396,
|
8819 |
+
"grad_norm": 0.5383254885673523,
|
8820 |
+
"learning_rate": 1.0667772532550163e-05,
|
8821 |
+
"loss": 1.2418,
|
8822 |
+
"step": 1259
|
8823 |
+
},
|
8824 |
+
{
|
8825 |
+
"epoch": 0.21553198768388643,
|
8826 |
+
"grad_norm": 0.5377271175384521,
|
8827 |
+
"learning_rate": 1.0606892663645445e-05,
|
8828 |
+
"loss": 1.1972,
|
8829 |
+
"step": 1260
|
8830 |
+
},
|
8831 |
+
{
|
8832 |
+
"epoch": 0.21570304481696886,
|
8833 |
+
"grad_norm": 0.5625713467597961,
|
8834 |
+
"learning_rate": 1.0546166390591682e-05,
|
8835 |
+
"loss": 1.2832,
|
8836 |
+
"step": 1261
|
8837 |
+
},
|
8838 |
+
{
|
8839 |
+
"epoch": 0.21587410195005133,
|
8840 |
+
"grad_norm": 0.5753247141838074,
|
8841 |
+
"learning_rate": 1.0485593950163897e-05,
|
8842 |
+
"loss": 1.3734,
|
8843 |
+
"step": 1262
|
8844 |
+
},
|
8845 |
+
{
|
8846 |
+
"epoch": 0.21604515908313376,
|
8847 |
+
"grad_norm": 0.612352192401886,
|
8848 |
+
"learning_rate": 1.0425175578537299e-05,
|
8849 |
+
"loss": 1.2911,
|
8850 |
+
"step": 1263
|
8851 |
+
},
|
8852 |
+
{
|
8853 |
+
"epoch": 0.21621621621621623,
|
8854 |
+
"grad_norm": 0.5850250720977783,
|
8855 |
+
"learning_rate": 1.036491151128638e-05,
|
8856 |
+
"loss": 1.319,
|
8857 |
+
"step": 1264
|
8858 |
+
},
|
8859 |
+
{
|
8860 |
+
"epoch": 0.21638727334929866,
|
8861 |
+
"grad_norm": 0.6049216985702515,
|
8862 |
+
"learning_rate": 1.0304801983383989e-05,
|
8863 |
+
"loss": 1.2336,
|
8864 |
+
"step": 1265
|
8865 |
+
},
|
8866 |
+
{
|
8867 |
+
"epoch": 0.21655833048238113,
|
8868 |
+
"grad_norm": 0.5945784449577332,
|
8869 |
+
"learning_rate": 1.0244847229200421e-05,
|
8870 |
+
"loss": 1.4106,
|
8871 |
+
"step": 1266
|
8872 |
+
},
|
8873 |
+
{
|
8874 |
+
"epoch": 0.21672938761546356,
|
8875 |
+
"grad_norm": 0.6478225588798523,
|
8876 |
+
"learning_rate": 1.0185047482502491e-05,
|
8877 |
+
"loss": 1.4137,
|
8878 |
+
"step": 1267
|
8879 |
+
},
|
8880 |
+
{
|
8881 |
+
"epoch": 0.216900444748546,
|
8882 |
+
"grad_norm": 0.681825578212738,
|
8883 |
+
"learning_rate": 1.0125402976452631e-05,
|
8884 |
+
"loss": 1.3382,
|
8885 |
+
"step": 1268
|
8886 |
+
},
|
8887 |
+
{
|
8888 |
+
"epoch": 0.21707150188162846,
|
8889 |
+
"grad_norm": 0.683777391910553,
|
8890 |
+
"learning_rate": 1.0065913943608029e-05,
|
8891 |
+
"loss": 1.4074,
|
8892 |
+
"step": 1269
|
8893 |
+
},
|
8894 |
+
{
|
8895 |
+
"epoch": 0.2172425590147109,
|
8896 |
+
"grad_norm": 0.6514219045639038,
|
8897 |
+
"learning_rate": 1.0006580615919576e-05,
|
8898 |
+
"loss": 1.2732,
|
8899 |
+
"step": 1270
|
8900 |
+
},
|
8901 |
+
{
|
8902 |
+
"epoch": 0.21741361614779336,
|
8903 |
+
"grad_norm": 0.6524000763893127,
|
8904 |
+
"learning_rate": 9.947403224731167e-06,
|
8905 |
+
"loss": 0.9494,
|
8906 |
+
"step": 1271
|
8907 |
+
},
|
8908 |
+
{
|
8909 |
+
"epoch": 0.2175846732808758,
|
8910 |
+
"grad_norm": 0.6782096028327942,
|
8911 |
+
"learning_rate": 9.8883820007786e-06,
|
8912 |
+
"loss": 1.2089,
|
8913 |
+
"step": 1272
|
8914 |
+
},
|
8915 |
+
{
|
8916 |
+
"epoch": 0.21775573041395827,
|
8917 |
+
"grad_norm": 0.7298002243041992,
|
8918 |
+
"learning_rate": 9.829517174188852e-06,
|
8919 |
+
"loss": 1.163,
|
8920 |
+
"step": 1273
|
8921 |
+
},
|
8922 |
+
{
|
8923 |
+
"epoch": 0.2179267875470407,
|
8924 |
+
"grad_norm": 0.7292787432670593,
|
8925 |
+
"learning_rate": 9.770808974479034e-06,
|
8926 |
+
"loss": 1.0956,
|
8927 |
+
"step": 1274
|
8928 |
+
},
|
8929 |
+
{
|
8930 |
+
"epoch": 0.21809784468012317,
|
8931 |
+
"grad_norm": 0.7491458058357239,
|
8932 |
+
"learning_rate": 9.712257630555588e-06,
|
8933 |
+
"loss": 1.2119,
|
8934 |
+
"step": 1275
|
8935 |
+
},
|
8936 |
+
{
|
8937 |
+
"epoch": 0.2182689018132056,
|
8938 |
+
"grad_norm": 0.7791218757629395,
|
8939 |
+
"learning_rate": 9.653863370713373e-06,
|
8940 |
+
"loss": 1.2157,
|
8941 |
+
"step": 1276
|
8942 |
+
},
|
8943 |
+
{
|
8944 |
+
"epoch": 0.21843995894628807,
|
8945 |
+
"grad_norm": 0.8694621324539185,
|
8946 |
+
"learning_rate": 9.595626422634746e-06,
|
8947 |
+
"loss": 1.2245,
|
8948 |
+
"step": 1277
|
8949 |
+
},
|
8950 |
+
{
|
8951 |
+
"epoch": 0.2186110160793705,
|
8952 |
+
"grad_norm": 0.8408295512199402,
|
8953 |
+
"learning_rate": 9.537547013388714e-06,
|
8954 |
+
"loss": 1.0785,
|
8955 |
+
"step": 1278
|
8956 |
+
},
|
8957 |
+
{
|
8958 |
+
"epoch": 0.21878207321245297,
|
8959 |
+
"grad_norm": 0.8591949939727783,
|
8960 |
+
"learning_rate": 9.479625369430028e-06,
|
8961 |
+
"loss": 1.121,
|
8962 |
+
"step": 1279
|
8963 |
+
},
|
8964 |
+
{
|
8965 |
+
"epoch": 0.2189531303455354,
|
8966 |
+
"grad_norm": 0.8336533904075623,
|
8967 |
+
"learning_rate": 9.421861716598307e-06,
|
8968 |
+
"loss": 1.0616,
|
8969 |
+
"step": 1280
|
8970 |
+
},
|
8971 |
+
{
|
8972 |
+
"epoch": 0.21912418747861787,
|
8973 |
+
"grad_norm": 0.8794592022895813,
|
8974 |
+
"learning_rate": 9.364256280117146e-06,
|
8975 |
+
"loss": 1.0264,
|
8976 |
+
"step": 1281
|
8977 |
+
},
|
8978 |
+
{
|
8979 |
+
"epoch": 0.2192952446117003,
|
8980 |
+
"grad_norm": 0.9473572373390198,
|
8981 |
+
"learning_rate": 9.306809284593243e-06,
|
8982 |
+
"loss": 0.9601,
|
8983 |
+
"step": 1282
|
8984 |
+
},
|
8985 |
+
{
|
8986 |
+
"epoch": 0.21946630174478277,
|
8987 |
+
"grad_norm": 0.9008265733718872,
|
8988 |
+
"learning_rate": 9.249520954015544e-06,
|
8989 |
+
"loss": 1.1171,
|
8990 |
+
"step": 1283
|
8991 |
+
},
|
8992 |
+
{
|
8993 |
+
"epoch": 0.2196373588778652,
|
8994 |
+
"grad_norm": 1.041060447692871,
|
8995 |
+
"learning_rate": 9.19239151175434e-06,
|
8996 |
+
"loss": 1.2924,
|
8997 |
+
"step": 1284
|
8998 |
+
},
|
8999 |
+
{
|
9000 |
+
"epoch": 0.21980841601094767,
|
9001 |
+
"grad_norm": 1.2047138214111328,
|
9002 |
+
"learning_rate": 9.135421180560394e-06,
|
9003 |
+
"loss": 1.174,
|
9004 |
+
"step": 1285
|
9005 |
+
},
|
9006 |
+
{
|
9007 |
+
"epoch": 0.2199794731440301,
|
9008 |
+
"grad_norm": 1.1049747467041016,
|
9009 |
+
"learning_rate": 9.078610182564134e-06,
|
9010 |
+
"loss": 1.3061,
|
9011 |
+
"step": 1286
|
9012 |
+
},
|
9013 |
+
{
|
9014 |
+
"epoch": 0.22015053027711257,
|
9015 |
+
"grad_norm": 1.1561453342437744,
|
9016 |
+
"learning_rate": 9.02195873927466e-06,
|
9017 |
+
"loss": 1.2821,
|
9018 |
+
"step": 1287
|
9019 |
+
},
|
9020 |
+
{
|
9021 |
+
"epoch": 0.220321587410195,
|
9022 |
+
"grad_norm": 1.3061068058013916,
|
9023 |
+
"learning_rate": 8.965467071579058e-06,
|
9024 |
+
"loss": 1.4048,
|
9025 |
+
"step": 1288
|
9026 |
+
},
|
9027 |
+
{
|
9028 |
+
"epoch": 0.22049264454327747,
|
9029 |
+
"grad_norm": 1.2303966283798218,
|
9030 |
+
"learning_rate": 8.909135399741341e-06,
|
9031 |
+
"loss": 1.0917,
|
9032 |
+
"step": 1289
|
9033 |
+
},
|
9034 |
+
{
|
9035 |
+
"epoch": 0.2206637016763599,
|
9036 |
+
"grad_norm": 1.44425368309021,
|
9037 |
+
"learning_rate": 8.852963943401765e-06,
|
9038 |
+
"loss": 1.2494,
|
9039 |
+
"step": 1290
|
9040 |
+
},
|
9041 |
+
{
|
9042 |
+
"epoch": 0.22083475880944237,
|
9043 |
+
"grad_norm": 1.5312856435775757,
|
9044 |
+
"learning_rate": 8.796952921575863e-06,
|
9045 |
+
"loss": 1.1933,
|
9046 |
+
"step": 1291
|
9047 |
+
},
|
9048 |
+
{
|
9049 |
+
"epoch": 0.2210058159425248,
|
9050 |
+
"grad_norm": 1.705080270767212,
|
9051 |
+
"learning_rate": 8.74110255265363e-06,
|
9052 |
+
"loss": 1.1517,
|
9053 |
+
"step": 1292
|
9054 |
+
},
|
9055 |
+
{
|
9056 |
+
"epoch": 0.22117687307560727,
|
9057 |
+
"grad_norm": 1.6327414512634277,
|
9058 |
+
"learning_rate": 8.685413054398667e-06,
|
9059 |
+
"loss": 1.1953,
|
9060 |
+
"step": 1293
|
9061 |
+
},
|
9062 |
+
{
|
9063 |
+
"epoch": 0.2213479302086897,
|
9064 |
+
"grad_norm": 2.047757148742676,
|
9065 |
+
"learning_rate": 8.629884643947333e-06,
|
9066 |
+
"loss": 1.399,
|
9067 |
+
"step": 1294
|
9068 |
+
},
|
9069 |
+
{
|
9070 |
+
"epoch": 0.22151898734177214,
|
9071 |
+
"grad_norm": 2.0639777183532715,
|
9072 |
+
"learning_rate": 8.574517537807897e-06,
|
9073 |
+
"loss": 1.274,
|
9074 |
+
"step": 1295
|
9075 |
+
},
|
9076 |
+
{
|
9077 |
+
"epoch": 0.2216900444748546,
|
9078 |
+
"grad_norm": 2.2816340923309326,
|
9079 |
+
"learning_rate": 8.519311951859693e-06,
|
9080 |
+
"loss": 1.4099,
|
9081 |
+
"step": 1296
|
9082 |
+
},
|
9083 |
+
{
|
9084 |
+
"epoch": 0.22186110160793704,
|
9085 |
+
"grad_norm": 2.353408098220825,
|
9086 |
+
"learning_rate": 8.464268101352268e-06,
|
9087 |
+
"loss": 1.0556,
|
9088 |
+
"step": 1297
|
9089 |
+
},
|
9090 |
+
{
|
9091 |
+
"epoch": 0.2220321587410195,
|
9092 |
+
"grad_norm": 2.7751424312591553,
|
9093 |
+
"learning_rate": 8.409386200904595e-06,
|
9094 |
+
"loss": 1.6104,
|
9095 |
+
"step": 1298
|
9096 |
+
},
|
9097 |
+
{
|
9098 |
+
"epoch": 0.22220321587410194,
|
9099 |
+
"grad_norm": 3.7724111080169678,
|
9100 |
+
"learning_rate": 8.354666464504129e-06,
|
9101 |
+
"loss": 1.6579,
|
9102 |
+
"step": 1299
|
9103 |
+
},
|
9104 |
+
{
|
9105 |
+
"epoch": 0.2223742730071844,
|
9106 |
+
"grad_norm": 4.376495838165283,
|
9107 |
+
"learning_rate": 8.30010910550611e-06,
|
9108 |
+
"loss": 1.6636,
|
9109 |
+
"step": 1300
|
9110 |
+
},
|
9111 |
+
{
|
9112 |
+
"epoch": 0.22254533014026684,
|
9113 |
+
"grad_norm": 0.47548404335975647,
|
9114 |
+
"learning_rate": 8.245714336632599e-06,
|
9115 |
+
"loss": 1.0056,
|
9116 |
+
"step": 1301
|
9117 |
+
},
|
9118 |
+
{
|
9119 |
+
"epoch": 0.2227163872733493,
|
9120 |
+
"grad_norm": 0.5077752470970154,
|
9121 |
+
"learning_rate": 8.191482369971727e-06,
|
9122 |
+
"loss": 1.1516,
|
9123 |
+
"step": 1302
|
9124 |
+
},
|
9125 |
+
{
|
9126 |
+
"epoch": 0.22288744440643174,
|
9127 |
+
"grad_norm": 0.5043779611587524,
|
9128 |
+
"learning_rate": 8.137413416976891e-06,
|
9129 |
+
"loss": 1.1089,
|
9130 |
+
"step": 1303
|
9131 |
+
},
|
9132 |
+
{
|
9133 |
+
"epoch": 0.2230585015395142,
|
9134 |
+
"grad_norm": 0.5539701581001282,
|
9135 |
+
"learning_rate": 8.083507688465819e-06,
|
9136 |
+
"loss": 1.2675,
|
9137 |
+
"step": 1304
|
9138 |
+
},
|
9139 |
+
{
|
9140 |
+
"epoch": 0.22322955867259664,
|
9141 |
+
"grad_norm": 0.5334572792053223,
|
9142 |
+
"learning_rate": 8.029765394619899e-06,
|
9143 |
+
"loss": 1.1902,
|
9144 |
+
"step": 1305
|
9145 |
+
},
|
9146 |
+
{
|
9147 |
+
"epoch": 0.2234006158056791,
|
9148 |
+
"grad_norm": 0.5344416499137878,
|
9149 |
+
"learning_rate": 7.976186744983183e-06,
|
9150 |
+
"loss": 1.2268,
|
9151 |
+
"step": 1306
|
9152 |
+
},
|
9153 |
+
{
|
9154 |
+
"epoch": 0.22357167293876154,
|
9155 |
+
"grad_norm": 0.5525511503219604,
|
9156 |
+
"learning_rate": 7.922771948461765e-06,
|
9157 |
+
"loss": 1.2606,
|
9158 |
+
"step": 1307
|
9159 |
+
},
|
9160 |
+
{
|
9161 |
+
"epoch": 0.223742730071844,
|
9162 |
+
"grad_norm": 0.5714919567108154,
|
9163 |
+
"learning_rate": 7.869521213322778e-06,
|
9164 |
+
"loss": 1.2948,
|
9165 |
+
"step": 1308
|
9166 |
+
},
|
9167 |
+
{
|
9168 |
+
"epoch": 0.22391378720492644,
|
9169 |
+
"grad_norm": 0.562394917011261,
|
9170 |
+
"learning_rate": 7.816434747193734e-06,
|
9171 |
+
"loss": 1.3514,
|
9172 |
+
"step": 1309
|
9173 |
+
},
|
9174 |
+
{
|
9175 |
+
"epoch": 0.2240848443380089,
|
9176 |
+
"grad_norm": 0.5721734166145325,
|
9177 |
+
"learning_rate": 7.76351275706163e-06,
|
9178 |
+
"loss": 1.3599,
|
9179 |
+
"step": 1310
|
9180 |
+
},
|
9181 |
+
{
|
9182 |
+
"epoch": 0.22425590147109134,
|
9183 |
+
"grad_norm": 0.5503696799278259,
|
9184 |
+
"learning_rate": 7.710755449272156e-06,
|
9185 |
+
"loss": 1.2771,
|
9186 |
+
"step": 1311
|
9187 |
+
},
|
9188 |
+
{
|
9189 |
+
"epoch": 0.2244269586041738,
|
9190 |
+
"grad_norm": 0.5626800060272217,
|
9191 |
+
"learning_rate": 7.658163029528903e-06,
|
9192 |
+
"loss": 1.2395,
|
9193 |
+
"step": 1312
|
9194 |
+
},
|
9195 |
+
{
|
9196 |
+
"epoch": 0.22459801573725624,
|
9197 |
+
"grad_norm": 0.574084460735321,
|
9198 |
+
"learning_rate": 7.6057357028925466e-06,
|
9199 |
+
"loss": 1.2471,
|
9200 |
+
"step": 1313
|
9201 |
+
},
|
9202 |
+
{
|
9203 |
+
"epoch": 0.2247690728703387,
|
9204 |
+
"grad_norm": 0.5996257662773132,
|
9205 |
+
"learning_rate": 7.55347367378007e-06,
|
9206 |
+
"loss": 1.3195,
|
9207 |
+
"step": 1314
|
9208 |
+
},
|
9209 |
+
{
|
9210 |
+
"epoch": 0.22494013000342114,
|
9211 |
+
"grad_norm": 0.6033976078033447,
|
9212 |
+
"learning_rate": 7.501377145963939e-06,
|
9213 |
+
"loss": 1.3731,
|
9214 |
+
"step": 1315
|
9215 |
+
},
|
9216 |
+
{
|
9217 |
+
"epoch": 0.2251111871365036,
|
9218 |
+
"grad_norm": 0.6140574216842651,
|
9219 |
+
"learning_rate": 7.449446322571318e-06,
|
9220 |
+
"loss": 1.2818,
|
9221 |
+
"step": 1316
|
9222 |
+
},
|
9223 |
+
{
|
9224 |
+
"epoch": 0.22528224426958604,
|
9225 |
+
"grad_norm": 0.6565428972244263,
|
9226 |
+
"learning_rate": 7.3976814060833235e-06,
|
9227 |
+
"loss": 1.4516,
|
9228 |
+
"step": 1317
|
9229 |
+
},
|
9230 |
+
{
|
9231 |
+
"epoch": 0.2254533014026685,
|
9232 |
+
"grad_norm": 0.6131988763809204,
|
9233 |
+
"learning_rate": 7.346082598334108e-06,
|
9234 |
+
"loss": 1.2641,
|
9235 |
+
"step": 1318
|
9236 |
+
},
|
9237 |
+
{
|
9238 |
+
"epoch": 0.22562435853575094,
|
9239 |
+
"grad_norm": 0.683064341545105,
|
9240 |
+
"learning_rate": 7.294650100510258e-06,
|
9241 |
+
"loss": 1.1898,
|
9242 |
+
"step": 1319
|
9243 |
+
},
|
9244 |
+
{
|
9245 |
+
"epoch": 0.22579541566883338,
|
9246 |
+
"grad_norm": 0.6226746439933777,
|
9247 |
+
"learning_rate": 7.243384113149815e-06,
|
9248 |
+
"loss": 0.9962,
|
9249 |
+
"step": 1320
|
9250 |
+
},
|
9251 |
+
{
|
9252 |
+
"epoch": 0.22596647280191584,
|
9253 |
+
"grad_norm": 0.6749435067176819,
|
9254 |
+
"learning_rate": 7.1922848361416555e-06,
|
9255 |
+
"loss": 1.3907,
|
9256 |
+
"step": 1321
|
9257 |
+
},
|
9258 |
+
{
|
9259 |
+
"epoch": 0.22613752993499828,
|
9260 |
+
"grad_norm": 0.6928286552429199,
|
9261 |
+
"learning_rate": 7.141352468724621e-06,
|
9262 |
+
"loss": 1.0053,
|
9263 |
+
"step": 1322
|
9264 |
+
},
|
9265 |
+
{
|
9266 |
+
"epoch": 0.22630858706808074,
|
9267 |
+
"grad_norm": 0.7091418504714966,
|
9268 |
+
"learning_rate": 7.090587209486726e-06,
|
9269 |
+
"loss": 1.1128,
|
9270 |
+
"step": 1323
|
9271 |
+
},
|
9272 |
+
{
|
9273 |
+
"epoch": 0.22647964420116318,
|
9274 |
+
"grad_norm": 0.7740198969841003,
|
9275 |
+
"learning_rate": 7.039989256364499e-06,
|
9276 |
+
"loss": 1.2887,
|
9277 |
+
"step": 1324
|
9278 |
+
},
|
9279 |
+
{
|
9280 |
+
"epoch": 0.22665070133424564,
|
9281 |
+
"grad_norm": 0.7330458164215088,
|
9282 |
+
"learning_rate": 6.989558806642049e-06,
|
9283 |
+
"loss": 1.0614,
|
9284 |
+
"step": 1325
|
9285 |
+
},
|
9286 |
+
{
|
9287 |
+
"epoch": 0.22682175846732808,
|
9288 |
+
"grad_norm": 0.7767535448074341,
|
9289 |
+
"learning_rate": 6.9392960569504505e-06,
|
9290 |
+
"loss": 1.0587,
|
9291 |
+
"step": 1326
|
9292 |
+
},
|
9293 |
+
{
|
9294 |
+
"epoch": 0.22699281560041054,
|
9295 |
+
"grad_norm": 0.7924586534500122,
|
9296 |
+
"learning_rate": 6.889201203266865e-06,
|
9297 |
+
"loss": 1.2006,
|
9298 |
+
"step": 1327
|
9299 |
+
},
|
9300 |
+
{
|
9301 |
+
"epoch": 0.22716387273349298,
|
9302 |
+
"grad_norm": 0.8487681746482849,
|
9303 |
+
"learning_rate": 6.839274440913829e-06,
|
9304 |
+
"loss": 1.1982,
|
9305 |
+
"step": 1328
|
9306 |
+
},
|
9307 |
+
{
|
9308 |
+
"epoch": 0.22733492986657544,
|
9309 |
+
"grad_norm": 0.9162440299987793,
|
9310 |
+
"learning_rate": 6.78951596455849e-06,
|
9311 |
+
"loss": 1.3977,
|
9312 |
+
"step": 1329
|
9313 |
+
},
|
9314 |
+
{
|
9315 |
+
"epoch": 0.22750598699965788,
|
9316 |
+
"grad_norm": 0.8817742466926575,
|
9317 |
+
"learning_rate": 6.739925968211824e-06,
|
9318 |
+
"loss": 1.2476,
|
9319 |
+
"step": 1330
|
9320 |
+
},
|
9321 |
+
{
|
9322 |
+
"epoch": 0.22767704413274034,
|
9323 |
+
"grad_norm": 0.951762855052948,
|
9324 |
+
"learning_rate": 6.690504645227902e-06,
|
9325 |
+
"loss": 1.2047,
|
9326 |
+
"step": 1331
|
9327 |
+
},
|
9328 |
+
{
|
9329 |
+
"epoch": 0.22784810126582278,
|
9330 |
+
"grad_norm": 0.8350614309310913,
|
9331 |
+
"learning_rate": 6.641252188303132e-06,
|
9332 |
+
"loss": 1.0696,
|
9333 |
+
"step": 1332
|
9334 |
+
},
|
9335 |
+
{
|
9336 |
+
"epoch": 0.22801915839890524,
|
9337 |
+
"grad_norm": 1.0560204982757568,
|
9338 |
+
"learning_rate": 6.592168789475495e-06,
|
9339 |
+
"loss": 1.3044,
|
9340 |
+
"step": 1333
|
9341 |
+
},
|
9342 |
+
{
|
9343 |
+
"epoch": 0.22819021553198768,
|
9344 |
+
"grad_norm": 0.9795908331871033,
|
9345 |
+
"learning_rate": 6.543254640123836e-06,
|
9346 |
+
"loss": 1.1138,
|
9347 |
+
"step": 1334
|
9348 |
+
},
|
9349 |
+
{
|
9350 |
+
"epoch": 0.22836127266507014,
|
9351 |
+
"grad_norm": 1.1002568006515503,
|
9352 |
+
"learning_rate": 6.494509930967019e-06,
|
9353 |
+
"loss": 1.1925,
|
9354 |
+
"step": 1335
|
9355 |
+
},
|
9356 |
+
{
|
9357 |
+
"epoch": 0.22853232979815258,
|
9358 |
+
"grad_norm": 1.1076974868774414,
|
9359 |
+
"learning_rate": 6.4459348520633456e-06,
|
9360 |
+
"loss": 1.0866,
|
9361 |
+
"step": 1336
|
9362 |
+
},
|
9363 |
+
{
|
9364 |
+
"epoch": 0.22870338693123504,
|
9365 |
+
"grad_norm": 1.1985821723937988,
|
9366 |
+
"learning_rate": 6.397529592809614e-06,
|
9367 |
+
"loss": 1.2294,
|
9368 |
+
"step": 1337
|
9369 |
+
},
|
9370 |
+
{
|
9371 |
+
"epoch": 0.22887444406431748,
|
9372 |
+
"grad_norm": 1.267824649810791,
|
9373 |
+
"learning_rate": 6.349294341940593e-06,
|
9374 |
+
"loss": 1.1715,
|
9375 |
+
"step": 1338
|
9376 |
+
},
|
9377 |
+
{
|
9378 |
+
"epoch": 0.22904550119739994,
|
9379 |
+
"grad_norm": 1.4798980951309204,
|
9380 |
+
"learning_rate": 6.301229287528099e-06,
|
9381 |
+
"loss": 1.4075,
|
9382 |
+
"step": 1339
|
9383 |
+
},
|
9384 |
+
{
|
9385 |
+
"epoch": 0.22921655833048238,
|
9386 |
+
"grad_norm": 1.5626285076141357,
|
9387 |
+
"learning_rate": 6.253334616980377e-06,
|
9388 |
+
"loss": 1.2854,
|
9389 |
+
"step": 1340
|
9390 |
+
},
|
9391 |
+
{
|
9392 |
+
"epoch": 0.22938761546356484,
|
9393 |
+
"grad_norm": 1.343494176864624,
|
9394 |
+
"learning_rate": 6.205610517041344e-06,
|
9395 |
+
"loss": 1.1578,
|
9396 |
+
"step": 1341
|
9397 |
+
},
|
9398 |
+
{
|
9399 |
+
"epoch": 0.22955867259664728,
|
9400 |
+
"grad_norm": 1.7497094869613647,
|
9401 |
+
"learning_rate": 6.158057173789822e-06,
|
9402 |
+
"loss": 1.3868,
|
9403 |
+
"step": 1342
|
9404 |
+
},
|
9405 |
+
{
|
9406 |
+
"epoch": 0.22972972972972974,
|
9407 |
+
"grad_norm": 1.8784244060516357,
|
9408 |
+
"learning_rate": 6.110674772638875e-06,
|
9409 |
+
"loss": 1.3888,
|
9410 |
+
"step": 1343
|
9411 |
+
},
|
9412 |
+
{
|
9413 |
+
"epoch": 0.22990078686281218,
|
9414 |
+
"grad_norm": 2.4020941257476807,
|
9415 |
+
"learning_rate": 6.063463498335037e-06,
|
9416 |
+
"loss": 1.7359,
|
9417 |
+
"step": 1344
|
9418 |
+
},
|
9419 |
+
{
|
9420 |
+
"epoch": 0.23007184399589464,
|
9421 |
+
"grad_norm": 1.8806310892105103,
|
9422 |
+
"learning_rate": 6.016423534957616e-06,
|
9423 |
+
"loss": 1.1762,
|
9424 |
+
"step": 1345
|
9425 |
+
},
|
9426 |
+
{
|
9427 |
+
"epoch": 0.23024290112897708,
|
9428 |
+
"grad_norm": 2.319746732711792,
|
9429 |
+
"learning_rate": 5.969555065917965e-06,
|
9430 |
+
"loss": 1.4859,
|
9431 |
+
"step": 1346
|
9432 |
+
},
|
9433 |
+
{
|
9434 |
+
"epoch": 0.23041395826205952,
|
9435 |
+
"grad_norm": 2.3504860401153564,
|
9436 |
+
"learning_rate": 5.92285827395877e-06,
|
9437 |
+
"loss": 1.4591,
|
9438 |
+
"step": 1347
|
9439 |
+
},
|
9440 |
+
{
|
9441 |
+
"epoch": 0.23058501539514198,
|
9442 |
+
"grad_norm": 2.283094644546509,
|
9443 |
+
"learning_rate": 5.876333341153351e-06,
|
9444 |
+
"loss": 1.0209,
|
9445 |
+
"step": 1348
|
9446 |
+
},
|
9447 |
+
{
|
9448 |
+
"epoch": 0.23075607252822442,
|
9449 |
+
"grad_norm": 2.855846405029297,
|
9450 |
+
"learning_rate": 5.829980448904926e-06,
|
9451 |
+
"loss": 1.5417,
|
9452 |
+
"step": 1349
|
9453 |
+
},
|
9454 |
+
{
|
9455 |
+
"epoch": 0.23092712966130688,
|
9456 |
+
"grad_norm": 4.009420394897461,
|
9457 |
+
"learning_rate": 5.783799777945925e-06,
|
9458 |
+
"loss": 1.8782,
|
9459 |
+
"step": 1350
|
9460 |
+
},
|
9461 |
+
{
|
9462 |
+
"epoch": 0.23109818679438932,
|
9463 |
+
"grad_norm": 0.5421372652053833,
|
9464 |
+
"learning_rate": 5.7377915083372754e-06,
|
9465 |
+
"loss": 1.1131,
|
9466 |
+
"step": 1351
|
9467 |
+
},
|
9468 |
+
{
|
9469 |
+
"epoch": 0.23126924392747178,
|
9470 |
+
"grad_norm": 0.4765899181365967,
|
9471 |
+
"learning_rate": 5.691955819467687e-06,
|
9472 |
+
"loss": 1.1413,
|
9473 |
+
"step": 1352
|
9474 |
+
},
|
9475 |
+
{
|
9476 |
+
"epoch": 0.23144030106055422,
|
9477 |
+
"grad_norm": 0.4964736998081207,
|
9478 |
+
"learning_rate": 5.646292890053023e-06,
|
9479 |
+
"loss": 1.0907,
|
9480 |
+
"step": 1353
|
9481 |
+
},
|
9482 |
+
{
|
9483 |
+
"epoch": 0.23161135819363668,
|
9484 |
+
"grad_norm": 0.5765841603279114,
|
9485 |
+
"learning_rate": 5.60080289813546e-06,
|
9486 |
+
"loss": 1.106,
|
9487 |
+
"step": 1354
|
9488 |
+
},
|
9489 |
+
{
|
9490 |
+
"epoch": 0.23178241532671912,
|
9491 |
+
"grad_norm": 0.4981246292591095,
|
9492 |
+
"learning_rate": 5.555486021082978e-06,
|
9493 |
+
"loss": 1.0799,
|
9494 |
+
"step": 1355
|
9495 |
+
},
|
9496 |
+
{
|
9497 |
+
"epoch": 0.23195347245980158,
|
9498 |
+
"grad_norm": 0.5498924255371094,
|
9499 |
+
"learning_rate": 5.510342435588489e-06,
|
9500 |
+
"loss": 1.2926,
|
9501 |
+
"step": 1356
|
9502 |
+
},
|
9503 |
+
{
|
9504 |
+
"epoch": 0.23212452959288402,
|
9505 |
+
"grad_norm": 0.5441321134567261,
|
9506 |
+
"learning_rate": 5.465372317669287e-06,
|
9507 |
+
"loss": 1.2539,
|
9508 |
+
"step": 1357
|
9509 |
+
},
|
9510 |
+
{
|
9511 |
+
"epoch": 0.23229558672596648,
|
9512 |
+
"grad_norm": 0.5248607993125916,
|
9513 |
+
"learning_rate": 5.420575842666281e-06,
|
9514 |
+
"loss": 1.2271,
|
9515 |
+
"step": 1358
|
9516 |
+
},
|
9517 |
+
{
|
9518 |
+
"epoch": 0.23246664385904892,
|
9519 |
+
"grad_norm": 0.541762113571167,
|
9520 |
+
"learning_rate": 5.375953185243354e-06,
|
9521 |
+
"loss": 1.2812,
|
9522 |
+
"step": 1359
|
9523 |
+
},
|
9524 |
+
{
|
9525 |
+
"epoch": 0.23263770099213138,
|
9526 |
+
"grad_norm": 0.5515731573104858,
|
9527 |
+
"learning_rate": 5.331504519386643e-06,
|
9528 |
+
"loss": 1.2373,
|
9529 |
+
"step": 1360
|
9530 |
+
},
|
9531 |
+
{
|
9532 |
+
"epoch": 0.23280875812521382,
|
9533 |
+
"grad_norm": 0.5733421444892883,
|
9534 |
+
"learning_rate": 5.287230018403899e-06,
|
9535 |
+
"loss": 1.1709,
|
9536 |
+
"step": 1361
|
9537 |
+
},
|
9538 |
+
{
|
9539 |
+
"epoch": 0.23297981525829628,
|
9540 |
+
"grad_norm": 0.5242468118667603,
|
9541 |
+
"learning_rate": 5.243129854923789e-06,
|
9542 |
+
"loss": 1.1527,
|
9543 |
+
"step": 1362
|
9544 |
+
},
|
9545 |
+
{
|
9546 |
+
"epoch": 0.23315087239137872,
|
9547 |
+
"grad_norm": 0.605229377746582,
|
9548 |
+
"learning_rate": 5.199204200895236e-06,
|
9549 |
+
"loss": 1.4104,
|
9550 |
+
"step": 1363
|
9551 |
+
},
|
9552 |
+
{
|
9553 |
+
"epoch": 0.23332192952446118,
|
9554 |
+
"grad_norm": 0.5772895812988281,
|
9555 |
+
"learning_rate": 5.155453227586726e-06,
|
9556 |
+
"loss": 1.4494,
|
9557 |
+
"step": 1364
|
9558 |
+
},
|
9559 |
+
{
|
9560 |
+
"epoch": 0.23349298665754362,
|
9561 |
+
"grad_norm": 0.5811589360237122,
|
9562 |
+
"learning_rate": 5.111877105585672e-06,
|
9563 |
+
"loss": 1.2593,
|
9564 |
+
"step": 1365
|
9565 |
+
},
|
9566 |
+
{
|
9567 |
+
"epoch": 0.23366404379062608,
|
9568 |
+
"grad_norm": 0.6203511953353882,
|
9569 |
+
"learning_rate": 5.068476004797723e-06,
|
9570 |
+
"loss": 1.316,
|
9571 |
+
"step": 1366
|
9572 |
+
},
|
9573 |
+
{
|
9574 |
+
"epoch": 0.23383510092370852,
|
9575 |
+
"grad_norm": 0.5840880274772644,
|
9576 |
+
"learning_rate": 5.02525009444611e-06,
|
9577 |
+
"loss": 1.3263,
|
9578 |
+
"step": 1367
|
9579 |
+
},
|
9580 |
+
{
|
9581 |
+
"epoch": 0.23400615805679098,
|
9582 |
+
"grad_norm": 0.582473635673523,
|
9583 |
+
"learning_rate": 4.9821995430709865e-06,
|
9584 |
+
"loss": 1.2171,
|
9585 |
+
"step": 1368
|
9586 |
+
},
|
9587 |
+
{
|
9588 |
+
"epoch": 0.23417721518987342,
|
9589 |
+
"grad_norm": 0.5981590747833252,
|
9590 |
+
"learning_rate": 4.939324518528776e-06,
|
9591 |
+
"loss": 1.1424,
|
9592 |
+
"step": 1369
|
9593 |
+
},
|
9594 |
+
{
|
9595 |
+
"epoch": 0.23434827232295588,
|
9596 |
+
"grad_norm": 0.6488403677940369,
|
9597 |
+
"learning_rate": 4.896625187991532e-06,
|
9598 |
+
"loss": 1.3032,
|
9599 |
+
"step": 1370
|
9600 |
+
},
|
9601 |
+
{
|
9602 |
+
"epoch": 0.23451932945603832,
|
9603 |
+
"grad_norm": 0.6946617960929871,
|
9604 |
+
"learning_rate": 4.854101717946214e-06,
|
9605 |
+
"loss": 1.2504,
|
9606 |
+
"step": 1371
|
9607 |
+
},
|
9608 |
+
{
|
9609 |
+
"epoch": 0.23469038658912075,
|
9610 |
+
"grad_norm": 0.6498163342475891,
|
9611 |
+
"learning_rate": 4.811754274194164e-06,
|
9612 |
+
"loss": 1.0427,
|
9613 |
+
"step": 1372
|
9614 |
+
},
|
9615 |
+
{
|
9616 |
+
"epoch": 0.23486144372220322,
|
9617 |
+
"grad_norm": 0.7162802815437317,
|
9618 |
+
"learning_rate": 4.76958302185031e-06,
|
9619 |
+
"loss": 0.975,
|
9620 |
+
"step": 1373
|
9621 |
+
},
|
9622 |
+
{
|
9623 |
+
"epoch": 0.23503250085528565,
|
9624 |
+
"grad_norm": 0.7548353672027588,
|
9625 |
+
"learning_rate": 4.727588125342669e-06,
|
9626 |
+
"loss": 1.2503,
|
9627 |
+
"step": 1374
|
9628 |
+
},
|
9629 |
+
{
|
9630 |
+
"epoch": 0.23520355798836812,
|
9631 |
+
"grad_norm": 0.680657684803009,
|
9632 |
+
"learning_rate": 4.6857697484116005e-06,
|
9633 |
+
"loss": 1.0588,
|
9634 |
+
"step": 1375
|
9635 |
+
},
|
9636 |
+
{
|
9637 |
+
"epoch": 0.23537461512145055,
|
9638 |
+
"grad_norm": 0.767929196357727,
|
9639 |
+
"learning_rate": 4.644128054109214e-06,
|
9640 |
+
"loss": 1.1378,
|
9641 |
+
"step": 1376
|
9642 |
+
},
|
9643 |
+
{
|
9644 |
+
"epoch": 0.23554567225453302,
|
9645 |
+
"grad_norm": 0.7880586385726929,
|
9646 |
+
"learning_rate": 4.602663204798718e-06,
|
9647 |
+
"loss": 1.1596,
|
9648 |
+
"step": 1377
|
9649 |
+
},
|
9650 |
+
{
|
9651 |
+
"epoch": 0.23571672938761545,
|
9652 |
+
"grad_norm": 0.8862399458885193,
|
9653 |
+
"learning_rate": 4.561375362153803e-06,
|
9654 |
+
"loss": 1.1974,
|
9655 |
+
"step": 1378
|
9656 |
+
},
|
9657 |
+
{
|
9658 |
+
"epoch": 0.23588778652069792,
|
9659 |
+
"grad_norm": 0.8138688206672668,
|
9660 |
+
"learning_rate": 4.520264687157988e-06,
|
9661 |
+
"loss": 1.172,
|
9662 |
+
"step": 1379
|
9663 |
+
},
|
9664 |
+
{
|
9665 |
+
"epoch": 0.23605884365378035,
|
9666 |
+
"grad_norm": 0.8866947293281555,
|
9667 |
+
"learning_rate": 4.479331340104015e-06,
|
9668 |
+
"loss": 1.0678,
|
9669 |
+
"step": 1380
|
9670 |
+
},
|
9671 |
+
{
|
9672 |
+
"epoch": 0.23622990078686282,
|
9673 |
+
"grad_norm": 0.9346544742584229,
|
9674 |
+
"learning_rate": 4.43857548059321e-06,
|
9675 |
+
"loss": 1.1789,
|
9676 |
+
"step": 1381
|
9677 |
+
},
|
9678 |
+
{
|
9679 |
+
"epoch": 0.23640095791994525,
|
9680 |
+
"grad_norm": 0.9389496445655823,
|
9681 |
+
"learning_rate": 4.3979972675348865e-06,
|
9682 |
+
"loss": 1.1913,
|
9683 |
+
"step": 1382
|
9684 |
+
},
|
9685 |
+
{
|
9686 |
+
"epoch": 0.23657201505302772,
|
9687 |
+
"grad_norm": 1.110094666481018,
|
9688 |
+
"learning_rate": 4.357596859145658e-06,
|
9689 |
+
"loss": 1.4951,
|
9690 |
+
"step": 1383
|
9691 |
+
},
|
9692 |
+
{
|
9693 |
+
"epoch": 0.23674307218611015,
|
9694 |
+
"grad_norm": 1.051514983177185,
|
9695 |
+
"learning_rate": 4.317374412948927e-06,
|
9696 |
+
"loss": 1.0819,
|
9697 |
+
"step": 1384
|
9698 |
+
},
|
9699 |
+
{
|
9700 |
+
"epoch": 0.23691412931919262,
|
9701 |
+
"grad_norm": 1.1748234033584595,
|
9702 |
+
"learning_rate": 4.277330085774156e-06,
|
9703 |
+
"loss": 1.2414,
|
9704 |
+
"step": 1385
|
9705 |
+
},
|
9706 |
+
{
|
9707 |
+
"epoch": 0.23708518645227505,
|
9708 |
+
"grad_norm": 1.207331657409668,
|
9709 |
+
"learning_rate": 4.2374640337563385e-06,
|
9710 |
+
"loss": 1.1237,
|
9711 |
+
"step": 1386
|
9712 |
+
},
|
9713 |
+
{
|
9714 |
+
"epoch": 0.23725624358535752,
|
9715 |
+
"grad_norm": 1.4284909963607788,
|
9716 |
+
"learning_rate": 4.1977764123353914e-06,
|
9717 |
+
"loss": 1.437,
|
9718 |
+
"step": 1387
|
9719 |
+
},
|
9720 |
+
{
|
9721 |
+
"epoch": 0.23742730071843995,
|
9722 |
+
"grad_norm": 1.3866310119628906,
|
9723 |
+
"learning_rate": 4.158267376255448e-06,
|
9724 |
+
"loss": 1.4029,
|
9725 |
+
"step": 1388
|
9726 |
+
},
|
9727 |
+
{
|
9728 |
+
"epoch": 0.23759835785152242,
|
9729 |
+
"grad_norm": 1.3556033372879028,
|
9730 |
+
"learning_rate": 4.118937079564417e-06,
|
9731 |
+
"loss": 1.2113,
|
9732 |
+
"step": 1389
|
9733 |
+
},
|
9734 |
+
{
|
9735 |
+
"epoch": 0.23776941498460485,
|
9736 |
+
"grad_norm": 2.037419080734253,
|
9737 |
+
"learning_rate": 4.079785675613207e-06,
|
9738 |
+
"loss": 1.6154,
|
9739 |
+
"step": 1390
|
9740 |
+
},
|
9741 |
+
{
|
9742 |
+
"epoch": 0.23794047211768732,
|
9743 |
+
"grad_norm": 1.6674432754516602,
|
9744 |
+
"learning_rate": 4.040813317055286e-06,
|
9745 |
+
"loss": 1.4388,
|
9746 |
+
"step": 1391
|
9747 |
+
},
|
9748 |
+
{
|
9749 |
+
"epoch": 0.23811152925076975,
|
9750 |
+
"grad_norm": 1.5869760513305664,
|
9751 |
+
"learning_rate": 4.002020155845959e-06,
|
9752 |
+
"loss": 1.2458,
|
9753 |
+
"step": 1392
|
9754 |
+
},
|
9755 |
+
{
|
9756 |
+
"epoch": 0.23828258638385222,
|
9757 |
+
"grad_norm": 1.7335622310638428,
|
9758 |
+
"learning_rate": 3.963406343241866e-06,
|
9759 |
+
"loss": 0.979,
|
9760 |
+
"step": 1393
|
9761 |
+
},
|
9762 |
+
{
|
9763 |
+
"epoch": 0.23845364351693465,
|
9764 |
+
"grad_norm": 2.117042064666748,
|
9765 |
+
"learning_rate": 3.92497202980035e-06,
|
9766 |
+
"loss": 1.6294,
|
9767 |
+
"step": 1394
|
9768 |
+
},
|
9769 |
+
{
|
9770 |
+
"epoch": 0.23862470065001712,
|
9771 |
+
"grad_norm": 1.7038896083831787,
|
9772 |
+
"learning_rate": 3.886717365378867e-06,
|
9773 |
+
"loss": 1.045,
|
9774 |
+
"step": 1395
|
9775 |
+
},
|
9776 |
+
{
|
9777 |
+
"epoch": 0.23879575778309955,
|
9778 |
+
"grad_norm": 2.010951519012451,
|
9779 |
+
"learning_rate": 3.848642499134419e-06,
|
9780 |
+
"loss": 1.4806,
|
9781 |
+
"step": 1396
|
9782 |
+
},
|
9783 |
+
{
|
9784 |
+
"epoch": 0.238966814916182,
|
9785 |
+
"grad_norm": 2.311997652053833,
|
9786 |
+
"learning_rate": 3.810747579522961e-06,
|
9787 |
+
"loss": 1.3005,
|
9788 |
+
"step": 1397
|
9789 |
+
},
|
9790 |
+
{
|
9791 |
+
"epoch": 0.23913787204926445,
|
9792 |
+
"grad_norm": 2.7114341259002686,
|
9793 |
+
"learning_rate": 3.7730327542988274e-06,
|
9794 |
+
"loss": 1.2746,
|
9795 |
+
"step": 1398
|
9796 |
+
},
|
9797 |
+
{
|
9798 |
+
"epoch": 0.2393089291823469,
|
9799 |
+
"grad_norm": 2.8539140224456787,
|
9800 |
+
"learning_rate": 3.735498170514168e-06,
|
9801 |
+
"loss": 1.324,
|
9802 |
+
"step": 1399
|
9803 |
+
},
|
9804 |
+
{
|
9805 |
+
"epoch": 0.23947998631542936,
|
9806 |
+
"grad_norm": 4.023092269897461,
|
9807 |
+
"learning_rate": 3.698143974518331e-06,
|
9808 |
+
"loss": 1.7793,
|
9809 |
+
"step": 1400
|
9810 |
+
},
|
9811 |
+
{
|
9812 |
+
"epoch": 0.2396510434485118,
|
9813 |
+
"grad_norm": 0.5118989944458008,
|
9814 |
+
"learning_rate": 3.660970311957368e-06,
|
9815 |
+
"loss": 1.1518,
|
9816 |
+
"step": 1401
|
9817 |
+
},
|
9818 |
+
{
|
9819 |
+
"epoch": 0.23982210058159426,
|
9820 |
+
"grad_norm": 0.5201111435890198,
|
9821 |
+
"learning_rate": 3.6239773277733648e-06,
|
9822 |
+
"loss": 1.0663,
|
9823 |
+
"step": 1402
|
9824 |
+
},
|
9825 |
+
{
|
9826 |
+
"epoch": 0.2399931577146767,
|
9827 |
+
"grad_norm": 0.5192155241966248,
|
9828 |
+
"learning_rate": 3.5871651662039984e-06,
|
9829 |
+
"loss": 1.1775,
|
9830 |
+
"step": 1403
|
9831 |
+
},
|
9832 |
+
{
|
9833 |
+
"epoch": 0.24016421484775916,
|
9834 |
+
"grad_norm": 0.5502973794937134,
|
9835 |
+
"learning_rate": 3.550533970781833e-06,
|
9836 |
+
"loss": 1.2527,
|
9837 |
+
"step": 1404
|
9838 |
+
},
|
9839 |
+
{
|
9840 |
+
"epoch": 0.2403352719808416,
|
9841 |
+
"grad_norm": 0.5738222002983093,
|
9842 |
+
"learning_rate": 3.5140838843339075e-06,
|
9843 |
+
"loss": 1.1344,
|
9844 |
+
"step": 1405
|
9845 |
+
},
|
9846 |
+
{
|
9847 |
+
"epoch": 0.24050632911392406,
|
9848 |
+
"grad_norm": 0.539737343788147,
|
9849 |
+
"learning_rate": 3.4778150489810703e-06,
|
9850 |
+
"loss": 1.2407,
|
9851 |
+
"step": 1406
|
9852 |
+
},
|
9853 |
+
{
|
9854 |
+
"epoch": 0.2406773862470065,
|
9855 |
+
"grad_norm": 0.5440237522125244,
|
9856 |
+
"learning_rate": 3.441727606137446e-06,
|
9857 |
+
"loss": 1.2319,
|
9858 |
+
"step": 1407
|
9859 |
+
},
|
9860 |
+
{
|
9861 |
+
"epoch": 0.24084844338008896,
|
9862 |
+
"grad_norm": 0.5339118838310242,
|
9863 |
+
"learning_rate": 3.4058216965099477e-06,
|
9864 |
+
"loss": 1.184,
|
9865 |
+
"step": 1408
|
9866 |
+
},
|
9867 |
+
{
|
9868 |
+
"epoch": 0.2410195005131714,
|
9869 |
+
"grad_norm": 0.5639998912811279,
|
9870 |
+
"learning_rate": 3.3700974600976322e-06,
|
9871 |
+
"loss": 1.3496,
|
9872 |
+
"step": 1409
|
9873 |
+
},
|
9874 |
+
{
|
9875 |
+
"epoch": 0.24119055764625386,
|
9876 |
+
"grad_norm": 0.5606639385223389,
|
9877 |
+
"learning_rate": 3.334555036191245e-06,
|
9878 |
+
"loss": 1.2531,
|
9879 |
+
"step": 1410
|
9880 |
+
},
|
9881 |
+
{
|
9882 |
+
"epoch": 0.2413616147793363,
|
9883 |
+
"grad_norm": 0.5367159247398376,
|
9884 |
+
"learning_rate": 3.299194563372604e-06,
|
9885 |
+
"loss": 1.2477,
|
9886 |
+
"step": 1411
|
9887 |
+
},
|
9888 |
+
{
|
9889 |
+
"epoch": 0.24153267191241876,
|
9890 |
+
"grad_norm": 0.6273518800735474,
|
9891 |
+
"learning_rate": 3.264016179514101e-06,
|
9892 |
+
"loss": 1.468,
|
9893 |
+
"step": 1412
|
9894 |
+
},
|
9895 |
+
{
|
9896 |
+
"epoch": 0.2417037290455012,
|
9897 |
+
"grad_norm": 0.5670663118362427,
|
9898 |
+
"learning_rate": 3.229020021778151e-06,
|
9899 |
+
"loss": 1.3129,
|
9900 |
+
"step": 1413
|
9901 |
+
},
|
9902 |
+
{
|
9903 |
+
"epoch": 0.24187478617858366,
|
9904 |
+
"grad_norm": 0.5941410660743713,
|
9905 |
+
"learning_rate": 3.1942062266166694e-06,
|
9906 |
+
"loss": 1.3021,
|
9907 |
+
"step": 1414
|
9908 |
+
},
|
9909 |
+
{
|
9910 |
+
"epoch": 0.2420458433116661,
|
9911 |
+
"grad_norm": 0.6362264752388,
|
9912 |
+
"learning_rate": 3.1595749297705147e-06,
|
9913 |
+
"loss": 1.3469,
|
9914 |
+
"step": 1415
|
9915 |
+
},
|
9916 |
+
{
|
9917 |
+
"epoch": 0.24221690044474856,
|
9918 |
+
"grad_norm": 0.5958232283592224,
|
9919 |
+
"learning_rate": 3.1251262662689797e-06,
|
9920 |
+
"loss": 1.1123,
|
9921 |
+
"step": 1416
|
9922 |
+
},
|
9923 |
+
{
|
9924 |
+
"epoch": 0.242387957577831,
|
9925 |
+
"grad_norm": 0.5941672325134277,
|
9926 |
+
"learning_rate": 3.0908603704292606e-06,
|
9927 |
+
"loss": 1.278,
|
9928 |
+
"step": 1417
|
9929 |
+
},
|
9930 |
+
{
|
9931 |
+
"epoch": 0.24255901471091346,
|
9932 |
+
"grad_norm": 0.6328888535499573,
|
9933 |
+
"learning_rate": 3.056777375855957e-06,
|
9934 |
+
"loss": 1.2037,
|
9935 |
+
"step": 1418
|
9936 |
+
},
|
9937 |
+
{
|
9938 |
+
"epoch": 0.2427300718439959,
|
9939 |
+
"grad_norm": 0.6915973424911499,
|
9940 |
+
"learning_rate": 3.022877415440467e-06,
|
9941 |
+
"loss": 1.2673,
|
9942 |
+
"step": 1419
|
9943 |
+
},
|
9944 |
+
{
|
9945 |
+
"epoch": 0.24290112897707836,
|
9946 |
+
"grad_norm": 0.6439147591590881,
|
9947 |
+
"learning_rate": 2.989160621360604e-06,
|
9948 |
+
"loss": 1.1152,
|
9949 |
+
"step": 1420
|
9950 |
+
},
|
9951 |
+
{
|
9952 |
+
"epoch": 0.2430721861101608,
|
9953 |
+
"grad_norm": 0.7173706293106079,
|
9954 |
+
"learning_rate": 2.9556271250799296e-06,
|
9955 |
+
"loss": 1.1382,
|
9956 |
+
"step": 1421
|
9957 |
+
},
|
9958 |
+
{
|
9959 |
+
"epoch": 0.24324324324324326,
|
9960 |
+
"grad_norm": 0.6668547987937927,
|
9961 |
+
"learning_rate": 2.9222770573473835e-06,
|
9962 |
+
"loss": 1.1069,
|
9963 |
+
"step": 1422
|
9964 |
+
},
|
9965 |
+
{
|
9966 |
+
"epoch": 0.2434143003763257,
|
9967 |
+
"grad_norm": 0.7452664375305176,
|
9968 |
+
"learning_rate": 2.8891105481966664e-06,
|
9969 |
+
"loss": 1.1078,
|
9970 |
+
"step": 1423
|
9971 |
+
},
|
9972 |
+
{
|
9973 |
+
"epoch": 0.24358535750940813,
|
9974 |
+
"grad_norm": 0.7419282793998718,
|
9975 |
+
"learning_rate": 2.85612772694579e-06,
|
9976 |
+
"loss": 1.2242,
|
9977 |
+
"step": 1424
|
9978 |
+
},
|
9979 |
+
{
|
9980 |
+
"epoch": 0.2437564146424906,
|
9981 |
+
"grad_norm": 0.7008459568023682,
|
9982 |
+
"learning_rate": 2.8233287221965556e-06,
|
9983 |
+
"loss": 1.0396,
|
9984 |
+
"step": 1425
|
9985 |
+
},
|
9986 |
+
{
|
9987 |
+
"epoch": 0.24392747177557303,
|
9988 |
+
"grad_norm": 0.8020456433296204,
|
9989 |
+
"learning_rate": 2.790713661834049e-06,
|
9990 |
+
"loss": 0.9406,
|
9991 |
+
"step": 1426
|
9992 |
+
},
|
9993 |
+
{
|
9994 |
+
"epoch": 0.2440985289086555,
|
9995 |
+
"grad_norm": 0.7833404541015625,
|
9996 |
+
"learning_rate": 2.758282673026152e-06,
|
9997 |
+
"loss": 1.1185,
|
9998 |
+
"step": 1427
|
9999 |
+
},
|
10000 |
+
{
|
10001 |
+
"epoch": 0.24426958604173793,
|
10002 |
+
"grad_norm": 0.8281404972076416,
|
10003 |
+
"learning_rate": 2.726035882223038e-06,
|
10004 |
+
"loss": 1.1532,
|
10005 |
+
"step": 1428
|
10006 |
+
},
|
10007 |
+
{
|
10008 |
+
"epoch": 0.2444406431748204,
|
10009 |
+
"grad_norm": 0.8403681516647339,
|
10010 |
+
"learning_rate": 2.6939734151566865e-06,
|
10011 |
+
"loss": 1.0994,
|
10012 |
+
"step": 1429
|
10013 |
+
},
|
10014 |
+
{
|
10015 |
+
"epoch": 0.24461170030790283,
|
10016 |
+
"grad_norm": 0.8664372563362122,
|
10017 |
+
"learning_rate": 2.662095396840386e-06,
|
10018 |
+
"loss": 1.1607,
|
10019 |
+
"step": 1430
|
10020 |
+
},
|
10021 |
+
{
|
10022 |
+
"epoch": 0.2447827574409853,
|
10023 |
+
"grad_norm": 0.9844470024108887,
|
10024 |
+
"learning_rate": 2.6304019515682454e-06,
|
10025 |
+
"loss": 1.1356,
|
10026 |
+
"step": 1431
|
10027 |
+
},
|
10028 |
+
{
|
10029 |
+
"epoch": 0.24495381457406773,
|
10030 |
+
"grad_norm": 0.9611857533454895,
|
10031 |
+
"learning_rate": 2.598893202914726e-06,
|
10032 |
+
"loss": 1.042,
|
10033 |
+
"step": 1432
|
10034 |
+
},
|
10035 |
+
{
|
10036 |
+
"epoch": 0.2451248717071502,
|
10037 |
+
"grad_norm": 1.0004849433898926,
|
10038 |
+
"learning_rate": 2.567569273734133e-06,
|
10039 |
+
"loss": 1.0163,
|
10040 |
+
"step": 1433
|
10041 |
+
},
|
10042 |
+
{
|
10043 |
+
"epoch": 0.24529592884023263,
|
10044 |
+
"grad_norm": 1.127707839012146,
|
10045 |
+
"learning_rate": 2.5364302861601573e-06,
|
10046 |
+
"loss": 1.2836,
|
10047 |
+
"step": 1434
|
10048 |
+
},
|
10049 |
+
{
|
10050 |
+
"epoch": 0.2454669859733151,
|
10051 |
+
"grad_norm": 1.2910242080688477,
|
10052 |
+
"learning_rate": 2.5054763616053966e-06,
|
10053 |
+
"loss": 1.3878,
|
10054 |
+
"step": 1435
|
10055 |
+
},
|
10056 |
+
{
|
10057 |
+
"epoch": 0.24563804310639753,
|
10058 |
+
"grad_norm": 1.294445276260376,
|
10059 |
+
"learning_rate": 2.474707620760858e-06,
|
10060 |
+
"loss": 1.3105,
|
10061 |
+
"step": 1436
|
10062 |
+
},
|
10063 |
+
{
|
10064 |
+
"epoch": 0.24580910023948,
|
10065 |
+
"grad_norm": 1.2891169786453247,
|
10066 |
+
"learning_rate": 2.4441241835955474e-06,
|
10067 |
+
"loss": 1.4281,
|
10068 |
+
"step": 1437
|
10069 |
+
},
|
10070 |
+
{
|
10071 |
+
"epoch": 0.24598015737256243,
|
10072 |
+
"grad_norm": 1.5556684732437134,
|
10073 |
+
"learning_rate": 2.413726169355912e-06,
|
10074 |
+
"loss": 1.2546,
|
10075 |
+
"step": 1438
|
10076 |
+
},
|
10077 |
+
{
|
10078 |
+
"epoch": 0.2461512145056449,
|
10079 |
+
"grad_norm": 1.4214191436767578,
|
10080 |
+
"learning_rate": 2.3835136965654646e-06,
|
10081 |
+
"loss": 1.1372,
|
10082 |
+
"step": 1439
|
10083 |
+
},
|
10084 |
+
{
|
10085 |
+
"epoch": 0.24632227163872733,
|
10086 |
+
"grad_norm": 1.8401309251785278,
|
10087 |
+
"learning_rate": 2.353486883024253e-06,
|
10088 |
+
"loss": 1.6806,
|
10089 |
+
"step": 1440
|
10090 |
+
},
|
10091 |
+
{
|
10092 |
+
"epoch": 0.2464933287718098,
|
10093 |
+
"grad_norm": 1.4558113813400269,
|
10094 |
+
"learning_rate": 2.3236458458084585e-06,
|
10095 |
+
"loss": 1.1617,
|
10096 |
+
"step": 1441
|
10097 |
+
},
|
10098 |
+
{
|
10099 |
+
"epoch": 0.24666438590489223,
|
10100 |
+
"grad_norm": 1.7065509557724,
|
10101 |
+
"learning_rate": 2.293990701269888e-06,
|
10102 |
+
"loss": 1.1602,
|
10103 |
+
"step": 1442
|
10104 |
+
},
|
10105 |
+
{
|
10106 |
+
"epoch": 0.2468354430379747,
|
10107 |
+
"grad_norm": 2.4731521606445312,
|
10108 |
+
"learning_rate": 2.2645215650355545e-06,
|
10109 |
+
"loss": 1.3742,
|
10110 |
+
"step": 1443
|
10111 |
+
},
|
10112 |
+
{
|
10113 |
+
"epoch": 0.24700650017105713,
|
10114 |
+
"grad_norm": 1.8989933729171753,
|
10115 |
+
"learning_rate": 2.2352385520072018e-06,
|
10116 |
+
"loss": 1.5344,
|
10117 |
+
"step": 1444
|
10118 |
+
},
|
10119 |
+
{
|
10120 |
+
"epoch": 0.2471775573041396,
|
10121 |
+
"grad_norm": 2.264230966567993,
|
10122 |
+
"learning_rate": 2.2061417763608816e-06,
|
10123 |
+
"loss": 1.3283,
|
10124 |
+
"step": 1445
|
10125 |
+
},
|
10126 |
+
{
|
10127 |
+
"epoch": 0.24734861443722203,
|
10128 |
+
"grad_norm": 2.357905626296997,
|
10129 |
+
"learning_rate": 2.177231351546494e-06,
|
10130 |
+
"loss": 1.5292,
|
10131 |
+
"step": 1446
|
10132 |
+
},
|
10133 |
+
{
|
10134 |
+
"epoch": 0.2475196715703045,
|
10135 |
+
"grad_norm": 2.139000654220581,
|
10136 |
+
"learning_rate": 2.148507390287341e-06,
|
10137 |
+
"loss": 1.0836,
|
10138 |
+
"step": 1447
|
10139 |
+
},
|
10140 |
+
{
|
10141 |
+
"epoch": 0.24769072870338693,
|
10142 |
+
"grad_norm": 2.3239285945892334,
|
10143 |
+
"learning_rate": 2.1199700045797077e-06,
|
10144 |
+
"loss": 1.4205,
|
10145 |
+
"step": 1448
|
10146 |
+
},
|
10147 |
+
{
|
10148 |
+
"epoch": 0.24786178583646937,
|
10149 |
+
"grad_norm": 2.2953028678894043,
|
10150 |
+
"learning_rate": 2.091619305692388e-06,
|
10151 |
+
"loss": 1.0398,
|
10152 |
+
"step": 1449
|
10153 |
+
},
|
10154 |
+
{
|
10155 |
+
"epoch": 0.24803284296955183,
|
10156 |
+
"grad_norm": 4.2786712646484375,
|
10157 |
+
"learning_rate": 2.063455404166287e-06,
|
10158 |
+
"loss": 1.9562,
|
10159 |
+
"step": 1450
|
10160 |
+
},
|
10161 |
+
{
|
10162 |
+
"epoch": 0.24820390010263427,
|
10163 |
+
"grad_norm": 0.4897097945213318,
|
10164 |
+
"learning_rate": 2.03547840981399e-06,
|
10165 |
+
"loss": 0.9971,
|
10166 |
+
"step": 1451
|
10167 |
+
},
|
10168 |
+
{
|
10169 |
+
"epoch": 0.24837495723571673,
|
10170 |
+
"grad_norm": 0.5019426941871643,
|
10171 |
+
"learning_rate": 2.00768843171929e-06,
|
10172 |
+
"loss": 1.1337,
|
10173 |
+
"step": 1452
|
10174 |
+
},
|
10175 |
+
{
|
10176 |
+
"epoch": 0.24854601436879917,
|
10177 |
+
"grad_norm": 0.5064823031425476,
|
10178 |
+
"learning_rate": 1.9800855782368176e-06,
|
10179 |
+
"loss": 1.1981,
|
10180 |
+
"step": 1453
|
10181 |
+
},
|
10182 |
+
{
|
10183 |
+
"epoch": 0.24871707150188163,
|
10184 |
+
"grad_norm": 0.5236514210700989,
|
10185 |
+
"learning_rate": 1.952669956991604e-06,
|
10186 |
+
"loss": 1.1714,
|
10187 |
+
"step": 1454
|
10188 |
+
},
|
10189 |
+
{
|
10190 |
+
"epoch": 0.24888812863496407,
|
10191 |
+
"grad_norm": 0.5447357296943665,
|
10192 |
+
"learning_rate": 1.9254416748786086e-06,
|
10193 |
+
"loss": 1.2409,
|
10194 |
+
"step": 1455
|
10195 |
+
},
|
10196 |
+
{
|
10197 |
+
"epoch": 0.24905918576804653,
|
10198 |
+
"grad_norm": 0.5601467490196228,
|
10199 |
+
"learning_rate": 1.8984008380623975e-06,
|
10200 |
+
"loss": 1.2084,
|
10201 |
+
"step": 1456
|
10202 |
+
},
|
10203 |
+
{
|
10204 |
+
"epoch": 0.24923024290112897,
|
10205 |
+
"grad_norm": 0.536537230014801,
|
10206 |
+
"learning_rate": 1.8715475519766269e-06,
|
10207 |
+
"loss": 1.2323,
|
10208 |
+
"step": 1457
|
10209 |
+
},
|
10210 |
+
{
|
10211 |
+
"epoch": 0.24940130003421143,
|
10212 |
+
"grad_norm": 0.568358302116394,
|
10213 |
+
"learning_rate": 1.8448819213237267e-06,
|
10214 |
+
"loss": 1.3147,
|
10215 |
+
"step": 1458
|
10216 |
+
},
|
10217 |
+
{
|
10218 |
+
"epoch": 0.24957235716729387,
|
10219 |
+
"grad_norm": 0.541961133480072,
|
10220 |
+
"learning_rate": 1.8184040500744238e-06,
|
10221 |
+
"loss": 1.2917,
|
10222 |
+
"step": 1459
|
10223 |
+
},
|
10224 |
+
{
|
10225 |
+
"epoch": 0.24974341430037633,
|
10226 |
+
"grad_norm": 0.5472269654273987,
|
10227 |
+
"learning_rate": 1.7921140414673688e-06,
|
10228 |
+
"loss": 1.2432,
|
10229 |
+
"step": 1460
|
10230 |
+
},
|
10231 |
+
{
|
10232 |
+
"epoch": 0.24991447143345877,
|
10233 |
+
"grad_norm": 0.5804473757743835,
|
10234 |
+
"learning_rate": 1.7660119980087209e-06,
|
10235 |
+
"loss": 1.3318,
|
10236 |
+
"step": 1461
|
10237 |
+
},
|
10238 |
+
{
|
10239 |
+
"epoch": 0.2500855285665412,
|
10240 |
+
"grad_norm": 0.5860776901245117,
|
10241 |
+
"learning_rate": 1.7400980214717588e-06,
|
10242 |
+
"loss": 1.3374,
|
10243 |
+
"step": 1462
|
10244 |
+
},
|
10245 |
+
{
|
10246 |
+
"epoch": 0.25025658569962367,
|
10247 |
+
"grad_norm": 0.5852331519126892,
|
10248 |
+
"learning_rate": 1.71437221289647e-06,
|
10249 |
+
"loss": 1.1678,
|
10250 |
+
"step": 1463
|
10251 |
+
},
|
10252 |
+
{
|
10253 |
+
"epoch": 0.25042764283270613,
|
10254 |
+
"grad_norm": 0.5965182185173035,
|
10255 |
+
"learning_rate": 1.688834672589179e-06,
|
10256 |
+
"loss": 1.3263,
|
10257 |
+
"step": 1464
|
10258 |
+
},
|
10259 |
+
{
|
10260 |
+
"epoch": 0.2505986999657886,
|
10261 |
+
"grad_norm": 0.6312009692192078,
|
10262 |
+
"learning_rate": 1.6634855001221194e-06,
|
10263 |
+
"loss": 1.4823,
|
10264 |
+
"step": 1465
|
10265 |
+
},
|
10266 |
+
{
|
10267 |
+
"epoch": 0.250769757098871,
|
10268 |
+
"grad_norm": 0.6010558009147644,
|
10269 |
+
"learning_rate": 1.6383247943331014e-06,
|
10270 |
+
"loss": 1.3346,
|
10271 |
+
"step": 1466
|
10272 |
+
},
|
10273 |
+
{
|
10274 |
+
"epoch": 0.25094081423195347,
|
10275 |
+
"grad_norm": 0.5966724157333374,
|
10276 |
+
"learning_rate": 1.6133526533250565e-06,
|
10277 |
+
"loss": 1.1256,
|
10278 |
+
"step": 1467
|
10279 |
+
},
|
10280 |
+
{
|
10281 |
+
"epoch": 0.25111187136503593,
|
10282 |
+
"grad_norm": 0.6598055362701416,
|
10283 |
+
"learning_rate": 1.5885691744657261e-06,
|
10284 |
+
"loss": 1.662,
|
10285 |
+
"step": 1468
|
10286 |
+
},
|
10287 |
+
{
|
10288 |
+
"epoch": 0.2512829284981184,
|
10289 |
+
"grad_norm": 0.6296840310096741,
|
10290 |
+
"learning_rate": 1.5639744543872182e-06,
|
10291 |
+
"loss": 1.0919,
|
10292 |
+
"step": 1469
|
10293 |
+
},
|
10294 |
+
{
|
10295 |
+
"epoch": 0.2514539856312008,
|
10296 |
+
"grad_norm": 0.6338956356048584,
|
10297 |
+
"learning_rate": 1.5395685889856681e-06,
|
10298 |
+
"loss": 1.0028,
|
10299 |
+
"step": 1470
|
10300 |
+
},
|
10301 |
+
{
|
10302 |
+
"epoch": 0.25162504276428327,
|
10303 |
+
"grad_norm": 0.6730737686157227,
|
10304 |
+
"learning_rate": 1.5153516734208895e-06,
|
10305 |
+
"loss": 1.11,
|
10306 |
+
"step": 1471
|
10307 |
+
},
|
10308 |
+
{
|
10309 |
+
"epoch": 0.25179609989736573,
|
10310 |
+
"grad_norm": 0.6922449469566345,
|
10311 |
+
"learning_rate": 1.4913238021159126e-06,
|
10312 |
+
"loss": 1.1715,
|
10313 |
+
"step": 1472
|
10314 |
+
},
|
10315 |
+
{
|
10316 |
+
"epoch": 0.2519671570304482,
|
10317 |
+
"grad_norm": 0.6994005441665649,
|
10318 |
+
"learning_rate": 1.4674850687567244e-06,
|
10319 |
+
"loss": 1.059,
|
10320 |
+
"step": 1473
|
10321 |
+
},
|
10322 |
+
{
|
10323 |
+
"epoch": 0.2521382141635306,
|
10324 |
+
"grad_norm": 0.7274287939071655,
|
10325 |
+
"learning_rate": 1.4438355662918125e-06,
|
10326 |
+
"loss": 1.2041,
|
10327 |
+
"step": 1474
|
10328 |
+
},
|
10329 |
+
{
|
10330 |
+
"epoch": 0.25230927129661307,
|
10331 |
+
"grad_norm": 0.8313538432121277,
|
10332 |
+
"learning_rate": 1.4203753869318881e-06,
|
10333 |
+
"loss": 1.1304,
|
10334 |
+
"step": 1475
|
10335 |
+
},
|
10336 |
+
{
|
10337 |
+
"epoch": 0.25248032842969553,
|
10338 |
+
"grad_norm": 0.8052504658699036,
|
10339 |
+
"learning_rate": 1.3971046221494367e-06,
|
10340 |
+
"loss": 1.1036,
|
10341 |
+
"step": 1476
|
10342 |
+
},
|
10343 |
+
{
|
10344 |
+
"epoch": 0.25265138556277794,
|
10345 |
+
"grad_norm": 0.8060058355331421,
|
10346 |
+
"learning_rate": 1.3740233626784337e-06,
|
10347 |
+
"loss": 0.9327,
|
10348 |
+
"step": 1477
|
10349 |
+
},
|
10350 |
+
{
|
10351 |
+
"epoch": 0.2528224426958604,
|
10352 |
+
"grad_norm": 0.8705495595932007,
|
10353 |
+
"learning_rate": 1.3511316985139521e-06,
|
10354 |
+
"loss": 1.1738,
|
10355 |
+
"step": 1478
|
10356 |
+
},
|
10357 |
+
{
|
10358 |
+
"epoch": 0.25299349982894287,
|
10359 |
+
"grad_norm": 0.8042508363723755,
|
10360 |
+
"learning_rate": 1.3284297189118279e-06,
|
10361 |
+
"loss": 1.1432,
|
10362 |
+
"step": 1479
|
10363 |
+
},
|
10364 |
+
{
|
10365 |
+
"epoch": 0.25316455696202533,
|
10366 |
+
"grad_norm": 0.9142102003097534,
|
10367 |
+
"learning_rate": 1.305917512388305e-06,
|
10368 |
+
"loss": 1.2597,
|
10369 |
+
"step": 1480
|
10370 |
+
},
|
10371 |
+
{
|
10372 |
+
"epoch": 0.25333561409510774,
|
10373 |
+
"grad_norm": 0.9008240699768066,
|
10374 |
+
"learning_rate": 1.2835951667196923e-06,
|
10375 |
+
"loss": 1.1446,
|
10376 |
+
"step": 1481
|
10377 |
+
},
|
10378 |
+
{
|
10379 |
+
"epoch": 0.2535066712281902,
|
10380 |
+
"grad_norm": 0.9593712687492371,
|
10381 |
+
"learning_rate": 1.2614627689420177e-06,
|
10382 |
+
"loss": 1.0451,
|
10383 |
+
"step": 1482
|
10384 |
+
},
|
10385 |
+
{
|
10386 |
+
"epoch": 0.25367772836127267,
|
10387 |
+
"grad_norm": 1.0314337015151978,
|
10388 |
+
"learning_rate": 1.2395204053506915e-06,
|
10389 |
+
"loss": 0.9297,
|
10390 |
+
"step": 1483
|
10391 |
+
},
|
10392 |
+
{
|
10393 |
+
"epoch": 0.25384878549435513,
|
10394 |
+
"grad_norm": 1.0227497816085815,
|
10395 |
+
"learning_rate": 1.2177681615001713e-06,
|
10396 |
+
"loss": 1.2377,
|
10397 |
+
"step": 1484
|
10398 |
+
},
|
10399 |
+
{
|
10400 |
+
"epoch": 0.25401984262743754,
|
10401 |
+
"grad_norm": 1.1861833333969116,
|
10402 |
+
"learning_rate": 1.196206122203647e-06,
|
10403 |
+
"loss": 1.5228,
|
10404 |
+
"step": 1485
|
10405 |
+
},
|
10406 |
+
{
|
10407 |
+
"epoch": 0.25419089976052,
|
10408 |
+
"grad_norm": 1.1678194999694824,
|
10409 |
+
"learning_rate": 1.1748343715326459e-06,
|
10410 |
+
"loss": 1.2958,
|
10411 |
+
"step": 1486
|
10412 |
+
},
|
10413 |
+
{
|
10414 |
+
"epoch": 0.25436195689360247,
|
10415 |
+
"grad_norm": 1.184696078300476,
|
10416 |
+
"learning_rate": 1.153652992816806e-06,
|
10417 |
+
"loss": 1.0181,
|
10418 |
+
"step": 1487
|
10419 |
+
},
|
10420 |
+
{
|
10421 |
+
"epoch": 0.25453301402668493,
|
10422 |
+
"grad_norm": 1.348680019378662,
|
10423 |
+
"learning_rate": 1.1326620686434363e-06,
|
10424 |
+
"loss": 1.203,
|
10425 |
+
"step": 1488
|
10426 |
+
},
|
10427 |
+
{
|
10428 |
+
"epoch": 0.25470407115976734,
|
10429 |
+
"grad_norm": 1.3480643033981323,
|
10430 |
+
"learning_rate": 1.1118616808573068e-06,
|
10431 |
+
"loss": 1.4253,
|
10432 |
+
"step": 1489
|
10433 |
+
},
|
10434 |
+
{
|
10435 |
+
"epoch": 0.2548751282928498,
|
10436 |
+
"grad_norm": 1.5658440589904785,
|
10437 |
+
"learning_rate": 1.0912519105602482e-06,
|
10438 |
+
"loss": 1.4295,
|
10439 |
+
"step": 1490
|
10440 |
+
},
|
10441 |
+
{
|
10442 |
+
"epoch": 0.25504618542593227,
|
10443 |
+
"grad_norm": 1.8241169452667236,
|
10444 |
+
"learning_rate": 1.0708328381108524e-06,
|
10445 |
+
"loss": 1.4274,
|
10446 |
+
"step": 1491
|
10447 |
+
},
|
10448 |
+
{
|
10449 |
+
"epoch": 0.25521724255901473,
|
10450 |
+
"grad_norm": 1.662746787071228,
|
10451 |
+
"learning_rate": 1.0506045431242007e-06,
|
10452 |
+
"loss": 1.3222,
|
10453 |
+
"step": 1492
|
10454 |
+
},
|
10455 |
+
{
|
10456 |
+
"epoch": 0.25538829969209714,
|
10457 |
+
"grad_norm": 2.0207502841949463,
|
10458 |
+
"learning_rate": 1.0305671044714916e-06,
|
10459 |
+
"loss": 1.3686,
|
10460 |
+
"step": 1493
|
10461 |
+
},
|
10462 |
+
{
|
10463 |
+
"epoch": 0.2555593568251796,
|
10464 |
+
"grad_norm": 2.2003042697906494,
|
10465 |
+
"learning_rate": 1.010720600279791e-06,
|
10466 |
+
"loss": 1.317,
|
10467 |
+
"step": 1494
|
10468 |
+
},
|
10469 |
+
{
|
10470 |
+
"epoch": 0.25573041395826207,
|
10471 |
+
"grad_norm": 2.332540988922119,
|
10472 |
+
"learning_rate": 9.910651079316823e-07,
|
10473 |
+
"loss": 1.3053,
|
10474 |
+
"step": 1495
|
10475 |
+
},
|
10476 |
+
{
|
10477 |
+
"epoch": 0.25590147109134453,
|
10478 |
+
"grad_norm": 2.3512799739837646,
|
10479 |
+
"learning_rate": 9.71600704065001e-07,
|
10480 |
+
"loss": 1.259,
|
10481 |
+
"step": 1496
|
10482 |
+
},
|
10483 |
+
{
|
10484 |
+
"epoch": 0.25607252822442694,
|
10485 |
+
"grad_norm": 2.3095815181732178,
|
10486 |
+
"learning_rate": 9.523274645725e-07,
|
10487 |
+
"loss": 1.4166,
|
10488 |
+
"step": 1497
|
10489 |
+
},
|
10490 |
+
{
|
10491 |
+
"epoch": 0.2562435853575094,
|
10492 |
+
"grad_norm": 2.170978307723999,
|
10493 |
+
"learning_rate": 9.332454646015787e-07,
|
10494 |
+
"loss": 0.6582,
|
10495 |
+
"step": 1498
|
10496 |
+
},
|
10497 |
+
{
|
10498 |
+
"epoch": 0.25641464249059187,
|
10499 |
+
"grad_norm": 2.4511187076568604,
|
10500 |
+
"learning_rate": 9.143547785539885e-07,
|
10501 |
+
"loss": 1.2956,
|
10502 |
+
"step": 1499
|
10503 |
+
},
|
10504 |
+
{
|
10505 |
+
"epoch": 0.25658569962367433,
|
10506 |
+
"grad_norm": 3.6723880767822266,
|
10507 |
+
"learning_rate": 8.956554800855332e-07,
|
10508 |
+
"loss": 1.2516,
|
10509 |
+
"step": 1500
|
10510 |
+
},
|
10511 |
+
{
|
10512 |
+
"epoch": 0.25675675675675674,
|
10513 |
+
"grad_norm": 0.4546957314014435,
|
10514 |
+
"learning_rate": 8.771476421057801e-07,
|
10515 |
+
"loss": 1.0182,
|
10516 |
+
"step": 1501
|
10517 |
+
},
|
10518 |
+
{
|
10519 |
+
"epoch": 0.2569278138898392,
|
10520 |
+
"grad_norm": 0.5352223515510559,
|
10521 |
+
"learning_rate": 8.588313367778045e-07,
|
10522 |
+
"loss": 1.1829,
|
10523 |
+
"step": 1502
|
10524 |
+
},
|
10525 |
+
{
|
10526 |
+
"epoch": 0.25709887102292167,
|
10527 |
+
"grad_norm": 0.5148196220397949,
|
10528 |
+
"learning_rate": 8.407066355178461e-07,
|
10529 |
+
"loss": 1.1305,
|
10530 |
+
"step": 1503
|
10531 |
+
},
|
10532 |
+
{
|
10533 |
+
"epoch": 0.2572699281560041,
|
10534 |
+
"grad_norm": 0.5280354619026184,
|
10535 |
+
"learning_rate": 8.227736089951144e-07,
|
10536 |
+
"loss": 1.2108,
|
10537 |
+
"step": 1504
|
10538 |
+
},
|
10539 |
+
{
|
10540 |
+
"epoch": 0.25744098528908654,
|
10541 |
+
"grad_norm": 0.5749139785766602,
|
10542 |
+
"learning_rate": 8.05032327131433e-07,
|
10543 |
+
"loss": 1.2947,
|
10544 |
+
"step": 1505
|
10545 |
+
},
|
10546 |
+
{
|
10547 |
+
"epoch": 0.257612042422169,
|
10548 |
+
"grad_norm": 0.5080156922340393,
|
10549 |
+
"learning_rate": 7.874828591010242e-07,
|
10550 |
+
"loss": 1.1792,
|
10551 |
+
"step": 1506
|
10552 |
+
},
|
10553 |
+
{
|
10554 |
+
"epoch": 0.25778309955525147,
|
10555 |
+
"grad_norm": 0.5266414880752563,
|
10556 |
+
"learning_rate": 7.701252733302078e-07,
|
10557 |
+
"loss": 1.2799,
|
10558 |
+
"step": 1507
|
10559 |
+
},
|
10560 |
+
{
|
10561 |
+
"epoch": 0.2579541566883339,
|
10562 |
+
"grad_norm": 0.5320059657096863,
|
10563 |
+
"learning_rate": 7.529596374971471e-07,
|
10564 |
+
"loss": 1.2196,
|
10565 |
+
"step": 1508
|
10566 |
+
},
|
10567 |
+
{
|
10568 |
+
"epoch": 0.25812521382141634,
|
10569 |
+
"grad_norm": 0.5306248664855957,
|
10570 |
+
"learning_rate": 7.35986018531587e-07,
|
10571 |
+
"loss": 1.2946,
|
10572 |
+
"step": 1509
|
10573 |
+
},
|
10574 |
+
{
|
10575 |
+
"epoch": 0.2582962709544988,
|
10576 |
+
"grad_norm": 0.5468301773071289,
|
10577 |
+
"learning_rate": 7.192044826145771e-07,
|
10578 |
+
"loss": 1.3077,
|
10579 |
+
"step": 1510
|
10580 |
+
},
|
10581 |
+
{
|
10582 |
+
"epoch": 0.25846732808758127,
|
10583 |
+
"grad_norm": 0.540556013584137,
|
10584 |
+
"learning_rate": 7.026150951782385e-07,
|
10585 |
+
"loss": 1.2727,
|
10586 |
+
"step": 1511
|
10587 |
+
},
|
10588 |
+
{
|
10589 |
+
"epoch": 0.2586383852206637,
|
10590 |
+
"grad_norm": 0.5539586544036865,
|
10591 |
+
"learning_rate": 6.862179209054798e-07,
|
10592 |
+
"loss": 1.3236,
|
10593 |
+
"step": 1512
|
10594 |
+
},
|
10595 |
+
{
|
10596 |
+
"epoch": 0.25880944235374614,
|
10597 |
+
"grad_norm": 0.5765905976295471,
|
10598 |
+
"learning_rate": 6.700130237297708e-07,
|
10599 |
+
"loss": 1.3182,
|
10600 |
+
"step": 1513
|
10601 |
+
},
|
10602 |
+
{
|
10603 |
+
"epoch": 0.2589804994868286,
|
10604 |
+
"grad_norm": 0.5625589489936829,
|
10605 |
+
"learning_rate": 6.540004668348865e-07,
|
10606 |
+
"loss": 1.3291,
|
10607 |
+
"step": 1514
|
10608 |
+
},
|
10609 |
+
{
|
10610 |
+
"epoch": 0.25915155661991107,
|
10611 |
+
"grad_norm": 0.6037416458129883,
|
10612 |
+
"learning_rate": 6.381803126546404e-07,
|
10613 |
+
"loss": 1.3348,
|
10614 |
+
"step": 1515
|
10615 |
+
},
|
10616 |
+
{
|
10617 |
+
"epoch": 0.2593226137529935,
|
10618 |
+
"grad_norm": 0.6288855075836182,
|
10619 |
+
"learning_rate": 6.225526228726742e-07,
|
10620 |
+
"loss": 1.2543,
|
10621 |
+
"step": 1516
|
10622 |
+
},
|
10623 |
+
{
|
10624 |
+
"epoch": 0.25949367088607594,
|
10625 |
+
"grad_norm": 0.6188000440597534,
|
10626 |
+
"learning_rate": 6.071174584221905e-07,
|
10627 |
+
"loss": 1.2993,
|
10628 |
+
"step": 1517
|
10629 |
+
},
|
10630 |
+
{
|
10631 |
+
"epoch": 0.2596647280191584,
|
10632 |
+
"grad_norm": 0.5862905979156494,
|
10633 |
+
"learning_rate": 5.918748794857265e-07,
|
10634 |
+
"loss": 1.3056,
|
10635 |
+
"step": 1518
|
10636 |
+
},
|
10637 |
+
{
|
10638 |
+
"epoch": 0.2598357851522409,
|
10639 |
+
"grad_norm": 0.6180735230445862,
|
10640 |
+
"learning_rate": 5.768249454949248e-07,
|
10641 |
+
"loss": 1.373,
|
10642 |
+
"step": 1519
|
10643 |
+
},
|
10644 |
+
{
|
10645 |
+
"epoch": 0.2600068422853233,
|
10646 |
+
"grad_norm": 0.651246190071106,
|
10647 |
+
"learning_rate": 5.619677151302849e-07,
|
10648 |
+
"loss": 1.1295,
|
10649 |
+
"step": 1520
|
10650 |
+
},
|
10651 |
+
{
|
10652 |
+
"epoch": 0.26017789941840574,
|
10653 |
+
"grad_norm": 0.6686509251594543,
|
10654 |
+
"learning_rate": 5.473032463209627e-07,
|
10655 |
+
"loss": 1.2983,
|
10656 |
+
"step": 1521
|
10657 |
+
},
|
10658 |
+
{
|
10659 |
+
"epoch": 0.2603489565514882,
|
10660 |
+
"grad_norm": 0.6885531544685364,
|
10661 |
+
"learning_rate": 5.328315962444874e-07,
|
10662 |
+
"loss": 1.2861,
|
10663 |
+
"step": 1522
|
10664 |
+
},
|
10665 |
+
{
|
10666 |
+
"epoch": 0.2605200136845707,
|
10667 |
+
"grad_norm": 0.7137675881385803,
|
10668 |
+
"learning_rate": 5.185528213266344e-07,
|
10669 |
+
"loss": 1.2464,
|
10670 |
+
"step": 1523
|
10671 |
+
},
|
10672 |
+
{
|
10673 |
+
"epoch": 0.2606910708176531,
|
10674 |
+
"grad_norm": 0.667129635810852,
|
10675 |
+
"learning_rate": 5.044669772410915e-07,
|
10676 |
+
"loss": 1.1251,
|
10677 |
+
"step": 1524
|
10678 |
+
},
|
10679 |
+
{
|
10680 |
+
"epoch": 0.26086212795073555,
|
10681 |
+
"grad_norm": 0.7013494372367859,
|
10682 |
+
"learning_rate": 4.905741189093371e-07,
|
10683 |
+
"loss": 1.0481,
|
10684 |
+
"step": 1525
|
10685 |
+
},
|
10686 |
+
{
|
10687 |
+
"epoch": 0.261033185083818,
|
10688 |
+
"grad_norm": 0.7502620816230774,
|
10689 |
+
"learning_rate": 4.76874300500374e-07,
|
10690 |
+
"loss": 1.3277,
|
10691 |
+
"step": 1526
|
10692 |
+
},
|
10693 |
+
{
|
10694 |
+
"epoch": 0.2612042422169004,
|
10695 |
+
"grad_norm": 0.7736833095550537,
|
10696 |
+
"learning_rate": 4.6336757543053445e-07,
|
10697 |
+
"loss": 1.2107,
|
10698 |
+
"step": 1527
|
10699 |
+
},
|
10700 |
+
{
|
10701 |
+
"epoch": 0.2613752993499829,
|
10702 |
+
"grad_norm": 0.804922878742218,
|
10703 |
+
"learning_rate": 4.5005399636326996e-07,
|
10704 |
+
"loss": 1.1791,
|
10705 |
+
"step": 1528
|
10706 |
+
},
|
10707 |
+
{
|
10708 |
+
"epoch": 0.26154635648306535,
|
10709 |
+
"grad_norm": 0.8147058486938477,
|
10710 |
+
"learning_rate": 4.369336152089343e-07,
|
10711 |
+
"loss": 1.049,
|
10712 |
+
"step": 1529
|
10713 |
+
},
|
10714 |
+
{
|
10715 |
+
"epoch": 0.2617174136161478,
|
10716 |
+
"grad_norm": 0.7693686485290527,
|
10717 |
+
"learning_rate": 4.2400648312461157e-07,
|
10718 |
+
"loss": 1.0118,
|
10719 |
+
"step": 1530
|
10720 |
+
},
|
10721 |
+
{
|
10722 |
+
"epoch": 0.2618884707492302,
|
10723 |
+
"grad_norm": 0.8183416128158569,
|
10724 |
+
"learning_rate": 4.1127265051388306e-07,
|
10725 |
+
"loss": 0.9699,
|
10726 |
+
"step": 1531
|
10727 |
+
},
|
10728 |
+
{
|
10729 |
+
"epoch": 0.2620595278823127,
|
10730 |
+
"grad_norm": 0.8818575739860535,
|
10731 |
+
"learning_rate": 3.9873216702664416e-07,
|
10732 |
+
"loss": 1.1803,
|
10733 |
+
"step": 1532
|
10734 |
+
},
|
10735 |
+
{
|
10736 |
+
"epoch": 0.26223058501539515,
|
10737 |
+
"grad_norm": 0.8875685930252075,
|
10738 |
+
"learning_rate": 3.8638508155893206e-07,
|
10739 |
+
"loss": 1.1227,
|
10740 |
+
"step": 1533
|
10741 |
+
},
|
10742 |
+
{
|
10743 |
+
"epoch": 0.2624016421484776,
|
10744 |
+
"grad_norm": 1.0393235683441162,
|
10745 |
+
"learning_rate": 3.7423144225268716e-07,
|
10746 |
+
"loss": 1.2747,
|
10747 |
+
"step": 1534
|
10748 |
+
},
|
10749 |
+
{
|
10750 |
+
"epoch": 0.26257269928156,
|
10751 |
+
"grad_norm": 0.9820855259895325,
|
10752 |
+
"learning_rate": 3.6227129649560323e-07,
|
10753 |
+
"loss": 0.9348,
|
10754 |
+
"step": 1535
|
10755 |
+
},
|
10756 |
+
{
|
10757 |
+
"epoch": 0.2627437564146425,
|
10758 |
+
"grad_norm": 1.1748095750808716,
|
10759 |
+
"learning_rate": 3.505046909209386e-07,
|
10760 |
+
"loss": 1.2405,
|
10761 |
+
"step": 1536
|
10762 |
+
},
|
10763 |
+
{
|
10764 |
+
"epoch": 0.26291481354772495,
|
10765 |
+
"grad_norm": 1.2374056577682495,
|
10766 |
+
"learning_rate": 3.389316714073165e-07,
|
10767 |
+
"loss": 1.2981,
|
10768 |
+
"step": 1537
|
10769 |
+
},
|
10770 |
+
{
|
10771 |
+
"epoch": 0.2630858706808074,
|
10772 |
+
"grad_norm": 1.3548519611358643,
|
10773 |
+
"learning_rate": 3.275522830785804e-07,
|
10774 |
+
"loss": 1.5384,
|
10775 |
+
"step": 1538
|
10776 |
+
},
|
10777 |
+
{
|
10778 |
+
"epoch": 0.2632569278138898,
|
10779 |
+
"grad_norm": 1.4279098510742188,
|
10780 |
+
"learning_rate": 3.163665703035612e-07,
|
10781 |
+
"loss": 1.3415,
|
10782 |
+
"step": 1539
|
10783 |
+
},
|
10784 |
+
{
|
10785 |
+
"epoch": 0.2634279849469723,
|
10786 |
+
"grad_norm": 1.478561520576477,
|
10787 |
+
"learning_rate": 3.053745766959659e-07,
|
10788 |
+
"loss": 1.2851,
|
10789 |
+
"step": 1540
|
10790 |
+
},
|
10791 |
+
{
|
10792 |
+
"epoch": 0.26359904208005475,
|
10793 |
+
"grad_norm": 1.5138171911239624,
|
10794 |
+
"learning_rate": 2.9457634511417253e-07,
|
10795 |
+
"loss": 1.2725,
|
10796 |
+
"step": 1541
|
10797 |
+
},
|
10798 |
+
{
|
10799 |
+
"epoch": 0.2637700992131372,
|
10800 |
+
"grad_norm": 1.4709621667861938,
|
10801 |
+
"learning_rate": 2.8397191766106336e-07,
|
10802 |
+
"loss": 1.1577,
|
10803 |
+
"step": 1542
|
10804 |
+
},
|
10805 |
+
{
|
10806 |
+
"epoch": 0.2639411563462196,
|
10807 |
+
"grad_norm": 1.7153759002685547,
|
10808 |
+
"learning_rate": 2.7356133568387507e-07,
|
10809 |
+
"loss": 1.0665,
|
10810 |
+
"step": 1543
|
10811 |
+
},
|
10812 |
+
{
|
10813 |
+
"epoch": 0.2641122134793021,
|
10814 |
+
"grad_norm": 1.7548620700836182,
|
10815 |
+
"learning_rate": 2.633446397740269e-07,
|
10816 |
+
"loss": 1.2541,
|
10817 |
+
"step": 1544
|
10818 |
+
},
|
10819 |
+
{
|
10820 |
+
"epoch": 0.26428327061238455,
|
10821 |
+
"grad_norm": 1.8897814750671387,
|
10822 |
+
"learning_rate": 2.533218697669704e-07,
|
10823 |
+
"loss": 1.0455,
|
10824 |
+
"step": 1545
|
10825 |
+
},
|
10826 |
+
{
|
10827 |
+
"epoch": 0.264454327745467,
|
10828 |
+
"grad_norm": 1.8144909143447876,
|
10829 |
+
"learning_rate": 2.4349306474202306e-07,
|
10830 |
+
"loss": 1.1294,
|
10831 |
+
"step": 1546
|
10832 |
+
},
|
10833 |
+
{
|
10834 |
+
"epoch": 0.2646253848785494,
|
10835 |
+
"grad_norm": 2.3166208267211914,
|
10836 |
+
"learning_rate": 2.3385826302223523e-07,
|
10837 |
+
"loss": 1.2215,
|
10838 |
+
"step": 1547
|
10839 |
+
},
|
10840 |
+
{
|
10841 |
+
"epoch": 0.2647964420116319,
|
10842 |
+
"grad_norm": 2.6024763584136963,
|
10843 |
+
"learning_rate": 2.2441750217420676e-07,
|
10844 |
+
"loss": 1.3463,
|
10845 |
+
"step": 1548
|
10846 |
+
},
|
10847 |
+
{
|
10848 |
+
"epoch": 0.26496749914471435,
|
10849 |
+
"grad_norm": 2.7021827697753906,
|
10850 |
+
"learning_rate": 2.1517081900797597e-07,
|
10851 |
+
"loss": 1.2238,
|
10852 |
+
"step": 1549
|
10853 |
+
},
|
10854 |
+
{
|
10855 |
+
"epoch": 0.2651385562777968,
|
10856 |
+
"grad_norm": 3.228471279144287,
|
10857 |
+
"learning_rate": 2.0611824957686432e-07,
|
10858 |
+
"loss": 1.6716,
|
10859 |
+
"step": 1550
|
10860 |
+
},
|
10861 |
+
{
|
10862 |
+
"epoch": 0.2653096134108792,
|
10863 |
+
"grad_norm": 0.5370853543281555,
|
10864 |
+
"learning_rate": 1.9725982917732645e-07,
|
10865 |
+
"loss": 1.1386,
|
10866 |
+
"step": 1551
|
10867 |
+
},
|
10868 |
+
{
|
10869 |
+
"epoch": 0.2654806705439617,
|
10870 |
+
"grad_norm": 0.5028488636016846,
|
10871 |
+
"learning_rate": 1.8859559234882252e-07,
|
10872 |
+
"loss": 1.0045,
|
10873 |
+
"step": 1552
|
10874 |
+
},
|
10875 |
+
{
|
10876 |
+
"epoch": 0.26565172767704415,
|
10877 |
+
"grad_norm": 0.5231890082359314,
|
10878 |
+
"learning_rate": 1.8012557287367392e-07,
|
10879 |
+
"loss": 1.0546,
|
10880 |
+
"step": 1553
|
10881 |
+
},
|
10882 |
+
{
|
10883 |
+
"epoch": 0.26582278481012656,
|
10884 |
+
"grad_norm": 0.5263524055480957,
|
10885 |
+
"learning_rate": 1.7184980377695227e-07,
|
10886 |
+
"loss": 1.1458,
|
10887 |
+
"step": 1554
|
10888 |
+
},
|
10889 |
+
{
|
10890 |
+
"epoch": 0.265993841943209,
|
10891 |
+
"grad_norm": 0.5260305404663086,
|
10892 |
+
"learning_rate": 1.637683173263238e-07,
|
10893 |
+
"loss": 1.138,
|
10894 |
+
"step": 1555
|
10895 |
+
},
|
10896 |
+
{
|
10897 |
+
"epoch": 0.2661648990762915,
|
10898 |
+
"grad_norm": 0.556893527507782,
|
10899 |
+
"learning_rate": 1.5588114503193308e-07,
|
10900 |
+
"loss": 1.2938,
|
10901 |
+
"step": 1556
|
10902 |
+
},
|
10903 |
+
{
|
10904 |
+
"epoch": 0.26633595620937395,
|
10905 |
+
"grad_norm": 0.5308122634887695,
|
10906 |
+
"learning_rate": 1.481883176463028e-07,
|
10907 |
+
"loss": 1.1436,
|
10908 |
+
"step": 1557
|
10909 |
+
},
|
10910 |
+
{
|
10911 |
+
"epoch": 0.26650701334245636,
|
10912 |
+
"grad_norm": 0.5532926321029663,
|
10913 |
+
"learning_rate": 1.4068986516417305e-07,
|
10914 |
+
"loss": 1.2233,
|
10915 |
+
"step": 1558
|
10916 |
+
},
|
10917 |
+
{
|
10918 |
+
"epoch": 0.2666780704755388,
|
10919 |
+
"grad_norm": 0.5234983563423157,
|
10920 |
+
"learning_rate": 1.333858168224178e-07,
|
10921 |
+
"loss": 1.2124,
|
10922 |
+
"step": 1559
|
10923 |
+
},
|
10924 |
+
{
|
10925 |
+
"epoch": 0.2668491276086213,
|
10926 |
+
"grad_norm": 0.5301246643066406,
|
10927 |
+
"learning_rate": 1.2627620109991744e-07,
|
10928 |
+
"loss": 1.3205,
|
10929 |
+
"step": 1560
|
10930 |
+
},
|
10931 |
+
{
|
10932 |
+
"epoch": 0.26702018474170375,
|
10933 |
+
"grad_norm": 0.5585340261459351,
|
10934 |
+
"learning_rate": 1.193610457174421e-07,
|
10935 |
+
"loss": 1.3532,
|
10936 |
+
"step": 1561
|
10937 |
+
},
|
10938 |
+
{
|
10939 |
+
"epoch": 0.26719124187478616,
|
10940 |
+
"grad_norm": 0.6104132533073425,
|
10941 |
+
"learning_rate": 1.1264037763756285e-07,
|
10942 |
+
"loss": 1.369,
|
10943 |
+
"step": 1562
|
10944 |
+
},
|
10945 |
+
{
|
10946 |
+
"epoch": 0.2673622990078686,
|
10947 |
+
"grad_norm": 0.5729210376739502,
|
10948 |
+
"learning_rate": 1.0611422306451845e-07,
|
10949 |
+
"loss": 1.4583,
|
10950 |
+
"step": 1563
|
10951 |
+
},
|
10952 |
+
{
|
10953 |
+
"epoch": 0.2675333561409511,
|
10954 |
+
"grad_norm": 0.5886104702949524,
|
10955 |
+
"learning_rate": 9.97826074441488e-08,
|
10956 |
+
"loss": 1.2683,
|
10957 |
+
"step": 1564
|
10958 |
+
},
|
10959 |
+
{
|
10960 |
+
"epoch": 0.26770441327403355,
|
10961 |
+
"grad_norm": 0.5967490673065186,
|
10962 |
+
"learning_rate": 9.364555546375053e-08,
|
10963 |
+
"loss": 1.3709,
|
10964 |
+
"step": 1565
|
10965 |
+
},
|
10966 |
+
{
|
10967 |
+
"epoch": 0.26787547040711596,
|
10968 |
+
"grad_norm": 0.6073997616767883,
|
10969 |
+
"learning_rate": 8.770309105203267e-08,
|
10970 |
+
"loss": 1.2455,
|
10971 |
+
"step": 1566
|
10972 |
+
},
|
10973 |
+
{
|
10974 |
+
"epoch": 0.2680465275401984,
|
10975 |
+
"grad_norm": 0.6279546618461609,
|
10976 |
+
"learning_rate": 8.195523737898337e-08,
|
10977 |
+
"loss": 1.3806,
|
10978 |
+
"step": 1567
|
10979 |
+
},
|
10980 |
+
{
|
10981 |
+
"epoch": 0.2682175846732809,
|
10982 |
+
"grad_norm": 0.6512458920478821,
|
10983 |
+
"learning_rate": 7.640201685578663e-08,
|
10984 |
+
"loss": 1.1722,
|
10985 |
+
"step": 1568
|
10986 |
+
},
|
10987 |
+
{
|
10988 |
+
"epoch": 0.26838864180636335,
|
10989 |
+
"grad_norm": 0.6530386805534363,
|
10990 |
+
"learning_rate": 7.104345113475575e-08,
|
10991 |
+
"loss": 1.3105,
|
10992 |
+
"step": 1569
|
10993 |
+
},
|
10994 |
+
{
|
10995 |
+
"epoch": 0.26855969893944576,
|
10996 |
+
"grad_norm": 0.5967045426368713,
|
10997 |
+
"learning_rate": 6.587956110922777e-08,
|
10998 |
+
"loss": 1.1254,
|
10999 |
+
"step": 1570
|
11000 |
+
},
|
11001 |
+
{
|
11002 |
+
"epoch": 0.2687307560725282,
|
11003 |
+
"grad_norm": 0.6580327749252319,
|
11004 |
+
"learning_rate": 6.091036691348028e-08,
|
11005 |
+
"loss": 1.2518,
|
11006 |
+
"step": 1571
|
11007 |
+
},
|
11008 |
+
{
|
11009 |
+
"epoch": 0.2689018132056107,
|
11010 |
+
"grad_norm": 0.6447963118553162,
|
11011 |
+
"learning_rate": 5.613588792268143e-08,
|
11012 |
+
"loss": 1.092,
|
11013 |
+
"step": 1572
|
11014 |
+
},
|
11015 |
+
{
|
11016 |
+
"epoch": 0.26907287033869315,
|
11017 |
+
"grad_norm": 0.7256022691726685,
|
11018 |
+
"learning_rate": 5.1556142752773315e-08,
|
11019 |
+
"loss": 1.2333,
|
11020 |
+
"step": 1573
|
11021 |
+
},
|
11022 |
+
{
|
11023 |
+
"epoch": 0.26924392747177556,
|
11024 |
+
"grad_norm": 0.7108719944953918,
|
11025 |
+
"learning_rate": 4.71711492604332e-08,
|
11026 |
+
"loss": 1.2665,
|
11027 |
+
"step": 1574
|
11028 |
+
},
|
11029 |
+
{
|
11030 |
+
"epoch": 0.269414984604858,
|
11031 |
+
"grad_norm": 0.7295149564743042,
|
11032 |
+
"learning_rate": 4.2980924542984636e-08,
|
11033 |
+
"loss": 1.1384,
|
11034 |
+
"step": 1575
|
11035 |
+
},
|
11036 |
+
{
|
11037 |
+
"epoch": 0.2695860417379405,
|
11038 |
+
"grad_norm": 0.755050003528595,
|
11039 |
+
"learning_rate": 3.898548493834198e-08,
|
11040 |
+
"loss": 1.2904,
|
11041 |
+
"step": 1576
|
11042 |
+
},
|
11043 |
+
{
|
11044 |
+
"epoch": 0.26975709887102295,
|
11045 |
+
"grad_norm": 0.8332591652870178,
|
11046 |
+
"learning_rate": 3.518484602493821e-08,
|
11047 |
+
"loss": 1.1799,
|
11048 |
+
"step": 1577
|
11049 |
+
},
|
11050 |
+
{
|
11051 |
+
"epoch": 0.26992815600410536,
|
11052 |
+
"grad_norm": 0.7835500240325928,
|
11053 |
+
"learning_rate": 3.1579022621675e-08,
|
11054 |
+
"loss": 1.1395,
|
11055 |
+
"step": 1578
|
11056 |
+
},
|
11057 |
+
{
|
11058 |
+
"epoch": 0.2700992131371878,
|
11059 |
+
"grad_norm": 0.8407353758811951,
|
11060 |
+
"learning_rate": 2.816802878785052e-08,
|
11061 |
+
"loss": 1.1414,
|
11062 |
+
"step": 1579
|
11063 |
+
},
|
11064 |
+
{
|
11065 |
+
"epoch": 0.2702702702702703,
|
11066 |
+
"grad_norm": 0.8765965104103088,
|
11067 |
+
"learning_rate": 2.495187782310393e-08,
|
11068 |
+
"loss": 1.2097,
|
11069 |
+
"step": 1580
|
11070 |
+
},
|
11071 |
+
{
|
11072 |
+
"epoch": 0.2704413274033527,
|
11073 |
+
"grad_norm": 0.8900992274284363,
|
11074 |
+
"learning_rate": 2.1930582267393196e-08,
|
11075 |
+
"loss": 1.1671,
|
11076 |
+
"step": 1581
|
11077 |
+
},
|
11078 |
+
{
|
11079 |
+
"epoch": 0.27061238453643516,
|
11080 |
+
"grad_norm": 0.9992753863334656,
|
11081 |
+
"learning_rate": 1.9104153900911802e-08,
|
11082 |
+
"loss": 1.1734,
|
11083 |
+
"step": 1582
|
11084 |
+
},
|
11085 |
+
{
|
11086 |
+
"epoch": 0.2707834416695176,
|
11087 |
+
"grad_norm": 0.9550507664680481,
|
11088 |
+
"learning_rate": 1.6472603744049908e-08,
|
11089 |
+
"loss": 1.0932,
|
11090 |
+
"step": 1583
|
11091 |
+
},
|
11092 |
+
{
|
11093 |
+
"epoch": 0.2709544988026001,
|
11094 |
+
"grad_norm": 1.0631017684936523,
|
11095 |
+
"learning_rate": 1.403594205736658e-08,
|
11096 |
+
"loss": 1.1782,
|
11097 |
+
"step": 1584
|
11098 |
+
},
|
11099 |
+
{
|
11100 |
+
"epoch": 0.2711255559356825,
|
11101 |
+
"grad_norm": 1.0812159776687622,
|
11102 |
+
"learning_rate": 1.179417834153429e-08,
|
11103 |
+
"loss": 1.135,
|
11104 |
+
"step": 1585
|
11105 |
+
},
|
11106 |
+
{
|
11107 |
+
"epoch": 0.27129661306876496,
|
11108 |
+
"grad_norm": 1.1135135889053345,
|
11109 |
+
"learning_rate": 9.747321337316706e-09,
|
11110 |
+
"loss": 0.9834,
|
11111 |
+
"step": 1586
|
11112 |
+
},
|
11113 |
+
{
|
11114 |
+
"epoch": 0.2714676702018474,
|
11115 |
+
"grad_norm": 1.3064099550247192,
|
11116 |
+
"learning_rate": 7.895379025518734e-09,
|
11117 |
+
"loss": 1.266,
|
11118 |
+
"step": 1587
|
11119 |
+
},
|
11120 |
+
{
|
11121 |
+
"epoch": 0.2716387273349299,
|
11122 |
+
"grad_norm": 1.402748942375183,
|
11123 |
+
"learning_rate": 6.238358626958763e-09,
|
11124 |
+
"loss": 1.284,
|
11125 |
+
"step": 1588
|
11126 |
+
},
|
11127 |
+
{
|
11128 |
+
"epoch": 0.2718097844680123,
|
11129 |
+
"grad_norm": 1.5143039226531982,
|
11130 |
+
"learning_rate": 4.776266602452006e-09,
|
11131 |
+
"loss": 1.0113,
|
11132 |
+
"step": 1589
|
11133 |
+
},
|
11134 |
+
{
|
11135 |
+
"epoch": 0.27198084160109476,
|
11136 |
+
"grad_norm": 1.624098300933838,
|
11137 |
+
"learning_rate": 3.5091086527772044e-09,
|
11138 |
+
"loss": 1.0796,
|
11139 |
+
"step": 1590
|
11140 |
+
},
|
11141 |
+
{
|
11142 |
+
"epoch": 0.2721518987341772,
|
11143 |
+
"grad_norm": 1.899276614189148,
|
11144 |
+
"learning_rate": 2.4368897186433095e-09,
|
11145 |
+
"loss": 1.2258,
|
11146 |
+
"step": 1591
|
11147 |
+
},
|
11148 |
+
{
|
11149 |
+
"epoch": 0.2723229558672597,
|
11150 |
+
"grad_norm": 1.6464680433273315,
|
11151 |
+
"learning_rate": 1.5596139806950405e-09,
|
11152 |
+
"loss": 1.2317,
|
11153 |
+
"step": 1592
|
11154 |
+
},
|
11155 |
+
{
|
11156 |
+
"epoch": 0.2724940130003421,
|
11157 |
+
"grad_norm": 1.796233892440796,
|
11158 |
+
"learning_rate": 8.772848594795769e-10,
|
11159 |
+
"loss": 1.1118,
|
11160 |
+
"step": 1593
|
11161 |
+
},
|
11162 |
+
{
|
11163 |
+
"epoch": 0.27266507013342456,
|
11164 |
+
"grad_norm": 2.145975112915039,
|
11165 |
+
"learning_rate": 3.899050154354544e-10,
|
11166 |
+
"loss": 1.3574,
|
11167 |
+
"step": 1594
|
11168 |
+
},
|
11169 |
+
{
|
11170 |
+
"epoch": 0.272836127266507,
|
11171 |
+
"grad_norm": 2.1007165908813477,
|
11172 |
+
"learning_rate": 9.74763488759134e-11,
|
11173 |
+
"loss": 1.3241,
|
11174 |
+
"step": 1595
|
11175 |
+
},
|
11176 |
+
{
|
11177 |
+
"epoch": 0.2730071843995895,
|
11178 |
+
"grad_norm": 2.092686891555786,
|
11179 |
+
"learning_rate": 0.0,
|
11180 |
+
"loss": 1.3341,
|
11181 |
+
"step": 1596
|
11182 |
}
|
11183 |
],
|
11184 |
"logging_steps": 1,
|
|
|
11193 |
"should_evaluate": false,
|
11194 |
"should_log": false,
|
11195 |
"should_save": true,
|
11196 |
+
"should_training_stop": true
|
11197 |
},
|
11198 |
"attributes": {}
|
11199 |
}
|
11200 |
},
|
11201 |
+
"total_flos": 2.026146219687936e+18,
|
11202 |
"train_batch_size": 4,
|
11203 |
"trial_name": null,
|
11204 |
"trial_params": null
|