Upload folder using huggingface_hub
Browse files- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- runs/Apr04_02-55-46_320d677d8fbf/events.out.tfevents.1743735349.320d677d8fbf.4053.0 +3 -0
- runs/Apr04_03-03-34_320d677d8fbf/events.out.tfevents.1743735817.320d677d8fbf.6876.0 +3 -0
- runs/Apr04_03-04-20_320d677d8fbf/events.out.tfevents.1743735863.320d677d8fbf.28090.0 +3 -0
- runs/Apr04_03-08-14_320d677d8fbf/events.out.tfevents.1743736097.320d677d8fbf.187095.0 +3 -0
- trainer_state.json +2463 -4
- training_args.bin +3 -0
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4938446392
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62eaf2329eca44f22abaccd1f5f524b187c8f984c9aac7c1dc56eff75a82d671
|
3 |
size 4938446392
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3821736024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41b049df65dea08166e7f563f856c60efd28c6710065880ca26c25fcda1ca4a5
|
3 |
size 3821736024
|
runs/Apr04_02-55-46_320d677d8fbf/events.out.tfevents.1743735349.320d677d8fbf.4053.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2f002a72d324bed426996133723aa248fdbe8ff023c42c8a192f3d195032030
|
3 |
+
size 6158
|
runs/Apr04_03-03-34_320d677d8fbf/events.out.tfevents.1743735817.320d677d8fbf.6876.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c305d00a9c0e40286408f92d6596a233011790bbe4659698ceb674b3c477cc3
|
3 |
+
size 6779
|
runs/Apr04_03-04-20_320d677d8fbf/events.out.tfevents.1743735863.320d677d8fbf.28090.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e233659f46eb6a1707541f7355192dc0b0484455a8868b90ca82fb5aef09a3a
|
3 |
+
size 14338
|
runs/Apr04_03-08-14_320d677d8fbf/events.out.tfevents.1743736097.320d677d8fbf.187095.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c390d8c71846c2f48c04ce6d5accf75ef1d4d8dc8645e8ceae2f02c771f1fc0b
|
3 |
+
size 217463
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4557,6 +4557,2465 @@
|
|
4557 |
"learning_rate": 2.991522876735154e-05,
|
4558 |
"loss": 0.0065,
|
4559 |
"step": 6500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4560 |
}
|
4561 |
],
|
4562 |
"logging_steps": 10,
|
@@ -4571,12 +7030,12 @@
|
|
4571 |
"should_evaluate": false,
|
4572 |
"should_log": false,
|
4573 |
"should_save": true,
|
4574 |
-
"should_training_stop":
|
4575 |
},
|
4576 |
"attributes": {}
|
4577 |
}
|
4578 |
},
|
4579 |
-
"total_flos":
|
4580 |
"train_batch_size": 16,
|
4581 |
"trial_name": null,
|
4582 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.337135614702155,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 10000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4557 |
"learning_rate": 2.991522876735154e-05,
|
4558 |
"loss": 0.0065,
|
4559 |
"step": 6500
|
4560 |
+
},
|
4561 |
+
{
|
4562 |
+
"epoch": 4.1254752851711025,
|
4563 |
+
"grad_norm": 0.1556195318698883,
|
4564 |
+
"learning_rate": 2.976391850971065e-05,
|
4565 |
+
"loss": 0.0155,
|
4566 |
+
"step": 6510
|
4567 |
+
},
|
4568 |
+
{
|
4569 |
+
"epoch": 4.1318124207858045,
|
4570 |
+
"grad_norm": 0.24738621711730957,
|
4571 |
+
"learning_rate": 2.9612829550614836e-05,
|
4572 |
+
"loss": 0.0107,
|
4573 |
+
"step": 6520
|
4574 |
+
},
|
4575 |
+
{
|
4576 |
+
"epoch": 4.138149556400507,
|
4577 |
+
"grad_norm": 0.2733817398548126,
|
4578 |
+
"learning_rate": 2.9461963542348737e-05,
|
4579 |
+
"loss": 0.0083,
|
4580 |
+
"step": 6530
|
4581 |
+
},
|
4582 |
+
{
|
4583 |
+
"epoch": 4.144486692015209,
|
4584 |
+
"grad_norm": 0.4232415556907654,
|
4585 |
+
"learning_rate": 2.931132213475884e-05,
|
4586 |
+
"loss": 0.0154,
|
4587 |
+
"step": 6540
|
4588 |
+
},
|
4589 |
+
{
|
4590 |
+
"epoch": 4.150823827629911,
|
4591 |
+
"grad_norm": 0.16608037054538727,
|
4592 |
+
"learning_rate": 2.916090697523549e-05,
|
4593 |
+
"loss": 0.0117,
|
4594 |
+
"step": 6550
|
4595 |
+
},
|
4596 |
+
{
|
4597 |
+
"epoch": 4.157160963244613,
|
4598 |
+
"grad_norm": 0.226557657122612,
|
4599 |
+
"learning_rate": 2.9010719708694722e-05,
|
4600 |
+
"loss": 0.0087,
|
4601 |
+
"step": 6560
|
4602 |
+
},
|
4603 |
+
{
|
4604 |
+
"epoch": 4.163498098859316,
|
4605 |
+
"grad_norm": 0.34729430079460144,
|
4606 |
+
"learning_rate": 2.8860761977560436e-05,
|
4607 |
+
"loss": 0.012,
|
4608 |
+
"step": 6570
|
4609 |
+
},
|
4610 |
+
{
|
4611 |
+
"epoch": 4.169835234474018,
|
4612 |
+
"grad_norm": 0.2842123806476593,
|
4613 |
+
"learning_rate": 2.8711035421746367e-05,
|
4614 |
+
"loss": 0.0109,
|
4615 |
+
"step": 6580
|
4616 |
+
},
|
4617 |
+
{
|
4618 |
+
"epoch": 4.17617237008872,
|
4619 |
+
"grad_norm": 0.24015074968338013,
|
4620 |
+
"learning_rate": 2.8561541678638142e-05,
|
4621 |
+
"loss": 0.01,
|
4622 |
+
"step": 6590
|
4623 |
+
},
|
4624 |
+
{
|
4625 |
+
"epoch": 4.182509505703422,
|
4626 |
+
"grad_norm": 0.2127242535352707,
|
4627 |
+
"learning_rate": 2.8412282383075363e-05,
|
4628 |
+
"loss": 0.0092,
|
4629 |
+
"step": 6600
|
4630 |
+
},
|
4631 |
+
{
|
4632 |
+
"epoch": 4.188846641318124,
|
4633 |
+
"grad_norm": 0.1556386947631836,
|
4634 |
+
"learning_rate": 2.8263259167333777e-05,
|
4635 |
+
"loss": 0.0066,
|
4636 |
+
"step": 6610
|
4637 |
+
},
|
4638 |
+
{
|
4639 |
+
"epoch": 4.195183776932827,
|
4640 |
+
"grad_norm": 0.2527133524417877,
|
4641 |
+
"learning_rate": 2.811447366110741e-05,
|
4642 |
+
"loss": 0.0121,
|
4643 |
+
"step": 6620
|
4644 |
+
},
|
4645 |
+
{
|
4646 |
+
"epoch": 4.201520912547529,
|
4647 |
+
"grad_norm": 0.13160255551338196,
|
4648 |
+
"learning_rate": 2.7965927491490705e-05,
|
4649 |
+
"loss": 0.0069,
|
4650 |
+
"step": 6630
|
4651 |
+
},
|
4652 |
+
{
|
4653 |
+
"epoch": 4.2078580481622305,
|
4654 |
+
"grad_norm": 0.29007360339164734,
|
4655 |
+
"learning_rate": 2.7817622282960815e-05,
|
4656 |
+
"loss": 0.0107,
|
4657 |
+
"step": 6640
|
4658 |
+
},
|
4659 |
+
{
|
4660 |
+
"epoch": 4.2141951837769325,
|
4661 |
+
"grad_norm": 0.09741051495075226,
|
4662 |
+
"learning_rate": 2.766955965735968e-05,
|
4663 |
+
"loss": 0.0069,
|
4664 |
+
"step": 6650
|
4665 |
+
},
|
4666 |
+
{
|
4667 |
+
"epoch": 4.220532319391635,
|
4668 |
+
"grad_norm": 0.2465200573205948,
|
4669 |
+
"learning_rate": 2.7521741233876496e-05,
|
4670 |
+
"loss": 0.0105,
|
4671 |
+
"step": 6660
|
4672 |
+
},
|
4673 |
+
{
|
4674 |
+
"epoch": 4.226869455006337,
|
4675 |
+
"grad_norm": 0.18087448179721832,
|
4676 |
+
"learning_rate": 2.7374168629029813e-05,
|
4677 |
+
"loss": 0.0122,
|
4678 |
+
"step": 6670
|
4679 |
+
},
|
4680 |
+
{
|
4681 |
+
"epoch": 4.233206590621039,
|
4682 |
+
"grad_norm": 0.15381130576133728,
|
4683 |
+
"learning_rate": 2.7226843456650037e-05,
|
4684 |
+
"loss": 0.0091,
|
4685 |
+
"step": 6680
|
4686 |
+
},
|
4687 |
+
{
|
4688 |
+
"epoch": 4.239543726235741,
|
4689 |
+
"grad_norm": 0.16442646086215973,
|
4690 |
+
"learning_rate": 2.707976732786166e-05,
|
4691 |
+
"loss": 0.0094,
|
4692 |
+
"step": 6690
|
4693 |
+
},
|
4694 |
+
{
|
4695 |
+
"epoch": 4.245880861850444,
|
4696 |
+
"grad_norm": 0.14896883070468903,
|
4697 |
+
"learning_rate": 2.693294185106562e-05,
|
4698 |
+
"loss": 0.0158,
|
4699 |
+
"step": 6700
|
4700 |
+
},
|
4701 |
+
{
|
4702 |
+
"epoch": 4.252217997465146,
|
4703 |
+
"grad_norm": 0.1330922693014145,
|
4704 |
+
"learning_rate": 2.6786368631921836e-05,
|
4705 |
+
"loss": 0.0097,
|
4706 |
+
"step": 6710
|
4707 |
+
},
|
4708 |
+
{
|
4709 |
+
"epoch": 4.258555133079848,
|
4710 |
+
"grad_norm": 0.18497434258460999,
|
4711 |
+
"learning_rate": 2.6640049273331515e-05,
|
4712 |
+
"loss": 0.0082,
|
4713 |
+
"step": 6720
|
4714 |
+
},
|
4715 |
+
{
|
4716 |
+
"epoch": 4.26489226869455,
|
4717 |
+
"grad_norm": 0.12878283858299255,
|
4718 |
+
"learning_rate": 2.6493985375419778e-05,
|
4719 |
+
"loss": 0.0141,
|
4720 |
+
"step": 6730
|
4721 |
+
},
|
4722 |
+
{
|
4723 |
+
"epoch": 4.271229404309253,
|
4724 |
+
"grad_norm": 0.1790645867586136,
|
4725 |
+
"learning_rate": 2.6348178535517966e-05,
|
4726 |
+
"loss": 0.012,
|
4727 |
+
"step": 6740
|
4728 |
+
},
|
4729 |
+
{
|
4730 |
+
"epoch": 4.277566539923955,
|
4731 |
+
"grad_norm": 0.29659292101860046,
|
4732 |
+
"learning_rate": 2.6202630348146324e-05,
|
4733 |
+
"loss": 0.0141,
|
4734 |
+
"step": 6750
|
4735 |
+
},
|
4736 |
+
{
|
4737 |
+
"epoch": 4.283903675538657,
|
4738 |
+
"grad_norm": 0.2492123246192932,
|
4739 |
+
"learning_rate": 2.6057342404996522e-05,
|
4740 |
+
"loss": 0.0133,
|
4741 |
+
"step": 6760
|
4742 |
+
},
|
4743 |
+
{
|
4744 |
+
"epoch": 4.2902408111533585,
|
4745 |
+
"grad_norm": 0.11853443086147308,
|
4746 |
+
"learning_rate": 2.591231629491423e-05,
|
4747 |
+
"loss": 0.01,
|
4748 |
+
"step": 6770
|
4749 |
+
},
|
4750 |
+
{
|
4751 |
+
"epoch": 4.2965779467680605,
|
4752 |
+
"grad_norm": 0.24464382231235504,
|
4753 |
+
"learning_rate": 2.5767553603881767e-05,
|
4754 |
+
"loss": 0.0088,
|
4755 |
+
"step": 6780
|
4756 |
+
},
|
4757 |
+
{
|
4758 |
+
"epoch": 4.302915082382763,
|
4759 |
+
"grad_norm": 0.2709729075431824,
|
4760 |
+
"learning_rate": 2.562305591500069e-05,
|
4761 |
+
"loss": 0.0075,
|
4762 |
+
"step": 6790
|
4763 |
+
},
|
4764 |
+
{
|
4765 |
+
"epoch": 4.309252217997465,
|
4766 |
+
"grad_norm": 0.19660340249538422,
|
4767 |
+
"learning_rate": 2.547882480847461e-05,
|
4768 |
+
"loss": 0.0136,
|
4769 |
+
"step": 6800
|
4770 |
+
},
|
4771 |
+
{
|
4772 |
+
"epoch": 4.315589353612167,
|
4773 |
+
"grad_norm": 0.21766068041324615,
|
4774 |
+
"learning_rate": 2.5334861861591753e-05,
|
4775 |
+
"loss": 0.0073,
|
4776 |
+
"step": 6810
|
4777 |
+
},
|
4778 |
+
{
|
4779 |
+
"epoch": 4.321926489226869,
|
4780 |
+
"grad_norm": 0.27058249711990356,
|
4781 |
+
"learning_rate": 2.5191168648707887e-05,
|
4782 |
+
"loss": 0.0128,
|
4783 |
+
"step": 6820
|
4784 |
+
},
|
4785 |
+
{
|
4786 |
+
"epoch": 4.328263624841572,
|
4787 |
+
"grad_norm": 0.18688562512397766,
|
4788 |
+
"learning_rate": 2.5047746741228978e-05,
|
4789 |
+
"loss": 0.0146,
|
4790 |
+
"step": 6830
|
4791 |
+
},
|
4792 |
+
{
|
4793 |
+
"epoch": 4.334600760456274,
|
4794 |
+
"grad_norm": 0.23607704043388367,
|
4795 |
+
"learning_rate": 2.490459770759398e-05,
|
4796 |
+
"loss": 0.0089,
|
4797 |
+
"step": 6840
|
4798 |
+
},
|
4799 |
+
{
|
4800 |
+
"epoch": 4.340937896070976,
|
4801 |
+
"grad_norm": 0.1704394668340683,
|
4802 |
+
"learning_rate": 2.476172311325783e-05,
|
4803 |
+
"loss": 0.009,
|
4804 |
+
"step": 6850
|
4805 |
+
},
|
4806 |
+
{
|
4807 |
+
"epoch": 4.347275031685678,
|
4808 |
+
"grad_norm": 0.11735841631889343,
|
4809 |
+
"learning_rate": 2.4619124520674146e-05,
|
4810 |
+
"loss": 0.0099,
|
4811 |
+
"step": 6860
|
4812 |
+
},
|
4813 |
+
{
|
4814 |
+
"epoch": 4.35361216730038,
|
4815 |
+
"grad_norm": 0.17828071117401123,
|
4816 |
+
"learning_rate": 2.447680348927837e-05,
|
4817 |
+
"loss": 0.0069,
|
4818 |
+
"step": 6870
|
4819 |
+
},
|
4820 |
+
{
|
4821 |
+
"epoch": 4.359949302915083,
|
4822 |
+
"grad_norm": 0.09438079595565796,
|
4823 |
+
"learning_rate": 2.433476157547044e-05,
|
4824 |
+
"loss": 0.0077,
|
4825 |
+
"step": 6880
|
4826 |
+
},
|
4827 |
+
{
|
4828 |
+
"epoch": 4.366286438529785,
|
4829 |
+
"grad_norm": 0.14729665219783783,
|
4830 |
+
"learning_rate": 2.419300033259798e-05,
|
4831 |
+
"loss": 0.0131,
|
4832 |
+
"step": 6890
|
4833 |
+
},
|
4834 |
+
{
|
4835 |
+
"epoch": 4.3726235741444865,
|
4836 |
+
"grad_norm": 0.21107903122901917,
|
4837 |
+
"learning_rate": 2.405152131093926e-05,
|
4838 |
+
"loss": 0.0089,
|
4839 |
+
"step": 6900
|
4840 |
+
},
|
4841 |
+
{
|
4842 |
+
"epoch": 4.3789607097591885,
|
4843 |
+
"grad_norm": 0.26359254121780396,
|
4844 |
+
"learning_rate": 2.3910326057686127e-05,
|
4845 |
+
"loss": 0.012,
|
4846 |
+
"step": 6910
|
4847 |
+
},
|
4848 |
+
{
|
4849 |
+
"epoch": 4.385297845373891,
|
4850 |
+
"grad_norm": 0.22561860084533691,
|
4851 |
+
"learning_rate": 2.3769416116927335e-05,
|
4852 |
+
"loss": 0.0112,
|
4853 |
+
"step": 6920
|
4854 |
+
},
|
4855 |
+
{
|
4856 |
+
"epoch": 4.391634980988593,
|
4857 |
+
"grad_norm": 0.253752201795578,
|
4858 |
+
"learning_rate": 2.362879302963135e-05,
|
4859 |
+
"loss": 0.0087,
|
4860 |
+
"step": 6930
|
4861 |
+
},
|
4862 |
+
{
|
4863 |
+
"epoch": 4.397972116603295,
|
4864 |
+
"grad_norm": 0.1931048333644867,
|
4865 |
+
"learning_rate": 2.3488458333629777e-05,
|
4866 |
+
"loss": 0.0078,
|
4867 |
+
"step": 6940
|
4868 |
+
},
|
4869 |
+
{
|
4870 |
+
"epoch": 4.404309252217997,
|
4871 |
+
"grad_norm": 0.13094092905521393,
|
4872 |
+
"learning_rate": 2.3348413563600325e-05,
|
4873 |
+
"loss": 0.0088,
|
4874 |
+
"step": 6950
|
4875 |
+
},
|
4876 |
+
{
|
4877 |
+
"epoch": 4.4106463878327,
|
4878 |
+
"grad_norm": 0.14549599587917328,
|
4879 |
+
"learning_rate": 2.3208660251050158e-05,
|
4880 |
+
"loss": 0.0099,
|
4881 |
+
"step": 6960
|
4882 |
+
},
|
4883 |
+
{
|
4884 |
+
"epoch": 4.416983523447402,
|
4885 |
+
"grad_norm": 0.10897214710712433,
|
4886 |
+
"learning_rate": 2.3069199924299174e-05,
|
4887 |
+
"loss": 0.0084,
|
4888 |
+
"step": 6970
|
4889 |
+
},
|
4890 |
+
{
|
4891 |
+
"epoch": 4.423320659062104,
|
4892 |
+
"grad_norm": 0.13430652022361755,
|
4893 |
+
"learning_rate": 2.29300341084631e-05,
|
4894 |
+
"loss": 0.0076,
|
4895 |
+
"step": 6980
|
4896 |
+
},
|
4897 |
+
{
|
4898 |
+
"epoch": 4.429657794676806,
|
4899 |
+
"grad_norm": 0.3317708969116211,
|
4900 |
+
"learning_rate": 2.279116432543705e-05,
|
4901 |
+
"loss": 0.0178,
|
4902 |
+
"step": 6990
|
4903 |
+
},
|
4904 |
+
{
|
4905 |
+
"epoch": 4.435994930291509,
|
4906 |
+
"grad_norm": 0.29772767424583435,
|
4907 |
+
"learning_rate": 2.2652592093878666e-05,
|
4908 |
+
"loss": 0.012,
|
4909 |
+
"step": 7000
|
4910 |
+
},
|
4911 |
+
{
|
4912 |
+
"epoch": 4.442332065906211,
|
4913 |
+
"grad_norm": 0.287555068731308,
|
4914 |
+
"learning_rate": 2.251431892919171e-05,
|
4915 |
+
"loss": 0.0131,
|
4916 |
+
"step": 7010
|
4917 |
+
},
|
4918 |
+
{
|
4919 |
+
"epoch": 4.448669201520913,
|
4920 |
+
"grad_norm": 0.22690831124782562,
|
4921 |
+
"learning_rate": 2.237634634350934e-05,
|
4922 |
+
"loss": 0.0105,
|
4923 |
+
"step": 7020
|
4924 |
+
},
|
4925 |
+
{
|
4926 |
+
"epoch": 4.455006337135615,
|
4927 |
+
"grad_norm": 0.2328868806362152,
|
4928 |
+
"learning_rate": 2.2238675845677663e-05,
|
4929 |
+
"loss": 0.011,
|
4930 |
+
"step": 7030
|
4931 |
+
},
|
4932 |
+
{
|
4933 |
+
"epoch": 4.4613434727503165,
|
4934 |
+
"grad_norm": 0.17060483992099762,
|
4935 |
+
"learning_rate": 2.2101308941239203e-05,
|
4936 |
+
"loss": 0.0099,
|
4937 |
+
"step": 7040
|
4938 |
+
},
|
4939 |
+
{
|
4940 |
+
"epoch": 4.467680608365019,
|
4941 |
+
"grad_norm": 0.3242473006248474,
|
4942 |
+
"learning_rate": 2.196424713241637e-05,
|
4943 |
+
"loss": 0.0107,
|
4944 |
+
"step": 7050
|
4945 |
+
},
|
4946 |
+
{
|
4947 |
+
"epoch": 4.474017743979721,
|
4948 |
+
"grad_norm": 0.2539024353027344,
|
4949 |
+
"learning_rate": 2.182749191809518e-05,
|
4950 |
+
"loss": 0.0069,
|
4951 |
+
"step": 7060
|
4952 |
+
},
|
4953 |
+
{
|
4954 |
+
"epoch": 4.480354879594423,
|
4955 |
+
"grad_norm": 0.2088938057422638,
|
4956 |
+
"learning_rate": 2.1691044793808734e-05,
|
4957 |
+
"loss": 0.0118,
|
4958 |
+
"step": 7070
|
4959 |
+
},
|
4960 |
+
{
|
4961 |
+
"epoch": 4.486692015209125,
|
4962 |
+
"grad_norm": 0.24568264186382294,
|
4963 |
+
"learning_rate": 2.1554907251720945e-05,
|
4964 |
+
"loss": 0.0091,
|
4965 |
+
"step": 7080
|
4966 |
+
},
|
4967 |
+
{
|
4968 |
+
"epoch": 4.493029150823828,
|
4969 |
+
"grad_norm": 0.14442333579063416,
|
4970 |
+
"learning_rate": 2.1419080780610123e-05,
|
4971 |
+
"loss": 0.0088,
|
4972 |
+
"step": 7090
|
4973 |
+
},
|
4974 |
+
{
|
4975 |
+
"epoch": 4.49936628643853,
|
4976 |
+
"grad_norm": 0.22089751064777374,
|
4977 |
+
"learning_rate": 2.128356686585282e-05,
|
4978 |
+
"loss": 0.013,
|
4979 |
+
"step": 7100
|
4980 |
+
},
|
4981 |
+
{
|
4982 |
+
"epoch": 4.505703422053232,
|
4983 |
+
"grad_norm": 0.1270669400691986,
|
4984 |
+
"learning_rate": 2.1148366989407496e-05,
|
4985 |
+
"loss": 0.0113,
|
4986 |
+
"step": 7110
|
4987 |
+
},
|
4988 |
+
{
|
4989 |
+
"epoch": 4.512040557667934,
|
4990 |
+
"grad_norm": 0.11936408281326294,
|
4991 |
+
"learning_rate": 2.1013482629798333e-05,
|
4992 |
+
"loss": 0.0072,
|
4993 |
+
"step": 7120
|
4994 |
+
},
|
4995 |
+
{
|
4996 |
+
"epoch": 4.518377693282636,
|
4997 |
+
"grad_norm": 0.1418917030096054,
|
4998 |
+
"learning_rate": 2.0878915262099098e-05,
|
4999 |
+
"loss": 0.0097,
|
5000 |
+
"step": 7130
|
5001 |
+
},
|
5002 |
+
{
|
5003 |
+
"epoch": 4.524714828897339,
|
5004 |
+
"grad_norm": 0.22373194992542267,
|
5005 |
+
"learning_rate": 2.0744666357916925e-05,
|
5006 |
+
"loss": 0.0107,
|
5007 |
+
"step": 7140
|
5008 |
+
},
|
5009 |
+
{
|
5010 |
+
"epoch": 4.531051964512041,
|
5011 |
+
"grad_norm": 0.16027112305164337,
|
5012 |
+
"learning_rate": 2.061073738537635e-05,
|
5013 |
+
"loss": 0.0076,
|
5014 |
+
"step": 7150
|
5015 |
+
},
|
5016 |
+
{
|
5017 |
+
"epoch": 4.537389100126743,
|
5018 |
+
"grad_norm": 0.205812469124794,
|
5019 |
+
"learning_rate": 2.0477129809103147e-05,
|
5020 |
+
"loss": 0.0156,
|
5021 |
+
"step": 7160
|
5022 |
+
},
|
5023 |
+
{
|
5024 |
+
"epoch": 4.5437262357414445,
|
5025 |
+
"grad_norm": 0.07858346402645111,
|
5026 |
+
"learning_rate": 2.0343845090208368e-05,
|
5027 |
+
"loss": 0.0069,
|
5028 |
+
"step": 7170
|
5029 |
+
},
|
5030 |
+
{
|
5031 |
+
"epoch": 4.550063371356147,
|
5032 |
+
"grad_norm": 0.173725426197052,
|
5033 |
+
"learning_rate": 2.0210884686272368e-05,
|
5034 |
+
"loss": 0.0107,
|
5035 |
+
"step": 7180
|
5036 |
+
},
|
5037 |
+
{
|
5038 |
+
"epoch": 4.556400506970849,
|
5039 |
+
"grad_norm": 0.14513538777828217,
|
5040 |
+
"learning_rate": 2.0078250051328784e-05,
|
5041 |
+
"loss": 0.0096,
|
5042 |
+
"step": 7190
|
5043 |
+
},
|
5044 |
+
{
|
5045 |
+
"epoch": 4.562737642585551,
|
5046 |
+
"grad_norm": 0.14513027667999268,
|
5047 |
+
"learning_rate": 1.9945942635848748e-05,
|
5048 |
+
"loss": 0.0155,
|
5049 |
+
"step": 7200
|
5050 |
+
},
|
5051 |
+
{
|
5052 |
+
"epoch": 4.569074778200253,
|
5053 |
+
"grad_norm": 0.12547020614147186,
|
5054 |
+
"learning_rate": 1.981396388672496e-05,
|
5055 |
+
"loss": 0.007,
|
5056 |
+
"step": 7210
|
5057 |
+
},
|
5058 |
+
{
|
5059 |
+
"epoch": 4.575411913814955,
|
5060 |
+
"grad_norm": 0.09598460793495178,
|
5061 |
+
"learning_rate": 1.9682315247255894e-05,
|
5062 |
+
"loss": 0.0077,
|
5063 |
+
"step": 7220
|
5064 |
+
},
|
5065 |
+
{
|
5066 |
+
"epoch": 4.581749049429658,
|
5067 |
+
"grad_norm": 0.09776268899440765,
|
5068 |
+
"learning_rate": 1.9550998157129946e-05,
|
5069 |
+
"loss": 0.0088,
|
5070 |
+
"step": 7230
|
5071 |
+
},
|
5072 |
+
{
|
5073 |
+
"epoch": 4.58808618504436,
|
5074 |
+
"grad_norm": 0.14646144211292267,
|
5075 |
+
"learning_rate": 1.942001405240979e-05,
|
5076 |
+
"loss": 0.0066,
|
5077 |
+
"step": 7240
|
5078 |
+
},
|
5079 |
+
{
|
5080 |
+
"epoch": 4.594423320659062,
|
5081 |
+
"grad_norm": 0.09714064747095108,
|
5082 |
+
"learning_rate": 1.928936436551661e-05,
|
5083 |
+
"loss": 0.0068,
|
5084 |
+
"step": 7250
|
5085 |
+
},
|
5086 |
+
{
|
5087 |
+
"epoch": 4.600760456273765,
|
5088 |
+
"grad_norm": 0.09656299650669098,
|
5089 |
+
"learning_rate": 1.9159050525214452e-05,
|
5090 |
+
"loss": 0.0088,
|
5091 |
+
"step": 7260
|
5092 |
+
},
|
5093 |
+
{
|
5094 |
+
"epoch": 4.607097591888467,
|
5095 |
+
"grad_norm": 0.11101258546113968,
|
5096 |
+
"learning_rate": 1.9029073956594606e-05,
|
5097 |
+
"loss": 0.0125,
|
5098 |
+
"step": 7270
|
5099 |
+
},
|
5100 |
+
{
|
5101 |
+
"epoch": 4.613434727503169,
|
5102 |
+
"grad_norm": 0.24712498486042023,
|
5103 |
+
"learning_rate": 1.8899436081059975e-05,
|
5104 |
+
"loss": 0.0079,
|
5105 |
+
"step": 7280
|
5106 |
+
},
|
5107 |
+
{
|
5108 |
+
"epoch": 4.619771863117871,
|
5109 |
+
"grad_norm": 0.27654388546943665,
|
5110 |
+
"learning_rate": 1.877013831630961e-05,
|
5111 |
+
"loss": 0.0086,
|
5112 |
+
"step": 7290
|
5113 |
+
},
|
5114 |
+
{
|
5115 |
+
"epoch": 4.6261089987325725,
|
5116 |
+
"grad_norm": 0.30745381116867065,
|
5117 |
+
"learning_rate": 1.8641182076323148e-05,
|
5118 |
+
"loss": 0.0115,
|
5119 |
+
"step": 7300
|
5120 |
+
},
|
5121 |
+
{
|
5122 |
+
"epoch": 4.632446134347275,
|
5123 |
+
"grad_norm": 0.21805402636528015,
|
5124 |
+
"learning_rate": 1.851256877134538e-05,
|
5125 |
+
"loss": 0.0086,
|
5126 |
+
"step": 7310
|
5127 |
+
},
|
5128 |
+
{
|
5129 |
+
"epoch": 4.638783269961977,
|
5130 |
+
"grad_norm": 0.14376069605350494,
|
5131 |
+
"learning_rate": 1.838429980787081e-05,
|
5132 |
+
"loss": 0.0078,
|
5133 |
+
"step": 7320
|
5134 |
+
},
|
5135 |
+
{
|
5136 |
+
"epoch": 4.645120405576679,
|
5137 |
+
"grad_norm": 0.15503360331058502,
|
5138 |
+
"learning_rate": 1.8256376588628238e-05,
|
5139 |
+
"loss": 0.0061,
|
5140 |
+
"step": 7330
|
5141 |
+
},
|
5142 |
+
{
|
5143 |
+
"epoch": 4.651457541191381,
|
5144 |
+
"grad_norm": 0.21702007949352264,
|
5145 |
+
"learning_rate": 1.8128800512565513e-05,
|
5146 |
+
"loss": 0.0078,
|
5147 |
+
"step": 7340
|
5148 |
+
},
|
5149 |
+
{
|
5150 |
+
"epoch": 4.657794676806084,
|
5151 |
+
"grad_norm": 0.16204503178596497,
|
5152 |
+
"learning_rate": 1.800157297483417e-05,
|
5153 |
+
"loss": 0.0084,
|
5154 |
+
"step": 7350
|
5155 |
+
},
|
5156 |
+
{
|
5157 |
+
"epoch": 4.664131812420786,
|
5158 |
+
"grad_norm": 0.16800753772258759,
|
5159 |
+
"learning_rate": 1.787469536677419e-05,
|
5160 |
+
"loss": 0.0123,
|
5161 |
+
"step": 7360
|
5162 |
+
},
|
5163 |
+
{
|
5164 |
+
"epoch": 4.670468948035488,
|
5165 |
+
"grad_norm": 0.26554012298583984,
|
5166 |
+
"learning_rate": 1.774816907589873e-05,
|
5167 |
+
"loss": 0.0155,
|
5168 |
+
"step": 7370
|
5169 |
+
},
|
5170 |
+
{
|
5171 |
+
"epoch": 4.67680608365019,
|
5172 |
+
"grad_norm": 0.18996752798557281,
|
5173 |
+
"learning_rate": 1.7621995485879062e-05,
|
5174 |
+
"loss": 0.0093,
|
5175 |
+
"step": 7380
|
5176 |
+
},
|
5177 |
+
{
|
5178 |
+
"epoch": 4.683143219264892,
|
5179 |
+
"grad_norm": 0.08091560751199722,
|
5180 |
+
"learning_rate": 1.749617597652934e-05,
|
5181 |
+
"loss": 0.0087,
|
5182 |
+
"step": 7390
|
5183 |
+
},
|
5184 |
+
{
|
5185 |
+
"epoch": 4.689480354879595,
|
5186 |
+
"grad_norm": 0.16652986407279968,
|
5187 |
+
"learning_rate": 1.7370711923791567e-05,
|
5188 |
+
"loss": 0.0068,
|
5189 |
+
"step": 7400
|
5190 |
+
},
|
5191 |
+
{
|
5192 |
+
"epoch": 4.695817490494297,
|
5193 |
+
"grad_norm": 0.06975802779197693,
|
5194 |
+
"learning_rate": 1.7245604699720535e-05,
|
5195 |
+
"loss": 0.0098,
|
5196 |
+
"step": 7410
|
5197 |
+
},
|
5198 |
+
{
|
5199 |
+
"epoch": 4.702154626108999,
|
5200 |
+
"grad_norm": 0.2281133234500885,
|
5201 |
+
"learning_rate": 1.712085567246878e-05,
|
5202 |
+
"loss": 0.0213,
|
5203 |
+
"step": 7420
|
5204 |
+
},
|
5205 |
+
{
|
5206 |
+
"epoch": 4.7084917617237005,
|
5207 |
+
"grad_norm": 0.12594124674797058,
|
5208 |
+
"learning_rate": 1.699646620627168e-05,
|
5209 |
+
"loss": 0.0082,
|
5210 |
+
"step": 7430
|
5211 |
+
},
|
5212 |
+
{
|
5213 |
+
"epoch": 4.714828897338403,
|
5214 |
+
"grad_norm": 0.18425187468528748,
|
5215 |
+
"learning_rate": 1.6872437661432517e-05,
|
5216 |
+
"loss": 0.0068,
|
5217 |
+
"step": 7440
|
5218 |
+
},
|
5219 |
+
{
|
5220 |
+
"epoch": 4.721166032953105,
|
5221 |
+
"grad_norm": 0.3101319670677185,
|
5222 |
+
"learning_rate": 1.6748771394307585e-05,
|
5223 |
+
"loss": 0.0076,
|
5224 |
+
"step": 7450
|
5225 |
+
},
|
5226 |
+
{
|
5227 |
+
"epoch": 4.727503168567807,
|
5228 |
+
"grad_norm": 0.1615813672542572,
|
5229 |
+
"learning_rate": 1.662546875729138e-05,
|
5230 |
+
"loss": 0.0078,
|
5231 |
+
"step": 7460
|
5232 |
+
},
|
5233 |
+
{
|
5234 |
+
"epoch": 4.733840304182509,
|
5235 |
+
"grad_norm": 0.22787398099899292,
|
5236 |
+
"learning_rate": 1.6502531098801753e-05,
|
5237 |
+
"loss": 0.0112,
|
5238 |
+
"step": 7470
|
5239 |
+
},
|
5240 |
+
{
|
5241 |
+
"epoch": 4.740177439797211,
|
5242 |
+
"grad_norm": 0.08283796161413193,
|
5243 |
+
"learning_rate": 1.637995976326527e-05,
|
5244 |
+
"loss": 0.0157,
|
5245 |
+
"step": 7480
|
5246 |
+
},
|
5247 |
+
{
|
5248 |
+
"epoch": 4.746514575411914,
|
5249 |
+
"grad_norm": 0.20619574189186096,
|
5250 |
+
"learning_rate": 1.62577560911024e-05,
|
5251 |
+
"loss": 0.0119,
|
5252 |
+
"step": 7490
|
5253 |
+
},
|
5254 |
+
{
|
5255 |
+
"epoch": 4.752851711026616,
|
5256 |
+
"grad_norm": 0.35674479603767395,
|
5257 |
+
"learning_rate": 1.6135921418712956e-05,
|
5258 |
+
"loss": 0.0087,
|
5259 |
+
"step": 7500
|
5260 |
+
},
|
5261 |
+
{
|
5262 |
+
"epoch": 4.759188846641318,
|
5263 |
+
"grad_norm": 0.2025870382785797,
|
5264 |
+
"learning_rate": 1.6014457078461353e-05,
|
5265 |
+
"loss": 0.0062,
|
5266 |
+
"step": 7510
|
5267 |
+
},
|
5268 |
+
{
|
5269 |
+
"epoch": 4.765525982256021,
|
5270 |
+
"grad_norm": 0.13968420028686523,
|
5271 |
+
"learning_rate": 1.5893364398662176e-05,
|
5272 |
+
"loss": 0.0132,
|
5273 |
+
"step": 7520
|
5274 |
+
},
|
5275 |
+
{
|
5276 |
+
"epoch": 4.771863117870723,
|
5277 |
+
"grad_norm": 0.18326647579669952,
|
5278 |
+
"learning_rate": 1.5772644703565565e-05,
|
5279 |
+
"loss": 0.0096,
|
5280 |
+
"step": 7530
|
5281 |
+
},
|
5282 |
+
{
|
5283 |
+
"epoch": 4.778200253485425,
|
5284 |
+
"grad_norm": 0.2547348141670227,
|
5285 |
+
"learning_rate": 1.5652299313342773e-05,
|
5286 |
+
"loss": 0.0084,
|
5287 |
+
"step": 7540
|
5288 |
+
},
|
5289 |
+
{
|
5290 |
+
"epoch": 4.784537389100127,
|
5291 |
+
"grad_norm": 0.12518715858459473,
|
5292 |
+
"learning_rate": 1.553232954407171e-05,
|
5293 |
+
"loss": 0.01,
|
5294 |
+
"step": 7550
|
5295 |
+
},
|
5296 |
+
{
|
5297 |
+
"epoch": 4.7908745247148286,
|
5298 |
+
"grad_norm": 0.12570436298847198,
|
5299 |
+
"learning_rate": 1.5412736707722537e-05,
|
5300 |
+
"loss": 0.0094,
|
5301 |
+
"step": 7560
|
5302 |
+
},
|
5303 |
+
{
|
5304 |
+
"epoch": 4.797211660329531,
|
5305 |
+
"grad_norm": 0.15311287343502045,
|
5306 |
+
"learning_rate": 1.5293522112143373e-05,
|
5307 |
+
"loss": 0.0057,
|
5308 |
+
"step": 7570
|
5309 |
+
},
|
5310 |
+
{
|
5311 |
+
"epoch": 4.803548795944233,
|
5312 |
+
"grad_norm": 0.08851443231105804,
|
5313 |
+
"learning_rate": 1.517468706104589e-05,
|
5314 |
+
"loss": 0.0083,
|
5315 |
+
"step": 7580
|
5316 |
+
},
|
5317 |
+
{
|
5318 |
+
"epoch": 4.809885931558935,
|
5319 |
+
"grad_norm": 0.14807093143463135,
|
5320 |
+
"learning_rate": 1.5056232853991209e-05,
|
5321 |
+
"loss": 0.0097,
|
5322 |
+
"step": 7590
|
5323 |
+
},
|
5324 |
+
{
|
5325 |
+
"epoch": 4.816223067173637,
|
5326 |
+
"grad_norm": 0.20004534721374512,
|
5327 |
+
"learning_rate": 1.4938160786375572e-05,
|
5328 |
+
"loss": 0.0101,
|
5329 |
+
"step": 7600
|
5330 |
+
},
|
5331 |
+
{
|
5332 |
+
"epoch": 4.82256020278834,
|
5333 |
+
"grad_norm": 0.2074967622756958,
|
5334 |
+
"learning_rate": 1.4820472149416154e-05,
|
5335 |
+
"loss": 0.0073,
|
5336 |
+
"step": 7610
|
5337 |
+
},
|
5338 |
+
{
|
5339 |
+
"epoch": 4.828897338403042,
|
5340 |
+
"grad_norm": 0.1131831631064415,
|
5341 |
+
"learning_rate": 1.470316823013707e-05,
|
5342 |
+
"loss": 0.0067,
|
5343 |
+
"step": 7620
|
5344 |
+
},
|
5345 |
+
{
|
5346 |
+
"epoch": 4.835234474017744,
|
5347 |
+
"grad_norm": 0.21209746599197388,
|
5348 |
+
"learning_rate": 1.4586250311355132e-05,
|
5349 |
+
"loss": 0.014,
|
5350 |
+
"step": 7630
|
5351 |
+
},
|
5352 |
+
{
|
5353 |
+
"epoch": 4.841571609632446,
|
5354 |
+
"grad_norm": 0.14775687456130981,
|
5355 |
+
"learning_rate": 1.4469719671666043e-05,
|
5356 |
+
"loss": 0.0081,
|
5357 |
+
"step": 7640
|
5358 |
+
},
|
5359 |
+
{
|
5360 |
+
"epoch": 4.847908745247148,
|
5361 |
+
"grad_norm": 0.20488443970680237,
|
5362 |
+
"learning_rate": 1.435357758543015e-05,
|
5363 |
+
"loss": 0.0073,
|
5364 |
+
"step": 7650
|
5365 |
+
},
|
5366 |
+
{
|
5367 |
+
"epoch": 4.854245880861851,
|
5368 |
+
"grad_norm": 0.06777448952198029,
|
5369 |
+
"learning_rate": 1.4237825322758736e-05,
|
5370 |
+
"loss": 0.0131,
|
5371 |
+
"step": 7660
|
5372 |
+
},
|
5373 |
+
{
|
5374 |
+
"epoch": 4.860583016476553,
|
5375 |
+
"grad_norm": 0.13396242260932922,
|
5376 |
+
"learning_rate": 1.412246414949997e-05,
|
5377 |
+
"loss": 0.005,
|
5378 |
+
"step": 7670
|
5379 |
+
},
|
5380 |
+
{
|
5381 |
+
"epoch": 4.866920152091255,
|
5382 |
+
"grad_norm": 0.21779701113700867,
|
5383 |
+
"learning_rate": 1.4007495327225162e-05,
|
5384 |
+
"loss": 0.0076,
|
5385 |
+
"step": 7680
|
5386 |
+
},
|
5387 |
+
{
|
5388 |
+
"epoch": 4.873257287705957,
|
5389 |
+
"grad_norm": 0.2239043265581131,
|
5390 |
+
"learning_rate": 1.389292011321498e-05,
|
5391 |
+
"loss": 0.0091,
|
5392 |
+
"step": 7690
|
5393 |
+
},
|
5394 |
+
{
|
5395 |
+
"epoch": 4.879594423320659,
|
5396 |
+
"grad_norm": 0.06960437446832657,
|
5397 |
+
"learning_rate": 1.3778739760445552e-05,
|
5398 |
+
"loss": 0.007,
|
5399 |
+
"step": 7700
|
5400 |
+
},
|
5401 |
+
{
|
5402 |
+
"epoch": 4.885931558935361,
|
5403 |
+
"grad_norm": 0.14277929067611694,
|
5404 |
+
"learning_rate": 1.3664955517574968e-05,
|
5405 |
+
"loss": 0.0078,
|
5406 |
+
"step": 7710
|
5407 |
+
},
|
5408 |
+
{
|
5409 |
+
"epoch": 4.892268694550063,
|
5410 |
+
"grad_norm": 0.17942270636558533,
|
5411 |
+
"learning_rate": 1.3551568628929434e-05,
|
5412 |
+
"loss": 0.0093,
|
5413 |
+
"step": 7720
|
5414 |
+
},
|
5415 |
+
{
|
5416 |
+
"epoch": 4.898605830164765,
|
5417 |
+
"grad_norm": 0.23902519047260284,
|
5418 |
+
"learning_rate": 1.343858033448982e-05,
|
5419 |
+
"loss": 0.0087,
|
5420 |
+
"step": 7730
|
5421 |
+
},
|
5422 |
+
{
|
5423 |
+
"epoch": 4.904942965779467,
|
5424 |
+
"grad_norm": 0.33525460958480835,
|
5425 |
+
"learning_rate": 1.3325991869878013e-05,
|
5426 |
+
"loss": 0.0064,
|
5427 |
+
"step": 7740
|
5428 |
+
},
|
5429 |
+
{
|
5430 |
+
"epoch": 4.91128010139417,
|
5431 |
+
"grad_norm": 0.11698520183563232,
|
5432 |
+
"learning_rate": 1.3213804466343421e-05,
|
5433 |
+
"loss": 0.0088,
|
5434 |
+
"step": 7750
|
5435 |
+
},
|
5436 |
+
{
|
5437 |
+
"epoch": 4.917617237008872,
|
5438 |
+
"grad_norm": 0.1842937171459198,
|
5439 |
+
"learning_rate": 1.3102019350749528e-05,
|
5440 |
+
"loss": 0.0092,
|
5441 |
+
"step": 7760
|
5442 |
+
},
|
5443 |
+
{
|
5444 |
+
"epoch": 4.923954372623574,
|
5445 |
+
"grad_norm": 0.09481093287467957,
|
5446 |
+
"learning_rate": 1.299063774556042e-05,
|
5447 |
+
"loss": 0.0059,
|
5448 |
+
"step": 7770
|
5449 |
+
},
|
5450 |
+
{
|
5451 |
+
"epoch": 4.930291508238277,
|
5452 |
+
"grad_norm": 0.17098549008369446,
|
5453 |
+
"learning_rate": 1.2879660868827508e-05,
|
5454 |
+
"loss": 0.0084,
|
5455 |
+
"step": 7780
|
5456 |
+
},
|
5457 |
+
{
|
5458 |
+
"epoch": 4.936628643852979,
|
5459 |
+
"grad_norm": 0.3938882648944855,
|
5460 |
+
"learning_rate": 1.2769089934176126e-05,
|
5461 |
+
"loss": 0.0113,
|
5462 |
+
"step": 7790
|
5463 |
+
},
|
5464 |
+
{
|
5465 |
+
"epoch": 4.942965779467681,
|
5466 |
+
"grad_norm": 0.21974030137062073,
|
5467 |
+
"learning_rate": 1.2658926150792322e-05,
|
5468 |
+
"loss": 0.0074,
|
5469 |
+
"step": 7800
|
5470 |
+
},
|
5471 |
+
{
|
5472 |
+
"epoch": 4.949302915082383,
|
5473 |
+
"grad_norm": 0.1278766393661499,
|
5474 |
+
"learning_rate": 1.2549170723409549e-05,
|
5475 |
+
"loss": 0.01,
|
5476 |
+
"step": 7810
|
5477 |
+
},
|
5478 |
+
{
|
5479 |
+
"epoch": 4.955640050697085,
|
5480 |
+
"grad_norm": 0.18058639764785767,
|
5481 |
+
"learning_rate": 1.243982485229559e-05,
|
5482 |
+
"loss": 0.0075,
|
5483 |
+
"step": 7820
|
5484 |
+
},
|
5485 |
+
{
|
5486 |
+
"epoch": 4.961977186311787,
|
5487 |
+
"grad_norm": 0.20735391974449158,
|
5488 |
+
"learning_rate": 1.233088973323937e-05,
|
5489 |
+
"loss": 0.0074,
|
5490 |
+
"step": 7830
|
5491 |
+
},
|
5492 |
+
{
|
5493 |
+
"epoch": 4.968314321926489,
|
5494 |
+
"grad_norm": 0.12043243646621704,
|
5495 |
+
"learning_rate": 1.2222366557537911e-05,
|
5496 |
+
"loss": 0.0093,
|
5497 |
+
"step": 7840
|
5498 |
+
},
|
5499 |
+
{
|
5500 |
+
"epoch": 4.974651457541191,
|
5501 |
+
"grad_norm": 0.28951793909072876,
|
5502 |
+
"learning_rate": 1.2114256511983274e-05,
|
5503 |
+
"loss": 0.007,
|
5504 |
+
"step": 7850
|
5505 |
+
},
|
5506 |
+
{
|
5507 |
+
"epoch": 4.980988593155893,
|
5508 |
+
"grad_norm": 0.1455940455198288,
|
5509 |
+
"learning_rate": 1.2006560778849578e-05,
|
5510 |
+
"loss": 0.0067,
|
5511 |
+
"step": 7860
|
5512 |
+
},
|
5513 |
+
{
|
5514 |
+
"epoch": 4.987325728770596,
|
5515 |
+
"grad_norm": 0.1419738233089447,
|
5516 |
+
"learning_rate": 1.1899280535880119e-05,
|
5517 |
+
"loss": 0.0115,
|
5518 |
+
"step": 7870
|
5519 |
+
},
|
5520 |
+
{
|
5521 |
+
"epoch": 4.993662864385298,
|
5522 |
+
"grad_norm": 0.3839736878871918,
|
5523 |
+
"learning_rate": 1.1792416956274444e-05,
|
5524 |
+
"loss": 0.0103,
|
5525 |
+
"step": 7880
|
5526 |
+
},
|
5527 |
+
{
|
5528 |
+
"epoch": 5.0,
|
5529 |
+
"grad_norm": 0.12225103378295898,
|
5530 |
+
"learning_rate": 1.1685971208675539e-05,
|
5531 |
+
"loss": 0.0083,
|
5532 |
+
"step": 7890
|
5533 |
+
},
|
5534 |
+
{
|
5535 |
+
"epoch": 5.006337135614702,
|
5536 |
+
"grad_norm": 0.22479389607906342,
|
5537 |
+
"learning_rate": 1.157994445715706e-05,
|
5538 |
+
"loss": 0.0082,
|
5539 |
+
"step": 7900
|
5540 |
+
},
|
5541 |
+
{
|
5542 |
+
"epoch": 5.012674271229404,
|
5543 |
+
"grad_norm": 0.2502928674221039,
|
5544 |
+
"learning_rate": 1.1474337861210543e-05,
|
5545 |
+
"loss": 0.0073,
|
5546 |
+
"step": 7910
|
5547 |
+
},
|
5548 |
+
{
|
5549 |
+
"epoch": 5.019011406844107,
|
5550 |
+
"grad_norm": 0.11566631495952606,
|
5551 |
+
"learning_rate": 1.1369152575732822e-05,
|
5552 |
+
"loss": 0.0058,
|
5553 |
+
"step": 7920
|
5554 |
+
},
|
5555 |
+
{
|
5556 |
+
"epoch": 5.025348542458809,
|
5557 |
+
"grad_norm": 0.11840217560529709,
|
5558 |
+
"learning_rate": 1.1264389751013326e-05,
|
5559 |
+
"loss": 0.007,
|
5560 |
+
"step": 7930
|
5561 |
+
},
|
5562 |
+
{
|
5563 |
+
"epoch": 5.031685678073511,
|
5564 |
+
"grad_norm": 0.30334770679473877,
|
5565 |
+
"learning_rate": 1.1160050532721528e-05,
|
5566 |
+
"loss": 0.0116,
|
5567 |
+
"step": 7940
|
5568 |
+
},
|
5569 |
+
{
|
5570 |
+
"epoch": 5.038022813688213,
|
5571 |
+
"grad_norm": 0.17277342081069946,
|
5572 |
+
"learning_rate": 1.1056136061894384e-05,
|
5573 |
+
"loss": 0.0069,
|
5574 |
+
"step": 7950
|
5575 |
+
},
|
5576 |
+
{
|
5577 |
+
"epoch": 5.044359949302915,
|
5578 |
+
"grad_norm": 0.09852743148803711,
|
5579 |
+
"learning_rate": 1.095264747492391e-05,
|
5580 |
+
"loss": 0.0056,
|
5581 |
+
"step": 7960
|
5582 |
+
},
|
5583 |
+
{
|
5584 |
+
"epoch": 5.050697084917617,
|
5585 |
+
"grad_norm": 0.10726805031299591,
|
5586 |
+
"learning_rate": 1.0849585903544706e-05,
|
5587 |
+
"loss": 0.0148,
|
5588 |
+
"step": 7970
|
5589 |
+
},
|
5590 |
+
{
|
5591 |
+
"epoch": 5.057034220532319,
|
5592 |
+
"grad_norm": 0.08306515961885452,
|
5593 |
+
"learning_rate": 1.0746952474821614e-05,
|
5594 |
+
"loss": 0.005,
|
5595 |
+
"step": 7980
|
5596 |
+
},
|
5597 |
+
{
|
5598 |
+
"epoch": 5.063371356147021,
|
5599 |
+
"grad_norm": 0.13456346094608307,
|
5600 |
+
"learning_rate": 1.0644748311137376e-05,
|
5601 |
+
"loss": 0.0071,
|
5602 |
+
"step": 7990
|
5603 |
+
},
|
5604 |
+
{
|
5605 |
+
"epoch": 5.069708491761724,
|
5606 |
+
"grad_norm": 0.2890625,
|
5607 |
+
"learning_rate": 1.0542974530180327e-05,
|
5608 |
+
"loss": 0.0063,
|
5609 |
+
"step": 8000
|
5610 |
+
},
|
5611 |
+
{
|
5612 |
+
"epoch": 5.076045627376426,
|
5613 |
+
"grad_norm": 0.15506230294704437,
|
5614 |
+
"learning_rate": 1.0441632244932237e-05,
|
5615 |
+
"loss": 0.0077,
|
5616 |
+
"step": 8010
|
5617 |
+
},
|
5618 |
+
{
|
5619 |
+
"epoch": 5.082382762991128,
|
5620 |
+
"grad_norm": 0.17413881421089172,
|
5621 |
+
"learning_rate": 1.0340722563656107e-05,
|
5622 |
+
"loss": 0.0066,
|
5623 |
+
"step": 8020
|
5624 |
+
},
|
5625 |
+
{
|
5626 |
+
"epoch": 5.08871989860583,
|
5627 |
+
"grad_norm": 0.15269486606121063,
|
5628 |
+
"learning_rate": 1.0240246589884044e-05,
|
5629 |
+
"loss": 0.0056,
|
5630 |
+
"step": 8030
|
5631 |
+
},
|
5632 |
+
{
|
5633 |
+
"epoch": 5.095057034220532,
|
5634 |
+
"grad_norm": 0.17267774045467377,
|
5635 |
+
"learning_rate": 1.0140205422405214e-05,
|
5636 |
+
"loss": 0.0062,
|
5637 |
+
"step": 8040
|
5638 |
+
},
|
5639 |
+
{
|
5640 |
+
"epoch": 5.101394169835235,
|
5641 |
+
"grad_norm": 0.20491677522659302,
|
5642 |
+
"learning_rate": 1.0040600155253765e-05,
|
5643 |
+
"loss": 0.0082,
|
5644 |
+
"step": 8050
|
5645 |
+
},
|
5646 |
+
{
|
5647 |
+
"epoch": 5.107731305449937,
|
5648 |
+
"grad_norm": 0.15973028540611267,
|
5649 |
+
"learning_rate": 9.941431877696955e-06,
|
5650 |
+
"loss": 0.0095,
|
5651 |
+
"step": 8060
|
5652 |
+
},
|
5653 |
+
{
|
5654 |
+
"epoch": 5.114068441064639,
|
5655 |
+
"grad_norm": 0.13956964015960693,
|
5656 |
+
"learning_rate": 9.842701674223187e-06,
|
5657 |
+
"loss": 0.008,
|
5658 |
+
"step": 8070
|
5659 |
+
},
|
5660 |
+
{
|
5661 |
+
"epoch": 5.120405576679341,
|
5662 |
+
"grad_norm": 0.29668980836868286,
|
5663 |
+
"learning_rate": 9.744410624530148e-06,
|
5664 |
+
"loss": 0.009,
|
5665 |
+
"step": 8080
|
5666 |
+
},
|
5667 |
+
{
|
5668 |
+
"epoch": 5.126742712294043,
|
5669 |
+
"grad_norm": 0.1549568921327591,
|
5670 |
+
"learning_rate": 9.646559803512994e-06,
|
5671 |
+
"loss": 0.0073,
|
5672 |
+
"step": 8090
|
5673 |
+
},
|
5674 |
+
{
|
5675 |
+
"epoch": 5.133079847908745,
|
5676 |
+
"grad_norm": 0.15987129509449005,
|
5677 |
+
"learning_rate": 9.549150281252633e-06,
|
5678 |
+
"loss": 0.0078,
|
5679 |
+
"step": 8100
|
5680 |
+
},
|
5681 |
+
{
|
5682 |
+
"epoch": 5.139416983523447,
|
5683 |
+
"grad_norm": 0.0577310174703598,
|
5684 |
+
"learning_rate": 9.452183123004e-06,
|
5685 |
+
"loss": 0.0098,
|
5686 |
+
"step": 8110
|
5687 |
+
},
|
5688 |
+
{
|
5689 |
+
"epoch": 5.145754119138149,
|
5690 |
+
"grad_norm": 0.1426042914390564,
|
5691 |
+
"learning_rate": 9.355659389184396e-06,
|
5692 |
+
"loss": 0.0078,
|
5693 |
+
"step": 8120
|
5694 |
+
},
|
5695 |
+
{
|
5696 |
+
"epoch": 5.152091254752852,
|
5697 |
+
"grad_norm": 0.13074810802936554,
|
5698 |
+
"learning_rate": 9.259580135361929e-06,
|
5699 |
+
"loss": 0.0081,
|
5700 |
+
"step": 8130
|
5701 |
+
},
|
5702 |
+
{
|
5703 |
+
"epoch": 5.158428390367554,
|
5704 |
+
"grad_norm": 0.19135090708732605,
|
5705 |
+
"learning_rate": 9.163946412243896e-06,
|
5706 |
+
"loss": 0.0098,
|
5707 |
+
"step": 8140
|
5708 |
+
},
|
5709 |
+
{
|
5710 |
+
"epoch": 5.164765525982256,
|
5711 |
+
"grad_norm": 0.15256141126155853,
|
5712 |
+
"learning_rate": 9.068759265665384e-06,
|
5713 |
+
"loss": 0.0053,
|
5714 |
+
"step": 8150
|
5715 |
+
},
|
5716 |
+
{
|
5717 |
+
"epoch": 5.171102661596958,
|
5718 |
+
"grad_norm": 0.15117891132831573,
|
5719 |
+
"learning_rate": 8.974019736577777e-06,
|
5720 |
+
"loss": 0.0079,
|
5721 |
+
"step": 8160
|
5722 |
+
},
|
5723 |
+
{
|
5724 |
+
"epoch": 5.17743979721166,
|
5725 |
+
"grad_norm": 0.09071781486272812,
|
5726 |
+
"learning_rate": 8.879728861037384e-06,
|
5727 |
+
"loss": 0.0099,
|
5728 |
+
"step": 8170
|
5729 |
+
},
|
5730 |
+
{
|
5731 |
+
"epoch": 5.183776932826363,
|
5732 |
+
"grad_norm": 0.24567152559757233,
|
5733 |
+
"learning_rate": 8.785887670194138e-06,
|
5734 |
+
"loss": 0.0055,
|
5735 |
+
"step": 8180
|
5736 |
+
},
|
5737 |
+
{
|
5738 |
+
"epoch": 5.190114068441065,
|
5739 |
+
"grad_norm": 0.19762946665287018,
|
5740 |
+
"learning_rate": 8.692497190280224e-06,
|
5741 |
+
"loss": 0.0092,
|
5742 |
+
"step": 8190
|
5743 |
+
},
|
5744 |
+
{
|
5745 |
+
"epoch": 5.196451204055767,
|
5746 |
+
"grad_norm": 0.24934445321559906,
|
5747 |
+
"learning_rate": 8.599558442598998e-06,
|
5748 |
+
"loss": 0.0094,
|
5749 |
+
"step": 8200
|
5750 |
+
},
|
5751 |
+
{
|
5752 |
+
"epoch": 5.202788339670469,
|
5753 |
+
"grad_norm": 0.1780957281589508,
|
5754 |
+
"learning_rate": 8.507072443513702e-06,
|
5755 |
+
"loss": 0.0083,
|
5756 |
+
"step": 8210
|
5757 |
+
},
|
5758 |
+
{
|
5759 |
+
"epoch": 5.2091254752851714,
|
5760 |
+
"grad_norm": 0.23371122777462006,
|
5761 |
+
"learning_rate": 8.415040204436426e-06,
|
5762 |
+
"loss": 0.0082,
|
5763 |
+
"step": 8220
|
5764 |
+
},
|
5765 |
+
{
|
5766 |
+
"epoch": 5.215462610899873,
|
5767 |
+
"grad_norm": 0.06640614569187164,
|
5768 |
+
"learning_rate": 8.323462731816961e-06,
|
5769 |
+
"loss": 0.0052,
|
5770 |
+
"step": 8230
|
5771 |
+
},
|
5772 |
+
{
|
5773 |
+
"epoch": 5.221799746514575,
|
5774 |
+
"grad_norm": 0.118456169962883,
|
5775 |
+
"learning_rate": 8.232341027131885e-06,
|
5776 |
+
"loss": 0.0081,
|
5777 |
+
"step": 8240
|
5778 |
+
},
|
5779 |
+
{
|
5780 |
+
"epoch": 5.228136882129277,
|
5781 |
+
"grad_norm": 0.1477072387933731,
|
5782 |
+
"learning_rate": 8.141676086873572e-06,
|
5783 |
+
"loss": 0.0081,
|
5784 |
+
"step": 8250
|
5785 |
+
},
|
5786 |
+
{
|
5787 |
+
"epoch": 5.23447401774398,
|
5788 |
+
"grad_norm": 0.1423831284046173,
|
5789 |
+
"learning_rate": 8.051468902539272e-06,
|
5790 |
+
"loss": 0.0058,
|
5791 |
+
"step": 8260
|
5792 |
+
},
|
5793 |
+
{
|
5794 |
+
"epoch": 5.240811153358682,
|
5795 |
+
"grad_norm": 0.14859850704669952,
|
5796 |
+
"learning_rate": 7.96172046062032e-06,
|
5797 |
+
"loss": 0.0094,
|
5798 |
+
"step": 8270
|
5799 |
+
},
|
5800 |
+
{
|
5801 |
+
"epoch": 5.247148288973384,
|
5802 |
+
"grad_norm": 0.22598117589950562,
|
5803 |
+
"learning_rate": 7.872431742591268e-06,
|
5804 |
+
"loss": 0.0079,
|
5805 |
+
"step": 8280
|
5806 |
+
},
|
5807 |
+
{
|
5808 |
+
"epoch": 5.253485424588086,
|
5809 |
+
"grad_norm": 0.20482905209064484,
|
5810 |
+
"learning_rate": 7.783603724899257e-06,
|
5811 |
+
"loss": 0.0079,
|
5812 |
+
"step": 8290
|
5813 |
+
},
|
5814 |
+
{
|
5815 |
+
"epoch": 5.259822560202788,
|
5816 |
+
"grad_norm": 0.21788014471530914,
|
5817 |
+
"learning_rate": 7.695237378953223e-06,
|
5818 |
+
"loss": 0.0055,
|
5819 |
+
"step": 8300
|
5820 |
+
},
|
5821 |
+
{
|
5822 |
+
"epoch": 5.266159695817491,
|
5823 |
+
"grad_norm": 0.09685339778661728,
|
5824 |
+
"learning_rate": 7.607333671113409e-06,
|
5825 |
+
"loss": 0.0038,
|
5826 |
+
"step": 8310
|
5827 |
+
},
|
5828 |
+
{
|
5829 |
+
"epoch": 5.272496831432193,
|
5830 |
+
"grad_norm": 0.175571009516716,
|
5831 |
+
"learning_rate": 7.519893562680663e-06,
|
5832 |
+
"loss": 0.0062,
|
5833 |
+
"step": 8320
|
5834 |
+
},
|
5835 |
+
{
|
5836 |
+
"epoch": 5.278833967046895,
|
5837 |
+
"grad_norm": 0.20628724992275238,
|
5838 |
+
"learning_rate": 7.432918009885997e-06,
|
5839 |
+
"loss": 0.0088,
|
5840 |
+
"step": 8330
|
5841 |
+
},
|
5842 |
+
{
|
5843 |
+
"epoch": 5.285171102661597,
|
5844 |
+
"grad_norm": 0.0737687200307846,
|
5845 |
+
"learning_rate": 7.3464079638801365e-06,
|
5846 |
+
"loss": 0.0088,
|
5847 |
+
"step": 8340
|
5848 |
+
},
|
5849 |
+
{
|
5850 |
+
"epoch": 5.2915082382762995,
|
5851 |
+
"grad_norm": 0.10191112756729126,
|
5852 |
+
"learning_rate": 7.260364370723044e-06,
|
5853 |
+
"loss": 0.0081,
|
5854 |
+
"step": 8350
|
5855 |
+
},
|
5856 |
+
{
|
5857 |
+
"epoch": 5.297845373891001,
|
5858 |
+
"grad_norm": 0.35432806611061096,
|
5859 |
+
"learning_rate": 7.174788171373731e-06,
|
5860 |
+
"loss": 0.0096,
|
5861 |
+
"step": 8360
|
5862 |
+
},
|
5863 |
+
{
|
5864 |
+
"epoch": 5.304182509505703,
|
5865 |
+
"grad_norm": 0.11743071675300598,
|
5866 |
+
"learning_rate": 7.089680301679752e-06,
|
5867 |
+
"loss": 0.0132,
|
5868 |
+
"step": 8370
|
5869 |
+
},
|
5870 |
+
{
|
5871 |
+
"epoch": 5.310519645120405,
|
5872 |
+
"grad_norm": 0.15060439705848694,
|
5873 |
+
"learning_rate": 7.005041692367154e-06,
|
5874 |
+
"loss": 0.008,
|
5875 |
+
"step": 8380
|
5876 |
+
},
|
5877 |
+
{
|
5878 |
+
"epoch": 5.316856780735108,
|
5879 |
+
"grad_norm": 0.19431732594966888,
|
5880 |
+
"learning_rate": 6.92087326903022e-06,
|
5881 |
+
"loss": 0.0062,
|
5882 |
+
"step": 8390
|
5883 |
+
},
|
5884 |
+
{
|
5885 |
+
"epoch": 5.32319391634981,
|
5886 |
+
"grad_norm": 0.10982516407966614,
|
5887 |
+
"learning_rate": 6.837175952121306e-06,
|
5888 |
+
"loss": 0.0057,
|
5889 |
+
"step": 8400
|
5890 |
+
},
|
5891 |
+
{
|
5892 |
+
"epoch": 5.329531051964512,
|
5893 |
+
"grad_norm": 0.09768649935722351,
|
5894 |
+
"learning_rate": 6.753950656940905e-06,
|
5895 |
+
"loss": 0.0047,
|
5896 |
+
"step": 8410
|
5897 |
+
},
|
5898 |
+
{
|
5899 |
+
"epoch": 5.335868187579214,
|
5900 |
+
"grad_norm": 0.2184268981218338,
|
5901 |
+
"learning_rate": 6.671198293627479e-06,
|
5902 |
+
"loss": 0.0061,
|
5903 |
+
"step": 8420
|
5904 |
+
},
|
5905 |
+
{
|
5906 |
+
"epoch": 5.342205323193916,
|
5907 |
+
"grad_norm": 0.11367691308259964,
|
5908 |
+
"learning_rate": 6.588919767147639e-06,
|
5909 |
+
"loss": 0.0048,
|
5910 |
+
"step": 8430
|
5911 |
+
},
|
5912 |
+
{
|
5913 |
+
"epoch": 5.348542458808619,
|
5914 |
+
"grad_norm": 0.11176607757806778,
|
5915 |
+
"learning_rate": 6.5071159772861436e-06,
|
5916 |
+
"loss": 0.0092,
|
5917 |
+
"step": 8440
|
5918 |
+
},
|
5919 |
+
{
|
5920 |
+
"epoch": 5.354879594423321,
|
5921 |
+
"grad_norm": 0.12881191074848175,
|
5922 |
+
"learning_rate": 6.425787818636131e-06,
|
5923 |
+
"loss": 0.0056,
|
5924 |
+
"step": 8450
|
5925 |
+
},
|
5926 |
+
{
|
5927 |
+
"epoch": 5.361216730038023,
|
5928 |
+
"grad_norm": 0.19846558570861816,
|
5929 |
+
"learning_rate": 6.344936180589351e-06,
|
5930 |
+
"loss": 0.0059,
|
5931 |
+
"step": 8460
|
5932 |
+
},
|
5933 |
+
{
|
5934 |
+
"epoch": 5.367553865652725,
|
5935 |
+
"grad_norm": 0.2730921506881714,
|
5936 |
+
"learning_rate": 6.264561947326331e-06,
|
5937 |
+
"loss": 0.0074,
|
5938 |
+
"step": 8470
|
5939 |
+
},
|
5940 |
+
{
|
5941 |
+
"epoch": 5.3738910012674275,
|
5942 |
+
"grad_norm": 0.1230594590306282,
|
5943 |
+
"learning_rate": 6.184665997806832e-06,
|
5944 |
+
"loss": 0.0076,
|
5945 |
+
"step": 8480
|
5946 |
+
},
|
5947 |
+
{
|
5948 |
+
"epoch": 5.380228136882129,
|
5949 |
+
"grad_norm": 0.12817886471748352,
|
5950 |
+
"learning_rate": 6.1052492057601275e-06,
|
5951 |
+
"loss": 0.0069,
|
5952 |
+
"step": 8490
|
5953 |
+
},
|
5954 |
+
{
|
5955 |
+
"epoch": 5.386565272496831,
|
5956 |
+
"grad_norm": 0.08666050434112549,
|
5957 |
+
"learning_rate": 6.026312439675552e-06,
|
5958 |
+
"loss": 0.0059,
|
5959 |
+
"step": 8500
|
5960 |
+
},
|
5961 |
+
{
|
5962 |
+
"epoch": 5.392902408111533,
|
5963 |
+
"grad_norm": 0.2247517853975296,
|
5964 |
+
"learning_rate": 5.947856562792925e-06,
|
5965 |
+
"loss": 0.0147,
|
5966 |
+
"step": 8510
|
5967 |
+
},
|
5968 |
+
{
|
5969 |
+
"epoch": 5.399239543726236,
|
5970 |
+
"grad_norm": 0.08660010248422623,
|
5971 |
+
"learning_rate": 5.869882433093155e-06,
|
5972 |
+
"loss": 0.0073,
|
5973 |
+
"step": 8520
|
5974 |
+
},
|
5975 |
+
{
|
5976 |
+
"epoch": 5.405576679340938,
|
5977 |
+
"grad_norm": 0.0791560560464859,
|
5978 |
+
"learning_rate": 5.79239090328883e-06,
|
5979 |
+
"loss": 0.0061,
|
5980 |
+
"step": 8530
|
5981 |
+
},
|
5982 |
+
{
|
5983 |
+
"epoch": 5.41191381495564,
|
5984 |
+
"grad_norm": 0.07316572964191437,
|
5985 |
+
"learning_rate": 5.715382820814885e-06,
|
5986 |
+
"loss": 0.0084,
|
5987 |
+
"step": 8540
|
5988 |
+
},
|
5989 |
+
{
|
5990 |
+
"epoch": 5.418250950570342,
|
5991 |
+
"grad_norm": 0.12857115268707275,
|
5992 |
+
"learning_rate": 5.6388590278194096e-06,
|
5993 |
+
"loss": 0.0068,
|
5994 |
+
"step": 8550
|
5995 |
+
},
|
5996 |
+
{
|
5997 |
+
"epoch": 5.424588086185044,
|
5998 |
+
"grad_norm": 0.21100053191184998,
|
5999 |
+
"learning_rate": 5.562820361154314e-06,
|
6000 |
+
"loss": 0.0075,
|
6001 |
+
"step": 8560
|
6002 |
+
},
|
6003 |
+
{
|
6004 |
+
"epoch": 5.430925221799747,
|
6005 |
+
"grad_norm": 0.08549866825342178,
|
6006 |
+
"learning_rate": 5.48726765236629e-06,
|
6007 |
+
"loss": 0.006,
|
6008 |
+
"step": 8570
|
6009 |
+
},
|
6010 |
+
{
|
6011 |
+
"epoch": 5.437262357414449,
|
6012 |
+
"grad_norm": 0.15550892055034637,
|
6013 |
+
"learning_rate": 5.412201727687644e-06,
|
6014 |
+
"loss": 0.0063,
|
6015 |
+
"step": 8580
|
6016 |
+
},
|
6017 |
+
{
|
6018 |
+
"epoch": 5.443599493029151,
|
6019 |
+
"grad_norm": 0.18836095929145813,
|
6020 |
+
"learning_rate": 5.337623408027293e-06,
|
6021 |
+
"loss": 0.0087,
|
6022 |
+
"step": 8590
|
6023 |
+
},
|
6024 |
+
{
|
6025 |
+
"epoch": 5.449936628643853,
|
6026 |
+
"grad_norm": 0.08456358313560486,
|
6027 |
+
"learning_rate": 5.263533508961827e-06,
|
6028 |
+
"loss": 0.006,
|
6029 |
+
"step": 8600
|
6030 |
+
},
|
6031 |
+
{
|
6032 |
+
"epoch": 5.4562737642585555,
|
6033 |
+
"grad_norm": 0.054700568318367004,
|
6034 |
+
"learning_rate": 5.1899328407264855e-06,
|
6035 |
+
"loss": 0.0053,
|
6036 |
+
"step": 8610
|
6037 |
+
},
|
6038 |
+
{
|
6039 |
+
"epoch": 5.462610899873257,
|
6040 |
+
"grad_norm": 0.13627506792545319,
|
6041 |
+
"learning_rate": 5.116822208206396e-06,
|
6042 |
+
"loss": 0.006,
|
6043 |
+
"step": 8620
|
6044 |
+
},
|
6045 |
+
{
|
6046 |
+
"epoch": 5.468948035487959,
|
6047 |
+
"grad_norm": 0.1252082735300064,
|
6048 |
+
"learning_rate": 5.044202410927706e-06,
|
6049 |
+
"loss": 0.0125,
|
6050 |
+
"step": 8630
|
6051 |
+
},
|
6052 |
+
{
|
6053 |
+
"epoch": 5.475285171102661,
|
6054 |
+
"grad_norm": 0.2162148505449295,
|
6055 |
+
"learning_rate": 4.972074243048897e-06,
|
6056 |
+
"loss": 0.0065,
|
6057 |
+
"step": 8640
|
6058 |
+
},
|
6059 |
+
{
|
6060 |
+
"epoch": 5.481622306717364,
|
6061 |
+
"grad_norm": 0.12487296015024185,
|
6062 |
+
"learning_rate": 4.900438493352055e-06,
|
6063 |
+
"loss": 0.0063,
|
6064 |
+
"step": 8650
|
6065 |
+
},
|
6066 |
+
{
|
6067 |
+
"epoch": 5.487959442332066,
|
6068 |
+
"grad_norm": 0.14202018082141876,
|
6069 |
+
"learning_rate": 4.829295945234258e-06,
|
6070 |
+
"loss": 0.0063,
|
6071 |
+
"step": 8660
|
6072 |
+
},
|
6073 |
+
{
|
6074 |
+
"epoch": 5.494296577946768,
|
6075 |
+
"grad_norm": 0.18836723268032074,
|
6076 |
+
"learning_rate": 4.758647376699032e-06,
|
6077 |
+
"loss": 0.0069,
|
6078 |
+
"step": 8670
|
6079 |
+
},
|
6080 |
+
{
|
6081 |
+
"epoch": 5.50063371356147,
|
6082 |
+
"grad_norm": 0.2227470427751541,
|
6083 |
+
"learning_rate": 4.688493560347773e-06,
|
6084 |
+
"loss": 0.0111,
|
6085 |
+
"step": 8680
|
6086 |
+
},
|
6087 |
+
{
|
6088 |
+
"epoch": 5.506970849176172,
|
6089 |
+
"grad_norm": 0.1131884977221489,
|
6090 |
+
"learning_rate": 4.618835263371396e-06,
|
6091 |
+
"loss": 0.0054,
|
6092 |
+
"step": 8690
|
6093 |
+
},
|
6094 |
+
{
|
6095 |
+
"epoch": 5.513307984790875,
|
6096 |
+
"grad_norm": 0.08110499382019043,
|
6097 |
+
"learning_rate": 4.549673247541875e-06,
|
6098 |
+
"loss": 0.0039,
|
6099 |
+
"step": 8700
|
6100 |
+
},
|
6101 |
+
{
|
6102 |
+
"epoch": 5.519645120405577,
|
6103 |
+
"grad_norm": 0.2664946913719177,
|
6104 |
+
"learning_rate": 4.48100826920394e-06,
|
6105 |
+
"loss": 0.0055,
|
6106 |
+
"step": 8710
|
6107 |
+
},
|
6108 |
+
{
|
6109 |
+
"epoch": 5.525982256020279,
|
6110 |
+
"grad_norm": 0.09621760994195938,
|
6111 |
+
"learning_rate": 4.412841079266777e-06,
|
6112 |
+
"loss": 0.0066,
|
6113 |
+
"step": 8720
|
6114 |
+
},
|
6115 |
+
{
|
6116 |
+
"epoch": 5.532319391634981,
|
6117 |
+
"grad_norm": 0.07459976524114609,
|
6118 |
+
"learning_rate": 4.3451724231958644e-06,
|
6119 |
+
"loss": 0.0103,
|
6120 |
+
"step": 8730
|
6121 |
+
},
|
6122 |
+
{
|
6123 |
+
"epoch": 5.5386565272496835,
|
6124 |
+
"grad_norm": 0.252323180437088,
|
6125 |
+
"learning_rate": 4.27800304100478e-06,
|
6126 |
+
"loss": 0.0041,
|
6127 |
+
"step": 8740
|
6128 |
+
},
|
6129 |
+
{
|
6130 |
+
"epoch": 5.544993662864385,
|
6131 |
+
"grad_norm": 0.11829400062561035,
|
6132 |
+
"learning_rate": 4.2113336672471245e-06,
|
6133 |
+
"loss": 0.0042,
|
6134 |
+
"step": 8750
|
6135 |
+
},
|
6136 |
+
{
|
6137 |
+
"epoch": 5.551330798479087,
|
6138 |
+
"grad_norm": 0.07013269513845444,
|
6139 |
+
"learning_rate": 4.145165031008508e-06,
|
6140 |
+
"loss": 0.0047,
|
6141 |
+
"step": 8760
|
6142 |
+
},
|
6143 |
+
{
|
6144 |
+
"epoch": 5.557667934093789,
|
6145 |
+
"grad_norm": 0.2011323720216751,
|
6146 |
+
"learning_rate": 4.079497855898501e-06,
|
6147 |
+
"loss": 0.0067,
|
6148 |
+
"step": 8770
|
6149 |
+
},
|
6150 |
+
{
|
6151 |
+
"epoch": 5.564005069708491,
|
6152 |
+
"grad_norm": 0.06639205664396286,
|
6153 |
+
"learning_rate": 4.01433286004283e-06,
|
6154 |
+
"loss": 0.0086,
|
6155 |
+
"step": 8780
|
6156 |
+
},
|
6157 |
+
{
|
6158 |
+
"epoch": 5.570342205323194,
|
6159 |
+
"grad_norm": 0.24140506982803345,
|
6160 |
+
"learning_rate": 3.949670756075447e-06,
|
6161 |
+
"loss": 0.0101,
|
6162 |
+
"step": 8790
|
6163 |
+
},
|
6164 |
+
{
|
6165 |
+
"epoch": 5.576679340937896,
|
6166 |
+
"grad_norm": 0.1788935363292694,
|
6167 |
+
"learning_rate": 3.885512251130763e-06,
|
6168 |
+
"loss": 0.0068,
|
6169 |
+
"step": 8800
|
6170 |
+
},
|
6171 |
+
{
|
6172 |
+
"epoch": 5.583016476552598,
|
6173 |
+
"grad_norm": 0.10148416459560394,
|
6174 |
+
"learning_rate": 3.821858046835913e-06,
|
6175 |
+
"loss": 0.011,
|
6176 |
+
"step": 8810
|
6177 |
+
},
|
6178 |
+
{
|
6179 |
+
"epoch": 5.589353612167301,
|
6180 |
+
"grad_norm": 0.25111889839172363,
|
6181 |
+
"learning_rate": 3.75870883930306e-06,
|
6182 |
+
"loss": 0.0052,
|
6183 |
+
"step": 8820
|
6184 |
+
},
|
6185 |
+
{
|
6186 |
+
"epoch": 5.595690747782003,
|
6187 |
+
"grad_norm": 0.1619209200143814,
|
6188 |
+
"learning_rate": 3.696065319121833e-06,
|
6189 |
+
"loss": 0.0061,
|
6190 |
+
"step": 8830
|
6191 |
+
},
|
6192 |
+
{
|
6193 |
+
"epoch": 5.602027883396705,
|
6194 |
+
"grad_norm": 0.1424710899591446,
|
6195 |
+
"learning_rate": 3.6339281713517303e-06,
|
6196 |
+
"loss": 0.0147,
|
6197 |
+
"step": 8840
|
6198 |
+
},
|
6199 |
+
{
|
6200 |
+
"epoch": 5.608365019011407,
|
6201 |
+
"grad_norm": 0.093357153236866,
|
6202 |
+
"learning_rate": 3.5722980755146517e-06,
|
6203 |
+
"loss": 0.01,
|
6204 |
+
"step": 8850
|
6205 |
+
},
|
6206 |
+
{
|
6207 |
+
"epoch": 5.614702154626109,
|
6208 |
+
"grad_norm": 0.09066253155469894,
|
6209 |
+
"learning_rate": 3.511175705587433e-06,
|
6210 |
+
"loss": 0.0064,
|
6211 |
+
"step": 8860
|
6212 |
+
},
|
6213 |
+
{
|
6214 |
+
"epoch": 5.6210392902408115,
|
6215 |
+
"grad_norm": 0.24886588752269745,
|
6216 |
+
"learning_rate": 3.4505617299945336e-06,
|
6217 |
+
"loss": 0.0072,
|
6218 |
+
"step": 8870
|
6219 |
+
},
|
6220 |
+
{
|
6221 |
+
"epoch": 5.6273764258555135,
|
6222 |
+
"grad_norm": 0.22231824696063995,
|
6223 |
+
"learning_rate": 3.390456811600673e-06,
|
6224 |
+
"loss": 0.0111,
|
6225 |
+
"step": 8880
|
6226 |
+
},
|
6227 |
+
{
|
6228 |
+
"epoch": 5.633713561470215,
|
6229 |
+
"grad_norm": 0.11994162201881409,
|
6230 |
+
"learning_rate": 3.3308616077036115e-06,
|
6231 |
+
"loss": 0.0088,
|
6232 |
+
"step": 8890
|
6233 |
+
},
|
6234 |
+
{
|
6235 |
+
"epoch": 5.640050697084917,
|
6236 |
+
"grad_norm": 0.11728494614362717,
|
6237 |
+
"learning_rate": 3.271776770026963e-06,
|
6238 |
+
"loss": 0.0095,
|
6239 |
+
"step": 8900
|
6240 |
+
},
|
6241 |
+
{
|
6242 |
+
"epoch": 5.64638783269962,
|
6243 |
+
"grad_norm": 0.13038089871406555,
|
6244 |
+
"learning_rate": 3.213202944713023e-06,
|
6245 |
+
"loss": 0.0053,
|
6246 |
+
"step": 8910
|
6247 |
+
},
|
6248 |
+
{
|
6249 |
+
"epoch": 5.652724968314322,
|
6250 |
+
"grad_norm": 0.09141723066568375,
|
6251 |
+
"learning_rate": 3.155140772315773e-06,
|
6252 |
+
"loss": 0.0062,
|
6253 |
+
"step": 8920
|
6254 |
+
},
|
6255 |
+
{
|
6256 |
+
"epoch": 5.659062103929024,
|
6257 |
+
"grad_norm": 0.07316479086875916,
|
6258 |
+
"learning_rate": 3.0975908877938277e-06,
|
6259 |
+
"loss": 0.0062,
|
6260 |
+
"step": 8930
|
6261 |
+
},
|
6262 |
+
{
|
6263 |
+
"epoch": 5.665399239543726,
|
6264 |
+
"grad_norm": 0.18643398582935333,
|
6265 |
+
"learning_rate": 3.040553920503503e-06,
|
6266 |
+
"loss": 0.0065,
|
6267 |
+
"step": 8940
|
6268 |
+
},
|
6269 |
+
{
|
6270 |
+
"epoch": 5.671736375158428,
|
6271 |
+
"grad_norm": 0.151958167552948,
|
6272 |
+
"learning_rate": 2.9840304941919415e-06,
|
6273 |
+
"loss": 0.0055,
|
6274 |
+
"step": 8950
|
6275 |
+
},
|
6276 |
+
{
|
6277 |
+
"epoch": 5.678073510773131,
|
6278 |
+
"grad_norm": 0.2611534297466278,
|
6279 |
+
"learning_rate": 2.928021226990263e-06,
|
6280 |
+
"loss": 0.0075,
|
6281 |
+
"step": 8960
|
6282 |
+
},
|
6283 |
+
{
|
6284 |
+
"epoch": 5.684410646387833,
|
6285 |
+
"grad_norm": 0.16426794230937958,
|
6286 |
+
"learning_rate": 2.8725267314068495e-06,
|
6287 |
+
"loss": 0.0046,
|
6288 |
+
"step": 8970
|
6289 |
+
},
|
6290 |
+
{
|
6291 |
+
"epoch": 5.690747782002535,
|
6292 |
+
"grad_norm": 0.11258076876401901,
|
6293 |
+
"learning_rate": 2.817547614320615e-06,
|
6294 |
+
"loss": 0.0095,
|
6295 |
+
"step": 8980
|
6296 |
+
},
|
6297 |
+
{
|
6298 |
+
"epoch": 5.697084917617237,
|
6299 |
+
"grad_norm": 0.15174216032028198,
|
6300 |
+
"learning_rate": 2.7630844769743757e-06,
|
6301 |
+
"loss": 0.0069,
|
6302 |
+
"step": 8990
|
6303 |
+
},
|
6304 |
+
{
|
6305 |
+
"epoch": 5.7034220532319395,
|
6306 |
+
"grad_norm": 0.15580610930919647,
|
6307 |
+
"learning_rate": 2.7091379149682685e-06,
|
6308 |
+
"loss": 0.0079,
|
6309 |
+
"step": 9000
|
6310 |
+
},
|
6311 |
+
{
|
6312 |
+
"epoch": 5.7097591888466415,
|
6313 |
+
"grad_norm": 0.12744936347007751,
|
6314 |
+
"learning_rate": 2.6557085182532582e-06,
|
6315 |
+
"loss": 0.0086,
|
6316 |
+
"step": 9010
|
6317 |
+
},
|
6318 |
+
{
|
6319 |
+
"epoch": 5.716096324461343,
|
6320 |
+
"grad_norm": 0.1542847603559494,
|
6321 |
+
"learning_rate": 2.602796871124663e-06,
|
6322 |
+
"loss": 0.0096,
|
6323 |
+
"step": 9020
|
6324 |
+
},
|
6325 |
+
{
|
6326 |
+
"epoch": 5.722433460076045,
|
6327 |
+
"grad_norm": 0.13958968222141266,
|
6328 |
+
"learning_rate": 2.5504035522157854e-06,
|
6329 |
+
"loss": 0.0089,
|
6330 |
+
"step": 9030
|
6331 |
+
},
|
6332 |
+
{
|
6333 |
+
"epoch": 5.728770595690747,
|
6334 |
+
"grad_norm": 0.20519228279590607,
|
6335 |
+
"learning_rate": 2.4985291344915674e-06,
|
6336 |
+
"loss": 0.0061,
|
6337 |
+
"step": 9040
|
6338 |
+
},
|
6339 |
+
{
|
6340 |
+
"epoch": 5.73510773130545,
|
6341 |
+
"grad_norm": 0.11365640163421631,
|
6342 |
+
"learning_rate": 2.4471741852423237e-06,
|
6343 |
+
"loss": 0.0046,
|
6344 |
+
"step": 9050
|
6345 |
+
},
|
6346 |
+
{
|
6347 |
+
"epoch": 5.741444866920152,
|
6348 |
+
"grad_norm": 0.0870782658457756,
|
6349 |
+
"learning_rate": 2.3963392660775575e-06,
|
6350 |
+
"loss": 0.0079,
|
6351 |
+
"step": 9060
|
6352 |
+
},
|
6353 |
+
{
|
6354 |
+
"epoch": 5.747782002534854,
|
6355 |
+
"grad_norm": 0.07872149348258972,
|
6356 |
+
"learning_rate": 2.3460249329197824e-06,
|
6357 |
+
"loss": 0.0055,
|
6358 |
+
"step": 9070
|
6359 |
+
},
|
6360 |
+
{
|
6361 |
+
"epoch": 5.754119138149557,
|
6362 |
+
"grad_norm": 0.08804658055305481,
|
6363 |
+
"learning_rate": 2.296231735998511e-06,
|
6364 |
+
"loss": 0.007,
|
6365 |
+
"step": 9080
|
6366 |
+
},
|
6367 |
+
{
|
6368 |
+
"epoch": 5.760456273764259,
|
6369 |
+
"grad_norm": 0.05982697755098343,
|
6370 |
+
"learning_rate": 2.2469602198441573e-06,
|
6371 |
+
"loss": 0.0044,
|
6372 |
+
"step": 9090
|
6373 |
+
},
|
6374 |
+
{
|
6375 |
+
"epoch": 5.766793409378961,
|
6376 |
+
"grad_norm": 0.10259458422660828,
|
6377 |
+
"learning_rate": 2.1982109232821178e-06,
|
6378 |
+
"loss": 0.006,
|
6379 |
+
"step": 9100
|
6380 |
+
},
|
6381 |
+
{
|
6382 |
+
"epoch": 5.773130544993663,
|
6383 |
+
"grad_norm": 0.1546311378479004,
|
6384 |
+
"learning_rate": 2.149984379426906e-06,
|
6385 |
+
"loss": 0.0067,
|
6386 |
+
"step": 9110
|
6387 |
+
},
|
6388 |
+
{
|
6389 |
+
"epoch": 5.779467680608365,
|
6390 |
+
"grad_norm": 0.14351686835289001,
|
6391 |
+
"learning_rate": 2.102281115676258e-06,
|
6392 |
+
"loss": 0.0056,
|
6393 |
+
"step": 9120
|
6394 |
+
},
|
6395 |
+
{
|
6396 |
+
"epoch": 5.7858048162230675,
|
6397 |
+
"grad_norm": 0.05316438898444176,
|
6398 |
+
"learning_rate": 2.0551016537054493e-06,
|
6399 |
+
"loss": 0.0061,
|
6400 |
+
"step": 9130
|
6401 |
+
},
|
6402 |
+
{
|
6403 |
+
"epoch": 5.7921419518377695,
|
6404 |
+
"grad_norm": 0.0845993310213089,
|
6405 |
+
"learning_rate": 2.008446509461498e-06,
|
6406 |
+
"loss": 0.006,
|
6407 |
+
"step": 9140
|
6408 |
+
},
|
6409 |
+
{
|
6410 |
+
"epoch": 5.798479087452471,
|
6411 |
+
"grad_norm": 0.156995952129364,
|
6412 |
+
"learning_rate": 1.962316193157593e-06,
|
6413 |
+
"loss": 0.0054,
|
6414 |
+
"step": 9150
|
6415 |
+
},
|
6416 |
+
{
|
6417 |
+
"epoch": 5.804816223067173,
|
6418 |
+
"grad_norm": 0.20765309035778046,
|
6419 |
+
"learning_rate": 1.91671120926748e-06,
|
6420 |
+
"loss": 0.0049,
|
6421 |
+
"step": 9160
|
6422 |
+
},
|
6423 |
+
{
|
6424 |
+
"epoch": 5.811153358681876,
|
6425 |
+
"grad_norm": 0.15031087398529053,
|
6426 |
+
"learning_rate": 1.8716320565199618e-06,
|
6427 |
+
"loss": 0.0069,
|
6428 |
+
"step": 9170
|
6429 |
+
},
|
6430 |
+
{
|
6431 |
+
"epoch": 5.817490494296578,
|
6432 |
+
"grad_norm": 0.08898857980966568,
|
6433 |
+
"learning_rate": 1.8270792278934302e-06,
|
6434 |
+
"loss": 0.0052,
|
6435 |
+
"step": 9180
|
6436 |
+
},
|
6437 |
+
{
|
6438 |
+
"epoch": 5.82382762991128,
|
6439 |
+
"grad_norm": 0.16047555208206177,
|
6440 |
+
"learning_rate": 1.7830532106104747e-06,
|
6441 |
+
"loss": 0.0061,
|
6442 |
+
"step": 9190
|
6443 |
+
},
|
6444 |
+
{
|
6445 |
+
"epoch": 5.830164765525982,
|
6446 |
+
"grad_norm": 0.24277397990226746,
|
6447 |
+
"learning_rate": 1.7395544861325718e-06,
|
6448 |
+
"loss": 0.0061,
|
6449 |
+
"step": 9200
|
6450 |
+
},
|
6451 |
+
{
|
6452 |
+
"epoch": 5.836501901140684,
|
6453 |
+
"grad_norm": 0.1472398340702057,
|
6454 |
+
"learning_rate": 1.696583530154794e-06,
|
6455 |
+
"loss": 0.0123,
|
6456 |
+
"step": 9210
|
6457 |
+
},
|
6458 |
+
{
|
6459 |
+
"epoch": 5.842839036755387,
|
6460 |
+
"grad_norm": 0.15417790412902832,
|
6461 |
+
"learning_rate": 1.6541408126006463e-06,
|
6462 |
+
"loss": 0.0058,
|
6463 |
+
"step": 9220
|
6464 |
+
},
|
6465 |
+
{
|
6466 |
+
"epoch": 5.849176172370089,
|
6467 |
+
"grad_norm": 0.10276266187429428,
|
6468 |
+
"learning_rate": 1.6122267976168781e-06,
|
6469 |
+
"loss": 0.0076,
|
6470 |
+
"step": 9230
|
6471 |
+
},
|
6472 |
+
{
|
6473 |
+
"epoch": 5.855513307984791,
|
6474 |
+
"grad_norm": 0.22714407742023468,
|
6475 |
+
"learning_rate": 1.5708419435684462e-06,
|
6476 |
+
"loss": 0.0079,
|
6477 |
+
"step": 9240
|
6478 |
+
},
|
6479 |
+
{
|
6480 |
+
"epoch": 5.861850443599493,
|
6481 |
+
"grad_norm": 0.08724337071180344,
|
6482 |
+
"learning_rate": 1.5299867030334814e-06,
|
6483 |
+
"loss": 0.006,
|
6484 |
+
"step": 9250
|
6485 |
+
},
|
6486 |
+
{
|
6487 |
+
"epoch": 5.8681875792141955,
|
6488 |
+
"grad_norm": 0.1354474574327469,
|
6489 |
+
"learning_rate": 1.4896615227983468e-06,
|
6490 |
+
"loss": 0.0063,
|
6491 |
+
"step": 9260
|
6492 |
+
},
|
6493 |
+
{
|
6494 |
+
"epoch": 5.8745247148288975,
|
6495 |
+
"grad_norm": 0.09567315876483917,
|
6496 |
+
"learning_rate": 1.4498668438527597e-06,
|
6497 |
+
"loss": 0.0073,
|
6498 |
+
"step": 9270
|
6499 |
+
},
|
6500 |
+
{
|
6501 |
+
"epoch": 5.880861850443599,
|
6502 |
+
"grad_norm": 0.25440752506256104,
|
6503 |
+
"learning_rate": 1.4106031013849496e-06,
|
6504 |
+
"loss": 0.0059,
|
6505 |
+
"step": 9280
|
6506 |
+
},
|
6507 |
+
{
|
6508 |
+
"epoch": 5.887198986058301,
|
6509 |
+
"grad_norm": 0.046442631632089615,
|
6510 |
+
"learning_rate": 1.3718707247769135e-06,
|
6511 |
+
"loss": 0.0085,
|
6512 |
+
"step": 9290
|
6513 |
+
},
|
6514 |
+
{
|
6515 |
+
"epoch": 5.893536121673003,
|
6516 |
+
"grad_norm": 0.202633798122406,
|
6517 |
+
"learning_rate": 1.333670137599713e-06,
|
6518 |
+
"loss": 0.0078,
|
6519 |
+
"step": 9300
|
6520 |
+
},
|
6521 |
+
{
|
6522 |
+
"epoch": 5.899873257287706,
|
6523 |
+
"grad_norm": 0.11459842324256897,
|
6524 |
+
"learning_rate": 1.2960017576088446e-06,
|
6525 |
+
"loss": 0.0049,
|
6526 |
+
"step": 9310
|
6527 |
+
},
|
6528 |
+
{
|
6529 |
+
"epoch": 5.906210392902408,
|
6530 |
+
"grad_norm": 0.06873945146799088,
|
6531 |
+
"learning_rate": 1.2588659967397e-06,
|
6532 |
+
"loss": 0.0172,
|
6533 |
+
"step": 9320
|
6534 |
+
},
|
6535 |
+
{
|
6536 |
+
"epoch": 5.91254752851711,
|
6537 |
+
"grad_norm": 0.17459562420845032,
|
6538 |
+
"learning_rate": 1.222263261102985e-06,
|
6539 |
+
"loss": 0.0071,
|
6540 |
+
"step": 9330
|
6541 |
+
},
|
6542 |
+
{
|
6543 |
+
"epoch": 5.918884664131813,
|
6544 |
+
"grad_norm": 0.15463189780712128,
|
6545 |
+
"learning_rate": 1.1861939509803687e-06,
|
6546 |
+
"loss": 0.0068,
|
6547 |
+
"step": 9340
|
6548 |
+
},
|
6549 |
+
{
|
6550 |
+
"epoch": 5.925221799746515,
|
6551 |
+
"grad_norm": 0.052368469536304474,
|
6552 |
+
"learning_rate": 1.1506584608200367e-06,
|
6553 |
+
"loss": 0.0063,
|
6554 |
+
"step": 9350
|
6555 |
+
},
|
6556 |
+
{
|
6557 |
+
"epoch": 5.931558935361217,
|
6558 |
+
"grad_norm": 0.08080845326185226,
|
6559 |
+
"learning_rate": 1.1156571792324211e-06,
|
6560 |
+
"loss": 0.0037,
|
6561 |
+
"step": 9360
|
6562 |
+
},
|
6563 |
+
{
|
6564 |
+
"epoch": 5.937896070975919,
|
6565 |
+
"grad_norm": 0.1452123522758484,
|
6566 |
+
"learning_rate": 1.0811904889859336e-06,
|
6567 |
+
"loss": 0.0106,
|
6568 |
+
"step": 9370
|
6569 |
+
},
|
6570 |
+
{
|
6571 |
+
"epoch": 5.944233206590621,
|
6572 |
+
"grad_norm": 0.18937888741493225,
|
6573 |
+
"learning_rate": 1.0472587670027678e-06,
|
6574 |
+
"loss": 0.0045,
|
6575 |
+
"step": 9380
|
6576 |
+
},
|
6577 |
+
{
|
6578 |
+
"epoch": 5.9505703422053235,
|
6579 |
+
"grad_norm": 0.1064092218875885,
|
6580 |
+
"learning_rate": 1.0138623843548078e-06,
|
6581 |
+
"loss": 0.01,
|
6582 |
+
"step": 9390
|
6583 |
+
},
|
6584 |
+
{
|
6585 |
+
"epoch": 5.9569074778200255,
|
6586 |
+
"grad_norm": 0.15949054062366486,
|
6587 |
+
"learning_rate": 9.810017062595322e-07,
|
6588 |
+
"loss": 0.0093,
|
6589 |
+
"step": 9400
|
6590 |
+
},
|
6591 |
+
{
|
6592 |
+
"epoch": 5.9632446134347274,
|
6593 |
+
"grad_norm": 0.11651349812746048,
|
6594 |
+
"learning_rate": 9.486770920760668e-07,
|
6595 |
+
"loss": 0.0062,
|
6596 |
+
"step": 9410
|
6597 |
+
},
|
6598 |
+
{
|
6599 |
+
"epoch": 5.969581749049429,
|
6600 |
+
"grad_norm": 0.16830092668533325,
|
6601 |
+
"learning_rate": 9.168888953011989e-07,
|
6602 |
+
"loss": 0.005,
|
6603 |
+
"step": 9420
|
6604 |
+
},
|
6605 |
+
{
|
6606 |
+
"epoch": 5.975918884664132,
|
6607 |
+
"grad_norm": 0.08310183882713318,
|
6608 |
+
"learning_rate": 8.856374635655695e-07,
|
6609 |
+
"loss": 0.0059,
|
6610 |
+
"step": 9430
|
6611 |
+
},
|
6612 |
+
{
|
6613 |
+
"epoch": 5.982256020278834,
|
6614 |
+
"grad_norm": 0.12785309553146362,
|
6615 |
+
"learning_rate": 8.549231386298151e-07,
|
6616 |
+
"loss": 0.0097,
|
6617 |
+
"step": 9440
|
6618 |
+
},
|
6619 |
+
{
|
6620 |
+
"epoch": 5.988593155893536,
|
6621 |
+
"grad_norm": 0.2574361264705658,
|
6622 |
+
"learning_rate": 8.247462563808817e-07,
|
6623 |
+
"loss": 0.0071,
|
6624 |
+
"step": 9450
|
6625 |
+
},
|
6626 |
+
{
|
6627 |
+
"epoch": 5.994930291508238,
|
6628 |
+
"grad_norm": 0.31402966380119324,
|
6629 |
+
"learning_rate": 7.951071468283167e-07,
|
6630 |
+
"loss": 0.0065,
|
6631 |
+
"step": 9460
|
6632 |
+
},
|
6633 |
+
{
|
6634 |
+
"epoch": 6.00126742712294,
|
6635 |
+
"grad_norm": 0.1356462836265564,
|
6636 |
+
"learning_rate": 7.66006134100672e-07,
|
6637 |
+
"loss": 0.0058,
|
6638 |
+
"step": 9470
|
6639 |
+
},
|
6640 |
+
{
|
6641 |
+
"epoch": 6.007604562737643,
|
6642 |
+
"grad_norm": 0.08312007039785385,
|
6643 |
+
"learning_rate": 7.374435364419674e-07,
|
6644 |
+
"loss": 0.004,
|
6645 |
+
"step": 9480
|
6646 |
+
},
|
6647 |
+
{
|
6648 |
+
"epoch": 6.013941698352345,
|
6649 |
+
"grad_norm": 0.25194162130355835,
|
6650 |
+
"learning_rate": 7.094196662081831e-07,
|
6651 |
+
"loss": 0.0105,
|
6652 |
+
"step": 9490
|
6653 |
+
},
|
6654 |
+
{
|
6655 |
+
"epoch": 6.020278833967047,
|
6656 |
+
"grad_norm": 0.11876359581947327,
|
6657 |
+
"learning_rate": 6.819348298638839e-07,
|
6658 |
+
"loss": 0.005,
|
6659 |
+
"step": 9500
|
6660 |
+
},
|
6661 |
+
{
|
6662 |
+
"epoch": 6.026615969581749,
|
6663 |
+
"grad_norm": 0.11712250113487244,
|
6664 |
+
"learning_rate": 6.549893279788277e-07,
|
6665 |
+
"loss": 0.0087,
|
6666 |
+
"step": 9510
|
6667 |
+
},
|
6668 |
+
{
|
6669 |
+
"epoch": 6.032953105196452,
|
6670 |
+
"grad_norm": 0.24556323885917664,
|
6671 |
+
"learning_rate": 6.285834552247128e-07,
|
6672 |
+
"loss": 0.0059,
|
6673 |
+
"step": 9520
|
6674 |
+
},
|
6675 |
+
{
|
6676 |
+
"epoch": 6.0392902408111535,
|
6677 |
+
"grad_norm": 0.12040778994560242,
|
6678 |
+
"learning_rate": 6.027175003719354e-07,
|
6679 |
+
"loss": 0.0062,
|
6680 |
+
"step": 9530
|
6681 |
+
},
|
6682 |
+
{
|
6683 |
+
"epoch": 6.0456273764258555,
|
6684 |
+
"grad_norm": 0.07460421323776245,
|
6685 |
+
"learning_rate": 5.773917462864264e-07,
|
6686 |
+
"loss": 0.0091,
|
6687 |
+
"step": 9540
|
6688 |
+
},
|
6689 |
+
{
|
6690 |
+
"epoch": 6.051964512040557,
|
6691 |
+
"grad_norm": 0.18476122617721558,
|
6692 |
+
"learning_rate": 5.526064699265753e-07,
|
6693 |
+
"loss": 0.01,
|
6694 |
+
"step": 9550
|
6695 |
+
},
|
6696 |
+
{
|
6697 |
+
"epoch": 6.05830164765526,
|
6698 |
+
"grad_norm": 0.06253950297832489,
|
6699 |
+
"learning_rate": 5.283619423401998e-07,
|
6700 |
+
"loss": 0.0076,
|
6701 |
+
"step": 9560
|
6702 |
+
},
|
6703 |
+
{
|
6704 |
+
"epoch": 6.064638783269962,
|
6705 |
+
"grad_norm": 0.07986325025558472,
|
6706 |
+
"learning_rate": 5.046584286615697e-07,
|
6707 |
+
"loss": 0.0111,
|
6708 |
+
"step": 9570
|
6709 |
+
},
|
6710 |
+
{
|
6711 |
+
"epoch": 6.070975918884664,
|
6712 |
+
"grad_norm": 0.25652700662612915,
|
6713 |
+
"learning_rate": 4.814961881085045e-07,
|
6714 |
+
"loss": 0.004,
|
6715 |
+
"step": 9580
|
6716 |
+
},
|
6717 |
+
{
|
6718 |
+
"epoch": 6.077313054499366,
|
6719 |
+
"grad_norm": 0.10732295364141464,
|
6720 |
+
"learning_rate": 4.5887547397955864e-07,
|
6721 |
+
"loss": 0.0052,
|
6722 |
+
"step": 9590
|
6723 |
+
},
|
6724 |
+
{
|
6725 |
+
"epoch": 6.083650190114068,
|
6726 |
+
"grad_norm": 0.09151678532361984,
|
6727 |
+
"learning_rate": 4.367965336512403e-07,
|
6728 |
+
"loss": 0.0109,
|
6729 |
+
"step": 9600
|
6730 |
+
},
|
6731 |
+
{
|
6732 |
+
"epoch": 6.089987325728771,
|
6733 |
+
"grad_norm": 0.057842839509248734,
|
6734 |
+
"learning_rate": 4.1525960857530243e-07,
|
6735 |
+
"loss": 0.0059,
|
6736 |
+
"step": 9610
|
6737 |
+
},
|
6738 |
+
{
|
6739 |
+
"epoch": 6.096324461343473,
|
6740 |
+
"grad_norm": 0.1317504197359085,
|
6741 |
+
"learning_rate": 3.9426493427611177e-07,
|
6742 |
+
"loss": 0.0067,
|
6743 |
+
"step": 9620
|
6744 |
+
},
|
6745 |
+
{
|
6746 |
+
"epoch": 6.102661596958175,
|
6747 |
+
"grad_norm": 0.0697597935795784,
|
6748 |
+
"learning_rate": 3.738127403480507e-07,
|
6749 |
+
"loss": 0.0064,
|
6750 |
+
"step": 9630
|
6751 |
+
},
|
6752 |
+
{
|
6753 |
+
"epoch": 6.108998732572877,
|
6754 |
+
"grad_norm": 0.3189285695552826,
|
6755 |
+
"learning_rate": 3.5390325045304706e-07,
|
6756 |
+
"loss": 0.0066,
|
6757 |
+
"step": 9640
|
6758 |
+
},
|
6759 |
+
{
|
6760 |
+
"epoch": 6.11533586818758,
|
6761 |
+
"grad_norm": 0.07586175948381424,
|
6762 |
+
"learning_rate": 3.3453668231809286e-07,
|
6763 |
+
"loss": 0.0061,
|
6764 |
+
"step": 9650
|
6765 |
+
},
|
6766 |
+
{
|
6767 |
+
"epoch": 6.1216730038022815,
|
6768 |
+
"grad_norm": 0.28144219517707825,
|
6769 |
+
"learning_rate": 3.157132477328628e-07,
|
6770 |
+
"loss": 0.0061,
|
6771 |
+
"step": 9660
|
6772 |
+
},
|
6773 |
+
{
|
6774 |
+
"epoch": 6.1280101394169835,
|
6775 |
+
"grad_norm": 0.21867600083351135,
|
6776 |
+
"learning_rate": 2.9743315254743833e-07,
|
6777 |
+
"loss": 0.016,
|
6778 |
+
"step": 9670
|
6779 |
+
},
|
6780 |
+
{
|
6781 |
+
"epoch": 6.134347275031685,
|
6782 |
+
"grad_norm": 0.08317314088344574,
|
6783 |
+
"learning_rate": 2.796965966699927e-07,
|
6784 |
+
"loss": 0.0143,
|
6785 |
+
"step": 9680
|
6786 |
+
},
|
6787 |
+
{
|
6788 |
+
"epoch": 6.140684410646388,
|
6789 |
+
"grad_norm": 0.13135367631912231,
|
6790 |
+
"learning_rate": 2.625037740646763e-07,
|
6791 |
+
"loss": 0.0058,
|
6792 |
+
"step": 9690
|
6793 |
+
},
|
6794 |
+
{
|
6795 |
+
"epoch": 6.14702154626109,
|
6796 |
+
"grad_norm": 0.06645505130290985,
|
6797 |
+
"learning_rate": 2.458548727494292e-07,
|
6798 |
+
"loss": 0.0109,
|
6799 |
+
"step": 9700
|
6800 |
+
},
|
6801 |
+
{
|
6802 |
+
"epoch": 6.153358681875792,
|
6803 |
+
"grad_norm": 0.09750212728977203,
|
6804 |
+
"learning_rate": 2.2975007479397738e-07,
|
6805 |
+
"loss": 0.0071,
|
6806 |
+
"step": 9710
|
6807 |
+
},
|
6808 |
+
{
|
6809 |
+
"epoch": 6.159695817490494,
|
6810 |
+
"grad_norm": 0.09561249613761902,
|
6811 |
+
"learning_rate": 2.1418955631781202e-07,
|
6812 |
+
"loss": 0.0076,
|
6813 |
+
"step": 9720
|
6814 |
+
},
|
6815 |
+
{
|
6816 |
+
"epoch": 6.166032953105196,
|
6817 |
+
"grad_norm": 0.1740742325782776,
|
6818 |
+
"learning_rate": 1.9917348748826335e-07,
|
6819 |
+
"loss": 0.006,
|
6820 |
+
"step": 9730
|
6821 |
+
},
|
6822 |
+
{
|
6823 |
+
"epoch": 6.172370088719899,
|
6824 |
+
"grad_norm": 0.18339525163173676,
|
6825 |
+
"learning_rate": 1.847020325186577e-07,
|
6826 |
+
"loss": 0.006,
|
6827 |
+
"step": 9740
|
6828 |
+
},
|
6829 |
+
{
|
6830 |
+
"epoch": 6.178707224334601,
|
6831 |
+
"grad_norm": 0.06436607986688614,
|
6832 |
+
"learning_rate": 1.7077534966650766e-07,
|
6833 |
+
"loss": 0.0122,
|
6834 |
+
"step": 9750
|
6835 |
+
},
|
6836 |
+
{
|
6837 |
+
"epoch": 6.185044359949303,
|
6838 |
+
"grad_norm": 0.05160180851817131,
|
6839 |
+
"learning_rate": 1.5739359123178587e-07,
|
6840 |
+
"loss": 0.008,
|
6841 |
+
"step": 9760
|
6842 |
+
},
|
6843 |
+
{
|
6844 |
+
"epoch": 6.191381495564005,
|
6845 |
+
"grad_norm": 0.0478476956486702,
|
6846 |
+
"learning_rate": 1.4455690355525964e-07,
|
6847 |
+
"loss": 0.0054,
|
6848 |
+
"step": 9770
|
6849 |
+
},
|
6850 |
+
{
|
6851 |
+
"epoch": 6.197718631178708,
|
6852 |
+
"grad_norm": 0.13458269834518433,
|
6853 |
+
"learning_rate": 1.3226542701689215e-07,
|
6854 |
+
"loss": 0.0051,
|
6855 |
+
"step": 9780
|
6856 |
+
},
|
6857 |
+
{
|
6858 |
+
"epoch": 6.2040557667934095,
|
6859 |
+
"grad_norm": 0.14054587483406067,
|
6860 |
+
"learning_rate": 1.2051929603428825e-07,
|
6861 |
+
"loss": 0.0066,
|
6862 |
+
"step": 9790
|
6863 |
+
},
|
6864 |
+
{
|
6865 |
+
"epoch": 6.2103929024081115,
|
6866 |
+
"grad_norm": 0.3035231828689575,
|
6867 |
+
"learning_rate": 1.0931863906127327e-07,
|
6868 |
+
"loss": 0.0057,
|
6869 |
+
"step": 9800
|
6870 |
+
},
|
6871 |
+
{
|
6872 |
+
"epoch": 6.216730038022813,
|
6873 |
+
"grad_norm": 0.3359600901603699,
|
6874 |
+
"learning_rate": 9.866357858642205e-08,
|
6875 |
+
"loss": 0.0088,
|
6876 |
+
"step": 9810
|
6877 |
+
},
|
6878 |
+
{
|
6879 |
+
"epoch": 6.223067173637516,
|
6880 |
+
"grad_norm": 0.13854598999023438,
|
6881 |
+
"learning_rate": 8.855423113177664e-08,
|
6882 |
+
"loss": 0.0057,
|
6883 |
+
"step": 9820
|
6884 |
+
},
|
6885 |
+
{
|
6886 |
+
"epoch": 6.229404309252218,
|
6887 |
+
"grad_norm": 0.061560165137052536,
|
6888 |
+
"learning_rate": 7.899070725153613e-08,
|
6889 |
+
"loss": 0.0047,
|
6890 |
+
"step": 9830
|
6891 |
+
},
|
6892 |
+
{
|
6893 |
+
"epoch": 6.23574144486692,
|
6894 |
+
"grad_norm": 0.1461103856563568,
|
6895 |
+
"learning_rate": 6.997311153086883e-08,
|
6896 |
+
"loss": 0.004,
|
6897 |
+
"step": 9840
|
6898 |
+
},
|
6899 |
+
{
|
6900 |
+
"epoch": 6.242078580481622,
|
6901 |
+
"grad_norm": 0.3121045231819153,
|
6902 |
+
"learning_rate": 6.150154258476315e-08,
|
6903 |
+
"loss": 0.0073,
|
6904 |
+
"step": 9850
|
6905 |
+
},
|
6906 |
+
{
|
6907 |
+
"epoch": 6.248415716096324,
|
6908 |
+
"grad_norm": 0.18830816447734833,
|
6909 |
+
"learning_rate": 5.3576093056922906e-08,
|
6910 |
+
"loss": 0.0067,
|
6911 |
+
"step": 9860
|
6912 |
+
},
|
6913 |
+
{
|
6914 |
+
"epoch": 6.254752851711027,
|
6915 |
+
"grad_norm": 0.060759589076042175,
|
6916 |
+
"learning_rate": 4.619684961881254e-08,
|
6917 |
+
"loss": 0.005,
|
6918 |
+
"step": 9870
|
6919 |
+
},
|
6920 |
+
{
|
6921 |
+
"epoch": 6.261089987325729,
|
6922 |
+
"grad_norm": 0.1718645989894867,
|
6923 |
+
"learning_rate": 3.936389296864129e-08,
|
6924 |
+
"loss": 0.0049,
|
6925 |
+
"step": 9880
|
6926 |
+
},
|
6927 |
+
{
|
6928 |
+
"epoch": 6.267427122940431,
|
6929 |
+
"grad_norm": 0.09834865480661392,
|
6930 |
+
"learning_rate": 3.3077297830541584e-08,
|
6931 |
+
"loss": 0.0066,
|
6932 |
+
"step": 9890
|
6933 |
+
},
|
6934 |
+
{
|
6935 |
+
"epoch": 6.273764258555133,
|
6936 |
+
"grad_norm": 0.11674518138170242,
|
6937 |
+
"learning_rate": 2.7337132953697554e-08,
|
6938 |
+
"loss": 0.0086,
|
6939 |
+
"step": 9900
|
6940 |
+
},
|
6941 |
+
{
|
6942 |
+
"epoch": 6.280101394169836,
|
6943 |
+
"grad_norm": 0.17682595551013947,
|
6944 |
+
"learning_rate": 2.214346111164556e-08,
|
6945 |
+
"loss": 0.0065,
|
6946 |
+
"step": 9910
|
6947 |
+
},
|
6948 |
+
{
|
6949 |
+
"epoch": 6.2864385297845375,
|
6950 |
+
"grad_norm": 0.07229286432266235,
|
6951 |
+
"learning_rate": 1.749633910153592e-08,
|
6952 |
+
"loss": 0.0038,
|
6953 |
+
"step": 9920
|
6954 |
+
},
|
6955 |
+
{
|
6956 |
+
"epoch": 6.2927756653992395,
|
6957 |
+
"grad_norm": 0.083204485476017,
|
6958 |
+
"learning_rate": 1.3395817743561134e-08,
|
6959 |
+
"loss": 0.0057,
|
6960 |
+
"step": 9930
|
6961 |
+
},
|
6962 |
+
{
|
6963 |
+
"epoch": 6.299112801013941,
|
6964 |
+
"grad_norm": 0.2328864485025406,
|
6965 |
+
"learning_rate": 9.841941880361916e-09,
|
6966 |
+
"loss": 0.0108,
|
6967 |
+
"step": 9940
|
6968 |
+
},
|
6969 |
+
{
|
6970 |
+
"epoch": 6.305449936628644,
|
6971 |
+
"grad_norm": 0.1641470342874527,
|
6972 |
+
"learning_rate": 6.834750376549792e-09,
|
6973 |
+
"loss": 0.007,
|
6974 |
+
"step": 9950
|
6975 |
+
},
|
6976 |
+
{
|
6977 |
+
"epoch": 6.311787072243346,
|
6978 |
+
"grad_norm": 0.08585844933986664,
|
6979 |
+
"learning_rate": 4.3742761183018784e-09,
|
6980 |
+
"loss": 0.012,
|
6981 |
+
"step": 9960
|
6982 |
+
},
|
6983 |
+
{
|
6984 |
+
"epoch": 6.318124207858048,
|
6985 |
+
"grad_norm": 0.10597538948059082,
|
6986 |
+
"learning_rate": 2.4605460129556445e-09,
|
6987 |
+
"loss": 0.004,
|
6988 |
+
"step": 9970
|
6989 |
+
},
|
6990 |
+
{
|
6991 |
+
"epoch": 6.32446134347275,
|
6992 |
+
"grad_norm": 0.21842822432518005,
|
6993 |
+
"learning_rate": 1.0935809887702154e-09,
|
6994 |
+
"loss": 0.006,
|
6995 |
+
"step": 9980
|
6996 |
+
},
|
6997 |
+
{
|
6998 |
+
"epoch": 6.330798479087452,
|
6999 |
+
"grad_norm": 0.0765758827328682,
|
7000 |
+
"learning_rate": 2.7339599464326627e-10,
|
7001 |
+
"loss": 0.0059,
|
7002 |
+
"step": 9990
|
7003 |
+
},
|
7004 |
+
{
|
7005 |
+
"epoch": 6.337135614702155,
|
7006 |
+
"grad_norm": 0.09744177758693695,
|
7007 |
+
"learning_rate": 0.0,
|
7008 |
+
"loss": 0.0057,
|
7009 |
+
"step": 10000
|
7010 |
+
},
|
7011 |
+
{
|
7012 |
+
"epoch": 6.337135614702155,
|
7013 |
+
"step": 10000,
|
7014 |
+
"total_flos": 3.5060140843731366e+17,
|
7015 |
+
"train_loss": 0.02414526521936059,
|
7016 |
+
"train_runtime": 5814.7906,
|
7017 |
+
"train_samples_per_second": 27.516,
|
7018 |
+
"train_steps_per_second": 1.72
|
7019 |
}
|
7020 |
],
|
7021 |
"logging_steps": 10,
|
|
|
7030 |
"should_evaluate": false,
|
7031 |
"should_log": false,
|
7032 |
"should_save": true,
|
7033 |
+
"should_training_stop": true
|
7034 |
},
|
7035 |
"attributes": {}
|
7036 |
}
|
7037 |
},
|
7038 |
+
"total_flos": 3.5060140843731366e+17,
|
7039 |
"train_batch_size": 16,
|
7040 |
"trial_name": null,
|
7041 |
"trial_params": null
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84923e4733ccef3490ee1b765c64cf4119517e18ac78a7caf63ab1bc0ad41ca9
|
3 |
+
size 5240
|