diff --git a/cvnet_small_v1_global/checkpoint-1000/model.safetensors b/cvnet_small_v1_global/checkpoint-1000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d6cd2a75496a5b2e33c799d7d410384596d0e69 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-1000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b64e1aea865196e9b19a03a0b7e3ac2eec8b15357819313767513259f8a27d +size 237860264 diff --git a/cvnet_small_v1_global/checkpoint-1000/optimizer.pt b/cvnet_small_v1_global/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd4d9a49fecf912b711c2c1032847680cf430373 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3087e6b510df04a6a96e86fb6a37ae576f3f26f7ce8e9068e3c726c8440dd3f +size 475804282 diff --git a/cvnet_small_v1_global/checkpoint-1000/rng_state_0.pth b/cvnet_small_v1_global/checkpoint-1000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..4ab31eb89973990d9b8610104399e7e5aaec5986 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-1000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f68045b77ef059786b9d730ec7bfcab3532c91c560ed23338e0d4d518b2be5e +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-1000/rng_state_1.pth b/cvnet_small_v1_global/checkpoint-1000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..c53f9ece2cbb84f4662850cb2a534a064415698c --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-1000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:164a39302aced8efcfa4d5b883ab7d4f208ce2d3aaf57fa0fff0dc5bc35e9b53 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-1000/rng_state_2.pth b/cvnet_small_v1_global/checkpoint-1000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..5513d1ce6c2aed658bc3b07da4002acb5b6ce7dc --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-1000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74928fb7ef7944af9aedd43d3456cb7cc320ccd38e5d7989ea54cf81f2bdc4ac +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-1000/rng_state_3.pth b/cvnet_small_v1_global/checkpoint-1000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..873724ab5b9b6f006d53045897c53d31e33804d8 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-1000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7390fd7db05b7bd4f309fe3c16e3133e51c0b7bbf36881c88fd513b9ebb7b75a +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-1000/rng_state_4.pth b/cvnet_small_v1_global/checkpoint-1000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..81ff0496e02da863836522e59c26181e5043ad3f --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-1000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315c200f1cd4bb6a4a593accb8ba546796343253845919cb5b2b52089686830d +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-1000/rng_state_5.pth b/cvnet_small_v1_global/checkpoint-1000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..dde498f5924607b3543e00cedc5f8d8e7ead0775 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-1000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:561c368392fbdb6e8a7e5e086de59545625d80b821b5c72a4024b1f47b1390bb +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-1000/rng_state_6.pth b/cvnet_small_v1_global/checkpoint-1000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..004d54378694fe0b6ec4833301aba9fe98fda662 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-1000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a358b4a84de8c0fefda163466d1b792e3d23a409d2f7f54fdd9b3535793d39 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-1000/rng_state_7.pth b/cvnet_small_v1_global/checkpoint-1000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..e54c9d0e00105e7c7921fd6d4d170f670ee0b5a7 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-1000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17ad99f8730fc68cc1e3438467270d0c73427583fe0bffeb5bca595169156c2f +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-1000/scheduler.pt b/cvnet_small_v1_global/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ff2c05006e51f3f771730c9055c6c181da2abd0 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c34ba9e6f50c2313df84e91c7dfed2be6b77dec7394adba34d3bd12d40d5ba2d +size 1064 diff --git a/cvnet_small_v1_global/checkpoint-1000/trainer_state.json b/cvnet_small_v1_global/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9464ad2cb3743f8522ca37b90c705ff3c670ddde --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-1000/trainer_state.json @@ -0,0 +1,621 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3373534620899047, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7081, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6774, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6781, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6772, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6802, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6749, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6745, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6729, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6586, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.636, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6098, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5978, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.602, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5959, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5906, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5778, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5795, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5883, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5882, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5747, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5846, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5823, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5722, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5599, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5716, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5832, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5691, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5711, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5821, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5557, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5581, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5761, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5643, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5702, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5648, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.5691, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.556, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5578, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.564, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5562, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.559, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.565, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.5508, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5588, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5584, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5555, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5597, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5483, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5621, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5568, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5527, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5524, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5458, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5524, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5553, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5582, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5518, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5485, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5511, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5444, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5522, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5417, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5543, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.56, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.5394, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.5475, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5437, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5438, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5454, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5481, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5434, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5422, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5465, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5447, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5396, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.543, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.5461, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.5482, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5453, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5493, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.5477, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.5393, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.5365, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.5433, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.5368, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.5467, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5382, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5366, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5354, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.5322, + "step": 1000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_global/checkpoint-1000/training_args.bin b/cvnet_small_v1_global/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..89dfdb803f04836e7afc0c6a28f9dae664c6e171 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dddfb786197274aad152ee4c2cc9787279a04f3c148146f1b57e85d4fb19f6fc +size 4856 diff --git a/cvnet_small_v1_global/checkpoint-2000/model.safetensors b/cvnet_small_v1_global/checkpoint-2000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12c9675d79b5f5ba6adba9943753084a0c317647 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-2000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbdd0ac1006e9b3741dea6c7d1cbaed5ad86865589792b5cd08c72bb887c128e +size 237860264 diff --git a/cvnet_small_v1_global/checkpoint-2000/optimizer.pt b/cvnet_small_v1_global/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b83476b9b88f57d0b18da882a705f7881554fae --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dcf359b402ab155e141b07b029eddf98e5e63df26e4e808b51a68a0fa44c1bb +size 475804282 diff --git a/cvnet_small_v1_global/checkpoint-2000/rng_state_0.pth b/cvnet_small_v1_global/checkpoint-2000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..33d35d46eb49c7694ddf02abc1f806fe21ce3c03 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-2000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24545b6bb7493bdf35c20f30780f8e0793c67065421d64d7180f5a320abd8417 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-2000/rng_state_1.pth b/cvnet_small_v1_global/checkpoint-2000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..6310340e83ce52050038f3421730ede96037a6ba --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-2000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0232b218d0ca1f5487f91d43e6fb49bb92e0051351056294e6e73f362f38dc4 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-2000/rng_state_2.pth b/cvnet_small_v1_global/checkpoint-2000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..5f2d7f68fb7fd5fa3b96e6ab8e2a6ed8ae0cc685 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-2000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dffc06c0c6d363c38ffe8a64d78f552b7bfccf1d412a5e6d8b05a20badc3ab4 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-2000/rng_state_3.pth b/cvnet_small_v1_global/checkpoint-2000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..ee633cf4e658e3f4b63beab3a61ea2a072f17879 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-2000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c47dc8892b7412ec8da71c7704256b96b8469c5c7f3b6490b83502e4b2ee928 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-2000/rng_state_4.pth b/cvnet_small_v1_global/checkpoint-2000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..a68c4918b64151b1245b3a97d067f09d0feb75a0 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-2000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:905b45076269c4efc0bfdb46513ea882d1de5bce7ddd5bed4ce98092c7da2c0b +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-2000/rng_state_5.pth b/cvnet_small_v1_global/checkpoint-2000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..37b3a6d3dd2e51bee05f2894dcebb9a4fe902f47 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-2000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ee8e3283737d5d9165ab7bbed91fab8f8054e727830b0ac207e19ea1b998ca +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-2000/rng_state_6.pth b/cvnet_small_v1_global/checkpoint-2000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..24f763bcd01fe0ca32d12a74bd2f9a71337dd568 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-2000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7205ffddd0d11808dcecbed7a8e9b2051147bffa0bcf02b1ee5ef28b9e65162 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-2000/rng_state_7.pth b/cvnet_small_v1_global/checkpoint-2000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..1a55221cfbb5c000b877e8e6b14977b7163966dc --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-2000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07bde508a1939450d8ec45b79bab2568f6dd5d0ac0cf2add96faab85481697da +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-2000/scheduler.pt b/cvnet_small_v1_global/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4605de17b07049a57c10990ac0db26645b375276 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115fc5d3c71fc8b112d5c1701359950aa4e48ea559451a73de1d1d8ca1116d89 +size 1064 diff --git a/cvnet_small_v1_global/checkpoint-2000/trainer_state.json b/cvnet_small_v1_global/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..97d26beb7095a7619de2410acc29982f83ce9d6b --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-2000/trainer_state.json @@ -0,0 +1,1221 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6747069241798094, + "eval_steps": 500, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7081, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6774, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6781, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6772, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6802, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6749, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6745, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6729, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6586, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.636, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6098, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5978, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.602, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5959, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5906, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5778, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5795, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5883, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5882, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5747, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5846, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5823, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5722, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5599, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5716, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5832, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5691, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5711, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5821, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5557, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5581, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5761, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5643, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5702, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5648, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.5691, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.556, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5578, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.564, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5562, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.559, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.565, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.5508, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5588, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5584, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5555, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5597, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5483, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5621, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5568, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5527, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5524, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5458, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5524, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5553, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5582, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5518, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5485, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5511, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5444, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5522, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5417, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5543, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.56, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.5394, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.5475, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5437, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5438, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5454, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5481, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5434, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5422, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5465, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5447, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5396, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.543, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.5461, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.5482, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5453, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5493, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.5477, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.5393, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.5365, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.5433, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.5368, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.5467, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5382, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5366, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5354, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.5322, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.5379, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.5398, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.5337, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.5384, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.5256, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.5353, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.5233, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.5483, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.5362, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.5347, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.5377, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.5374, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.532, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.5315, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.5274, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.537, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.5427, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.5288, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.5259, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.531, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.5263, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.5305, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.5245, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.5306, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.526, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.5308, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.5226, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.5295, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.5297, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.5293, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.5387, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.5252, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.524, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.5231, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.5248, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.5332, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.5366, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.5231, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.5206, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.5237, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.5201, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.5366, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.5336, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.5282, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.5269, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.5245, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.5203, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.5225, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.5311, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.5229, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.5266, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.5283, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.5279, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.5299, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.5267, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.5186, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.5238, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.5249, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.5225, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.5226, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.525, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.5249, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.5225, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.5281, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.5204, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.5193, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.5278, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.5224, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.5218, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.5171, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.5239, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.5231, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.5164, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.5181, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.5166, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.5179, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.5169, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.5275, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.5179, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.5216, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.5236, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.5125, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.527, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.5224, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.5304, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.5242, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.5351, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.5262, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.5178, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.5104, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.5172, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.517, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.5154, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.5285, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.5163, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.5104, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.5228, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.5131, + "step": 2000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_global/checkpoint-2000/training_args.bin b/cvnet_small_v1_global/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..89dfdb803f04836e7afc0c6a28f9dae664c6e171 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dddfb786197274aad152ee4c2cc9787279a04f3c148146f1b57e85d4fb19f6fc +size 4856 diff --git a/cvnet_small_v1_global/checkpoint-3000/model.safetensors b/cvnet_small_v1_global/checkpoint-3000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f73a5307890fffc413b81ed0da91c17f7ac5988a --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-3000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6f885f65a815e27442367ce964bc2c0559b43681759c08c7f00bb28cb1d07e1 +size 237860264 diff --git a/cvnet_small_v1_global/checkpoint-3000/optimizer.pt b/cvnet_small_v1_global/checkpoint-3000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fda4745001dc9ab2673cd3d8e8068f6e46e4f801 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-3000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97471d9b30876e8c0dbc7c483bcba3c96168b58bf31900a8605c5dc276a793de +size 475804282 diff --git a/cvnet_small_v1_global/checkpoint-3000/rng_state_0.pth b/cvnet_small_v1_global/checkpoint-3000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..cb8e78a2252e7489e47adb734bb1f370f80fb558 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-3000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7431609fc9d10c784464e14fe45f5a89b22573e0769c9c21d7d0a2dead49ec58 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-3000/rng_state_1.pth b/cvnet_small_v1_global/checkpoint-3000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..14409c37cb8c37cf95a2a40f01e3bde6c0a86a8f --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-3000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5081bec13a1a7ac2ad3917351c18540a7bae0cfef6e0df05d537d5c0163fce12 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-3000/rng_state_2.pth b/cvnet_small_v1_global/checkpoint-3000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..2e0781a6af83c0376c3bc340f290368a3cdf5f3b --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-3000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:535456fe6d77127c3883ae9c325dccec963de01449e321e9c704f78c50992939 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-3000/rng_state_3.pth b/cvnet_small_v1_global/checkpoint-3000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..c3a155aae9463722acad62167de281b7e03ad81a --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-3000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:880e7012bf446334aa49027d1791b52f192a6dc11d44fea74095b153fd34bc97 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-3000/rng_state_4.pth b/cvnet_small_v1_global/checkpoint-3000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..50505327adf88ab24ab9f90d8faf76553bc4d2a9 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-3000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40038cca0e1cc4c41ee9eb36cfd70f6e868697faa2d2d79afba994d00387b725 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-3000/rng_state_5.pth b/cvnet_small_v1_global/checkpoint-3000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..1db65e964f07df377b31f7937f3fad02cb5361f7 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-3000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ec016c5b320b9210f7edc5e4b4d309aa8c19437ccbdf2fd68012814e7b0be6 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-3000/rng_state_6.pth b/cvnet_small_v1_global/checkpoint-3000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d27f215baa695e5cea378d01affc7631aa9463d --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-3000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049da174472b1bfd728972b360f9b2b80d3b4842c4c00b3019ff285417d7b3ca +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-3000/rng_state_7.pth b/cvnet_small_v1_global/checkpoint-3000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..ae156e93c6ee694319aecdfa93bbb81b80472851 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-3000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2dcd919cf3c5f19cfa3f8a17ee1d2a7803a15af43f3917c55ec05ed6ca5858d +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-3000/scheduler.pt b/cvnet_small_v1_global/checkpoint-3000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a885c8b891f90d887f6c40db2b8ae74f564dd98 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-3000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4df43d46412429a4bda78060914e68357cbee658ac819784da79443fe1ce6982 +size 1064 diff --git a/cvnet_small_v1_global/checkpoint-3000/trainer_state.json b/cvnet_small_v1_global/checkpoint-3000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0f461f61fc933d6244beb15f69843dd8ebe46d4d --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-3000/trainer_state.json @@ -0,0 +1,1821 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0120603862697142, + "eval_steps": 500, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7081, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6774, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6781, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6772, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6802, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6749, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6745, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6729, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6586, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.636, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6098, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5978, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.602, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5959, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5906, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5778, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5795, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5883, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5882, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5747, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5846, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5823, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5722, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5599, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5716, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5832, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5691, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5711, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5821, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5557, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5581, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5761, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5643, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5702, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5648, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.5691, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.556, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5578, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.564, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5562, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.559, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.565, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.5508, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5588, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5584, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5555, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5597, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5483, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5621, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5568, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5527, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5524, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5458, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5524, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5553, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5582, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5518, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5485, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5511, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5444, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5522, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5417, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5543, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.56, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.5394, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.5475, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5437, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5438, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5454, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5481, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5434, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5422, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5465, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5447, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5396, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.543, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.5461, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.5482, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5453, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5493, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.5477, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.5393, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.5365, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.5433, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.5368, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.5467, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5382, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5366, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5354, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.5322, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.5379, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.5398, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.5337, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.5384, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.5256, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.5353, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.5233, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.5483, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.5362, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.5347, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.5377, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.5374, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.532, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.5315, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.5274, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.537, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.5427, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.5288, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.5259, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.531, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.5263, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.5305, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.5245, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.5306, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.526, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.5308, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.5226, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.5295, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.5297, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.5293, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.5387, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.5252, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.524, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.5231, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.5248, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.5332, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.5366, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.5231, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.5206, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.5237, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.5201, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.5366, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.5336, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.5282, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.5269, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.5245, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.5203, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.5225, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.5311, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.5229, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.5266, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.5283, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.5279, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.5299, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.5267, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.5186, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.5238, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.5249, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.5225, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.5226, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.525, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.5249, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.5225, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.5281, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.5204, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.5193, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.5278, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.5224, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.5218, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.5171, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.5239, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.5231, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.5164, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.5181, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.5166, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.5179, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.5169, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.5275, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.5179, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.5216, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.5236, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.5125, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.527, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.5224, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.5304, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.5242, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.5351, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.5262, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.5178, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.5104, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.5172, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.517, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.5154, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.5285, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.5163, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.5104, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.5228, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.5131, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.5173, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.5137, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.5132, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.5154, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.5194, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.521, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.5166, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.5161, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.5143, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.5133, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.5099, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.5181, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.5148, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.5155, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.5108, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.5026, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.5187, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.5156, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.5164, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.5129, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.5143, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.5206, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.508, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.5101, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.5069, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.5082, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.5206, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.5134, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.5099, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.5114, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.5132, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.5125, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.5055, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.514, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.5093, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.506, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.5198, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.5187, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.5112, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.5189, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.5104, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.5105, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.5165, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.5088, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.504, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.5065, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.5053, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.5135, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.5078, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.508, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.5038, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.5114, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.5055, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.5011, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.5025, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.5113, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.506, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.5031, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.505, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4966, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.5132, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4975, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.5108, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.508, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.514, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.5071, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.5021, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.5112, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.5037, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.5106, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.5011, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.5103, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.5004, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.5045, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.5019, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4928, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.5029, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.5175, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.5114, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.5021, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.5084, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.5008, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.5102, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.5133, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.508, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.5054, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.5191, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.5051, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4943, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.5044, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4943, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.504, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4978, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.492, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.5005, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4997, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4968, + "step": 3000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_global/checkpoint-3000/training_args.bin b/cvnet_small_v1_global/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..89dfdb803f04836e7afc0c6a28f9dae664c6e171 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dddfb786197274aad152ee4c2cc9787279a04f3c148146f1b57e85d4fb19f6fc +size 4856 diff --git a/cvnet_small_v1_global/checkpoint-4000/model.safetensors b/cvnet_small_v1_global/checkpoint-4000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..281414c4d9590f4d407fda3a5c744694b08804a5 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-4000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d25bb4e965b6951e2ee3ab893a0d0ce4998c4876a8469be3e67a9e30cbf24767 +size 237860264 diff --git a/cvnet_small_v1_global/checkpoint-4000/optimizer.pt b/cvnet_small_v1_global/checkpoint-4000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..74191f5642b9dce176111b6f6884111f164673d1 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-4000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb4094205d4f0d797c40d85e82a83e95c93d5a364494deadaafd79984544eee9 +size 475804282 diff --git a/cvnet_small_v1_global/checkpoint-4000/rng_state_0.pth b/cvnet_small_v1_global/checkpoint-4000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..27157ed6df83ce9d6c109c9b7dea4f945557c6c2 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-4000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30e9d80f2c7b48e823660edbe971433389c5987502ff5b8904c6fa06a4b8fac4 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-4000/rng_state_1.pth b/cvnet_small_v1_global/checkpoint-4000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d07f16bf0d35084ca8183acbe7d1ec6d1d90a220 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-4000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6dc64ef283107cc20517b99d8a4260d5dc35fe0739df81e0627d71f753f071e +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-4000/rng_state_2.pth b/cvnet_small_v1_global/checkpoint-4000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5a4f71f56747c23a569fdab1e672a231cf7beb8 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-4000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3984ffaba3ef211236ed7149a8baca5ccfdc9f355d0c4a2f94ee586ef97610e1 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-4000/rng_state_3.pth b/cvnet_small_v1_global/checkpoint-4000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..8bc5aa77c0a5136f2602b61102d9fa19e74049d1 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-4000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aef672fc70b0c0d7f41d3aa0c8a5fc28c8b3a305a6b083477713cb302d8d72a0 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-4000/rng_state_4.pth b/cvnet_small_v1_global/checkpoint-4000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..23ce76a0046bef79ca5a99524908ab2f3b052e03 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-4000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a0341c1736e01084f618482fe049b36f4ff83d3c592a9632213c226953a9084 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-4000/rng_state_5.pth b/cvnet_small_v1_global/checkpoint-4000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..78895c9a779442c5c0a14d8426eb7659ed5f69f5 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-4000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89b1c55b3a76909bc8b26c01f698db08cb90488d67ec9d13d047aea4788fd653 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-4000/rng_state_6.pth b/cvnet_small_v1_global/checkpoint-4000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..1009d8998b054def1787038b788e28e743447e38 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-4000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73db3b651070332fced69b13785993fdefdaac849cbcb23e06c08cde7fdc5d55 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-4000/rng_state_7.pth b/cvnet_small_v1_global/checkpoint-4000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..642ce42df3671964f6b34b6543afd6eb286a0714 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-4000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f21a5acb7274574d65d51c6e1a0d8b70345cf7b9c9dd99e4b85e293546a6e40 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-4000/scheduler.pt b/cvnet_small_v1_global/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..503d2b402ef8df237c8826d9429a15068a9f8e5d --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-4000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f5b5ac4a53f35c9923ba2e114d395c8f0ca2ff8283a0adc3fb6db30ab7f070 +size 1064 diff --git a/cvnet_small_v1_global/checkpoint-4000/trainer_state.json b/cvnet_small_v1_global/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..281cf43b7355f13d85eea0684f37ac254ce06d9f --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-4000/trainer_state.json @@ -0,0 +1,2421 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3494138483596188, + "eval_steps": 500, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7081, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6774, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6781, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6772, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6802, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6749, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6745, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6729, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6586, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.636, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6098, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5978, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.602, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5959, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5906, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5778, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5795, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5883, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5882, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5747, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5846, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5823, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5722, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5599, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5716, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5832, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5691, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5711, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5821, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5557, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5581, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5761, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5643, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5702, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5648, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.5691, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.556, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5578, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.564, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5562, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.559, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.565, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.5508, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5588, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5584, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5555, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5597, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5483, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5621, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5568, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5527, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5524, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5458, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5524, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5553, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5582, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5518, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5485, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5511, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5444, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5522, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5417, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5543, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.56, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.5394, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.5475, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5437, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5438, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5454, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5481, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5434, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5422, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5465, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5447, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5396, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.543, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.5461, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.5482, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5453, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5493, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.5477, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.5393, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.5365, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.5433, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.5368, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.5467, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5382, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5366, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5354, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.5322, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.5379, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.5398, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.5337, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.5384, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.5256, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.5353, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.5233, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.5483, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.5362, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.5347, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.5377, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.5374, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.532, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.5315, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.5274, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.537, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.5427, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.5288, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.5259, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.531, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.5263, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.5305, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.5245, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.5306, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.526, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.5308, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.5226, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.5295, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.5297, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.5293, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.5387, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.5252, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.524, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.5231, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.5248, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.5332, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.5366, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.5231, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.5206, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.5237, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.5201, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.5366, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.5336, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.5282, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.5269, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.5245, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.5203, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.5225, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.5311, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.5229, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.5266, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.5283, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.5279, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.5299, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.5267, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.5186, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.5238, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.5249, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.5225, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.5226, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.525, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.5249, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.5225, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.5281, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.5204, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.5193, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.5278, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.5224, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.5218, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.5171, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.5239, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.5231, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.5164, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.5181, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.5166, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.5179, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.5169, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.5275, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.5179, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.5216, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.5236, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.5125, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.527, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.5224, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.5304, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.5242, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.5351, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.5262, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.5178, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.5104, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.5172, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.517, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.5154, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.5285, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.5163, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.5104, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.5228, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.5131, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.5173, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.5137, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.5132, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.5154, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.5194, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.521, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.5166, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.5161, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.5143, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.5133, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.5099, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.5181, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.5148, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.5155, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.5108, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.5026, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.5187, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.5156, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.5164, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.5129, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.5143, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.5206, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.508, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.5101, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.5069, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.5082, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.5206, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.5134, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.5099, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.5114, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.5132, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.5125, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.5055, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.514, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.5093, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.506, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.5198, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.5187, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.5112, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.5189, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.5104, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.5105, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.5165, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.5088, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.504, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.5065, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.5053, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.5135, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.5078, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.508, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.5038, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.5114, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.5055, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.5011, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.5025, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.5113, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.506, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.5031, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.505, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4966, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.5132, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4975, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.5108, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.508, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.514, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.5071, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.5021, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.5112, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.5037, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.5106, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.5011, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.5103, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.5004, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.5045, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.5019, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4928, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.5029, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.5175, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.5114, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.5021, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.5084, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.5008, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.5102, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.5133, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.508, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.5054, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.5191, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.5051, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4943, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.5044, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4943, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.504, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4978, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.492, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.5005, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4997, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4968, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4963, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.5034, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.5049, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.5025, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.5021, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.512, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.498, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.5033, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4975, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.5091, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4988, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4971, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.5038, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4997, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.5012, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4974, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.503, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.501, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.5002, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4961, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4941, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4883, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.5048, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4951, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.493, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4831, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4962, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4989, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4962, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4884, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4923, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4943, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4942, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4928, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4967, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4873, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4954, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.502, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.5034, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4933, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4932, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4954, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4863, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4959, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4846, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4893, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4831, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4878, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4862, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4887, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4916, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4947, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4935, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4935, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4852, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4843, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.493, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4921, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4914, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4813, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4873, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4897, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4833, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4902, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4914, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4904, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4834, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4917, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.49, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.4849, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.4799, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.4801, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.4888, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4926, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4955, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4795, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4894, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4818, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4862, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4807, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4857, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4981, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.4911, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.4773, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.4755, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.481, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4815, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4854, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4811, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4809, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4726, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4906, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4862, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.48, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4757, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.481, + "step": 4000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_global/checkpoint-4000/training_args.bin b/cvnet_small_v1_global/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..89dfdb803f04836e7afc0c6a28f9dae664c6e171 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dddfb786197274aad152ee4c2cc9787279a04f3c148146f1b57e85d4fb19f6fc +size 4856 diff --git a/cvnet_small_v1_global/checkpoint-5000/model.safetensors b/cvnet_small_v1_global/checkpoint-5000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59a9ba0820f60821ccf988669213be6adbcd3ca2 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-5000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d9495d7a3d49a9be241c1af32a062b96e69e2fd6501e7bff8a7e3249bd4785a +size 237860264 diff --git a/cvnet_small_v1_global/checkpoint-5000/optimizer.pt b/cvnet_small_v1_global/checkpoint-5000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5771daa7bdf85cf7eb24db25122e99a4cc54350d --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-5000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85aa11f57b0e71cb3040f095948ffe07c37a9c111f1418d99850c3412b5d22d6 +size 475804282 diff --git a/cvnet_small_v1_global/checkpoint-5000/rng_state_0.pth b/cvnet_small_v1_global/checkpoint-5000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..11e0c340c3c865d1230d06fc8147457636c36c7e --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-5000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fdb01d742970be957c0ece5354beb317869a9c3ca2df86c171b56533ec8f43a +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-5000/rng_state_1.pth b/cvnet_small_v1_global/checkpoint-5000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..9dd478a8c9ea4164341bf243db8034288705a41e --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-5000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f451b776653c44b20c2f0aa86ce758d2f6b160a2aed1623e73fb9ce65aab45dc +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-5000/rng_state_2.pth b/cvnet_small_v1_global/checkpoint-5000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..64b4a78013a2d2da348987b2be5e0f0f9768f97d --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-5000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec45f9a6fa09681ce69490ec8693348a5cd79d4e73833ec2c3c3616ad0cfb45 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-5000/rng_state_3.pth b/cvnet_small_v1_global/checkpoint-5000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..1c64fb5462b0001c919706fb5b95cbb3049c2daf --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-5000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88c41930d7b79073d68661afecfe85fb979ce0c66aa50931c41c30c84094e01f +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-5000/rng_state_4.pth b/cvnet_small_v1_global/checkpoint-5000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..fe1ec346d9089256afe9858e9a678cc5a0f9e725 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-5000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f195114778c83dcdf347276c88ba35590abcc726622eff124827e1184d650b85 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-5000/rng_state_5.pth b/cvnet_small_v1_global/checkpoint-5000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..551227058e212b8b1b4c9946a46abd488003f429 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-5000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e790930ffa2fcd8eaf8532e85b32332d84d65708e9d2dbf5498070faacf67367 +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-5000/rng_state_6.pth b/cvnet_small_v1_global/checkpoint-5000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..dff0e3a4547f927a68daf8bdea3c57bf24a69713 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-5000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dcc050ca3e609d48f5075b0221433e64883cc24379379e0f87b9bfc6fc692cd +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-5000/rng_state_7.pth b/cvnet_small_v1_global/checkpoint-5000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..c4f9ceecc118db03d3225ab3c794d989fceb08fc --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-5000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ee5311f27f1199ba1ac2117100e5fb29127ebdb52900e88cf9b6a1d8cd6cd3a +size 15920 diff --git a/cvnet_small_v1_global/checkpoint-5000/scheduler.pt b/cvnet_small_v1_global/checkpoint-5000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1cb6e935fff7477b8e4bab40ab97b33a5268355e --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-5000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5efa72feef3a398c4152d1d39792b540dbc6e38d02d8ab41bc4a583b249405d +size 1064 diff --git a/cvnet_small_v1_global/checkpoint-5000/trainer_state.json b/cvnet_small_v1_global/checkpoint-5000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9cff7ded2e6525cc3c48bb515e59b378d9ef3438 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-5000/trainer_state.json @@ -0,0 +1,3021 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6867673104495235, + "eval_steps": 500, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7081, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6774, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6781, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6772, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6802, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6749, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6745, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6729, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6586, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.636, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6098, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5978, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.602, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5959, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5906, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5778, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5795, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5883, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5882, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5747, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5846, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5823, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5722, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5599, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5716, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5832, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5691, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5711, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5821, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.5557, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5581, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5761, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.5643, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5702, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.5648, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.5691, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.556, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5578, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.564, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5562, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.559, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.565, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.5508, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5588, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5584, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.5555, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5597, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5483, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.5621, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5568, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5527, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.5524, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5458, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.5524, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5553, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5582, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.5518, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5485, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5511, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.5444, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5522, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5417, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.5543, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.56, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.5394, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.5475, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5437, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5438, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.5454, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5481, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5434, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.5422, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5465, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5447, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.5396, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.543, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.5461, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.5482, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5453, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.5493, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.5477, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.5393, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.5365, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.5433, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.5368, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.542, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.5467, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5382, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5366, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.5354, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.5322, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.5379, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.5398, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.5337, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.5384, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.5256, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.5353, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.5233, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.5483, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.5362, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.5347, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.5377, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.5374, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.532, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.5315, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.5274, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.537, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.5427, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.5288, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.5259, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.531, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.5263, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.5305, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.5245, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.5306, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.526, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.5308, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.5226, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.5295, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.5297, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.5293, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.5387, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.5252, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.524, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.5231, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.5248, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.5332, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.5366, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.5231, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.5206, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.5237, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.5201, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.5366, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.5336, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.5282, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.5269, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.5245, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.5203, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.5225, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.5311, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.5229, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.5266, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.5283, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.5279, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.5299, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.5267, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.5186, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.5238, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.5249, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.5225, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.5226, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.525, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.5249, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.5225, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.5281, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.5204, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.5193, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.5278, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.5224, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.5218, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.5171, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.5239, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.5231, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.5164, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.5181, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.5166, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.5179, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.5169, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.5275, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.5179, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.5216, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.5236, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.5125, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.527, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.5224, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.5304, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.5242, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.5351, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.5262, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.5178, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.5104, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.5172, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.517, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.5154, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.5285, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.5163, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.5104, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.5228, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.5131, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.5173, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.5137, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.5132, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.5154, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.5194, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.521, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.5166, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.5161, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.5143, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.5133, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.5099, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.5181, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.5148, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.5155, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.5108, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.5026, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.5187, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.5156, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.5164, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.5129, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.5143, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.5206, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.508, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.5101, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.5069, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.5082, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.5206, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.5134, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.5099, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.5114, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.5132, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.5125, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.5055, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.514, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.5093, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.506, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.5198, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.5187, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.5112, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.5189, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.5104, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.5105, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.5165, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.5088, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.504, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.5065, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.5053, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.5135, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.5078, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.508, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.5038, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.5114, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.5055, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.5011, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.5025, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.5113, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.506, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.5031, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.505, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4966, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.5132, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4975, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.5108, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.508, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.514, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.5071, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.5021, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.5112, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.5037, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.5106, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.5011, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.5103, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.5004, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.5045, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.5019, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4928, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.5029, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.5175, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.5114, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.5021, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.5084, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.5008, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.5102, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.5133, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.508, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.5054, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.5191, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.5051, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4943, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.5044, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4943, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.504, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4978, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.492, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.5005, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4997, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4968, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4963, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.5034, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.5049, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.5025, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.5021, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.512, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.498, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.5033, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4975, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.5091, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4988, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4971, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.5038, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4997, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.5012, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4974, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.503, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.501, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.5002, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4961, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4941, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4883, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.5048, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4951, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.493, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4831, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4962, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4989, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4962, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4884, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4923, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4943, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4942, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4928, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4967, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4873, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4954, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.502, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.5034, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4933, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4932, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4954, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4863, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4959, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4846, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4893, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4831, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4878, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4862, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4887, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4916, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4947, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4935, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4935, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4852, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4843, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.493, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4921, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4914, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4813, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4873, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4897, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4833, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4902, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4914, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4904, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4834, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4917, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.49, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.4849, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.4799, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.4801, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.4888, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4926, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4955, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4795, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4894, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4818, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4862, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4807, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4857, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4981, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.4911, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.4773, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.4755, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.481, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4815, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4854, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4811, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4809, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4726, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4906, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4862, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.48, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4757, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.481, + "step": 4000 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4806, + "step": 4010 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4686, + "step": 4020 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.482, + "step": 4030 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4822, + "step": 4040 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4695, + "step": 4050 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4749, + "step": 4060 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.477, + "step": 4070 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4819, + "step": 4080 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4759, + "step": 4090 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4734, + "step": 4100 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4695, + "step": 4110 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4767, + "step": 4120 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4692, + "step": 4130 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4782, + "step": 4140 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4748, + "step": 4150 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4733, + "step": 4160 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.482, + "step": 4170 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4755, + "step": 4180 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4748, + "step": 4190 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.474, + "step": 4200 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4707, + "step": 4210 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4731, + "step": 4220 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.4756, + "step": 4230 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.4772, + "step": 4240 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 4250 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 4260 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.4781, + "step": 4270 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.475, + "step": 4280 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.4662, + "step": 4290 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.4705, + "step": 4300 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.4726, + "step": 4310 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.4785, + "step": 4320 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.4696, + "step": 4330 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.4759, + "step": 4340 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.4698, + "step": 4350 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.4773, + "step": 4360 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.47, + "step": 4370 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.4808, + "step": 4380 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.4681, + "step": 4390 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.4712, + "step": 4400 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 4410 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.4761, + "step": 4420 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.4725, + "step": 4430 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.473, + "step": 4440 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.4749, + "step": 4450 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.4767, + "step": 4460 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.4662, + "step": 4470 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.4789, + "step": 4480 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.4737, + "step": 4490 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.483, + "step": 4500 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.4721, + "step": 4510 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.4698, + "step": 4520 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.4672, + "step": 4530 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.4743, + "step": 4540 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.4668, + "step": 4550 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.4668, + "step": 4560 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.4679, + "step": 4570 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.4709, + "step": 4580 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.4688, + "step": 4590 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.4717, + "step": 4600 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.4715, + "step": 4610 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.4726, + "step": 4620 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 4630 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.4717, + "step": 4640 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.473, + "step": 4650 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.4742, + "step": 4660 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.4753, + "step": 4670 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.4697, + "step": 4680 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.4726, + "step": 4690 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.4748, + "step": 4700 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.4657, + "step": 4710 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.4709, + "step": 4720 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.474, + "step": 4730 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.4624, + "step": 4740 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.4725, + "step": 4750 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.4736, + "step": 4760 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.4671, + "step": 4770 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.4752, + "step": 4780 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.4689, + "step": 4790 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.4595, + "step": 4800 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.4679, + "step": 4810 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.4665, + "step": 4820 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.4747, + "step": 4830 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.469, + "step": 4840 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.4668, + "step": 4850 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.4713, + "step": 4860 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.4687, + "step": 4870 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.455, + "step": 4880 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.4613, + "step": 4890 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.4757, + "step": 4900 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.4631, + "step": 4910 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.4631, + "step": 4920 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.4595, + "step": 4930 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.4693, + "step": 4940 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 4950 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.4593, + "step": 4960 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.4725, + "step": 4970 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.4646, + "step": 4980 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.4624, + "step": 4990 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.4677, + "step": 5000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_global/checkpoint-5000/training_args.bin b/cvnet_small_v1_global/checkpoint-5000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..89dfdb803f04836e7afc0c6a28f9dae664c6e171 --- /dev/null +++ b/cvnet_small_v1_global/checkpoint-5000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dddfb786197274aad152ee4c2cc9787279a04f3c148146f1b57e85d4fb19f6fc +size 4856 diff --git a/cvnet_small_v1_noglobal/checkpoint-1000/model.safetensors b/cvnet_small_v1_noglobal/checkpoint-1000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1b8a957464af7ea593df71c21592f4abb505264 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-1000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ee09c633670537ad32071c6f48f72aedcb3cb4b22de6141e667c924f9bd3abf +size 231565528 diff --git a/cvnet_small_v1_noglobal/checkpoint-1000/optimizer.pt b/cvnet_small_v1_noglobal/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..182beb17de0b88daf4ee7f28e3877259fa756fae --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a5d473c72a379045a2e0f1e56c9af753f440c0392a002b4a5c1806f18fd5f73 +size 463213562 diff --git a/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_0.pth b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..76199e8cd59370e56d78379e7f0857d098932fdd --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b275069de3c999174e0675baa7922d66052a0c898935ff704e23e3cef878fe +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_1.pth b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..db236d0038dbd238bc789a4e77c23f1457370409 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d09eab743b5b749ebce1ebcc0acafce88f180d92e6c1574df2cd2ec6493f284 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_2.pth b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..612080d1b966edb9c548b38c39066628aed041b5 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f10f3b4810f8be1a94c6a3f8e84e6d8d72c53f7a363a52cc624ecfd9cc5eb83 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_3.pth b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..55551738ab573081f638dfdb48a0106c65248f31 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5240772816e916400c0a6d093b395cc427e69eb6b82ad16a4cecc2332561b8af +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_4.pth b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..7cd9a50c7c25b4903ef43a29d7252dbb96151f5f --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03cb1f9d9d54e20989e075d4d0a8459635eb37e623741f99efb7d2427f4993b4 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_5.pth b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..924b34104933c59c34ca55f6744bd89d9f634e41 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607b7a381bc052b0a9b35286824f0b454ed165491e7b87a3e74ed4c3336494bc +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_6.pth b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..c178a16483d317ef9f4c0b585ff4cfc137bdd296 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae6d53d1c065b1230e113ea0f5e5404c6c649c2fcca72b82bef06b2e3f1200e1 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_7.pth b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..d0baf74f7428518103c26e94815df83a9cafe655 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-1000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c32bd7774791739fb7c8a3c2e4d976b9b5b1d261f4312586bdf00cd5306c5794 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-1000/scheduler.pt b/cvnet_small_v1_noglobal/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ff2c05006e51f3f771730c9055c6c181da2abd0 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c34ba9e6f50c2313df84e91c7dfed2be6b77dec7394adba34d3bd12d40d5ba2d +size 1064 diff --git a/cvnet_small_v1_noglobal/checkpoint-1000/trainer_state.json b/cvnet_small_v1_noglobal/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..41b306bf7753ba937b18fa386ef83f6268365346 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-1000/trainer_state.json @@ -0,0 +1,621 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3373534620899047, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7302, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6767, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6744, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6699, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6799, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6763, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.674, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6733, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.668, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6679, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6722, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6663, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6793, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6706, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6749, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6748, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6722, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6671, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6688, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6701, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6828, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6716, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6706, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.667, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6635, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6661, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6624, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6657, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.664, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6605, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6635, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6664, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6696, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.658, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6611, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6575, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6703, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.665, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6617, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6557, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.663, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6627, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6528, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6586, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.659, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6583, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6594, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6572, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6546, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.656, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6544, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6574, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6528, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6574, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6566, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6512, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6517, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6562, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6463, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6494, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6549, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6535, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6489, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6459, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6444, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.648, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6353, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6566, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6469, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6385, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.649, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6551, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6512, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6486, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6617, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 1000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal/checkpoint-1000/training_args.bin b/cvnet_small_v1_noglobal/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddc854e996b1e5937c51594c3f01138b37a11f27 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e33b021e61b4db84dbe9910e89543e5342c7e95bc5945ad2e0f9f1fee840a78 +size 4856 diff --git a/cvnet_small_v1_noglobal/checkpoint-2000/model.safetensors b/cvnet_small_v1_noglobal/checkpoint-2000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd99606fde888ac987b991a1a8ea18a104bd2d4d --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-2000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36633585b0e17168614ca9692b64c7f3fea9f7946ac150d06c96c18bbb7047dd +size 231565528 diff --git a/cvnet_small_v1_noglobal/checkpoint-2000/optimizer.pt b/cvnet_small_v1_noglobal/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..152a06fe22fa993bc537189d9eeb3469d86c5fb4 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38ac31606569293367a9da6568e5fc983e47a2329e0fce8a4e57cc5272b371bc +size 463213562 diff --git a/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_0.pth b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..4986a8aac36333c2e13582ab4cc0e13f51a0e3c1 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399a88ae31a12e7e3c780b5188c2dd2baf3b17cf459e26aa3c46eb48ac6b98fa +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_1.pth b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..b3e8e99f59e43dbfee6761a8b163596d53d37a2b --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d11c4e0a8d7af0b51cdf0c49070411aa85a8ec60ebc52f77a275788f28703d46 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_2.pth b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..128a6430adf59dc9ae51af972ee34823bf35d745 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:602981e9d45a5154a05538455b041260754c018d792f1e6e664edfc3e0572feb +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_3.pth b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..8a638077e62343a306f380ba3c92cb3eb2f6c05b --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0a6236572b94e610082d6971859606d884a94f9265f9ddd8545182ffc6cc636 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_4.pth b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..d23874a128181fbca324694c9acc20fcb5e40998 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa496dbe24521e796da8d4b2a1507ea4c55cda06ae1c7336e5dd0396a4559190 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_5.pth b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..9e98e7cfc462af0bc887d61d6706110ea3f76c11 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:004586c0da87a64f32d6edcb395860503348b22a799755ca3fb2314708a49c7e +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_6.pth b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..40a36a2eda59b410e2ef79790cee3180f801fee8 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abbafa128291a4aa7088b96a35651dcc22e18ab4b22698d7e9ee3f40e398147d +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_7.pth b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..467f22eb368091fd8bf28f2a71eb7a725fa85d41 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-2000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b18a8aafa7fd87ce9ec0c644422f207e743a3863e733fbfbb45374297107d5af +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-2000/scheduler.pt b/cvnet_small_v1_noglobal/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4605de17b07049a57c10990ac0db26645b375276 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115fc5d3c71fc8b112d5c1701359950aa4e48ea559451a73de1d1d8ca1116d89 +size 1064 diff --git a/cvnet_small_v1_noglobal/checkpoint-2000/trainer_state.json b/cvnet_small_v1_noglobal/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fc9338585adfb20354421820e901b5d1a2479c5c --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-2000/trainer_state.json @@ -0,0 +1,1221 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6747069241798094, + "eval_steps": 500, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7302, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6767, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6744, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6699, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6799, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6763, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.674, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6733, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.668, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6679, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6722, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6663, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6793, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6706, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6749, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6748, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6722, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6671, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6688, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6701, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6828, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6716, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6706, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.667, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6635, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6661, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6624, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6657, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.664, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6605, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6635, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6664, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6696, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.658, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6611, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6575, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6703, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.665, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6617, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6557, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.663, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6627, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6528, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6586, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.659, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6583, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6594, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6572, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6546, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.656, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6544, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6574, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6528, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6574, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6566, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6512, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6517, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6562, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6463, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6494, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6549, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6535, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6489, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6459, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6444, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.648, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6353, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6566, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6469, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6385, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.649, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6551, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6512, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6486, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6617, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6395, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6582, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6447, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6378, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6491, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6399, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.648, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6614, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6453, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6441, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6449, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6443, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6411, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.642, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6364, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6341, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.65, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.645, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6401, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6373, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6383, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6452, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6334, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6395, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6425, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6422, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6393, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6421, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6441, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6351, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6307, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6434, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6417, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6404, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6346, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6318, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6439, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.646, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6326, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6752, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6355, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6387, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.63, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6288, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6394, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6236, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6308, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6302, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6223, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6307, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 2000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal/checkpoint-2000/training_args.bin b/cvnet_small_v1_noglobal/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddc854e996b1e5937c51594c3f01138b37a11f27 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e33b021e61b4db84dbe9910e89543e5342c7e95bc5945ad2e0f9f1fee840a78 +size 4856 diff --git a/cvnet_small_v1_noglobal/checkpoint-3000/model.safetensors b/cvnet_small_v1_noglobal/checkpoint-3000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed6c1a40a819a6fd1bde4e580e0d9435d4d8d9c7 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-3000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d688504fbd162544f39051f7bf7f722ab0b172635e68f4bac8af1af0d6d58300 +size 231565528 diff --git a/cvnet_small_v1_noglobal/checkpoint-3000/optimizer.pt b/cvnet_small_v1_noglobal/checkpoint-3000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..01da1dab0299b3a3ddc09395f27ce82d417e13ea --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-3000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daa82394884bf57dfca1b54d42fdcf921c3c1a028333ec1e680a0a04ba9f694a +size 463213562 diff --git a/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_0.pth b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..6279c89246d3ac79f9433ce1aa3c10763c6ab1de --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f066bac4e29b58cbd92ad08aebed608a49ce7ec8b8a08d89b29cd19eaa6526e1 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_1.pth b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..0d7ac0de90e233d9281952b42edad13d015b1f0f --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb4d6e21ee9e308a02de7f522194a6d3b8b29550b32bccb7b8f66ac8c0f18ccb +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_2.pth b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..554425bcf5d398faa6ea7d5da15ab29d5b0e641a --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f4fb5dab010c6c7d1a6d363b3da29775b73f0d66b80ff6beb870b648827933 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_3.pth b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..e94745960de1137c6534584cb54cff62b9babf23 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c67fb1b5eb9abc7454d2105b449747cd44bdcb81a729fbe025d4b657fa734669 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_4.pth b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..0d2d0ccd4eda40390432c3f449afd1bfc8388f78 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b7ebffea3f19a0a27c942a6bdac48a361468d1d42ff8818c4b30b3e8efa91f +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_5.pth b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..ddbc33f84b15728f6c193dc691f895ccc9806728 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb4144ca487b6f9fcbc6ab7fd4b4d135a1c75f1ab342fd8e9be48da4e8ebe68b +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_6.pth b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..2178437bf7e585fe72d165f15e0b103f82981938 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:270a519b5b8f8590b94caed17210ea4d103747c2f9c69c6214d89fa23be4ac65 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_7.pth b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..deeedfae0fb52086053eeadab93961e81a68fdad --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-3000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d85449fcca7e93c9c3fbc9d9a9c4305741c156b214ad2e628d107917a899673 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-3000/scheduler.pt b/cvnet_small_v1_noglobal/checkpoint-3000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a885c8b891f90d887f6c40db2b8ae74f564dd98 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-3000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4df43d46412429a4bda78060914e68357cbee658ac819784da79443fe1ce6982 +size 1064 diff --git a/cvnet_small_v1_noglobal/checkpoint-3000/trainer_state.json b/cvnet_small_v1_noglobal/checkpoint-3000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2a4cf76c1b24b1c1d3d014871cb7a41909373ec1 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-3000/trainer_state.json @@ -0,0 +1,1821 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0120603862697142, + "eval_steps": 500, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7302, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6767, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6744, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6699, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6799, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6763, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.674, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6733, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.668, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6679, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6722, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6663, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6793, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6706, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6749, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6748, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6722, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6671, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6688, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6701, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6828, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6716, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6706, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.667, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6635, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6661, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6624, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6657, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.664, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6605, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6635, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6664, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6696, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.658, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6611, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6575, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6703, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.665, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6617, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6557, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.663, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6627, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6528, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6586, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.659, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6583, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6594, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6572, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6546, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.656, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6544, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6574, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6528, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6574, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6566, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6512, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6517, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6562, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6463, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6494, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6549, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6535, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6489, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6459, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6444, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.648, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6353, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6566, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6469, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6385, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.649, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6551, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6512, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6486, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6617, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6395, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6582, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6447, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6378, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6491, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6399, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.648, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6614, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6453, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6441, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6449, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6443, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6411, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.642, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6364, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6341, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.65, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.645, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6401, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6373, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6383, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6452, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6334, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6395, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6425, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6422, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6393, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6421, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6441, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6351, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6307, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6434, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6417, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6404, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6346, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6318, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6439, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.646, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6326, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6752, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6355, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6387, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.63, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6288, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6394, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6236, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6308, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6302, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6223, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6307, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6417, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6363, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6294, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6286, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6282, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6296, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6346, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6646, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6307, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6334, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6287, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6286, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6332, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6397, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6236, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6281, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6397, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6317, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6272, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6336, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6273, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6328, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6336, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6269, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6265, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6264, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6184, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6244, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6239, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.62, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6424, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6368, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.624, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6346, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6246, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6177, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6245, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6248, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6124, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6227, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6539, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.623, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.628, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6254, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.626, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6309, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.62, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6271, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6336, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6345, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6172, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6169, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6165, + "step": 3000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal/checkpoint-3000/training_args.bin b/cvnet_small_v1_noglobal/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddc854e996b1e5937c51594c3f01138b37a11f27 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e33b021e61b4db84dbe9910e89543e5342c7e95bc5945ad2e0f9f1fee840a78 +size 4856 diff --git a/cvnet_small_v1_noglobal/checkpoint-4000/model.safetensors b/cvnet_small_v1_noglobal/checkpoint-4000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fae19eb9f643878678aa6b2cd33bfb6e90012872 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-4000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0a2818292ffcfb576e45a296de056ce7265d665ff9e40af04198f15926bcfae +size 231565528 diff --git a/cvnet_small_v1_noglobal/checkpoint-4000/optimizer.pt b/cvnet_small_v1_noglobal/checkpoint-4000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2457b4b5272963a868ab799ff9e9306764bcf46 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-4000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c901728a29230568247ae52bc6d870d69d35f0401b47e33e4c58df44f422d5a5 +size 463213562 diff --git a/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_0.pth b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..99493da2e5370a66dbed7827e1d7ba6410e8828d --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f26c3d1768c6ae4c474def0c137b69314ae8c72e465f6f95a800af794a2a09e +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_1.pth b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..fcc291e14a12baedccc1431ea7a9430e1b8c0e72 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea3ca7f799a3e1fd36dc83bd11dc3d9b99112d7064e93ea65eaa391986ef5cd6 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_2.pth b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d99186aaa674d522b45d36579003eb151a96fa7 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ccfd3472e4e4cfcd011db16885b574284403045f147713f913c42e21104c294 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_3.pth b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..cf177a48bdceb75cd47590d3dc4590deb2b704ae --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49ee0246ba535c4ea39226df08b91cfa3a9cb800772ae18a4e932595aafae3f3 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_4.pth b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..453a7e71ab9e0cdde170e3e2136d3597466543b3 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3c3a5050775f7ed2cb135b9675df1194aef481dc07c07e8786a990008c237d2 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_5.pth b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..6f1123cc148666196af551f7e7c508b3b7d5188a --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac45cf7b32e235682cc63de0934056563e30db7b9fb88500545c0fd6933f7ae4 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_6.pth b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..7a461608b4dcf872f8b375665b3b295abb61f341 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6550a2c59c5405e5668721566e21841986fa2fca1e14cac0ba7bd09a4c6c6d87 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_7.pth b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..dac85a2304e36e9a8808904608cf8511947a8270 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-4000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9397f0988a7295194f8742f890e6c203ae61671c0c74b2e5b3c0f3ab4fe9c03b +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-4000/scheduler.pt b/cvnet_small_v1_noglobal/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..503d2b402ef8df237c8826d9429a15068a9f8e5d --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-4000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f5b5ac4a53f35c9923ba2e114d395c8f0ca2ff8283a0adc3fb6db30ab7f070 +size 1064 diff --git a/cvnet_small_v1_noglobal/checkpoint-4000/trainer_state.json b/cvnet_small_v1_noglobal/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..46deeff68f10f6f46ecb976514bcc54121117bc7 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-4000/trainer_state.json @@ -0,0 +1,2421 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3494138483596188, + "eval_steps": 500, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7302, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6767, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6744, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6699, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6799, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6763, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.674, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6733, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.668, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6679, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6722, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6663, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6793, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6706, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6749, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6748, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6722, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6671, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6688, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6701, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6828, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6716, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6706, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.667, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6635, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6661, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6624, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6657, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.664, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6605, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6635, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6664, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6696, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.658, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6611, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6575, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6703, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.665, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6617, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6557, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.663, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6627, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6528, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6586, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.659, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6583, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6594, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6572, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6546, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.656, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6544, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6574, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6528, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6574, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6566, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6512, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6517, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6562, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6463, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6494, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6549, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6535, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6489, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6459, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6444, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.648, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6353, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6566, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6469, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6385, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.649, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6551, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6512, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6486, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6617, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6395, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6582, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6447, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6378, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6491, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6399, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.648, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6614, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6453, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6441, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6449, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6443, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6411, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.642, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6364, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6341, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.65, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.645, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6401, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6373, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6383, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6452, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6334, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6395, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6425, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6422, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6393, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6421, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6441, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6351, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6307, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6434, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6417, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6404, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6346, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6318, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6439, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.646, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6326, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6752, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6355, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6387, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.63, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6288, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6394, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6236, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6308, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6302, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6223, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6307, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6417, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6363, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6294, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6286, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6282, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6296, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6346, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6646, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6307, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6334, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6287, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6286, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6332, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6397, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6236, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6281, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6397, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6317, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6272, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6336, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6273, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6328, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6336, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6269, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6265, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6264, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6184, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6244, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6239, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.62, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6424, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6368, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.624, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6346, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6246, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6177, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6245, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6248, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6124, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6227, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6539, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.623, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.628, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6254, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.626, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6309, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.62, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6271, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6336, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6345, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6172, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6169, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6165, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6156, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6293, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6196, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6436, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6441, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.628, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6169, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6254, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6246, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6287, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6256, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6162, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6265, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6154, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6164, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6169, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6179, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6156, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6254, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6172, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6255, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6154, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6198, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6141, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6285, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6147, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6215, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6186, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6221, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6183, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6183, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6201, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6144, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6136, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6214, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6186, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6227, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6179, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6236, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6251, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6214, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6091, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6199, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6111, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6179, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.611, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.611, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6113, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6218, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6192, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6057, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.615, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6248, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6166, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6122, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6094, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.616, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6038, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 4000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal/checkpoint-4000/training_args.bin b/cvnet_small_v1_noglobal/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddc854e996b1e5937c51594c3f01138b37a11f27 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e33b021e61b4db84dbe9910e89543e5342c7e95bc5945ad2e0f9f1fee840a78 +size 4856 diff --git a/cvnet_small_v1_noglobal/checkpoint-5000/model.safetensors b/cvnet_small_v1_noglobal/checkpoint-5000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cc6d979a85f15f7a52c91535ff7abfae2093d9b5 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-5000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f787ac129d749961894bfafe697f66d13f573d5f38b432e59d9288f5d8e08b55 +size 231565528 diff --git a/cvnet_small_v1_noglobal/checkpoint-5000/optimizer.pt b/cvnet_small_v1_noglobal/checkpoint-5000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f510502d840d9acfc9205a602082dcdc5c90cd3 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-5000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707c1a4e5b6b8d2ccb9187696f9dbcf8bed3f25718bee672fa20cdf3d01ee54c +size 463213562 diff --git a/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_0.pth b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..39677673ea6d33ece6c7e54d42dda570337ef321 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cafb0e9235f8047bc55b0b621a4e93d80fc892e63ee3da03af79fc7f2e23579c +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_1.pth b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..c12058346ac872ccdb8f02d78f4563b6f47ae847 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d28fb3cde459b36c79b37d7ed796adba69b956c9e9e4bf18151be2743f515ce +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_2.pth b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..d1e634c602498a40b047d3b1bb9640f53ddc31d6 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be30b240d749c0c071986c83b1160d241a87f1b44562a3e04f2c33d1b0deba77 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_3.pth b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..119a7adf2ae3effe42deccb69216298ffd0ae4c5 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d15ee6cb6f002f026de91076d83dd67712e8d59d6e35203594152ccd458c034 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_4.pth b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..abcf417a906294784464e6ff4d87caff7743d479 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3540bcc17b69b78275876e9c411218fe0e7d9e9c456a543c715fddaf72580b12 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_5.pth b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..d6b847665195bf326e14d3f49401daec9be86fd4 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5577daf3bb02d1cdf478e7569ab5f72486e526bd3f0a7dfaafd5b876c20b233c +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_6.pth b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..de5876b2c74a579837ca196c64dadccca8f794aa --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5c6b0fa46b2cd29408ac0acf133813f28d691d2c969970edd02d477c516b9de +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_7.pth b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..e39ef30cc80b584ca184856225d68fea94b5904f --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-5000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:119af508b7160eed58791eb97d94873e53469668209aaa0085013a4450bcaf5f +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-5000/scheduler.pt b/cvnet_small_v1_noglobal/checkpoint-5000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1cb6e935fff7477b8e4bab40ab97b33a5268355e --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-5000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5efa72feef3a398c4152d1d39792b540dbc6e38d02d8ab41bc4a583b249405d +size 1064 diff --git a/cvnet_small_v1_noglobal/checkpoint-5000/trainer_state.json b/cvnet_small_v1_noglobal/checkpoint-5000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c0db2e7a7962ade00795af9d37c617f44073e1c6 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-5000/trainer_state.json @@ -0,0 +1,3021 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6867673104495235, + "eval_steps": 500, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7302, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6767, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6744, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6699, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6799, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6763, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.674, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6733, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.668, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6679, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6722, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6663, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6793, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6706, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6749, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6748, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6722, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6671, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6688, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6701, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6828, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6716, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6706, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.667, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6635, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6661, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6624, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6657, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.664, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6605, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6635, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6664, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6696, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.658, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6611, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6575, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6703, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.665, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6617, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6557, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.663, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6627, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6528, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6586, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.659, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6583, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6594, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6572, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6546, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.656, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6544, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6574, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6528, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6574, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6566, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6512, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6517, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6562, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6463, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6494, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6549, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6535, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6489, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6459, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6444, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.648, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6353, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6566, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6469, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6385, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.649, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6551, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6512, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6486, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6617, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6395, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6582, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6447, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6378, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6491, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6399, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.648, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6614, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6453, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6441, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6449, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6443, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6411, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.642, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6364, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6341, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.65, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.645, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6401, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6373, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6383, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6452, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6334, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6395, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6425, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6422, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6393, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6421, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6441, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6351, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6307, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6434, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6417, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6404, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6346, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6318, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6439, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.646, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6326, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6752, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6355, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6387, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.63, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6288, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6394, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6236, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6308, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6302, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6223, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6307, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6417, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6363, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6294, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6286, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6282, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6296, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6346, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6646, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6307, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6334, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6287, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6286, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6332, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6397, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6236, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6281, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6397, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6317, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6272, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6336, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6273, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6328, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6336, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6269, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6265, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6264, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6184, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6244, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6239, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.62, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6424, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6368, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.624, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6346, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6246, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6177, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6245, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6248, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6124, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6227, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6539, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.623, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.628, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6254, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.626, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6309, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.62, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6271, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6336, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6345, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6172, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6169, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6165, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6156, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6293, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6196, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6436, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6441, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.628, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6169, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6254, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6246, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6287, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6256, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6162, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6265, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6154, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6164, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6169, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6179, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6156, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6254, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6172, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6255, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6154, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6198, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6141, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6285, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6147, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6215, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6186, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6221, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6183, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6183, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6201, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6144, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6136, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6214, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6186, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6227, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6179, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6236, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6251, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6214, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6091, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6199, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6111, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6179, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.611, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.611, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6113, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6218, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6192, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6057, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.615, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6248, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6166, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6122, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6094, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.616, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6038, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 4000 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4010 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6044, + "step": 4020 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 4030 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4040 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6043, + "step": 4050 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6113, + "step": 4060 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6134, + "step": 4070 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6255, + "step": 4080 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6099, + "step": 4090 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6168, + "step": 4100 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6127, + "step": 4110 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.611, + "step": 4120 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.601, + "step": 4130 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.611, + "step": 4140 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6081, + "step": 4150 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 4160 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6101, + "step": 4170 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6038, + "step": 4180 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6061, + "step": 4190 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6094, + "step": 4200 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6031, + "step": 4210 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 4220 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6298, + "step": 4230 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6109, + "step": 4240 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6045, + "step": 4250 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4260 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 4270 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6099, + "step": 4280 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.6165, + "step": 4290 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.6076, + "step": 4300 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.617, + "step": 4310 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6109, + "step": 4320 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6132, + "step": 4330 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6126, + "step": 4340 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 4350 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6063, + "step": 4360 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6052, + "step": 4370 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6072, + "step": 4380 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6044, + "step": 4390 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6137, + "step": 4400 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.5983, + "step": 4410 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6113, + "step": 4420 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 4430 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6079, + "step": 4440 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6128, + "step": 4450 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 4460 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6085, + "step": 4470 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6161, + "step": 4480 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6099, + "step": 4490 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.613, + "step": 4500 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4510 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6106, + "step": 4520 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4530 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 4540 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.6166, + "step": 4550 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.6099, + "step": 4560 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.6216, + "step": 4570 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6115, + "step": 4580 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.608, + "step": 4590 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6124, + "step": 4600 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 4610 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6052, + "step": 4620 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 4630 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.6163, + "step": 4640 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.6088, + "step": 4650 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 4660 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 4670 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6104, + "step": 4680 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6017, + "step": 4690 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6118, + "step": 4700 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 4710 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 4720 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.6124, + "step": 4730 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.6044, + "step": 4740 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 4750 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.6189, + "step": 4760 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.614, + "step": 4770 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.6298, + "step": 4780 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6215, + "step": 4790 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6018, + "step": 4800 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 4810 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 4820 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.615, + "step": 4830 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.605, + "step": 4840 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6094, + "step": 4850 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6073, + "step": 4860 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6069, + "step": 4870 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 4880 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6127, + "step": 4890 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 4900 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.5996, + "step": 4910 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.6091, + "step": 4920 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 4930 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 4940 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 4950 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6133, + "step": 4960 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6057, + "step": 4970 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6117, + "step": 4980 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4990 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.6083, + "step": 5000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal/checkpoint-5000/training_args.bin b/cvnet_small_v1_noglobal/checkpoint-5000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddc854e996b1e5937c51594c3f01138b37a11f27 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-5000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e33b021e61b4db84dbe9910e89543e5342c7e95bc5945ad2e0f9f1fee840a78 +size 4856 diff --git a/cvnet_small_v1_noglobal/checkpoint-6000/model.safetensors b/cvnet_small_v1_noglobal/checkpoint-6000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68cebcf1d677a76c95c3f66c82899fc7725601f2 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-6000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c13daa9b98407416c717a5bfe1e85c9a851537397f95f7ef180a5edf56505aa +size 231565528 diff --git a/cvnet_small_v1_noglobal/checkpoint-6000/optimizer.pt b/cvnet_small_v1_noglobal/checkpoint-6000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9601183f85dc288cd5621621adc06c2739f0197 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-6000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f89cb3d65ea08823e01a4e530a9c6c4f3c7cdd4dd460191eafae7900256851e8 +size 463213562 diff --git a/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_0.pth b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..ddd59c5c15b5a55e7a3987f420003a1e30c4da49 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e2bce1b939178bdb91cb1f874a7a000b36cf3be74fa8948eb1676bc6975d384 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_1.pth b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..120cf5fd288918e83403c3cf97aaa6c39448c409 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fd052c2e662b89e3963ab8505c968beabf71be9e15da0b0f6052abee3c9f435 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_2.pth b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..1492ebad21f46552c2ff152143ba94df35d21ece --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d35d0d9bd0209acb9ce919c49f55de09c5cbf806f05e2fb1298640387aef6d5 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_3.pth b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..f83b7ba8355058c7be166f6cbf59d4539450907a --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4deb81c597430038ccd03b227752c64d8d38d92686235a61f2e148d6258ee9c +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_4.pth b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..5453ac3bb6ed37aa8c478debc89795d2db1f0dae --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd68fceaf4d06e82547852230652ef63243b906e535881a62b0ffcb13ab0eb4 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_5.pth b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..3a65bc2ad9e7b7165847a88c1f97572ab2321e1b --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c84cd4836653a84a020c85b2ebfa25193e9127ef7a20cbec7ac46fab556269db +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_6.pth b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..88ef8e5ae0ae6144d3bb7be2cab5bce5f887b80f --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc4e4217f50cef231a5e408ea5f4ef762ced599fe76d8ce39bc4fc3531204ae3 +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_7.pth b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..82bffb3c13d50406bfe64ab0b3555186d63ae810 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-6000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4b914c17166db505f1b1a01880e439a87ca31848337c59df04dafea9f7a9bea +size 15920 diff --git a/cvnet_small_v1_noglobal/checkpoint-6000/scheduler.pt b/cvnet_small_v1_noglobal/checkpoint-6000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a19f49862de9ff057d5dbf1579e3849486e1ddb --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-6000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c891c6464db0fd4c307d9cb1087a0312b5de2d7c04375cf97f0f59aa2882e71c +size 1064 diff --git a/cvnet_small_v1_noglobal/checkpoint-6000/trainer_state.json b/cvnet_small_v1_noglobal/checkpoint-6000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fe3b7d98f3d882871e998d8ff752b585a3c5f4d0 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-6000/trainer_state.json @@ -0,0 +1,3621 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0241207725394283, + "eval_steps": 500, + "global_step": 6000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7302, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6767, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6744, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6699, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6799, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6763, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.674, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6733, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.668, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6679, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6722, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6663, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6793, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6706, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6749, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6748, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6722, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6671, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6688, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6701, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6828, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6716, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6706, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.667, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6725, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6635, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6661, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6624, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6657, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.664, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6605, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6635, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6664, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6696, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.658, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6611, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6575, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6703, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.665, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6617, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6557, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.663, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6627, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6528, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6586, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.659, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6583, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6594, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6572, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6546, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.656, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6544, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6574, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6528, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6574, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6566, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6512, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6517, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6562, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6463, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6494, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6549, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6535, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6489, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6459, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6444, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.648, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6353, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6566, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6469, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6385, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.649, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6551, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6512, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6486, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6617, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6395, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6582, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6447, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6378, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6491, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6399, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.648, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6614, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6453, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6441, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6449, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6443, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6411, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.642, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6364, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6341, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.65, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.645, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6401, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6373, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6383, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6452, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6334, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6395, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6425, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6422, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6393, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6421, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6441, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6351, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6307, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6434, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6417, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6404, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6346, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6318, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6439, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.646, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6326, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6752, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6355, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6387, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.63, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6288, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6394, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6236, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6308, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6302, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6223, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6307, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6396, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6417, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6363, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6294, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6286, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6282, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6296, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6346, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6646, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6307, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6334, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6287, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6286, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6332, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6397, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6236, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6281, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6397, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6317, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6272, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6336, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6273, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6328, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6336, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6269, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6265, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6264, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6184, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6244, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6239, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.62, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6424, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6368, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.624, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6346, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6246, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6177, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6245, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6248, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6124, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6227, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6539, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.623, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.632, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.628, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6254, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.626, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6309, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.62, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6271, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6336, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6345, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6172, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6169, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6165, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6156, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6293, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6196, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6436, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6441, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.628, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6169, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6254, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6246, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6287, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6256, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6162, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6265, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6154, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6164, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6169, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6179, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6156, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6254, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6172, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6255, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6154, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6198, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6141, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6285, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6147, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6215, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6186, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6221, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6183, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6183, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6201, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6144, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6136, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6214, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6186, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6227, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.633, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6179, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6236, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6251, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6214, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6091, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6199, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6111, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6179, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.611, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.611, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6113, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6218, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6192, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6057, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.615, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6248, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6166, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6122, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6094, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.616, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6038, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 4000 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4010 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6044, + "step": 4020 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 4030 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4040 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6043, + "step": 4050 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6113, + "step": 4060 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6134, + "step": 4070 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6255, + "step": 4080 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6099, + "step": 4090 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6168, + "step": 4100 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6127, + "step": 4110 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.611, + "step": 4120 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.601, + "step": 4130 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.611, + "step": 4140 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6081, + "step": 4150 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 4160 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6101, + "step": 4170 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6038, + "step": 4180 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6061, + "step": 4190 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6094, + "step": 4200 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6031, + "step": 4210 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 4220 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6298, + "step": 4230 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6109, + "step": 4240 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6045, + "step": 4250 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4260 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 4270 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6099, + "step": 4280 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.6165, + "step": 4290 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.6076, + "step": 4300 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.617, + "step": 4310 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6109, + "step": 4320 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6132, + "step": 4330 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6126, + "step": 4340 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 4350 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6063, + "step": 4360 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6052, + "step": 4370 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6072, + "step": 4380 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6044, + "step": 4390 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6137, + "step": 4400 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.5983, + "step": 4410 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6113, + "step": 4420 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 4430 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6079, + "step": 4440 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6128, + "step": 4450 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 4460 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6085, + "step": 4470 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6161, + "step": 4480 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6099, + "step": 4490 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.613, + "step": 4500 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4510 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6106, + "step": 4520 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4530 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 4540 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.6166, + "step": 4550 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.6099, + "step": 4560 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.6216, + "step": 4570 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6115, + "step": 4580 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.608, + "step": 4590 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6124, + "step": 4600 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 4610 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6052, + "step": 4620 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 4630 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.6163, + "step": 4640 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.6088, + "step": 4650 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 4660 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 4670 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6104, + "step": 4680 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6017, + "step": 4690 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6118, + "step": 4700 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 4710 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 4720 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.6124, + "step": 4730 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.6044, + "step": 4740 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 4750 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.6189, + "step": 4760 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.614, + "step": 4770 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.6298, + "step": 4780 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6215, + "step": 4790 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6018, + "step": 4800 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 4810 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 4820 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.615, + "step": 4830 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.605, + "step": 4840 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6094, + "step": 4850 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6073, + "step": 4860 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6069, + "step": 4870 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 4880 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6127, + "step": 4890 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 4900 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.5996, + "step": 4910 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.6091, + "step": 4920 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 4930 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 4940 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 4950 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6133, + "step": 4960 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6057, + "step": 4970 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6117, + "step": 4980 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4990 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.6083, + "step": 5000 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.5989, + "step": 5010 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.6041, + "step": 5020 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.617, + "step": 5030 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.6094, + "step": 5040 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.5998, + "step": 5050 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.6172, + "step": 5060 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.6036, + "step": 5070 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.6039, + "step": 5080 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.5963, + "step": 5090 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.5983, + "step": 5100 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.6009, + "step": 5110 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 5120 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.598, + "step": 5130 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.6172, + "step": 5140 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.6118, + "step": 5150 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.6066, + "step": 5160 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 5170 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.614, + "step": 5180 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.6066, + "step": 5190 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.6128, + "step": 5200 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.6048, + "step": 5210 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.5964, + "step": 5220 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.6077, + "step": 5230 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.6115, + "step": 5240 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 5250 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.5997, + "step": 5260 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.6037, + "step": 5270 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.5995, + "step": 5280 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.6005, + "step": 5290 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.6009, + "step": 5300 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.6007, + "step": 5310 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.6097, + "step": 5320 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.63, + "step": 5330 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.6024, + "step": 5340 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.6013, + "step": 5350 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.6004, + "step": 5360 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.5915, + "step": 5370 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.603, + "step": 5380 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.6016, + "step": 5390 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.6006, + "step": 5400 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.6079, + "step": 5410 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.5957, + "step": 5420 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.6034, + "step": 5430 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.6019, + "step": 5440 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.5969, + "step": 5450 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.6236, + "step": 5460 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.6127, + "step": 5470 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.5989, + "step": 5480 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.6068, + "step": 5490 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.6005, + "step": 5500 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.5998, + "step": 5510 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 5520 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.5973, + "step": 5530 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.6003, + "step": 5540 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.602, + "step": 5550 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.6076, + "step": 5560 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.6048, + "step": 5570 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.6116, + "step": 5580 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.6015, + "step": 5590 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.599, + "step": 5600 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 5610 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.6077, + "step": 5620 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.6082, + "step": 5630 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.6118, + "step": 5640 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.6106, + "step": 5650 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.6216, + "step": 5660 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.5945, + "step": 5670 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.5996, + "step": 5680 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.6057, + "step": 5690 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.6102, + "step": 5700 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.5976, + "step": 5710 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.5969, + "step": 5720 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.6012, + "step": 5730 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 5740 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.5999, + "step": 5750 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.5989, + "step": 5760 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.6155, + "step": 5770 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.6164, + "step": 5780 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.5993, + "step": 5790 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.5874, + "step": 5800 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.5907, + "step": 5810 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.5989, + "step": 5820 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.6055, + "step": 5830 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.5989, + "step": 5840 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.5932, + "step": 5850 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.5994, + "step": 5860 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.6038, + "step": 5870 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.6036, + "step": 5880 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.6149, + "step": 5890 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.6261, + "step": 5900 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.6024, + "step": 5910 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.5974, + "step": 5920 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.6069, + "step": 5930 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.5981, + "step": 5940 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.6081, + "step": 5950 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.6016, + "step": 5960 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.6003, + "step": 5970 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.6016, + "step": 5980 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.6022, + "step": 5990 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.5956, + "step": 6000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal/checkpoint-6000/training_args.bin b/cvnet_small_v1_noglobal/checkpoint-6000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddc854e996b1e5937c51594c3f01138b37a11f27 --- /dev/null +++ b/cvnet_small_v1_noglobal/checkpoint-6000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e33b021e61b4db84dbe9910e89543e5342c7e95bc5945ad2e0f9f1fee840a78 +size 4856 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/model.safetensors b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..368eeaefc888c407e3fbffa50f9d0a5772bf21a8 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936bbc48893a3c243c74e36640e5bbafa8749420a2404b899e8b4186c930eee4 +size 231565528 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/optimizer.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7790424cacc9034ed3e464bb39dfa56898d47c74 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73b765d50a2fdb6ce5b4d3b395bcc2e9007df0363d89b3e7d6df4ca3d2258f59 +size 463213562 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_0.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..4ab31eb89973990d9b8610104399e7e5aaec5986 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f68045b77ef059786b9d730ec7bfcab3532c91c560ed23338e0d4d518b2be5e +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_1.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..c53f9ece2cbb84f4662850cb2a534a064415698c --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:164a39302aced8efcfa4d5b883ab7d4f208ce2d3aaf57fa0fff0dc5bc35e9b53 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_2.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..5513d1ce6c2aed658bc3b07da4002acb5b6ce7dc --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74928fb7ef7944af9aedd43d3456cb7cc320ccd38e5d7989ea54cf81f2bdc4ac +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_3.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..873724ab5b9b6f006d53045897c53d31e33804d8 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7390fd7db05b7bd4f309fe3c16e3133e51c0b7bbf36881c88fd513b9ebb7b75a +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_4.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..81ff0496e02da863836522e59c26181e5043ad3f --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315c200f1cd4bb6a4a593accb8ba546796343253845919cb5b2b52089686830d +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_5.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..dde498f5924607b3543e00cedc5f8d8e7ead0775 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:561c368392fbdb6e8a7e5e086de59545625d80b821b5c72a4024b1f47b1390bb +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_6.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..004d54378694fe0b6ec4833301aba9fe98fda662 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a358b4a84de8c0fefda163466d1b792e3d23a409d2f7f54fdd9b3535793d39 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_7.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..e54c9d0e00105e7c7921fd6d4d170f670ee0b5a7 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17ad99f8730fc68cc1e3438467270d0c73427583fe0bffeb5bca595169156c2f +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/scheduler.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ff2c05006e51f3f771730c9055c6c181da2abd0 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c34ba9e6f50c2313df84e91c7dfed2be6b77dec7394adba34d3bd12d40d5ba2d +size 1064 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/trainer_state.json b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..47414831e09a6ed28a726fb93ce021890ba59c3c --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/trainer_state.json @@ -0,0 +1,621 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3373534620899047, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7047, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6771, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.676, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6731, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6726, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6734, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6683, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6684, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6675, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6806, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6743, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6761, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6714, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6651, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6759, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6698, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6687, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6604, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6655, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6666, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6647, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6612, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6607, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6596, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6631, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6728, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6582, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6484, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6628, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.655, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6496, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6445, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6511, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6482, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6473, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.651, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6453, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6508, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6541, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6405, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6533, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6416, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6439, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6507, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6481, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6442, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6418, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.637, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/training_args.bin b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..76c6e8e44c071d2b5c47ee143f33b3b0c3f9fe92 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee26f62127373b4e313bacaf1f87bc66c9583ad97e93c127ff53a983dbadb481 +size 4856 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/model.safetensors b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d35c30d20e50f06177edb17f013b47f1eb5dfb1 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48a72f5940f8e43d49a57b55ad43a818ac37cd2211cd93ce0d848cc170bf3773 +size 231565528 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/optimizer.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..984155bc7b1807045b6c9bd81d29088056de3770 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb5526677174a85abc57b556ba718a00d467a978752bf5471b14ce804eba771 +size 463213562 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_0.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..33d35d46eb49c7694ddf02abc1f806fe21ce3c03 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24545b6bb7493bdf35c20f30780f8e0793c67065421d64d7180f5a320abd8417 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_1.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..6310340e83ce52050038f3421730ede96037a6ba --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0232b218d0ca1f5487f91d43e6fb49bb92e0051351056294e6e73f362f38dc4 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_2.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..5f2d7f68fb7fd5fa3b96e6ab8e2a6ed8ae0cc685 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dffc06c0c6d363c38ffe8a64d78f552b7bfccf1d412a5e6d8b05a20badc3ab4 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_3.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..ee633cf4e658e3f4b63beab3a61ea2a072f17879 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c47dc8892b7412ec8da71c7704256b96b8469c5c7f3b6490b83502e4b2ee928 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_4.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..a68c4918b64151b1245b3a97d067f09d0feb75a0 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:905b45076269c4efc0bfdb46513ea882d1de5bce7ddd5bed4ce98092c7da2c0b +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_5.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..37b3a6d3dd2e51bee05f2894dcebb9a4fe902f47 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ee8e3283737d5d9165ab7bbed91fab8f8054e727830b0ac207e19ea1b998ca +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_6.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..24f763bcd01fe0ca32d12a74bd2f9a71337dd568 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7205ffddd0d11808dcecbed7a8e9b2051147bffa0bcf02b1ee5ef28b9e65162 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_7.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..1a55221cfbb5c000b877e8e6b14977b7163966dc --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07bde508a1939450d8ec45b79bab2568f6dd5d0ac0cf2add96faab85481697da +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/scheduler.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4605de17b07049a57c10990ac0db26645b375276 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115fc5d3c71fc8b112d5c1701359950aa4e48ea559451a73de1d1d8ca1116d89 +size 1064 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/trainer_state.json b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..983cf3c63d861fff9a53997510e4eefcd5d62ff5 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/trainer_state.json @@ -0,0 +1,1221 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6747069241798094, + "eval_steps": 500, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7047, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6771, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.676, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6731, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6726, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6734, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6683, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6684, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6675, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6806, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6743, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6761, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6714, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6651, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6759, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6698, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6687, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6604, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6655, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6666, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6647, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6612, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6607, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6596, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6631, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6728, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6582, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6484, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6628, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.655, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6496, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6445, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6511, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6482, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6473, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.651, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6453, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6508, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6541, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6405, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6533, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6416, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6439, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6507, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6481, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6442, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6418, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.637, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6335, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6583, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6326, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6427, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.653, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6413, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6374, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6332, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6351, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6365, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6386, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6298, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6304, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6303, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6447, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6302, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6293, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6466, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6323, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6239, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.638, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6494, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6653, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6373, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6297, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6532, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6344, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6273, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6398, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6491, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6318, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6355, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6314, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6354, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6383, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.63, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6313, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6393, + "step": 2000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/training_args.bin b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..76c6e8e44c071d2b5c47ee143f33b3b0c3f9fe92 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee26f62127373b4e313bacaf1f87bc66c9583ad97e93c127ff53a983dbadb481 +size 4856 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/model.safetensors b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6fd944fe8cf63721714c351ea86044086138ded0 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c4f3f7bc7cfe4d62fba38a407fcadae0e37b7fa1b2a681226dbfb3a3888a34c +size 231565528 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/optimizer.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..51932223f52edcdc969e29f2ab99cda182dde289 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51f34a7b9911f5a43a65b50192839b4bda6d94d2973a75104786e8e37bc85721 +size 463213562 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_0.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..cb8e78a2252e7489e47adb734bb1f370f80fb558 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7431609fc9d10c784464e14fe45f5a89b22573e0769c9c21d7d0a2dead49ec58 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_1.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..14409c37cb8c37cf95a2a40f01e3bde6c0a86a8f --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5081bec13a1a7ac2ad3917351c18540a7bae0cfef6e0df05d537d5c0163fce12 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_2.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..2e0781a6af83c0376c3bc340f290368a3cdf5f3b --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:535456fe6d77127c3883ae9c325dccec963de01449e321e9c704f78c50992939 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_3.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..c3a155aae9463722acad62167de281b7e03ad81a --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:880e7012bf446334aa49027d1791b52f192a6dc11d44fea74095b153fd34bc97 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_4.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..50505327adf88ab24ab9f90d8faf76553bc4d2a9 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40038cca0e1cc4c41ee9eb36cfd70f6e868697faa2d2d79afba994d00387b725 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_5.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..1db65e964f07df377b31f7937f3fad02cb5361f7 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ec016c5b320b9210f7edc5e4b4d309aa8c19437ccbdf2fd68012814e7b0be6 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_6.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d27f215baa695e5cea378d01affc7631aa9463d --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049da174472b1bfd728972b360f9b2b80d3b4842c4c00b3019ff285417d7b3ca +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_7.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..ae156e93c6ee694319aecdfa93bbb81b80472851 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2dcd919cf3c5f19cfa3f8a17ee1d2a7803a15af43f3917c55ec05ed6ca5858d +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/scheduler.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a885c8b891f90d887f6c40db2b8ae74f564dd98 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4df43d46412429a4bda78060914e68357cbee658ac819784da79443fe1ce6982 +size 1064 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/trainer_state.json b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2eabfca3fc8876c65cb7b8564d6949d43550114f --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/trainer_state.json @@ -0,0 +1,1821 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0120603862697142, + "eval_steps": 500, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7047, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6771, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.676, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6731, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6726, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6734, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6683, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6684, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6675, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6806, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6743, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6761, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6714, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6651, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6759, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6698, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6687, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6604, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6655, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6666, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6647, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6612, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6607, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6596, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6631, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6728, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6582, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6484, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6628, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.655, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6496, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6445, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6511, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6482, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6473, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.651, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6453, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6508, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6541, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6405, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6533, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6416, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6439, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6507, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6481, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6442, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6418, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.637, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6335, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6583, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6326, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6427, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.653, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6413, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6374, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6332, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6351, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6365, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6386, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6298, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6304, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6303, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6447, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6302, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6293, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6466, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6323, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6239, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.638, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6494, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6653, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6373, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6297, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6532, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6344, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6273, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6398, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6491, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6318, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6355, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6314, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6354, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6383, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.63, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6313, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6393, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6387, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6337, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6292, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.634, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6227, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6425, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6277, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6399, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6324, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6306, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6385, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6288, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6223, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6382, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6345, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6358, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6255, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6199, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6328, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.624, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6463, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6446, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6264, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6209, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6226, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6141, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6249, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6259, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.621, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.622, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6366, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6341, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6414, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6218, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6278, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6295, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6278, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6248, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6362, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6221, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6226, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6246, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6112, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6233, + "step": 3000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/training_args.bin b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..76c6e8e44c071d2b5c47ee143f33b3b0c3f9fe92 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee26f62127373b4e313bacaf1f87bc66c9583ad97e93c127ff53a983dbadb481 +size 4856 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/model.safetensors b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9239f70972b9b1aee3747b256f4acf8301b2d9c5 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9060a0b79baf213a69af7c32b37dc43593dcdce701c552cd3e079c08bd20b190 +size 231565528 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/optimizer.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4fc1582c82d2755d01be2e645a86d5460185ae6 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd21c43f0fe0f026c757b0425a0b4eaa3f1249e9d06c6bc88194c7dca92928d0 +size 463213562 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_0.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..27157ed6df83ce9d6c109c9b7dea4f945557c6c2 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30e9d80f2c7b48e823660edbe971433389c5987502ff5b8904c6fa06a4b8fac4 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_1.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d07f16bf0d35084ca8183acbe7d1ec6d1d90a220 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6dc64ef283107cc20517b99d8a4260d5dc35fe0739df81e0627d71f753f071e +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_2.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5a4f71f56747c23a569fdab1e672a231cf7beb8 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3984ffaba3ef211236ed7149a8baca5ccfdc9f355d0c4a2f94ee586ef97610e1 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_3.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..8bc5aa77c0a5136f2602b61102d9fa19e74049d1 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aef672fc70b0c0d7f41d3aa0c8a5fc28c8b3a305a6b083477713cb302d8d72a0 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_4.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..23ce76a0046bef79ca5a99524908ab2f3b052e03 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a0341c1736e01084f618482fe049b36f4ff83d3c592a9632213c226953a9084 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_5.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..78895c9a779442c5c0a14d8426eb7659ed5f69f5 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89b1c55b3a76909bc8b26c01f698db08cb90488d67ec9d13d047aea4788fd653 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_6.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..1009d8998b054def1787038b788e28e743447e38 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73db3b651070332fced69b13785993fdefdaac849cbcb23e06c08cde7fdc5d55 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_7.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..642ce42df3671964f6b34b6543afd6eb286a0714 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f21a5acb7274574d65d51c6e1a0d8b70345cf7b9c9dd99e4b85e293546a6e40 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/scheduler.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..503d2b402ef8df237c8826d9429a15068a9f8e5d --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f5b5ac4a53f35c9923ba2e114d395c8f0ca2ff8283a0adc3fb6db30ab7f070 +size 1064 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/trainer_state.json b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6d8026acf4abf1c04906e50fd084e1fa7123437e --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/trainer_state.json @@ -0,0 +1,2421 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3494138483596188, + "eval_steps": 500, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7047, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6771, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.676, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6731, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6726, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6734, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6683, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6684, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6675, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6806, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6743, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6761, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6714, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6651, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6759, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6698, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6687, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6604, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6655, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6666, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6647, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6612, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6607, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6596, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6631, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6728, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6582, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6484, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6628, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.655, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6496, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6445, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6511, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6482, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6473, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.651, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6453, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6508, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6541, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6405, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6533, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6416, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6439, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6507, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6481, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6442, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6418, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.637, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6335, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6583, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6326, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6427, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.653, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6413, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6374, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6332, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6351, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6365, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6386, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6298, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6304, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6303, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6447, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6302, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6293, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6466, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6323, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6239, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.638, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6494, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6653, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6373, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6297, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6532, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6344, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6273, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6398, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6491, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6318, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6355, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6314, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6354, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6383, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.63, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6313, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6393, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6387, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6337, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6292, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.634, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6227, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6425, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6277, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6399, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6324, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6306, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6385, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6288, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6223, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6382, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6345, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6358, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6255, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6199, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6328, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.624, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6463, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6446, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6264, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6209, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6226, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6141, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6249, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6259, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.621, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.622, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6366, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6341, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6414, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6218, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6278, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6295, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6278, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6248, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6362, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6221, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6226, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6246, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6112, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6233, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6314, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6157, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6207, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6295, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6205, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6233, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6207, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6137, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6189, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.612, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6335, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6117, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6171, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6152, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6112, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6153, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6238, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6188, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6229, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6137, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6189, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6143, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6243, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6156, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6131, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.621, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6202, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6229, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6081, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6082, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6251, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6265, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6201, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6094, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6088, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6144, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6176, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6155, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6111, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6153, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6164, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6109, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6216, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6049, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6193, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6157, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6135, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6104, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6037, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6017, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.607, + "step": 4000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/training_args.bin b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..76c6e8e44c071d2b5c47ee143f33b3b0c3f9fe92 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee26f62127373b4e313bacaf1f87bc66c9583ad97e93c127ff53a983dbadb481 +size 4856 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/model.safetensors b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05a9e0f529fe0265c1899a5ac7e8d9d3b7e97164 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:534383927a64da5f7d6d7c482ca72d50d0a6151259b79422d21686d1e27f15d4 +size 231565528 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/optimizer.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf9163571635c1f5e158ec5cc80dc97251f8f611 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a79fb3b066c830961f4271527eb5029a4bd988b5562dc93ade9cbfbd569e83 +size 463213562 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_0.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..11e0c340c3c865d1230d06fc8147457636c36c7e --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fdb01d742970be957c0ece5354beb317869a9c3ca2df86c171b56533ec8f43a +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_1.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..9dd478a8c9ea4164341bf243db8034288705a41e --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f451b776653c44b20c2f0aa86ce758d2f6b160a2aed1623e73fb9ce65aab45dc +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_2.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..64b4a78013a2d2da348987b2be5e0f0f9768f97d --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec45f9a6fa09681ce69490ec8693348a5cd79d4e73833ec2c3c3616ad0cfb45 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_3.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..1c64fb5462b0001c919706fb5b95cbb3049c2daf --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88c41930d7b79073d68661afecfe85fb979ce0c66aa50931c41c30c84094e01f +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_4.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..fe1ec346d9089256afe9858e9a678cc5a0f9e725 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f195114778c83dcdf347276c88ba35590abcc726622eff124827e1184d650b85 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_5.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..551227058e212b8b1b4c9946a46abd488003f429 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e790930ffa2fcd8eaf8532e85b32332d84d65708e9d2dbf5498070faacf67367 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_6.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..dff0e3a4547f927a68daf8bdea3c57bf24a69713 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dcc050ca3e609d48f5075b0221433e64883cc24379379e0f87b9bfc6fc692cd +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_7.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..c4f9ceecc118db03d3225ab3c794d989fceb08fc --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ee5311f27f1199ba1ac2117100e5fb29127ebdb52900e88cf9b6a1d8cd6cd3a +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/scheduler.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1cb6e935fff7477b8e4bab40ab97b33a5268355e --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5efa72feef3a398c4152d1d39792b540dbc6e38d02d8ab41bc4a583b249405d +size 1064 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/trainer_state.json b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e30c74581a1a035afce4978c3eb859bbc14f88fc --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/trainer_state.json @@ -0,0 +1,3021 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6867673104495235, + "eval_steps": 500, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7047, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6771, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.676, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6731, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6726, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6734, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6683, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6684, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6675, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6806, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6743, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6761, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6714, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6651, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6759, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6698, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6687, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6604, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6655, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6666, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6647, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6612, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6607, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6596, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6631, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6728, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6582, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6484, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6628, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.655, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6496, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6445, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6511, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6482, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6473, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.651, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6453, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6508, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6541, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6405, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6533, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6416, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6439, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6507, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6481, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6442, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6418, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.637, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6335, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6583, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6326, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6427, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.653, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6413, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6374, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6332, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6351, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6365, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6386, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6298, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6304, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6303, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6447, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6302, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6293, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6466, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6323, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6239, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.638, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6494, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6653, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6373, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6297, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6532, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6344, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6273, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6398, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6491, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6318, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6355, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6314, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6354, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6383, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.63, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6313, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6393, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6387, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6337, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6292, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.634, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6227, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6425, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6277, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6399, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6324, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6306, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6385, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6288, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6223, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6382, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6345, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6358, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6255, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6199, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6328, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.624, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6463, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6446, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6264, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6209, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6226, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6141, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6249, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6259, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.621, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.622, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6366, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6341, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6414, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6218, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6278, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6295, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6278, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6248, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6362, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6221, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6226, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6246, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6112, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6233, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6314, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6157, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6207, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6295, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6205, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6233, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6207, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6137, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6189, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.612, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6335, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6117, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6171, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6152, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6112, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6153, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6238, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6188, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6229, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6137, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6189, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6143, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6243, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6156, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6131, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.621, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6202, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6229, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6081, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6082, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6251, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6265, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6201, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6094, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6088, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6144, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6176, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6155, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6111, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6153, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6164, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6109, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6216, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6049, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6193, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6157, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6135, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6104, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6037, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6017, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.607, + "step": 4000 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 4010 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6061, + "step": 4020 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4030 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4040 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4050 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6162, + "step": 4060 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6253, + "step": 4070 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6176, + "step": 4080 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6043, + "step": 4090 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6072, + "step": 4100 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6086, + "step": 4110 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4120 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6048, + "step": 4130 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4140 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6214, + "step": 4150 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 4160 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 4170 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4180 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6064, + "step": 4190 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6073, + "step": 4200 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6123, + "step": 4210 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 4220 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4230 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 4240 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 4250 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6095, + "step": 4260 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6188, + "step": 4270 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 4280 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.623, + "step": 4290 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.6115, + "step": 4300 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.6085, + "step": 4310 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6012, + "step": 4320 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6042, + "step": 4330 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.607, + "step": 4340 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 4350 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6104, + "step": 4360 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6091, + "step": 4370 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6116, + "step": 4380 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6108, + "step": 4390 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6175, + "step": 4400 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4410 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4420 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6086, + "step": 4430 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4440 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 4450 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6152, + "step": 4460 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6043, + "step": 4470 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.617, + "step": 4480 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4490 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6146, + "step": 4500 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 4510 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 4520 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4530 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.613, + "step": 4540 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 4550 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.5997, + "step": 4560 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.6056, + "step": 4570 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6211, + "step": 4580 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6064, + "step": 4590 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6124, + "step": 4600 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6097, + "step": 4610 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6024, + "step": 4620 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6177, + "step": 4630 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.6126, + "step": 4640 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.603, + "step": 4650 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 4660 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4670 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 4680 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6016, + "step": 4690 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6083, + "step": 4700 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 4710 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6128, + "step": 4720 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.6101, + "step": 4730 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.5993, + "step": 4740 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 4750 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 4760 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.6072, + "step": 4770 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.628, + "step": 4780 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 4790 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6024, + "step": 4800 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6063, + "step": 4810 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.5967, + "step": 4820 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 4830 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.5992, + "step": 4840 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 4850 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 4860 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6025, + "step": 4870 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.5954, + "step": 4880 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6022, + "step": 4890 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 4900 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.5979, + "step": 4910 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.6065, + "step": 4920 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.6061, + "step": 4930 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 4940 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 4950 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 4960 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6032, + "step": 4970 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4980 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.61, + "step": 4990 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.605, + "step": 5000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/training_args.bin b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..76c6e8e44c071d2b5c47ee143f33b3b0c3f9fe92 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-5000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee26f62127373b4e313bacaf1f87bc66c9583ad97e93c127ff53a983dbadb481 +size 4856 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/model.safetensors b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fdf578d811eee8f047f7d6690e96940268f42d2b --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de9e2b537175fba00dba9d9f77023284a722877e58b0614c9902334258c2a531 +size 231565528 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/optimizer.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d977b5bcca5fd385fc1c4292d44185fd97f33a7 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1e3956d7c24864bf9055686cfc0c8ab1f4b6209440413f437201740b299957 +size 463213562 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_0.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..573734be361b33074bad7976bc01de749f5d6479 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab4e74bbbd65d4c41a2a3bb1ea3fab4232793342026d4ffddf16cc632eae7a62 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_1.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..00dad64f42e12d0afa1e60caca6210040dc49131 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a697538c03af5ff8d6452dc15809a2894197ffa5681aa24edbcffc60e254a44f +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_2.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9ae0f8d5f1ebe6f385347e0bbf38ca560f444b7 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fbf72c1542dcec16af36c0222d3bbd5999db9343d956cbbc8773f3f66517526 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_3.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..cb0bf81d6134a09c791172c3ca07fa2b29f739b5 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a68317944e9f214474713c9c1db897f06cab2c5a4c170c0c5134a6091190b5 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_4.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..dc85f134ea3fbc6bc5d8d8a23706f5d0fe08b3fd --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cca38c6e7daab0237ed6da434a7df41d9f170d92b83d3a756bf26c5e8ec11fd +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_5.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef8acbd251c3c21ed20e9316184f5ddb5b7c29e2 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4b8da61059b60a1e04b5439ccfded49d90ee8196f887e88b6a19571fb41b8ce +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_6.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..5e9fed676d4b294296d8083ffa689e570dc39075 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6285187171ba9b778016af44ec47ceb282740a4dd23535d002b80bf5b2bdb97 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_7.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..4afd09a3df8f4d942ac7d014c7a0b8fecf1a0423 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6473ee5bdf6c7a45259c4f355f40e16b480317db5b2b9d24efa31f9bf9d22202 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/scheduler.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a19f49862de9ff057d5dbf1579e3849486e1ddb --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c891c6464db0fd4c307d9cb1087a0312b5de2d7c04375cf97f0f59aa2882e71c +size 1064 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/trainer_state.json b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7f0bcb3a292c748e2c12134be5b564457f31056e --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/trainer_state.json @@ -0,0 +1,3621 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0241207725394283, + "eval_steps": 500, + "global_step": 6000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7047, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6771, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.676, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6731, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6726, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6734, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6683, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6684, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6675, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6806, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6743, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6761, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6714, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6651, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6759, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6698, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6687, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6604, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6655, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6666, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6647, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6612, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6607, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6596, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6631, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6728, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6582, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6484, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6628, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.655, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6496, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6445, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6511, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6482, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6473, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.651, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6453, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6508, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6541, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6405, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6533, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6416, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6439, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6507, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6481, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6442, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6418, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.637, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6335, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6583, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6326, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6427, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.653, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6413, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6374, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6332, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6351, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6365, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6386, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6298, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6304, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6303, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6447, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6302, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6293, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6466, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6323, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6239, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.638, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6494, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6653, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6373, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6297, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6532, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6344, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6273, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6398, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6491, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6318, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6355, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6314, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6354, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6383, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.63, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6313, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6393, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6387, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6337, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6292, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.634, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6227, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6425, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6277, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6399, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6324, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6306, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6385, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6288, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6223, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6382, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6345, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6358, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6255, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6199, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6328, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.624, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6463, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6446, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6264, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6209, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6226, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6141, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6249, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6259, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.621, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.622, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6366, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6341, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6414, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6218, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6278, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6295, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6278, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6248, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6362, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6221, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6226, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6246, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6112, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6233, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6314, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6157, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6207, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6295, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6205, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6233, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6207, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6137, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6189, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.612, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6335, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6117, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6171, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6152, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6112, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6153, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6238, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6188, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6229, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6137, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6189, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6143, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6243, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6156, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6131, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.621, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6202, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6229, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6081, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6082, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6251, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6265, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6201, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6094, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6088, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6144, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6176, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6155, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6111, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6153, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6164, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6109, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6216, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6049, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6193, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6157, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6135, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6104, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6037, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6017, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.607, + "step": 4000 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 4010 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6061, + "step": 4020 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4030 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4040 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4050 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6162, + "step": 4060 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6253, + "step": 4070 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6176, + "step": 4080 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6043, + "step": 4090 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6072, + "step": 4100 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6086, + "step": 4110 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4120 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6048, + "step": 4130 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4140 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6214, + "step": 4150 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 4160 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 4170 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4180 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6064, + "step": 4190 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6073, + "step": 4200 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6123, + "step": 4210 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 4220 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4230 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 4240 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 4250 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6095, + "step": 4260 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6188, + "step": 4270 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 4280 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.623, + "step": 4290 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.6115, + "step": 4300 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.6085, + "step": 4310 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6012, + "step": 4320 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6042, + "step": 4330 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.607, + "step": 4340 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 4350 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6104, + "step": 4360 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6091, + "step": 4370 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6116, + "step": 4380 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6108, + "step": 4390 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6175, + "step": 4400 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4410 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4420 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6086, + "step": 4430 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4440 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 4450 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6152, + "step": 4460 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6043, + "step": 4470 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.617, + "step": 4480 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4490 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6146, + "step": 4500 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 4510 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 4520 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4530 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.613, + "step": 4540 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 4550 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.5997, + "step": 4560 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.6056, + "step": 4570 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6211, + "step": 4580 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6064, + "step": 4590 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6124, + "step": 4600 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6097, + "step": 4610 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6024, + "step": 4620 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6177, + "step": 4630 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.6126, + "step": 4640 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.603, + "step": 4650 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 4660 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4670 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 4680 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6016, + "step": 4690 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6083, + "step": 4700 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 4710 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6128, + "step": 4720 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.6101, + "step": 4730 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.5993, + "step": 4740 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 4750 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 4760 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.6072, + "step": 4770 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.628, + "step": 4780 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 4790 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6024, + "step": 4800 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6063, + "step": 4810 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.5967, + "step": 4820 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 4830 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.5992, + "step": 4840 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 4850 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 4860 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6025, + "step": 4870 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.5954, + "step": 4880 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6022, + "step": 4890 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 4900 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.5979, + "step": 4910 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.6065, + "step": 4920 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.6061, + "step": 4930 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 4940 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 4950 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 4960 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6032, + "step": 4970 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4980 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.61, + "step": 4990 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.605, + "step": 5000 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.5961, + "step": 5010 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.5957, + "step": 5020 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.6073, + "step": 5030 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.6069, + "step": 5040 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.5924, + "step": 5050 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.6059, + "step": 5060 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.5999, + "step": 5070 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.5981, + "step": 5080 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.5953, + "step": 5090 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.5974, + "step": 5100 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.5941, + "step": 5110 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 5120 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.5919, + "step": 5130 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 5140 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.5972, + "step": 5150 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 5160 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.6066, + "step": 5170 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.6047, + "step": 5180 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 5190 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.6077, + "step": 5200 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.5961, + "step": 5210 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.593, + "step": 5220 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.6012, + "step": 5230 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 5240 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.5976, + "step": 5250 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 5260 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.5945, + "step": 5270 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.6092, + "step": 5280 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.6062, + "step": 5290 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.6021, + "step": 5300 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.5988, + "step": 5310 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.6035, + "step": 5320 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.6163, + "step": 5330 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.6015, + "step": 5340 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.5948, + "step": 5350 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.5959, + "step": 5360 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.5871, + "step": 5370 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.5958, + "step": 5380 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.5939, + "step": 5390 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.5985, + "step": 5400 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.6004, + "step": 5410 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.5908, + "step": 5420 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.6019, + "step": 5430 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.6048, + "step": 5440 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.602, + "step": 5450 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.6098, + "step": 5460 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.6045, + "step": 5470 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.5921, + "step": 5480 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.5962, + "step": 5490 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.5969, + "step": 5500 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.5975, + "step": 5510 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.5998, + "step": 5520 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.6049, + "step": 5530 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.5984, + "step": 5540 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.599, + "step": 5550 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 5560 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.6019, + "step": 5570 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.601, + "step": 5580 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.592, + "step": 5590 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.5925, + "step": 5600 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.6064, + "step": 5610 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 5620 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.5952, + "step": 5630 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.5954, + "step": 5640 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.5987, + "step": 5650 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.6125, + "step": 5660 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.5889, + "step": 5670 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.5944, + "step": 5680 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.5984, + "step": 5690 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.5973, + "step": 5700 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.6004, + "step": 5710 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.5989, + "step": 5720 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.5983, + "step": 5730 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.6079, + "step": 5740 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.6028, + "step": 5750 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.5953, + "step": 5760 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.6108, + "step": 5770 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 5780 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.5943, + "step": 5790 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.5845, + "step": 5800 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.5952, + "step": 5810 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.6038, + "step": 5820 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.5987, + "step": 5830 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.5895, + "step": 5840 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.6007, + "step": 5850 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.5979, + "step": 5860 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.6054, + "step": 5870 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.5896, + "step": 5880 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.598, + "step": 5890 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.5963, + "step": 5900 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.5984, + "step": 5910 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.5944, + "step": 5920 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 5930 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.6001, + "step": 5940 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.5975, + "step": 5950 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.5864, + "step": 5960 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.5899, + "step": 5970 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 5980 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.5957, + "step": 5990 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.5888, + "step": 6000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/training_args.bin b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..76c6e8e44c071d2b5c47ee143f33b3b0c3f9fe92 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-6000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee26f62127373b4e313bacaf1f87bc66c9583ad97e93c127ff53a983dbadb481 +size 4856 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/model.safetensors b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8c00dd25bb175e9ffa589c17429fcd10ece3327 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:862730b4ea6b5f5205c5f7949cb2cae6b0e1bad04fe2ceeefdb066bf683f282b +size 231565528 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/optimizer.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..85afc1d33265d6500c56a1593146ee563d4db6d4 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7f4dee1f0577b85b72fa874a88c8fbcb1db4236741544bbe0613ce4cb09a55 +size 463213562 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_0.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..45df2fc5a65b8ed8addaca82738706bcbc3d354d --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79070614809b14d7d9ec529b08959bdf21acd157981c8790f18646e727ecb7a2 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_1.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..c948306a0faaf3e0501f52348767111b70b59cd7 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1271e97eea72dbeacaa4e28a8ba68c9001897356682fed9d29d54f495006853d +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_2.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..57db45639fee1b370933ee6c97f112bfaac04f90 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:271fc7f1ca0ec9742ec537789e746b0401ed98834a2288108d94cb14a694f803 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_3.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..22fa7388ac0e625e289ce37e6b79dd608755fcf3 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ef2b67b03c9810b08b56e4947a089f8d8ab7b89b86348255ecd2cd8c779631b +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_4.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e99c104b1f483d5eda22b12f56398f6de67e26d --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936416ff61c500377528f245d1c7100ba09e42993fe5edd62b64fc08b0ae180a +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_5.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ef2b61a9a61c20ff816eb7d53d5a100b8a34071 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56af862adbec6e2b62670042d928d0eefc403b58310fa35f2528aa926c375ec7 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_6.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..6d351974ca9fca1dda7126f22f4ee7f42fab3fb7 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cbc2fc39c7d03cec9648447b91c124a625c578e8c93e9870d9bdb8892f049a6 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_7.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..6d475c0fc82bccf2e2063e998260e0a996e01701 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63b8eff2583ba40500598c1be7bd1e1875b644238c72d057fe1345d8c310b5ac +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/scheduler.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f80910e440c97c6830904084432a446f7a721cd5 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5445e128a7bcf4177cc50c7224415006e388ca1019002cdd1a4b59301d72556c +size 1064 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/trainer_state.json b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0a25e6d75451865595727c4fe69bbdb12fa1bc26 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/trainer_state.json @@ -0,0 +1,4221 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.3614742346293327, + "eval_steps": 500, + "global_step": 7000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7047, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6771, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.676, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6731, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6726, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6734, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6683, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6684, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6675, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6806, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6743, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6761, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6714, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6651, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6759, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6698, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6687, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6604, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6655, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6666, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6647, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6612, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6607, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6596, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6631, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6728, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6582, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6484, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6628, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.655, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6496, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6445, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6511, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6482, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6473, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.651, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6453, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6508, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6541, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6405, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6533, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6416, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6439, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6507, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6481, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6442, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6418, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.637, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6335, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6583, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6326, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6427, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.653, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6413, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6374, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6332, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6351, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6365, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6386, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6298, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6304, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6303, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6447, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6302, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6293, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6466, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6323, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6239, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.638, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6494, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6653, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6373, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6297, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6532, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6344, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6273, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6398, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6491, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6318, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6355, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6314, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6354, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6383, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.63, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6313, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6393, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6387, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6337, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6292, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.634, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6227, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6425, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6277, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6399, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6324, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6306, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6385, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6288, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6223, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6382, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6345, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6358, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6255, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6199, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6328, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.624, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6463, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6446, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6264, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6209, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6226, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6141, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6249, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6259, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.621, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.622, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6366, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6341, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6414, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6218, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6278, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6295, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6278, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6248, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6362, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6221, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6226, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6246, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6112, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6233, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6314, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6157, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6207, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6295, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6205, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6233, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6207, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6137, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6189, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.612, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6335, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6117, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6171, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6152, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6112, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6153, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6238, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6188, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6229, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6137, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6189, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6143, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6243, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6156, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6131, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.621, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6202, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6229, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6081, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6082, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6251, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6265, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6201, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6094, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6088, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6144, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6176, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6155, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6111, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6153, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6164, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6109, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6216, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6049, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6193, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6157, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6135, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6104, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6037, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6017, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.607, + "step": 4000 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 4010 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6061, + "step": 4020 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4030 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4040 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4050 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6162, + "step": 4060 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6253, + "step": 4070 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6176, + "step": 4080 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6043, + "step": 4090 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6072, + "step": 4100 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6086, + "step": 4110 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4120 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6048, + "step": 4130 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4140 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6214, + "step": 4150 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 4160 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 4170 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4180 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6064, + "step": 4190 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6073, + "step": 4200 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6123, + "step": 4210 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 4220 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4230 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 4240 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 4250 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6095, + "step": 4260 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6188, + "step": 4270 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 4280 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.623, + "step": 4290 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.6115, + "step": 4300 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.6085, + "step": 4310 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6012, + "step": 4320 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6042, + "step": 4330 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.607, + "step": 4340 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 4350 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6104, + "step": 4360 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6091, + "step": 4370 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6116, + "step": 4380 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6108, + "step": 4390 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6175, + "step": 4400 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4410 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4420 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6086, + "step": 4430 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4440 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 4450 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6152, + "step": 4460 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6043, + "step": 4470 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.617, + "step": 4480 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4490 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6146, + "step": 4500 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 4510 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 4520 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4530 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.613, + "step": 4540 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 4550 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.5997, + "step": 4560 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.6056, + "step": 4570 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6211, + "step": 4580 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6064, + "step": 4590 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6124, + "step": 4600 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6097, + "step": 4610 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6024, + "step": 4620 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6177, + "step": 4630 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.6126, + "step": 4640 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.603, + "step": 4650 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 4660 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4670 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 4680 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6016, + "step": 4690 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6083, + "step": 4700 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 4710 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6128, + "step": 4720 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.6101, + "step": 4730 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.5993, + "step": 4740 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 4750 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 4760 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.6072, + "step": 4770 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.628, + "step": 4780 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 4790 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6024, + "step": 4800 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6063, + "step": 4810 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.5967, + "step": 4820 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 4830 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.5992, + "step": 4840 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 4850 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 4860 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6025, + "step": 4870 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.5954, + "step": 4880 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6022, + "step": 4890 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 4900 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.5979, + "step": 4910 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.6065, + "step": 4920 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.6061, + "step": 4930 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 4940 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 4950 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 4960 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6032, + "step": 4970 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4980 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.61, + "step": 4990 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.605, + "step": 5000 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.5961, + "step": 5010 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.5957, + "step": 5020 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.6073, + "step": 5030 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.6069, + "step": 5040 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.5924, + "step": 5050 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.6059, + "step": 5060 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.5999, + "step": 5070 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.5981, + "step": 5080 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.5953, + "step": 5090 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.5974, + "step": 5100 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.5941, + "step": 5110 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 5120 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.5919, + "step": 5130 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 5140 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.5972, + "step": 5150 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 5160 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.6066, + "step": 5170 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.6047, + "step": 5180 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 5190 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.6077, + "step": 5200 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.5961, + "step": 5210 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.593, + "step": 5220 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.6012, + "step": 5230 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 5240 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.5976, + "step": 5250 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 5260 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.5945, + "step": 5270 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.6092, + "step": 5280 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.6062, + "step": 5290 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.6021, + "step": 5300 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.5988, + "step": 5310 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.6035, + "step": 5320 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.6163, + "step": 5330 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.6015, + "step": 5340 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.5948, + "step": 5350 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.5959, + "step": 5360 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.5871, + "step": 5370 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.5958, + "step": 5380 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.5939, + "step": 5390 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.5985, + "step": 5400 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.6004, + "step": 5410 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.5908, + "step": 5420 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.6019, + "step": 5430 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.6048, + "step": 5440 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.602, + "step": 5450 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.6098, + "step": 5460 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.6045, + "step": 5470 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.5921, + "step": 5480 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.5962, + "step": 5490 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.5969, + "step": 5500 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.5975, + "step": 5510 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.5998, + "step": 5520 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.6049, + "step": 5530 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.5984, + "step": 5540 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.599, + "step": 5550 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 5560 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.6019, + "step": 5570 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.601, + "step": 5580 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.592, + "step": 5590 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.5925, + "step": 5600 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.6064, + "step": 5610 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 5620 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.5952, + "step": 5630 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.5954, + "step": 5640 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.5987, + "step": 5650 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.6125, + "step": 5660 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.5889, + "step": 5670 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.5944, + "step": 5680 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.5984, + "step": 5690 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.5973, + "step": 5700 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.6004, + "step": 5710 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.5989, + "step": 5720 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.5983, + "step": 5730 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.6079, + "step": 5740 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.6028, + "step": 5750 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.5953, + "step": 5760 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.6108, + "step": 5770 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 5780 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.5943, + "step": 5790 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.5845, + "step": 5800 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.5952, + "step": 5810 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.6038, + "step": 5820 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.5987, + "step": 5830 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.5895, + "step": 5840 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.6007, + "step": 5850 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.5979, + "step": 5860 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.6054, + "step": 5870 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.5896, + "step": 5880 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.598, + "step": 5890 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.5963, + "step": 5900 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.5984, + "step": 5910 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.5944, + "step": 5920 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 5930 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.6001, + "step": 5940 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.5975, + "step": 5950 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.5864, + "step": 5960 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.5899, + "step": 5970 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 5980 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.5957, + "step": 5990 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.5888, + "step": 6000 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.5994, + "step": 6010 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.5914, + "step": 6020 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.5981, + "step": 6030 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.5807, + "step": 6040 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.594, + "step": 6050 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.5983, + "step": 6060 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.5895, + "step": 6070 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.5896, + "step": 6080 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.5949, + "step": 6090 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.579, + "step": 6100 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.591, + "step": 6110 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.5881, + "step": 6120 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.6055, + "step": 6130 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.6037, + "step": 6140 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.5881, + "step": 6150 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.5937, + "step": 6160 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.5962, + "step": 6170 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.5838, + "step": 6180 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.6035, + "step": 6190 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.5993, + "step": 6200 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.6014, + "step": 6210 + }, + { + "epoch": 2.1, + "learning_rate": 5e-05, + "loss": 0.5968, + "step": 6220 + }, + { + "epoch": 2.1, + "learning_rate": 5e-05, + "loss": 0.5967, + "step": 6230 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.6003, + "step": 6240 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.5939, + "step": 6250 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.5855, + "step": 6260 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.5876, + "step": 6270 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.5898, + "step": 6280 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.5845, + "step": 6290 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.5929, + "step": 6300 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.5898, + "step": 6310 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.5938, + "step": 6320 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.5935, + "step": 6330 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.5904, + "step": 6340 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.6015, + "step": 6350 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.59, + "step": 6360 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.5894, + "step": 6370 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.6066, + "step": 6380 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.5921, + "step": 6390 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.5967, + "step": 6400 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.5919, + "step": 6410 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.5898, + "step": 6420 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.5948, + "step": 6430 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.5858, + "step": 6440 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.5927, + "step": 6450 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.5877, + "step": 6460 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.5874, + "step": 6470 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.5934, + "step": 6480 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.6038, + "step": 6490 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.5994, + "step": 6500 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.5986, + "step": 6510 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.5863, + "step": 6520 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.5844, + "step": 6530 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.5891, + "step": 6540 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.5848, + "step": 6550 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.6097, + "step": 6560 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.6017, + "step": 6570 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.5913, + "step": 6580 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.5933, + "step": 6590 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.5906, + "step": 6600 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.5811, + "step": 6610 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.5866, + "step": 6620 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.5979, + "step": 6630 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.5871, + "step": 6640 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.5849, + "step": 6650 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.5923, + "step": 6660 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.59, + "step": 6670 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.5842, + "step": 6680 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.5902, + "step": 6690 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.5971, + "step": 6700 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 6710 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.5885, + "step": 6720 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.5924, + "step": 6730 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.5837, + "step": 6740 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.588, + "step": 6750 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.5903, + "step": 6760 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.5896, + "step": 6770 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.5885, + "step": 6780 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.588, + "step": 6790 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.5856, + "step": 6800 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 6810 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.5811, + "step": 6820 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.5904, + "step": 6830 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.5817, + "step": 6840 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.5856, + "step": 6850 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.5847, + "step": 6860 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 6870 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.5886, + "step": 6880 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.5828, + "step": 6890 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.5906, + "step": 6900 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.5843, + "step": 6910 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.5907, + "step": 6920 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.5802, + "step": 6930 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.5883, + "step": 6940 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.5842, + "step": 6950 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.5908, + "step": 6960 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.593, + "step": 6970 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.5913, + "step": 6980 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.5846, + "step": 6990 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.5924, + "step": 7000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/training_args.bin b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..76c6e8e44c071d2b5c47ee143f33b3b0c3f9fe92 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-7000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee26f62127373b4e313bacaf1f87bc66c9583ad97e93c127ff53a983dbadb481 +size 4856 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/model.safetensors b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e0850bbb425b37c4f9346a502c70413b2bb7409 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05a0f07a7b809d891043275cfe2c44f080df708c5c52942fa8a6bab8fa8cd2e5 +size 231565528 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/optimizer.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..86154cd7cd88e4b843a431b83bade155f3dfeb86 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d76f5d5f819c3e9fe6615fe5e9f51ac9a949720c85e58d4c85886bd28b14f595 +size 463213562 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_0.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e1a7000b2a2a9b7e66995194c7def83701672681 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6041fbe6d64344a7314c873a27b77bc02d1482287c212202188d36f4518a660d +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_1.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..cca238fc6d5fb18fc6b45ee6a12e0b6439381ca2 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d98371ac0922b4275c5af92449fd0f7232b9afa2afae6edaeff681f991c72d33 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_2.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..3ea9d02214ccd6e3e63d6f099324d649a3685139 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:780c0966ec7854b6c6e39f3e8fa4117f78c3e4680730e9fa98df60d0fe7eab92 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_3.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..e3324706fc72c3c4eb41b160a595cc8c3920f031 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c663a5160d7288107012a869bab934436414514919f633809e6413d97c7ad945 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_4.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..474dbcb768ff8961068af90d14d5a6e7761b2bda --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ecb80974b256cc4c9e5cf15046168299cd65b34a55bc197240e42ed1c885ae5 +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_5.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..c8a6ddebeed1dfa08a5e502a3e3253075dcc5eca --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d98f2e800c82fc9025786118eb439c72f5f75939349d70c7f996ee05d9efbc0d +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_6.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..ba103507ac4db9853bb68e7c0c4c9864ee383c1b --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb1e4bb44c061e0bde43201c6a526de0aa0db51d6e379cb474dc0df9403bef4a +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_7.pth b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..b78fa1dc7fa2a0a928683c007abd9004fb520aba --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f30a1cb01c4953e01f8a0855c963b5ad50f1bb2cc19a235a31b0d397d966d70c +size 15920 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/scheduler.pt b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bc1a1fc7418b56600e0a35fa744b78dec1110ac --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33657ad014a83329319798b77914fe9b3b1f8d598a6a6cf79c7c92f3a50382cc +size 1064 diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/trainer_state.json b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..43a9feab656ad652111b03f8e1cbe38e0d9bf80f --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/trainer_state.json @@ -0,0 +1,4821 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.6988276967192375, + "eval_steps": 500, + "global_step": 8000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7047, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6771, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.676, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6731, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6726, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6734, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6683, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6684, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6675, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6806, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6743, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6761, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6714, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6651, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6759, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6698, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6687, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6604, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6655, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6666, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6647, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6612, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6607, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6596, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6631, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6728, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6582, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6484, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6628, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.655, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6496, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6445, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6511, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6482, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6473, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.651, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6453, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6508, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6541, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6405, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6533, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6416, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6439, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6507, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6481, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6442, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6418, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.637, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6335, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6583, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6326, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6427, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.653, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6413, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6374, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6332, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6351, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6365, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6386, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6298, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6304, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6303, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6447, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6302, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6293, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6466, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6323, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6239, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.638, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6494, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6653, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6373, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6297, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6532, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6344, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6273, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6398, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6491, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6318, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6355, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6314, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6354, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6383, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.63, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6313, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6393, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6387, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6337, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6292, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.634, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6227, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6425, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6277, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6399, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6324, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6306, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6385, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6288, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6223, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6382, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6345, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6358, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6255, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6199, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6328, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.624, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6463, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6446, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6264, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6209, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6226, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6141, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6249, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6259, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.621, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.622, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6366, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6341, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6414, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6218, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6278, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6295, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6278, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6248, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6362, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6221, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6226, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6246, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6112, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6233, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6314, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6157, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6207, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6295, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6205, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6233, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6207, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6137, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6189, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.612, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6335, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6117, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6171, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6152, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6112, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6153, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6238, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6188, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6229, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6137, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6189, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6143, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6243, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6156, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6131, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.621, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6202, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6229, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6081, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6082, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6251, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6265, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6201, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6094, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6088, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6144, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6176, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6155, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6111, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6153, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6164, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6109, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6216, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6049, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6193, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6157, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6135, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6104, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6037, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6017, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.607, + "step": 4000 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 4010 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6061, + "step": 4020 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4030 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4040 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4050 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6162, + "step": 4060 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6253, + "step": 4070 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6176, + "step": 4080 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6043, + "step": 4090 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6072, + "step": 4100 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6086, + "step": 4110 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4120 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6048, + "step": 4130 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4140 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6214, + "step": 4150 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 4160 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 4170 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4180 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6064, + "step": 4190 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6073, + "step": 4200 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6123, + "step": 4210 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 4220 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4230 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 4240 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 4250 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6095, + "step": 4260 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6188, + "step": 4270 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 4280 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.623, + "step": 4290 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.6115, + "step": 4300 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.6085, + "step": 4310 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6012, + "step": 4320 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6042, + "step": 4330 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.607, + "step": 4340 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 4350 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6104, + "step": 4360 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6091, + "step": 4370 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6116, + "step": 4380 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6108, + "step": 4390 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6175, + "step": 4400 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4410 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4420 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6086, + "step": 4430 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4440 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 4450 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6152, + "step": 4460 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6043, + "step": 4470 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.617, + "step": 4480 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4490 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6146, + "step": 4500 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 4510 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 4520 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4530 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.613, + "step": 4540 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 4550 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.5997, + "step": 4560 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.6056, + "step": 4570 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6211, + "step": 4580 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6064, + "step": 4590 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6124, + "step": 4600 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6097, + "step": 4610 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6024, + "step": 4620 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6177, + "step": 4630 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.6126, + "step": 4640 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.603, + "step": 4650 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 4660 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4670 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 4680 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6016, + "step": 4690 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6083, + "step": 4700 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 4710 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6128, + "step": 4720 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.6101, + "step": 4730 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.5993, + "step": 4740 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 4750 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 4760 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.6072, + "step": 4770 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.628, + "step": 4780 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 4790 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6024, + "step": 4800 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6063, + "step": 4810 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.5967, + "step": 4820 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 4830 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.5992, + "step": 4840 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 4850 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 4860 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6025, + "step": 4870 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.5954, + "step": 4880 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6022, + "step": 4890 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 4900 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.5979, + "step": 4910 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.6065, + "step": 4920 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.6061, + "step": 4930 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 4940 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 4950 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 4960 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6032, + "step": 4970 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4980 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.61, + "step": 4990 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.605, + "step": 5000 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.5961, + "step": 5010 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.5957, + "step": 5020 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.6073, + "step": 5030 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.6069, + "step": 5040 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.5924, + "step": 5050 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.6059, + "step": 5060 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.5999, + "step": 5070 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.5981, + "step": 5080 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.5953, + "step": 5090 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.5974, + "step": 5100 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.5941, + "step": 5110 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 5120 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.5919, + "step": 5130 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 5140 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.5972, + "step": 5150 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 5160 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.6066, + "step": 5170 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.6047, + "step": 5180 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 5190 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.6077, + "step": 5200 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.5961, + "step": 5210 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.593, + "step": 5220 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.6012, + "step": 5230 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 5240 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.5976, + "step": 5250 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 5260 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.5945, + "step": 5270 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.6092, + "step": 5280 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.6062, + "step": 5290 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.6021, + "step": 5300 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.5988, + "step": 5310 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.6035, + "step": 5320 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.6163, + "step": 5330 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.6015, + "step": 5340 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.5948, + "step": 5350 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.5959, + "step": 5360 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.5871, + "step": 5370 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.5958, + "step": 5380 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.5939, + "step": 5390 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.5985, + "step": 5400 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.6004, + "step": 5410 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.5908, + "step": 5420 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.6019, + "step": 5430 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.6048, + "step": 5440 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.602, + "step": 5450 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.6098, + "step": 5460 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.6045, + "step": 5470 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.5921, + "step": 5480 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.5962, + "step": 5490 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.5969, + "step": 5500 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.5975, + "step": 5510 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.5998, + "step": 5520 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.6049, + "step": 5530 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.5984, + "step": 5540 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.599, + "step": 5550 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 5560 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.6019, + "step": 5570 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.601, + "step": 5580 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.592, + "step": 5590 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.5925, + "step": 5600 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.6064, + "step": 5610 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 5620 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.5952, + "step": 5630 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.5954, + "step": 5640 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.5987, + "step": 5650 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.6125, + "step": 5660 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.5889, + "step": 5670 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.5944, + "step": 5680 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.5984, + "step": 5690 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.5973, + "step": 5700 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.6004, + "step": 5710 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.5989, + "step": 5720 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.5983, + "step": 5730 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.6079, + "step": 5740 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.6028, + "step": 5750 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.5953, + "step": 5760 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.6108, + "step": 5770 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 5780 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.5943, + "step": 5790 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.5845, + "step": 5800 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.5952, + "step": 5810 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.6038, + "step": 5820 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.5987, + "step": 5830 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.5895, + "step": 5840 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.6007, + "step": 5850 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.5979, + "step": 5860 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.6054, + "step": 5870 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.5896, + "step": 5880 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.598, + "step": 5890 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.5963, + "step": 5900 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.5984, + "step": 5910 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.5944, + "step": 5920 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 5930 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.6001, + "step": 5940 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.5975, + "step": 5950 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.5864, + "step": 5960 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.5899, + "step": 5970 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 5980 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.5957, + "step": 5990 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.5888, + "step": 6000 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.5994, + "step": 6010 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.5914, + "step": 6020 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.5981, + "step": 6030 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.5807, + "step": 6040 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.594, + "step": 6050 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.5983, + "step": 6060 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.5895, + "step": 6070 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.5896, + "step": 6080 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.5949, + "step": 6090 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.579, + "step": 6100 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.591, + "step": 6110 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.5881, + "step": 6120 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.6055, + "step": 6130 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.6037, + "step": 6140 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.5881, + "step": 6150 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.5937, + "step": 6160 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.5962, + "step": 6170 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.5838, + "step": 6180 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.6035, + "step": 6190 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.5993, + "step": 6200 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.6014, + "step": 6210 + }, + { + "epoch": 2.1, + "learning_rate": 5e-05, + "loss": 0.5968, + "step": 6220 + }, + { + "epoch": 2.1, + "learning_rate": 5e-05, + "loss": 0.5967, + "step": 6230 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.6003, + "step": 6240 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.5939, + "step": 6250 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.5855, + "step": 6260 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.5876, + "step": 6270 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.5898, + "step": 6280 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.5845, + "step": 6290 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.5929, + "step": 6300 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.5898, + "step": 6310 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.5938, + "step": 6320 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.5935, + "step": 6330 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.5904, + "step": 6340 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.6015, + "step": 6350 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.59, + "step": 6360 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.5894, + "step": 6370 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.6066, + "step": 6380 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.5921, + "step": 6390 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.5967, + "step": 6400 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.5919, + "step": 6410 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.5898, + "step": 6420 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.5948, + "step": 6430 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.5858, + "step": 6440 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.5927, + "step": 6450 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.5877, + "step": 6460 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.5874, + "step": 6470 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.5934, + "step": 6480 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.6038, + "step": 6490 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.5994, + "step": 6500 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.5986, + "step": 6510 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.5863, + "step": 6520 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.5844, + "step": 6530 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.5891, + "step": 6540 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.5848, + "step": 6550 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.6097, + "step": 6560 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.6017, + "step": 6570 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.5913, + "step": 6580 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.5933, + "step": 6590 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.5906, + "step": 6600 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.5811, + "step": 6610 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.5866, + "step": 6620 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.5979, + "step": 6630 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.5871, + "step": 6640 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.5849, + "step": 6650 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.5923, + "step": 6660 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.59, + "step": 6670 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.5842, + "step": 6680 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.5902, + "step": 6690 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.5971, + "step": 6700 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 6710 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.5885, + "step": 6720 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.5924, + "step": 6730 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.5837, + "step": 6740 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.588, + "step": 6750 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.5903, + "step": 6760 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.5896, + "step": 6770 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.5885, + "step": 6780 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.588, + "step": 6790 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.5856, + "step": 6800 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 6810 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.5811, + "step": 6820 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.5904, + "step": 6830 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.5817, + "step": 6840 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.5856, + "step": 6850 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.5847, + "step": 6860 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 6870 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.5886, + "step": 6880 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.5828, + "step": 6890 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.5906, + "step": 6900 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.5843, + "step": 6910 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.5907, + "step": 6920 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.5802, + "step": 6930 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.5883, + "step": 6940 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.5842, + "step": 6950 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.5908, + "step": 6960 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.593, + "step": 6970 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.5913, + "step": 6980 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.5846, + "step": 6990 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.5924, + "step": 7000 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.5959, + "step": 7010 + }, + { + "epoch": 2.37, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 7020 + }, + { + "epoch": 2.37, + "learning_rate": 5e-05, + "loss": 0.581, + "step": 7030 + }, + { + "epoch": 2.37, + "learning_rate": 5e-05, + "loss": 0.5893, + "step": 7040 + }, + { + "epoch": 2.38, + "learning_rate": 5e-05, + "loss": 0.5792, + "step": 7050 + }, + { + "epoch": 2.38, + "learning_rate": 5e-05, + "loss": 0.6, + "step": 7060 + }, + { + "epoch": 2.39, + "learning_rate": 5e-05, + "loss": 0.5856, + "step": 7070 + }, + { + "epoch": 2.39, + "learning_rate": 5e-05, + "loss": 0.579, + "step": 7080 + }, + { + "epoch": 2.39, + "learning_rate": 5e-05, + "loss": 0.5834, + "step": 7090 + }, + { + "epoch": 2.4, + "learning_rate": 5e-05, + "loss": 0.5823, + "step": 7100 + }, + { + "epoch": 2.4, + "learning_rate": 5e-05, + "loss": 0.5852, + "step": 7110 + }, + { + "epoch": 2.4, + "learning_rate": 5e-05, + "loss": 0.5856, + "step": 7120 + }, + { + "epoch": 2.41, + "learning_rate": 5e-05, + "loss": 0.5831, + "step": 7130 + }, + { + "epoch": 2.41, + "learning_rate": 5e-05, + "loss": 0.581, + "step": 7140 + }, + { + "epoch": 2.41, + "learning_rate": 5e-05, + "loss": 0.5791, + "step": 7150 + }, + { + "epoch": 2.42, + "learning_rate": 5e-05, + "loss": 0.5779, + "step": 7160 + }, + { + "epoch": 2.42, + "learning_rate": 5e-05, + "loss": 0.5843, + "step": 7170 + }, + { + "epoch": 2.42, + "learning_rate": 5e-05, + "loss": 0.5849, + "step": 7180 + }, + { + "epoch": 2.43, + "learning_rate": 5e-05, + "loss": 0.588, + "step": 7190 + }, + { + "epoch": 2.43, + "learning_rate": 5e-05, + "loss": 0.5856, + "step": 7200 + }, + { + "epoch": 2.43, + "learning_rate": 5e-05, + "loss": 0.5785, + "step": 7210 + }, + { + "epoch": 2.44, + "learning_rate": 5e-05, + "loss": 0.59, + "step": 7220 + }, + { + "epoch": 2.44, + "learning_rate": 5e-05, + "loss": 0.5897, + "step": 7230 + }, + { + "epoch": 2.44, + "learning_rate": 5e-05, + "loss": 0.5827, + "step": 7240 + }, + { + "epoch": 2.45, + "learning_rate": 5e-05, + "loss": 0.5874, + "step": 7250 + }, + { + "epoch": 2.45, + "learning_rate": 5e-05, + "loss": 0.5853, + "step": 7260 + }, + { + "epoch": 2.45, + "learning_rate": 5e-05, + "loss": 0.5849, + "step": 7270 + }, + { + "epoch": 2.46, + "learning_rate": 5e-05, + "loss": 0.5926, + "step": 7280 + }, + { + "epoch": 2.46, + "learning_rate": 5e-05, + "loss": 0.586, + "step": 7290 + }, + { + "epoch": 2.46, + "learning_rate": 5e-05, + "loss": 0.5897, + "step": 7300 + }, + { + "epoch": 2.47, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 7310 + }, + { + "epoch": 2.47, + "learning_rate": 5e-05, + "loss": 0.5805, + "step": 7320 + }, + { + "epoch": 2.47, + "learning_rate": 5e-05, + "loss": 0.5942, + "step": 7330 + }, + { + "epoch": 2.48, + "learning_rate": 5e-05, + "loss": 0.5872, + "step": 7340 + }, + { + "epoch": 2.48, + "learning_rate": 5e-05, + "loss": 0.5785, + "step": 7350 + }, + { + "epoch": 2.48, + "learning_rate": 5e-05, + "loss": 0.5852, + "step": 7360 + }, + { + "epoch": 2.49, + "learning_rate": 5e-05, + "loss": 0.5964, + "step": 7370 + }, + { + "epoch": 2.49, + "learning_rate": 5e-05, + "loss": 0.584, + "step": 7380 + }, + { + "epoch": 2.49, + "learning_rate": 5e-05, + "loss": 0.5775, + "step": 7390 + }, + { + "epoch": 2.5, + "learning_rate": 5e-05, + "loss": 0.5909, + "step": 7400 + }, + { + "epoch": 2.5, + "learning_rate": 5e-05, + "loss": 0.5876, + "step": 7410 + }, + { + "epoch": 2.5, + "learning_rate": 5e-05, + "loss": 0.585, + "step": 7420 + }, + { + "epoch": 2.51, + "learning_rate": 5e-05, + "loss": 0.585, + "step": 7430 + }, + { + "epoch": 2.51, + "learning_rate": 5e-05, + "loss": 0.5876, + "step": 7440 + }, + { + "epoch": 2.51, + "learning_rate": 5e-05, + "loss": 0.5876, + "step": 7450 + }, + { + "epoch": 2.52, + "learning_rate": 5e-05, + "loss": 0.5846, + "step": 7460 + }, + { + "epoch": 2.52, + "learning_rate": 5e-05, + "loss": 0.5909, + "step": 7470 + }, + { + "epoch": 2.52, + "learning_rate": 5e-05, + "loss": 0.5862, + "step": 7480 + }, + { + "epoch": 2.53, + "learning_rate": 5e-05, + "loss": 0.575, + "step": 7490 + }, + { + "epoch": 2.53, + "learning_rate": 5e-05, + "loss": 0.5819, + "step": 7500 + }, + { + "epoch": 2.53, + "learning_rate": 5e-05, + "loss": 0.5867, + "step": 7510 + }, + { + "epoch": 2.54, + "learning_rate": 5e-05, + "loss": 0.5963, + "step": 7520 + }, + { + "epoch": 2.54, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 7530 + }, + { + "epoch": 2.54, + "learning_rate": 5e-05, + "loss": 0.5914, + "step": 7540 + }, + { + "epoch": 2.55, + "learning_rate": 5e-05, + "loss": 0.5847, + "step": 7550 + }, + { + "epoch": 2.55, + "learning_rate": 5e-05, + "loss": 0.5905, + "step": 7560 + }, + { + "epoch": 2.55, + "learning_rate": 5e-05, + "loss": 0.5744, + "step": 7570 + }, + { + "epoch": 2.56, + "learning_rate": 5e-05, + "loss": 0.5771, + "step": 7580 + }, + { + "epoch": 2.56, + "learning_rate": 5e-05, + "loss": 0.5827, + "step": 7590 + }, + { + "epoch": 2.56, + "learning_rate": 5e-05, + "loss": 0.5844, + "step": 7600 + }, + { + "epoch": 2.57, + "learning_rate": 5e-05, + "loss": 0.5807, + "step": 7610 + }, + { + "epoch": 2.57, + "learning_rate": 5e-05, + "loss": 0.5872, + "step": 7620 + }, + { + "epoch": 2.57, + "learning_rate": 5e-05, + "loss": 0.5827, + "step": 7630 + }, + { + "epoch": 2.58, + "learning_rate": 5e-05, + "loss": 0.5827, + "step": 7640 + }, + { + "epoch": 2.58, + "learning_rate": 5e-05, + "loss": 0.5894, + "step": 7650 + }, + { + "epoch": 2.58, + "learning_rate": 5e-05, + "loss": 0.5827, + "step": 7660 + }, + { + "epoch": 2.59, + "learning_rate": 5e-05, + "loss": 0.5815, + "step": 7670 + }, + { + "epoch": 2.59, + "learning_rate": 5e-05, + "loss": 0.5828, + "step": 7680 + }, + { + "epoch": 2.59, + "learning_rate": 5e-05, + "loss": 0.5835, + "step": 7690 + }, + { + "epoch": 2.6, + "learning_rate": 5e-05, + "loss": 0.5798, + "step": 7700 + }, + { + "epoch": 2.6, + "learning_rate": 5e-05, + "loss": 0.5886, + "step": 7710 + }, + { + "epoch": 2.6, + "learning_rate": 5e-05, + "loss": 0.5966, + "step": 7720 + }, + { + "epoch": 2.61, + "learning_rate": 5e-05, + "loss": 0.5878, + "step": 7730 + }, + { + "epoch": 2.61, + "learning_rate": 5e-05, + "loss": 0.5855, + "step": 7740 + }, + { + "epoch": 2.61, + "learning_rate": 5e-05, + "loss": 0.5816, + "step": 7750 + }, + { + "epoch": 2.62, + "learning_rate": 5e-05, + "loss": 0.5856, + "step": 7760 + }, + { + "epoch": 2.62, + "learning_rate": 5e-05, + "loss": 0.5893, + "step": 7770 + }, + { + "epoch": 2.62, + "learning_rate": 5e-05, + "loss": 0.5839, + "step": 7780 + }, + { + "epoch": 2.63, + "learning_rate": 5e-05, + "loss": 0.5791, + "step": 7790 + }, + { + "epoch": 2.63, + "learning_rate": 5e-05, + "loss": 0.5974, + "step": 7800 + }, + { + "epoch": 2.63, + "learning_rate": 5e-05, + "loss": 0.5769, + "step": 7810 + }, + { + "epoch": 2.64, + "learning_rate": 5e-05, + "loss": 0.5816, + "step": 7820 + }, + { + "epoch": 2.64, + "learning_rate": 5e-05, + "loss": 0.5831, + "step": 7830 + }, + { + "epoch": 2.64, + "learning_rate": 5e-05, + "loss": 0.5885, + "step": 7840 + }, + { + "epoch": 2.65, + "learning_rate": 5e-05, + "loss": 0.5866, + "step": 7850 + }, + { + "epoch": 2.65, + "learning_rate": 5e-05, + "loss": 0.5805, + "step": 7860 + }, + { + "epoch": 2.65, + "learning_rate": 5e-05, + "loss": 0.5888, + "step": 7870 + }, + { + "epoch": 2.66, + "learning_rate": 5e-05, + "loss": 0.5713, + "step": 7880 + }, + { + "epoch": 2.66, + "learning_rate": 5e-05, + "loss": 0.5797, + "step": 7890 + }, + { + "epoch": 2.67, + "learning_rate": 5e-05, + "loss": 0.5757, + "step": 7900 + }, + { + "epoch": 2.67, + "learning_rate": 5e-05, + "loss": 0.5954, + "step": 7910 + }, + { + "epoch": 2.67, + "learning_rate": 5e-05, + "loss": 0.5921, + "step": 7920 + }, + { + "epoch": 2.68, + "learning_rate": 5e-05, + "loss": 0.5825, + "step": 7930 + }, + { + "epoch": 2.68, + "learning_rate": 5e-05, + "loss": 0.5749, + "step": 7940 + }, + { + "epoch": 2.68, + "learning_rate": 5e-05, + "loss": 0.5858, + "step": 7950 + }, + { + "epoch": 2.69, + "learning_rate": 5e-05, + "loss": 0.5773, + "step": 7960 + }, + { + "epoch": 2.69, + "learning_rate": 5e-05, + "loss": 0.5913, + "step": 7970 + }, + { + "epoch": 2.69, + "learning_rate": 5e-05, + "loss": 0.5758, + "step": 7980 + }, + { + "epoch": 2.7, + "learning_rate": 5e-05, + "loss": 0.5898, + "step": 7990 + }, + { + "epoch": 2.7, + "learning_rate": 5e-05, + "loss": 0.5885, + "step": 8000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/training_args.bin b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..76c6e8e44c071d2b5c47ee143f33b3b0c3f9fe92 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/checkpoint-8000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee26f62127373b4e313bacaf1f87bc66c9583ad97e93c127ff53a983dbadb481 +size 4856 diff --git a/cvnet_small_v1_noglobal_noshuf/model.safetensors b/cvnet_small_v1_noglobal_noshuf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76ca21d3991b619e97fc0284f2dbf1af7d0b7598 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bdfe92fcbb79a012510882d98b5bda0b2937bc0cc8648dfa5ab2e2302abb3fd +size 231565528 diff --git a/cvnet_small_v1_noglobal_noshuf/trainer_state.json b/cvnet_small_v1_noglobal_noshuf/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..07748219517da4290718f41ef4e4c261738846fa --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/trainer_state.json @@ -0,0 +1,5364 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.9997469849034326, + "eval_steps": 500, + "global_step": 8892, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7047, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6771, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.676, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.6731, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6726, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.6734, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6718, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.6683, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.6719, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6684, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6675, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.6806, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6743, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6735, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6739, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6761, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.6714, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6707, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6651, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.6757, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6759, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6698, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.6687, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6697, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6678, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.6604, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6669, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6655, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6666, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6647, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.6612, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6607, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.6596, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.6606, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.654, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6631, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.6558, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6728, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.6649, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6569, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6582, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.6484, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6476, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.6628, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6523, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.655, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.6496, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6498, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.6542, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6445, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6511, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.6482, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6623, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.6473, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.651, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6453, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.6508, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6541, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6467, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.6405, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6533, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6416, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6461, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6439, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6507, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.6481, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6442, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.6418, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6505, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.637, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.6737, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.64, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6335, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.6583, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.6326, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6468, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.6384, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6427, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.6379, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6352, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.653, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6477, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6413, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.6374, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.641, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.6332, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6351, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.6365, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.6386, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6433, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6298, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6304, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.6361, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6303, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.6299, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6376, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.6447, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.6325, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6343, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6389, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6302, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.6315, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.6293, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6466, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6371, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.6323, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6239, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.638, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6391, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6494, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.6448, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6454, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6521, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.6653, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6432, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.6347, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6373, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.635, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.6322, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6297, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.6532, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6428, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6344, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6273, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6372, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.6398, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6491, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6318, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.6339, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6355, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6381, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6314, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6327, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.6354, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.6383, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.63, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6313, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.6393, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.644, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.6387, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6388, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6337, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.6284, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6292, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6312, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.634, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.6267, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6227, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.6319, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6425, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.6301, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.6316, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6277, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6399, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.6324, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6268, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6306, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6203, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6305, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.6385, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6288, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6283, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.6349, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6223, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.6382, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6345, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6462, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.6367, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6358, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.6338, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6255, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6199, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.6252, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6258, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.6328, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6359, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.6329, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.624, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6463, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.6446, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6264, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6209, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.6226, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6141, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6249, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6259, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.6237, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.621, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.622, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6206, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.6366, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6341, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6414, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6218, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6278, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.6295, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6194, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6278, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.6248, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6213, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.6362, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6377, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6333, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.6526, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6221, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6226, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6246, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6311, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.6182, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6112, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6145, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.6233, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6314, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6157, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6207, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6357, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.6295, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6205, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6242, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.6233, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6235, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6207, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.6137, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6189, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.6224, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.612, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6335, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.6117, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6171, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6152, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.6112, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6153, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.6238, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6266, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6188, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6204, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6229, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.6241, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6137, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.625, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6217, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6234, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.6185, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6189, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6143, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6243, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.6156, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.6131, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.621, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6202, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6229, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6081, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.6082, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6251, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.639, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.6265, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6247, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.6201, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6212, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6197, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.6094, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6228, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.6274, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.627, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6088, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6144, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.6176, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6195, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6155, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6111, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.6153, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6164, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6109, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.6216, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6049, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6193, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6157, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.6135, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6104, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6178, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.6037, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6222, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6119, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6017, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.607, + "step": 4000 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.6129, + "step": 4010 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6061, + "step": 4020 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4030 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4040 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4050 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6162, + "step": 4060 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.6253, + "step": 4070 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6176, + "step": 4080 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6043, + "step": 4090 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.6072, + "step": 4100 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6086, + "step": 4110 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4120 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.6048, + "step": 4130 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4140 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6214, + "step": 4150 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.6138, + "step": 4160 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 4170 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6121, + "step": 4180 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.6064, + "step": 4190 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6073, + "step": 4200 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6123, + "step": 4210 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.6159, + "step": 4220 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4230 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6187, + "step": 4240 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.6071, + "step": 4250 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6095, + "step": 4260 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6188, + "step": 4270 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 4280 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.623, + "step": 4290 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.6115, + "step": 4300 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.6085, + "step": 4310 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6012, + "step": 4320 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.6042, + "step": 4330 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.607, + "step": 4340 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6148, + "step": 4350 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6104, + "step": 4360 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.6091, + "step": 4370 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6116, + "step": 4380 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6108, + "step": 4390 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.6175, + "step": 4400 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6114, + "step": 4410 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4420 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.6086, + "step": 4430 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4440 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 4450 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.6152, + "step": 4460 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6043, + "step": 4470 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.617, + "step": 4480 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.6151, + "step": 4490 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6146, + "step": 4500 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 4510 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.6107, + "step": 4520 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4530 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.613, + "step": 4540 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 4550 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.5997, + "step": 4560 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.6056, + "step": 4570 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6211, + "step": 4580 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6064, + "step": 4590 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.6124, + "step": 4600 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6097, + "step": 4610 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6024, + "step": 4620 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.6177, + "step": 4630 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.6126, + "step": 4640 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.603, + "step": 4650 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 4660 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6219, + "step": 4670 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 4680 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.6016, + "step": 4690 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6083, + "step": 4700 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 4710 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.6128, + "step": 4720 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.6101, + "step": 4730 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.5993, + "step": 4740 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 4750 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.6173, + "step": 4760 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.6072, + "step": 4770 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.628, + "step": 4780 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 4790 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6024, + "step": 4800 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.6063, + "step": 4810 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.5967, + "step": 4820 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 4830 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.5992, + "step": 4840 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 4850 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 4860 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.6025, + "step": 4870 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.5954, + "step": 4880 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6022, + "step": 4890 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.6103, + "step": 4900 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.5979, + "step": 4910 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.6065, + "step": 4920 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.6061, + "step": 4930 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6208, + "step": 4940 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 4950 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 4960 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6032, + "step": 4970 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.6084, + "step": 4980 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.61, + "step": 4990 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.605, + "step": 5000 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.5961, + "step": 5010 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.5957, + "step": 5020 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.6073, + "step": 5030 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.6069, + "step": 5040 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.5924, + "step": 5050 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.6059, + "step": 5060 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.5999, + "step": 5070 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.5981, + "step": 5080 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.5953, + "step": 5090 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.5974, + "step": 5100 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.5941, + "step": 5110 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.6051, + "step": 5120 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.5919, + "step": 5130 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.618, + "step": 5140 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.5972, + "step": 5150 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 5160 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.6066, + "step": 5170 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.6047, + "step": 5180 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.6096, + "step": 5190 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.6077, + "step": 5200 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.5961, + "step": 5210 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.593, + "step": 5220 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.6012, + "step": 5230 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 5240 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.5976, + "step": 5250 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.6027, + "step": 5260 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.5945, + "step": 5270 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.6092, + "step": 5280 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.6062, + "step": 5290 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.6021, + "step": 5300 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.5988, + "step": 5310 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.6035, + "step": 5320 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.6163, + "step": 5330 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.6015, + "step": 5340 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.5948, + "step": 5350 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.5959, + "step": 5360 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.5871, + "step": 5370 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.5958, + "step": 5380 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.5939, + "step": 5390 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.5985, + "step": 5400 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.6004, + "step": 5410 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.5908, + "step": 5420 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.6019, + "step": 5430 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.6048, + "step": 5440 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.602, + "step": 5450 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.6098, + "step": 5460 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.6045, + "step": 5470 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.5921, + "step": 5480 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.5962, + "step": 5490 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.5969, + "step": 5500 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.5975, + "step": 5510 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.5998, + "step": 5520 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.6049, + "step": 5530 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.5984, + "step": 5540 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.599, + "step": 5550 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.6075, + "step": 5560 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.6019, + "step": 5570 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.601, + "step": 5580 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.592, + "step": 5590 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.5925, + "step": 5600 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.6064, + "step": 5610 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.6089, + "step": 5620 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.5952, + "step": 5630 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.5954, + "step": 5640 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.5987, + "step": 5650 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.6125, + "step": 5660 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.5889, + "step": 5670 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.5944, + "step": 5680 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.5984, + "step": 5690 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.5973, + "step": 5700 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.6004, + "step": 5710 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.5989, + "step": 5720 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.5983, + "step": 5730 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.6079, + "step": 5740 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.6028, + "step": 5750 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.5953, + "step": 5760 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.6108, + "step": 5770 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.6158, + "step": 5780 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.5943, + "step": 5790 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.5845, + "step": 5800 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.5952, + "step": 5810 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.6038, + "step": 5820 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.5987, + "step": 5830 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.5895, + "step": 5840 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.6007, + "step": 5850 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.5979, + "step": 5860 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.6054, + "step": 5870 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.5896, + "step": 5880 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.598, + "step": 5890 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.5963, + "step": 5900 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.5984, + "step": 5910 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.5944, + "step": 5920 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.606, + "step": 5930 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.6001, + "step": 5940 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.5975, + "step": 5950 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.5864, + "step": 5960 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.5899, + "step": 5970 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.604, + "step": 5980 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.5957, + "step": 5990 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.5888, + "step": 6000 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.5994, + "step": 6010 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.5914, + "step": 6020 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.5981, + "step": 6030 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.5807, + "step": 6040 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.594, + "step": 6050 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.5983, + "step": 6060 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.5895, + "step": 6070 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.5896, + "step": 6080 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.5949, + "step": 6090 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.579, + "step": 6100 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.591, + "step": 6110 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.5881, + "step": 6120 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.6055, + "step": 6130 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.6037, + "step": 6140 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.5881, + "step": 6150 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.5937, + "step": 6160 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.5962, + "step": 6170 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.5838, + "step": 6180 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.6035, + "step": 6190 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.5993, + "step": 6200 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.6014, + "step": 6210 + }, + { + "epoch": 2.1, + "learning_rate": 5e-05, + "loss": 0.5968, + "step": 6220 + }, + { + "epoch": 2.1, + "learning_rate": 5e-05, + "loss": 0.5967, + "step": 6230 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.6003, + "step": 6240 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.5939, + "step": 6250 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.5855, + "step": 6260 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.5876, + "step": 6270 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.5898, + "step": 6280 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.5845, + "step": 6290 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.5929, + "step": 6300 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.5898, + "step": 6310 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.5938, + "step": 6320 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.5935, + "step": 6330 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.5904, + "step": 6340 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.6015, + "step": 6350 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.59, + "step": 6360 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.5894, + "step": 6370 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.6066, + "step": 6380 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.5921, + "step": 6390 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.5967, + "step": 6400 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.5919, + "step": 6410 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.5898, + "step": 6420 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.5948, + "step": 6430 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.5858, + "step": 6440 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.5927, + "step": 6450 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.5877, + "step": 6460 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.5874, + "step": 6470 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.5934, + "step": 6480 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.6038, + "step": 6490 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.5994, + "step": 6500 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.5986, + "step": 6510 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.5863, + "step": 6520 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.5844, + "step": 6530 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.5891, + "step": 6540 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.5848, + "step": 6550 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.6097, + "step": 6560 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.6017, + "step": 6570 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.5913, + "step": 6580 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.5933, + "step": 6590 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.5906, + "step": 6600 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.5811, + "step": 6610 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.5866, + "step": 6620 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.5979, + "step": 6630 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.5871, + "step": 6640 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.5849, + "step": 6650 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.5923, + "step": 6660 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.59, + "step": 6670 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.5842, + "step": 6680 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.5902, + "step": 6690 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.5971, + "step": 6700 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 6710 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.5885, + "step": 6720 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.5924, + "step": 6730 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.5837, + "step": 6740 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.588, + "step": 6750 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.5903, + "step": 6760 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.5896, + "step": 6770 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.5885, + "step": 6780 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.588, + "step": 6790 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.5856, + "step": 6800 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 6810 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.5811, + "step": 6820 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.5904, + "step": 6830 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.5817, + "step": 6840 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.5856, + "step": 6850 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.5847, + "step": 6860 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 6870 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.5886, + "step": 6880 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.5828, + "step": 6890 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.5906, + "step": 6900 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.5843, + "step": 6910 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.5907, + "step": 6920 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.5802, + "step": 6930 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.5883, + "step": 6940 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.5842, + "step": 6950 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.5908, + "step": 6960 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.593, + "step": 6970 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.5913, + "step": 6980 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.5846, + "step": 6990 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.5924, + "step": 7000 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.5959, + "step": 7010 + }, + { + "epoch": 2.37, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 7020 + }, + { + "epoch": 2.37, + "learning_rate": 5e-05, + "loss": 0.581, + "step": 7030 + }, + { + "epoch": 2.37, + "learning_rate": 5e-05, + "loss": 0.5893, + "step": 7040 + }, + { + "epoch": 2.38, + "learning_rate": 5e-05, + "loss": 0.5792, + "step": 7050 + }, + { + "epoch": 2.38, + "learning_rate": 5e-05, + "loss": 0.6, + "step": 7060 + }, + { + "epoch": 2.39, + "learning_rate": 5e-05, + "loss": 0.5856, + "step": 7070 + }, + { + "epoch": 2.39, + "learning_rate": 5e-05, + "loss": 0.579, + "step": 7080 + }, + { + "epoch": 2.39, + "learning_rate": 5e-05, + "loss": 0.5834, + "step": 7090 + }, + { + "epoch": 2.4, + "learning_rate": 5e-05, + "loss": 0.5823, + "step": 7100 + }, + { + "epoch": 2.4, + "learning_rate": 5e-05, + "loss": 0.5852, + "step": 7110 + }, + { + "epoch": 2.4, + "learning_rate": 5e-05, + "loss": 0.5856, + "step": 7120 + }, + { + "epoch": 2.41, + "learning_rate": 5e-05, + "loss": 0.5831, + "step": 7130 + }, + { + "epoch": 2.41, + "learning_rate": 5e-05, + "loss": 0.581, + "step": 7140 + }, + { + "epoch": 2.41, + "learning_rate": 5e-05, + "loss": 0.5791, + "step": 7150 + }, + { + "epoch": 2.42, + "learning_rate": 5e-05, + "loss": 0.5779, + "step": 7160 + }, + { + "epoch": 2.42, + "learning_rate": 5e-05, + "loss": 0.5843, + "step": 7170 + }, + { + "epoch": 2.42, + "learning_rate": 5e-05, + "loss": 0.5849, + "step": 7180 + }, + { + "epoch": 2.43, + "learning_rate": 5e-05, + "loss": 0.588, + "step": 7190 + }, + { + "epoch": 2.43, + "learning_rate": 5e-05, + "loss": 0.5856, + "step": 7200 + }, + { + "epoch": 2.43, + "learning_rate": 5e-05, + "loss": 0.5785, + "step": 7210 + }, + { + "epoch": 2.44, + "learning_rate": 5e-05, + "loss": 0.59, + "step": 7220 + }, + { + "epoch": 2.44, + "learning_rate": 5e-05, + "loss": 0.5897, + "step": 7230 + }, + { + "epoch": 2.44, + "learning_rate": 5e-05, + "loss": 0.5827, + "step": 7240 + }, + { + "epoch": 2.45, + "learning_rate": 5e-05, + "loss": 0.5874, + "step": 7250 + }, + { + "epoch": 2.45, + "learning_rate": 5e-05, + "loss": 0.5853, + "step": 7260 + }, + { + "epoch": 2.45, + "learning_rate": 5e-05, + "loss": 0.5849, + "step": 7270 + }, + { + "epoch": 2.46, + "learning_rate": 5e-05, + "loss": 0.5926, + "step": 7280 + }, + { + "epoch": 2.46, + "learning_rate": 5e-05, + "loss": 0.586, + "step": 7290 + }, + { + "epoch": 2.46, + "learning_rate": 5e-05, + "loss": 0.5897, + "step": 7300 + }, + { + "epoch": 2.47, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 7310 + }, + { + "epoch": 2.47, + "learning_rate": 5e-05, + "loss": 0.5805, + "step": 7320 + }, + { + "epoch": 2.47, + "learning_rate": 5e-05, + "loss": 0.5942, + "step": 7330 + }, + { + "epoch": 2.48, + "learning_rate": 5e-05, + "loss": 0.5872, + "step": 7340 + }, + { + "epoch": 2.48, + "learning_rate": 5e-05, + "loss": 0.5785, + "step": 7350 + }, + { + "epoch": 2.48, + "learning_rate": 5e-05, + "loss": 0.5852, + "step": 7360 + }, + { + "epoch": 2.49, + "learning_rate": 5e-05, + "loss": 0.5964, + "step": 7370 + }, + { + "epoch": 2.49, + "learning_rate": 5e-05, + "loss": 0.584, + "step": 7380 + }, + { + "epoch": 2.49, + "learning_rate": 5e-05, + "loss": 0.5775, + "step": 7390 + }, + { + "epoch": 2.5, + "learning_rate": 5e-05, + "loss": 0.5909, + "step": 7400 + }, + { + "epoch": 2.5, + "learning_rate": 5e-05, + "loss": 0.5876, + "step": 7410 + }, + { + "epoch": 2.5, + "learning_rate": 5e-05, + "loss": 0.585, + "step": 7420 + }, + { + "epoch": 2.51, + "learning_rate": 5e-05, + "loss": 0.585, + "step": 7430 + }, + { + "epoch": 2.51, + "learning_rate": 5e-05, + "loss": 0.5876, + "step": 7440 + }, + { + "epoch": 2.51, + "learning_rate": 5e-05, + "loss": 0.5876, + "step": 7450 + }, + { + "epoch": 2.52, + "learning_rate": 5e-05, + "loss": 0.5846, + "step": 7460 + }, + { + "epoch": 2.52, + "learning_rate": 5e-05, + "loss": 0.5909, + "step": 7470 + }, + { + "epoch": 2.52, + "learning_rate": 5e-05, + "loss": 0.5862, + "step": 7480 + }, + { + "epoch": 2.53, + "learning_rate": 5e-05, + "loss": 0.575, + "step": 7490 + }, + { + "epoch": 2.53, + "learning_rate": 5e-05, + "loss": 0.5819, + "step": 7500 + }, + { + "epoch": 2.53, + "learning_rate": 5e-05, + "loss": 0.5867, + "step": 7510 + }, + { + "epoch": 2.54, + "learning_rate": 5e-05, + "loss": 0.5963, + "step": 7520 + }, + { + "epoch": 2.54, + "learning_rate": 5e-05, + "loss": 0.5869, + "step": 7530 + }, + { + "epoch": 2.54, + "learning_rate": 5e-05, + "loss": 0.5914, + "step": 7540 + }, + { + "epoch": 2.55, + "learning_rate": 5e-05, + "loss": 0.5847, + "step": 7550 + }, + { + "epoch": 2.55, + "learning_rate": 5e-05, + "loss": 0.5905, + "step": 7560 + }, + { + "epoch": 2.55, + "learning_rate": 5e-05, + "loss": 0.5744, + "step": 7570 + }, + { + "epoch": 2.56, + "learning_rate": 5e-05, + "loss": 0.5771, + "step": 7580 + }, + { + "epoch": 2.56, + "learning_rate": 5e-05, + "loss": 0.5827, + "step": 7590 + }, + { + "epoch": 2.56, + "learning_rate": 5e-05, + "loss": 0.5844, + "step": 7600 + }, + { + "epoch": 2.57, + "learning_rate": 5e-05, + "loss": 0.5807, + "step": 7610 + }, + { + "epoch": 2.57, + "learning_rate": 5e-05, + "loss": 0.5872, + "step": 7620 + }, + { + "epoch": 2.57, + "learning_rate": 5e-05, + "loss": 0.5827, + "step": 7630 + }, + { + "epoch": 2.58, + "learning_rate": 5e-05, + "loss": 0.5827, + "step": 7640 + }, + { + "epoch": 2.58, + "learning_rate": 5e-05, + "loss": 0.5894, + "step": 7650 + }, + { + "epoch": 2.58, + "learning_rate": 5e-05, + "loss": 0.5827, + "step": 7660 + }, + { + "epoch": 2.59, + "learning_rate": 5e-05, + "loss": 0.5815, + "step": 7670 + }, + { + "epoch": 2.59, + "learning_rate": 5e-05, + "loss": 0.5828, + "step": 7680 + }, + { + "epoch": 2.59, + "learning_rate": 5e-05, + "loss": 0.5835, + "step": 7690 + }, + { + "epoch": 2.6, + "learning_rate": 5e-05, + "loss": 0.5798, + "step": 7700 + }, + { + "epoch": 2.6, + "learning_rate": 5e-05, + "loss": 0.5886, + "step": 7710 + }, + { + "epoch": 2.6, + "learning_rate": 5e-05, + "loss": 0.5966, + "step": 7720 + }, + { + "epoch": 2.61, + "learning_rate": 5e-05, + "loss": 0.5878, + "step": 7730 + }, + { + "epoch": 2.61, + "learning_rate": 5e-05, + "loss": 0.5855, + "step": 7740 + }, + { + "epoch": 2.61, + "learning_rate": 5e-05, + "loss": 0.5816, + "step": 7750 + }, + { + "epoch": 2.62, + "learning_rate": 5e-05, + "loss": 0.5856, + "step": 7760 + }, + { + "epoch": 2.62, + "learning_rate": 5e-05, + "loss": 0.5893, + "step": 7770 + }, + { + "epoch": 2.62, + "learning_rate": 5e-05, + "loss": 0.5839, + "step": 7780 + }, + { + "epoch": 2.63, + "learning_rate": 5e-05, + "loss": 0.5791, + "step": 7790 + }, + { + "epoch": 2.63, + "learning_rate": 5e-05, + "loss": 0.5974, + "step": 7800 + }, + { + "epoch": 2.63, + "learning_rate": 5e-05, + "loss": 0.5769, + "step": 7810 + }, + { + "epoch": 2.64, + "learning_rate": 5e-05, + "loss": 0.5816, + "step": 7820 + }, + { + "epoch": 2.64, + "learning_rate": 5e-05, + "loss": 0.5831, + "step": 7830 + }, + { + "epoch": 2.64, + "learning_rate": 5e-05, + "loss": 0.5885, + "step": 7840 + }, + { + "epoch": 2.65, + "learning_rate": 5e-05, + "loss": 0.5866, + "step": 7850 + }, + { + "epoch": 2.65, + "learning_rate": 5e-05, + "loss": 0.5805, + "step": 7860 + }, + { + "epoch": 2.65, + "learning_rate": 5e-05, + "loss": 0.5888, + "step": 7870 + }, + { + "epoch": 2.66, + "learning_rate": 5e-05, + "loss": 0.5713, + "step": 7880 + }, + { + "epoch": 2.66, + "learning_rate": 5e-05, + "loss": 0.5797, + "step": 7890 + }, + { + "epoch": 2.67, + "learning_rate": 5e-05, + "loss": 0.5757, + "step": 7900 + }, + { + "epoch": 2.67, + "learning_rate": 5e-05, + "loss": 0.5954, + "step": 7910 + }, + { + "epoch": 2.67, + "learning_rate": 5e-05, + "loss": 0.5921, + "step": 7920 + }, + { + "epoch": 2.68, + "learning_rate": 5e-05, + "loss": 0.5825, + "step": 7930 + }, + { + "epoch": 2.68, + "learning_rate": 5e-05, + "loss": 0.5749, + "step": 7940 + }, + { + "epoch": 2.68, + "learning_rate": 5e-05, + "loss": 0.5858, + "step": 7950 + }, + { + "epoch": 2.69, + "learning_rate": 5e-05, + "loss": 0.5773, + "step": 7960 + }, + { + "epoch": 2.69, + "learning_rate": 5e-05, + "loss": 0.5913, + "step": 7970 + }, + { + "epoch": 2.69, + "learning_rate": 5e-05, + "loss": 0.5758, + "step": 7980 + }, + { + "epoch": 2.7, + "learning_rate": 5e-05, + "loss": 0.5898, + "step": 7990 + }, + { + "epoch": 2.7, + "learning_rate": 5e-05, + "loss": 0.5885, + "step": 8000 + }, + { + "epoch": 2.7, + "learning_rate": 5e-05, + "loss": 0.5803, + "step": 8010 + }, + { + "epoch": 2.71, + "learning_rate": 5e-05, + "loss": 0.576, + "step": 8020 + }, + { + "epoch": 2.71, + "learning_rate": 5e-05, + "loss": 0.5821, + "step": 8030 + }, + { + "epoch": 2.71, + "learning_rate": 5e-05, + "loss": 0.5784, + "step": 8040 + }, + { + "epoch": 2.72, + "learning_rate": 5e-05, + "loss": 0.5787, + "step": 8050 + }, + { + "epoch": 2.72, + "learning_rate": 5e-05, + "loss": 0.5862, + "step": 8060 + }, + { + "epoch": 2.72, + "learning_rate": 5e-05, + "loss": 0.5778, + "step": 8070 + }, + { + "epoch": 2.73, + "learning_rate": 5e-05, + "loss": 0.5894, + "step": 8080 + }, + { + "epoch": 2.73, + "learning_rate": 5e-05, + "loss": 0.5868, + "step": 8090 + }, + { + "epoch": 2.73, + "learning_rate": 5e-05, + "loss": 0.578, + "step": 8100 + }, + { + "epoch": 2.74, + "learning_rate": 5e-05, + "loss": 0.5721, + "step": 8110 + }, + { + "epoch": 2.74, + "learning_rate": 5e-05, + "loss": 0.5771, + "step": 8120 + }, + { + "epoch": 2.74, + "learning_rate": 5e-05, + "loss": 0.5795, + "step": 8130 + }, + { + "epoch": 2.75, + "learning_rate": 5e-05, + "loss": 0.5847, + "step": 8140 + }, + { + "epoch": 2.75, + "learning_rate": 5e-05, + "loss": 0.5801, + "step": 8150 + }, + { + "epoch": 2.75, + "learning_rate": 5e-05, + "loss": 0.5759, + "step": 8160 + }, + { + "epoch": 2.76, + "learning_rate": 5e-05, + "loss": 0.581, + "step": 8170 + }, + { + "epoch": 2.76, + "learning_rate": 5e-05, + "loss": 0.5797, + "step": 8180 + }, + { + "epoch": 2.76, + "learning_rate": 5e-05, + "loss": 0.5798, + "step": 8190 + }, + { + "epoch": 2.77, + "learning_rate": 5e-05, + "loss": 0.5811, + "step": 8200 + }, + { + "epoch": 2.77, + "learning_rate": 5e-05, + "loss": 0.5724, + "step": 8210 + }, + { + "epoch": 2.77, + "learning_rate": 5e-05, + "loss": 0.577, + "step": 8220 + }, + { + "epoch": 2.78, + "learning_rate": 5e-05, + "loss": 0.5823, + "step": 8230 + }, + { + "epoch": 2.78, + "learning_rate": 5e-05, + "loss": 0.5812, + "step": 8240 + }, + { + "epoch": 2.78, + "learning_rate": 5e-05, + "loss": 0.5763, + "step": 8250 + }, + { + "epoch": 2.79, + "learning_rate": 5e-05, + "loss": 0.5789, + "step": 8260 + }, + { + "epoch": 2.79, + "learning_rate": 5e-05, + "loss": 0.578, + "step": 8270 + }, + { + "epoch": 2.79, + "learning_rate": 5e-05, + "loss": 0.5826, + "step": 8280 + }, + { + "epoch": 2.8, + "learning_rate": 5e-05, + "loss": 0.587, + "step": 8290 + }, + { + "epoch": 2.8, + "learning_rate": 5e-05, + "loss": 0.5859, + "step": 8300 + }, + { + "epoch": 2.8, + "learning_rate": 5e-05, + "loss": 0.5904, + "step": 8310 + }, + { + "epoch": 2.81, + "learning_rate": 5e-05, + "loss": 0.5706, + "step": 8320 + }, + { + "epoch": 2.81, + "learning_rate": 5e-05, + "loss": 0.5772, + "step": 8330 + }, + { + "epoch": 2.81, + "learning_rate": 5e-05, + "loss": 0.5859, + "step": 8340 + }, + { + "epoch": 2.82, + "learning_rate": 5e-05, + "loss": 0.5824, + "step": 8350 + }, + { + "epoch": 2.82, + "learning_rate": 5e-05, + "loss": 0.5798, + "step": 8360 + }, + { + "epoch": 2.82, + "learning_rate": 5e-05, + "loss": 0.5788, + "step": 8370 + }, + { + "epoch": 2.83, + "learning_rate": 5e-05, + "loss": 0.5742, + "step": 8380 + }, + { + "epoch": 2.83, + "learning_rate": 5e-05, + "loss": 0.5829, + "step": 8390 + }, + { + "epoch": 2.83, + "learning_rate": 5e-05, + "loss": 0.5823, + "step": 8400 + }, + { + "epoch": 2.84, + "learning_rate": 5e-05, + "loss": 0.5741, + "step": 8410 + }, + { + "epoch": 2.84, + "learning_rate": 5e-05, + "loss": 0.5789, + "step": 8420 + }, + { + "epoch": 2.84, + "learning_rate": 5e-05, + "loss": 0.5873, + "step": 8430 + }, + { + "epoch": 2.85, + "learning_rate": 5e-05, + "loss": 0.5781, + "step": 8440 + }, + { + "epoch": 2.85, + "learning_rate": 5e-05, + "loss": 0.5789, + "step": 8450 + }, + { + "epoch": 2.85, + "learning_rate": 5e-05, + "loss": 0.5738, + "step": 8460 + }, + { + "epoch": 2.86, + "learning_rate": 5e-05, + "loss": 0.5822, + "step": 8470 + }, + { + "epoch": 2.86, + "learning_rate": 5e-05, + "loss": 0.593, + "step": 8480 + }, + { + "epoch": 2.86, + "learning_rate": 5e-05, + "loss": 0.58, + "step": 8490 + }, + { + "epoch": 2.87, + "learning_rate": 5e-05, + "loss": 0.5768, + "step": 8500 + }, + { + "epoch": 2.87, + "learning_rate": 5e-05, + "loss": 0.5818, + "step": 8510 + }, + { + "epoch": 2.87, + "learning_rate": 5e-05, + "loss": 0.5802, + "step": 8520 + }, + { + "epoch": 2.88, + "learning_rate": 5e-05, + "loss": 0.5753, + "step": 8530 + }, + { + "epoch": 2.88, + "learning_rate": 5e-05, + "loss": 0.5794, + "step": 8540 + }, + { + "epoch": 2.88, + "learning_rate": 5e-05, + "loss": 0.574, + "step": 8550 + }, + { + "epoch": 2.89, + "learning_rate": 5e-05, + "loss": 0.5712, + "step": 8560 + }, + { + "epoch": 2.89, + "learning_rate": 5e-05, + "loss": 0.5756, + "step": 8570 + }, + { + "epoch": 2.89, + "learning_rate": 5e-05, + "loss": 0.5687, + "step": 8580 + }, + { + "epoch": 2.9, + "learning_rate": 5e-05, + "loss": 0.5673, + "step": 8590 + }, + { + "epoch": 2.9, + "learning_rate": 5e-05, + "loss": 0.5771, + "step": 8600 + }, + { + "epoch": 2.9, + "learning_rate": 5e-05, + "loss": 0.5781, + "step": 8610 + }, + { + "epoch": 2.91, + "learning_rate": 5e-05, + "loss": 0.573, + "step": 8620 + }, + { + "epoch": 2.91, + "learning_rate": 5e-05, + "loss": 0.5821, + "step": 8630 + }, + { + "epoch": 2.91, + "learning_rate": 5e-05, + "loss": 0.5797, + "step": 8640 + }, + { + "epoch": 2.92, + "learning_rate": 5e-05, + "loss": 0.5812, + "step": 8650 + }, + { + "epoch": 2.92, + "learning_rate": 5e-05, + "loss": 0.5777, + "step": 8660 + }, + { + "epoch": 2.92, + "learning_rate": 5e-05, + "loss": 0.5755, + "step": 8670 + }, + { + "epoch": 2.93, + "learning_rate": 5e-05, + "loss": 0.5988, + "step": 8680 + }, + { + "epoch": 2.93, + "learning_rate": 5e-05, + "loss": 0.5892, + "step": 8690 + }, + { + "epoch": 2.93, + "learning_rate": 5e-05, + "loss": 0.576, + "step": 8700 + }, + { + "epoch": 2.94, + "learning_rate": 5e-05, + "loss": 0.5888, + "step": 8710 + }, + { + "epoch": 2.94, + "learning_rate": 5e-05, + "loss": 0.5845, + "step": 8720 + }, + { + "epoch": 2.95, + "learning_rate": 5e-05, + "loss": 0.5799, + "step": 8730 + }, + { + "epoch": 2.95, + "learning_rate": 5e-05, + "loss": 0.5784, + "step": 8740 + }, + { + "epoch": 2.95, + "learning_rate": 5e-05, + "loss": 0.5774, + "step": 8750 + }, + { + "epoch": 2.96, + "learning_rate": 5e-05, + "loss": 0.5776, + "step": 8760 + }, + { + "epoch": 2.96, + "learning_rate": 5e-05, + "loss": 0.5793, + "step": 8770 + }, + { + "epoch": 2.96, + "learning_rate": 5e-05, + "loss": 0.5814, + "step": 8780 + }, + { + "epoch": 2.97, + "learning_rate": 5e-05, + "loss": 0.5645, + "step": 8790 + }, + { + "epoch": 2.97, + "learning_rate": 5e-05, + "loss": 0.5851, + "step": 8800 + }, + { + "epoch": 2.97, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 8810 + }, + { + "epoch": 2.98, + "learning_rate": 5e-05, + "loss": 0.5776, + "step": 8820 + }, + { + "epoch": 2.98, + "learning_rate": 5e-05, + "loss": 0.5755, + "step": 8830 + }, + { + "epoch": 2.98, + "learning_rate": 5e-05, + "loss": 0.581, + "step": 8840 + }, + { + "epoch": 2.99, + "learning_rate": 5e-05, + "loss": 0.577, + "step": 8850 + }, + { + "epoch": 2.99, + "learning_rate": 5e-05, + "loss": 0.5703, + "step": 8860 + }, + { + "epoch": 2.99, + "learning_rate": 5e-05, + "loss": 0.5934, + "step": 8870 + }, + { + "epoch": 3.0, + "learning_rate": 5e-05, + "loss": 0.5754, + "step": 8880 + }, + { + "epoch": 3.0, + "learning_rate": 5e-05, + "loss": 0.5745, + "step": 8890 + }, + { + "epoch": 3.0, + "step": 8892, + "total_flos": 0.0, + "train_loss": 0.6122167715069629, + "train_runtime": 62943.9838, + "train_samples_per_second": 36.166, + "train_steps_per_second": 0.141 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/cvnet_small_v1_noglobal_noshuf/training_args.bin b/cvnet_small_v1_noglobal_noshuf/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..76c6e8e44c071d2b5c47ee143f33b3b0c3f9fe92 --- /dev/null +++ b/cvnet_small_v1_noglobal_noshuf/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee26f62127373b4e313bacaf1f87bc66c9583ad97e93c127ff53a983dbadb481 +size 4856 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-1000/model.safetensors b/dinov2_small_v1_global_nominmax/checkpoint-1000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c001701709c6078cacadc4a004e88ad9db527f71 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-1000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:715056d99912e2e190ad33a1daa3583a0f5d55c0dac206b44d830e802fb8a79e +size 228416552 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-1000/optimizer.pt b/dinov2_small_v1_global_nominmax/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..94a01a45a7883c5540092ada819f2d409fb0bc2d --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124ae6e79ebf74bb762aac0c09532b19612907250e4e1b519426e39c737eaa04 +size 456914234 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_0.pth b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..4ab31eb89973990d9b8610104399e7e5aaec5986 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f68045b77ef059786b9d730ec7bfcab3532c91c560ed23338e0d4d518b2be5e +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_1.pth b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..c53f9ece2cbb84f4662850cb2a534a064415698c --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:164a39302aced8efcfa4d5b883ab7d4f208ce2d3aaf57fa0fff0dc5bc35e9b53 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_2.pth b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..5513d1ce6c2aed658bc3b07da4002acb5b6ce7dc --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74928fb7ef7944af9aedd43d3456cb7cc320ccd38e5d7989ea54cf81f2bdc4ac +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_3.pth b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..873724ab5b9b6f006d53045897c53d31e33804d8 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7390fd7db05b7bd4f309fe3c16e3133e51c0b7bbf36881c88fd513b9ebb7b75a +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_4.pth b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..81ff0496e02da863836522e59c26181e5043ad3f --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315c200f1cd4bb6a4a593accb8ba546796343253845919cb5b2b52089686830d +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_5.pth b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..dde498f5924607b3543e00cedc5f8d8e7ead0775 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:561c368392fbdb6e8a7e5e086de59545625d80b821b5c72a4024b1f47b1390bb +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_6.pth b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..004d54378694fe0b6ec4833301aba9fe98fda662 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a358b4a84de8c0fefda163466d1b792e3d23a409d2f7f54fdd9b3535793d39 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_7.pth b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..e54c9d0e00105e7c7921fd6d4d170f670ee0b5a7 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-1000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17ad99f8730fc68cc1e3438467270d0c73427583fe0bffeb5bca595169156c2f +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-1000/scheduler.pt b/dinov2_small_v1_global_nominmax/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ff2c05006e51f3f771730c9055c6c181da2abd0 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c34ba9e6f50c2313df84e91c7dfed2be6b77dec7394adba34d3bd12d40d5ba2d +size 1064 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-1000/trainer_state.json b/dinov2_small_v1_global_nominmax/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cbe217494c7cf0d89b1ffc4ab4749ebe3d540935 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-1000/trainer_state.json @@ -0,0 +1,621 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3373534620899047, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.5854, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5466, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5393, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.55, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5355, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5387, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5299, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5183, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5186, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5135, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5221, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5163, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5063, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5134, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5058, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5059, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5077, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5089, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5075, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5017, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5171, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5022, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5005, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5025, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5046, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5174, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5037, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4914, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4867, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4998, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.482, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.498, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4982, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4931, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4859, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4852, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.4942, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.5014, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5076, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4962, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.5002, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4979, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4999, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4908, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4884, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4801, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.478, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4838, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4879, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4761, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4853, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.477, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.491, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.4678, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4814, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4822, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4792, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4791, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4839, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4833, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4821, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4783, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4771, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4747, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4692, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4684, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4775, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4714, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4744, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4752, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4727, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4693, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4699, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4694, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4745, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4754, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4796, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4708, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4709, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.466, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4706, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4756, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4687, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/dinov2_small_v1_global_nominmax/checkpoint-1000/training_args.bin b/dinov2_small_v1_global_nominmax/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca4990ad6e2d6dc5c39192293644d1530e9bbcd5 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898baa792356a9e4502a27fce84c053686ecb4d412bc621b4bb97d064e9b460c +size 4856 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-2000/model.safetensors b/dinov2_small_v1_global_nominmax/checkpoint-2000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bae9b8c1e7bc411250451fe91ac78bf373622eaf --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-2000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe63ae8223f8f25d3c8d974bda9bfe7efe0437b404ca376b82fad1765a6027c +size 228416552 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-2000/optimizer.pt b/dinov2_small_v1_global_nominmax/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5f7e1af3406454cdacaf5b54d5b500dc2988be1 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28de628253cf7185c40364c91e74d4d8977fe1f386e6ce82b267c4edd48d0d71 +size 456914234 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_0.pth b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..33d35d46eb49c7694ddf02abc1f806fe21ce3c03 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24545b6bb7493bdf35c20f30780f8e0793c67065421d64d7180f5a320abd8417 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_1.pth b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..6310340e83ce52050038f3421730ede96037a6ba --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0232b218d0ca1f5487f91d43e6fb49bb92e0051351056294e6e73f362f38dc4 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_2.pth b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..5f2d7f68fb7fd5fa3b96e6ab8e2a6ed8ae0cc685 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dffc06c0c6d363c38ffe8a64d78f552b7bfccf1d412a5e6d8b05a20badc3ab4 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_3.pth b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..ee633cf4e658e3f4b63beab3a61ea2a072f17879 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c47dc8892b7412ec8da71c7704256b96b8469c5c7f3b6490b83502e4b2ee928 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_4.pth b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..a68c4918b64151b1245b3a97d067f09d0feb75a0 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:905b45076269c4efc0bfdb46513ea882d1de5bce7ddd5bed4ce98092c7da2c0b +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_5.pth b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..37b3a6d3dd2e51bee05f2894dcebb9a4fe902f47 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ee8e3283737d5d9165ab7bbed91fab8f8054e727830b0ac207e19ea1b998ca +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_6.pth b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..24f763bcd01fe0ca32d12a74bd2f9a71337dd568 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7205ffddd0d11808dcecbed7a8e9b2051147bffa0bcf02b1ee5ef28b9e65162 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_7.pth b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..1a55221cfbb5c000b877e8e6b14977b7163966dc --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-2000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07bde508a1939450d8ec45b79bab2568f6dd5d0ac0cf2add96faab85481697da +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-2000/scheduler.pt b/dinov2_small_v1_global_nominmax/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4605de17b07049a57c10990ac0db26645b375276 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115fc5d3c71fc8b112d5c1701359950aa4e48ea559451a73de1d1d8ca1116d89 +size 1064 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-2000/trainer_state.json b/dinov2_small_v1_global_nominmax/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2d454e2938361e4c5bc3e3ef1064eef03e069b6e --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-2000/trainer_state.json @@ -0,0 +1,1221 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6747069241798094, + "eval_steps": 500, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.5854, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5466, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5393, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.55, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5355, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5387, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5299, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5183, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5186, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5135, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5221, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5163, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5063, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5134, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5058, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5059, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5077, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5089, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5075, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5017, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5171, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5022, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5005, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5025, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5046, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5174, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5037, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4914, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4867, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4998, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.482, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.498, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4982, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4931, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4859, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4852, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.4942, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.5014, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5076, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4962, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.5002, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4979, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4999, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4908, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4884, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4801, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.478, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4838, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4879, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4761, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4853, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.477, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.491, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.4678, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4814, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4822, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4792, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4791, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4839, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4833, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4821, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4783, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4771, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4747, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4692, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4684, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4775, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4714, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4744, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4752, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4727, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4693, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4699, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4694, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4745, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4754, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4796, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4708, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4709, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.466, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4706, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4756, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4687, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4702, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4739, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.472, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.4632, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.458, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.463, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4655, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4682, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4627, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4608, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4718, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4612, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4711, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4615, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4621, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4575, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4602, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4672, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4613, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4685, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4643, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4637, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4595, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4593, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4585, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4538, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4559, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4535, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4623, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4605, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4569, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4649, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4572, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4578, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4617, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4646, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.457, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4564, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.455, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4475, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4591, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4543, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4536, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4547, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.451, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4534, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4537, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4485, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4518, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4601, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/dinov2_small_v1_global_nominmax/checkpoint-2000/training_args.bin b/dinov2_small_v1_global_nominmax/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca4990ad6e2d6dc5c39192293644d1530e9bbcd5 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898baa792356a9e4502a27fce84c053686ecb4d412bc621b4bb97d064e9b460c +size 4856 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-3000/model.safetensors b/dinov2_small_v1_global_nominmax/checkpoint-3000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5f4f5bf0e9203ec5083aacebbc30981019305b9 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-3000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c266272c4e71a36614fdd3904297971925f5e1c08abf7ad1d449815c569e8e7 +size 228416552 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-3000/optimizer.pt b/dinov2_small_v1_global_nominmax/checkpoint-3000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6a7366f12e739816ab11b63f9787793c6522737 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-3000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d131f48c4ec990cfb534a7c6235ebf4ef0fd190eb8238e084de7f830d48bbce5 +size 456914234 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_0.pth b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..cb8e78a2252e7489e47adb734bb1f370f80fb558 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7431609fc9d10c784464e14fe45f5a89b22573e0769c9c21d7d0a2dead49ec58 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_1.pth b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..14409c37cb8c37cf95a2a40f01e3bde6c0a86a8f --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5081bec13a1a7ac2ad3917351c18540a7bae0cfef6e0df05d537d5c0163fce12 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_2.pth b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..2e0781a6af83c0376c3bc340f290368a3cdf5f3b --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:535456fe6d77127c3883ae9c325dccec963de01449e321e9c704f78c50992939 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_3.pth b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..c3a155aae9463722acad62167de281b7e03ad81a --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:880e7012bf446334aa49027d1791b52f192a6dc11d44fea74095b153fd34bc97 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_4.pth b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..50505327adf88ab24ab9f90d8faf76553bc4d2a9 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40038cca0e1cc4c41ee9eb36cfd70f6e868697faa2d2d79afba994d00387b725 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_5.pth b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..1db65e964f07df377b31f7937f3fad02cb5361f7 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ec016c5b320b9210f7edc5e4b4d309aa8c19437ccbdf2fd68012814e7b0be6 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_6.pth b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d27f215baa695e5cea378d01affc7631aa9463d --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049da174472b1bfd728972b360f9b2b80d3b4842c4c00b3019ff285417d7b3ca +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_7.pth b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..ae156e93c6ee694319aecdfa93bbb81b80472851 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-3000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2dcd919cf3c5f19cfa3f8a17ee1d2a7803a15af43f3917c55ec05ed6ca5858d +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-3000/scheduler.pt b/dinov2_small_v1_global_nominmax/checkpoint-3000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a885c8b891f90d887f6c40db2b8ae74f564dd98 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-3000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4df43d46412429a4bda78060914e68357cbee658ac819784da79443fe1ce6982 +size 1064 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-3000/trainer_state.json b/dinov2_small_v1_global_nominmax/checkpoint-3000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..43d11899a95fdd698986089a18286f3486cd49b1 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-3000/trainer_state.json @@ -0,0 +1,1821 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0120603862697142, + "eval_steps": 500, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.5854, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5466, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5393, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.55, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5355, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5387, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5299, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5183, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5186, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5135, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5221, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5163, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5063, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5134, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5058, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5059, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5077, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5089, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5075, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5017, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5171, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5022, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5005, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5025, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5046, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5174, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5037, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4914, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4867, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4998, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.482, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.498, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4982, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4931, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4859, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4852, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.4942, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.5014, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5076, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4962, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.5002, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4979, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4999, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4908, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4884, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4801, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.478, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4838, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4879, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4761, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4853, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.477, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.491, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.4678, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4814, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4822, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4792, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4791, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4839, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4833, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4821, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4783, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4771, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4747, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4692, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4684, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4775, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4714, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4744, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4752, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4727, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4693, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4699, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4694, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4745, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4754, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4796, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4708, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4709, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.466, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4706, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4756, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4687, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4702, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4739, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.472, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.4632, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.458, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.463, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4655, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4682, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4627, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4608, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4718, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4612, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4711, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4615, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4621, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4575, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4602, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4672, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4613, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4685, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4643, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4637, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4595, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4593, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4585, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4538, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4559, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4535, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4623, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4605, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4569, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4649, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4572, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4578, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4617, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4646, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.457, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4564, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.455, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4475, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4591, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4543, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4536, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4547, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.451, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4534, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4537, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4485, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4518, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4601, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4609, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4606, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4476, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.456, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.4526, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4517, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4478, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4546, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.4565, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4471, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4498, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.4529, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4503, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4513, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4482, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4555, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4451, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4474, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4563, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4488, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4476, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4468, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4515, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4502, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4472, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4428, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.443, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.4549, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4554, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4481, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4463, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4492, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4505, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.438, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.4503, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.4464, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4504, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4461, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4401, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4432, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4458, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4478, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4507, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4471, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4463, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4506, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4393, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4387, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4392, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4451, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4483, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.4363, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4368, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4442, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4427, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4489, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4371, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4452, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4407, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4473, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4337, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4409, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4332, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4434, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4416, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4358, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4384, + "step": 3000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/dinov2_small_v1_global_nominmax/checkpoint-3000/training_args.bin b/dinov2_small_v1_global_nominmax/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca4990ad6e2d6dc5c39192293644d1530e9bbcd5 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898baa792356a9e4502a27fce84c053686ecb4d412bc621b4bb97d064e9b460c +size 4856 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-4000/model.safetensors b/dinov2_small_v1_global_nominmax/checkpoint-4000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27c66488b94c61c4700d3db227c7d26ed1da4038 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-4000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1e7ed581ced44beb63e53b4f97b2c055d9fec9189ab1eaa63eeb5fe161a132c +size 228416552 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-4000/optimizer.pt b/dinov2_small_v1_global_nominmax/checkpoint-4000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c62e85e9c8dac2f9c75a70da314b1b8fe801278 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-4000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ae9f1d52c25734c652ac506fefebfd259b1370b7de9d99a7ca8b21931d2cf17 +size 456914234 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_0.pth b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..27157ed6df83ce9d6c109c9b7dea4f945557c6c2 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30e9d80f2c7b48e823660edbe971433389c5987502ff5b8904c6fa06a4b8fac4 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_1.pth b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d07f16bf0d35084ca8183acbe7d1ec6d1d90a220 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6dc64ef283107cc20517b99d8a4260d5dc35fe0739df81e0627d71f753f071e +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_2.pth b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5a4f71f56747c23a569fdab1e672a231cf7beb8 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3984ffaba3ef211236ed7149a8baca5ccfdc9f355d0c4a2f94ee586ef97610e1 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_3.pth b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..8bc5aa77c0a5136f2602b61102d9fa19e74049d1 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aef672fc70b0c0d7f41d3aa0c8a5fc28c8b3a305a6b083477713cb302d8d72a0 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_4.pth b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..23ce76a0046bef79ca5a99524908ab2f3b052e03 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a0341c1736e01084f618482fe049b36f4ff83d3c592a9632213c226953a9084 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_5.pth b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..78895c9a779442c5c0a14d8426eb7659ed5f69f5 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89b1c55b3a76909bc8b26c01f698db08cb90488d67ec9d13d047aea4788fd653 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_6.pth b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..1009d8998b054def1787038b788e28e743447e38 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73db3b651070332fced69b13785993fdefdaac849cbcb23e06c08cde7fdc5d55 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_7.pth b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..642ce42df3671964f6b34b6543afd6eb286a0714 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-4000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f21a5acb7274574d65d51c6e1a0d8b70345cf7b9c9dd99e4b85e293546a6e40 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-4000/scheduler.pt b/dinov2_small_v1_global_nominmax/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..503d2b402ef8df237c8826d9429a15068a9f8e5d --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-4000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f5b5ac4a53f35c9923ba2e114d395c8f0ca2ff8283a0adc3fb6db30ab7f070 +size 1064 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-4000/trainer_state.json b/dinov2_small_v1_global_nominmax/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f8e5123fd23c9c7fdda76be5cd9e29596b98bdda --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-4000/trainer_state.json @@ -0,0 +1,2421 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3494138483596188, + "eval_steps": 500, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.5854, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5466, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5393, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.55, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5355, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5387, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5299, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5183, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5186, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5135, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5221, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5163, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5063, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5134, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5058, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5059, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5077, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5089, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5075, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5017, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5171, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5022, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5005, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5025, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5046, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5174, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5037, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4914, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4867, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4998, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.482, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.498, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4982, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4931, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4859, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4852, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.4942, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.5014, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5076, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4962, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.5002, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4979, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4999, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4908, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4884, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4801, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.478, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4838, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4879, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4761, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4853, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.477, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.491, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.4678, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4814, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4822, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4792, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4791, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4839, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4833, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4821, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4783, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4771, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4747, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4692, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4684, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4775, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4714, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4744, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4752, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4727, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4693, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4699, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4694, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4745, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4754, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4796, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4708, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4709, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.466, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4706, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4756, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4687, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4702, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4739, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.472, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.4632, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.458, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.463, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4655, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4682, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4627, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4608, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4718, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4612, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4711, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4615, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4621, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4575, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4602, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4672, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4613, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4685, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4643, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4637, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4595, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4593, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4585, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4538, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4559, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4535, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4623, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4605, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4569, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4649, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4572, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4578, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4617, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4646, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.457, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4564, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.455, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4475, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4591, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4543, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4536, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4547, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.451, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4534, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4537, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4485, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4518, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4601, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4609, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4606, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4476, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.456, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.4526, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4517, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4478, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4546, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.4565, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4471, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4498, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.4529, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4503, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4513, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4482, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4555, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4451, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4474, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4563, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4488, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4476, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4468, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4515, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4502, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4472, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4428, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.443, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.4549, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4554, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4481, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4463, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4492, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4505, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.438, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.4503, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.4464, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4504, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4461, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4401, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4432, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4458, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4478, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4507, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4471, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4463, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4506, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4393, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4387, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4392, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4451, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4483, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.4363, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4368, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4442, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4427, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4489, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4371, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4452, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4407, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4473, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4337, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4409, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4332, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4434, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4416, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4358, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4384, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4426, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4413, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4413, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.445, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.4453, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4402, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4389, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4434, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4457, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4475, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4401, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4385, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4414, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4396, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4355, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4389, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4384, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4382, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4408, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4398, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4343, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4352, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4345, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4363, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4342, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4308, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4361, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.435, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4327, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4319, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4388, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4387, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4338, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4335, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4424, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.4349, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.437, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.4449, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4405, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4426, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4366, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4324, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4305, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4318, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4405, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4374, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4359, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4351, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4397, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4354, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4327, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4358, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4291, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4304, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4277, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4319, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4392, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4288, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4305, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4253, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4295, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4298, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4283, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4278, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4306, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4263, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.4347, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.435, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.425, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4357, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4349, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4322, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4338, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4307, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4283, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4275, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4367, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.4258, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4325, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4299, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4268, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4263, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4335, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4314, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.419, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4254, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 4000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/dinov2_small_v1_global_nominmax/checkpoint-4000/training_args.bin b/dinov2_small_v1_global_nominmax/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca4990ad6e2d6dc5c39192293644d1530e9bbcd5 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898baa792356a9e4502a27fce84c053686ecb4d412bc621b4bb97d064e9b460c +size 4856 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-5000/model.safetensors b/dinov2_small_v1_global_nominmax/checkpoint-5000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e4d431b56f098473ba1d9956afabe99f002f100 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-5000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8510009479b79e6b36a6db87f3d23d450b37bb861528b08dc9eaf81ec54a016 +size 228416552 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-5000/optimizer.pt b/dinov2_small_v1_global_nominmax/checkpoint-5000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..58b2aaebfc1cd0dd851137b732872ec951ad1bd6 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-5000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b4b078e570f91d4e0b87e60ace89d2ba1af907976914ad1fb2d9a66327dd64 +size 456914234 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_0.pth b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..11e0c340c3c865d1230d06fc8147457636c36c7e --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fdb01d742970be957c0ece5354beb317869a9c3ca2df86c171b56533ec8f43a +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_1.pth b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..9dd478a8c9ea4164341bf243db8034288705a41e --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f451b776653c44b20c2f0aa86ce758d2f6b160a2aed1623e73fb9ce65aab45dc +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_2.pth b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..64b4a78013a2d2da348987b2be5e0f0f9768f97d --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec45f9a6fa09681ce69490ec8693348a5cd79d4e73833ec2c3c3616ad0cfb45 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_3.pth b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..1c64fb5462b0001c919706fb5b95cbb3049c2daf --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88c41930d7b79073d68661afecfe85fb979ce0c66aa50931c41c30c84094e01f +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_4.pth b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..fe1ec346d9089256afe9858e9a678cc5a0f9e725 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f195114778c83dcdf347276c88ba35590abcc726622eff124827e1184d650b85 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_5.pth b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..551227058e212b8b1b4c9946a46abd488003f429 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e790930ffa2fcd8eaf8532e85b32332d84d65708e9d2dbf5498070faacf67367 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_6.pth b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..dff0e3a4547f927a68daf8bdea3c57bf24a69713 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dcc050ca3e609d48f5075b0221433e64883cc24379379e0f87b9bfc6fc692cd +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_7.pth b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..c4f9ceecc118db03d3225ab3c794d989fceb08fc --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-5000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ee5311f27f1199ba1ac2117100e5fb29127ebdb52900e88cf9b6a1d8cd6cd3a +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-5000/scheduler.pt b/dinov2_small_v1_global_nominmax/checkpoint-5000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1cb6e935fff7477b8e4bab40ab97b33a5268355e --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-5000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5efa72feef3a398c4152d1d39792b540dbc6e38d02d8ab41bc4a583b249405d +size 1064 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-5000/trainer_state.json b/dinov2_small_v1_global_nominmax/checkpoint-5000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d055cff65acb79830a92cfc1dbe8ea188c4e7f17 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-5000/trainer_state.json @@ -0,0 +1,3021 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6867673104495235, + "eval_steps": 500, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.5854, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5466, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5393, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.55, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5355, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5387, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5299, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5183, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5186, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5135, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5221, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5163, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5063, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5134, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5058, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5059, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5077, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5089, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5075, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5017, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5171, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5022, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5005, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5025, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5046, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5174, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5037, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4914, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4867, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4998, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.482, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.498, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4982, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4931, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4859, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4852, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.4942, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.5014, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5076, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4962, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.5002, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4979, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4999, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4908, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4884, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4801, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.478, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4838, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4879, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4761, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4853, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.477, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.491, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.4678, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4814, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4822, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4792, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4791, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4839, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4833, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4821, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4783, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4771, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4747, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4692, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4684, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4775, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4714, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4744, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4752, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4727, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4693, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4699, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4694, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4745, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4754, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4796, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4708, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4709, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.466, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4706, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4756, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4687, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4702, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4739, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.472, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.4632, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.458, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.463, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4655, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4682, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4627, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4608, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4718, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4612, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4711, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4615, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4621, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4575, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4602, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4672, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4613, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4685, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4643, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4637, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4595, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4593, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4585, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4538, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4559, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4535, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4623, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4605, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4569, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4649, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4572, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4578, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4617, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4646, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.457, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4564, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.455, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4475, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4591, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4543, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4536, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4547, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.451, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4534, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4537, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4485, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4518, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4601, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4609, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4606, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4476, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.456, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.4526, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4517, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4478, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4546, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.4565, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4471, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4498, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.4529, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4503, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4513, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4482, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4555, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4451, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4474, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4563, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4488, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4476, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4468, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4515, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4502, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4472, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4428, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.443, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.4549, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4554, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4481, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4463, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4492, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4505, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.438, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.4503, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.4464, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4504, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4461, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4401, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4432, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4458, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4478, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4507, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4471, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4463, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4506, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4393, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4387, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4392, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4451, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4483, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.4363, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4368, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4442, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4427, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4489, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4371, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4452, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4407, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4473, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4337, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4409, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4332, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4434, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4416, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4358, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4384, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4426, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4413, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4413, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.445, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.4453, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4402, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4389, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4434, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4457, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4475, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4401, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4385, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4414, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4396, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4355, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4389, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4384, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4382, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4408, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4398, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4343, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4352, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4345, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4363, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4342, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4308, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4361, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.435, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4327, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4319, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4388, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4387, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4338, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4335, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4424, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.4349, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.437, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.4449, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4405, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4426, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4366, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4324, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4305, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4318, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4405, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4374, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4359, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4351, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4397, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4354, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4327, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4358, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4291, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4304, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4277, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4319, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4392, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4288, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4305, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4253, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4295, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4298, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4283, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4278, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4306, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4263, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.4347, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.435, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.425, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4357, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4349, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4322, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4338, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4307, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4283, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4275, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4367, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.4258, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4325, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4299, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4268, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4263, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4335, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4314, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.419, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4254, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 4000 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4218, + "step": 4010 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4204, + "step": 4020 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4219, + "step": 4030 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4174, + "step": 4040 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4195, + "step": 4050 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4217, + "step": 4060 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4196, + "step": 4070 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4195, + "step": 4080 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4189, + "step": 4090 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4179, + "step": 4100 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4187, + "step": 4110 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4217, + "step": 4120 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4186, + "step": 4130 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4155, + "step": 4140 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4111, + "step": 4150 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4119, + "step": 4160 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4075, + "step": 4170 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4121, + "step": 4180 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4016, + "step": 4190 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4039, + "step": 4200 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4036, + "step": 4210 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4014, + "step": 4220 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.4002, + "step": 4230 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.4041, + "step": 4240 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.396, + "step": 4250 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.4055, + "step": 4260 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.3981, + "step": 4270 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.3986, + "step": 4280 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3912, + "step": 4290 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3863, + "step": 4300 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3913, + "step": 4310 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3858, + "step": 4320 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3886, + "step": 4330 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3874, + "step": 4340 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3859, + "step": 4350 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3846, + "step": 4360 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3785, + "step": 4370 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3858, + "step": 4380 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3882, + "step": 4390 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3813, + "step": 4400 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3835, + "step": 4410 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3775, + "step": 4420 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3794, + "step": 4430 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3788, + "step": 4440 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3724, + "step": 4450 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3812, + "step": 4460 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3694, + "step": 4470 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3775, + "step": 4480 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3654, + "step": 4490 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.377, + "step": 4500 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.3701, + "step": 4510 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.3682, + "step": 4520 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3643, + "step": 4530 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3737, + "step": 4540 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3724, + "step": 4550 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.369, + "step": 4560 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.3634, + "step": 4570 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3674, + "step": 4580 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3569, + "step": 4590 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3675, + "step": 4600 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.3634, + "step": 4610 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.362, + "step": 4620 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.3589, + "step": 4630 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3591, + "step": 4640 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3549, + "step": 4650 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3611, + "step": 4660 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.362, + "step": 4670 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.3554, + "step": 4680 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.3564, + "step": 4690 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.3604, + "step": 4700 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.3574, + "step": 4710 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.36, + "step": 4720 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3511, + "step": 4730 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3529, + "step": 4740 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3565, + "step": 4750 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.3501, + "step": 4760 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.3475, + "step": 4770 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.353, + "step": 4780 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.3524, + "step": 4790 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.35, + "step": 4800 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.3497, + "step": 4810 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3439, + "step": 4820 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3552, + "step": 4830 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3406, + "step": 4840 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3531, + "step": 4850 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3459, + "step": 4860 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3476, + "step": 4870 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.3421, + "step": 4880 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.3451, + "step": 4890 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.342, + "step": 4900 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.3357, + "step": 4910 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4920 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4930 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.345, + "step": 4940 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.3437, + "step": 4950 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4960 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3381, + "step": 4970 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3365, + "step": 4980 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3412, + "step": 4990 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.3413, + "step": 5000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/dinov2_small_v1_global_nominmax/checkpoint-5000/training_args.bin b/dinov2_small_v1_global_nominmax/checkpoint-5000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca4990ad6e2d6dc5c39192293644d1530e9bbcd5 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-5000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898baa792356a9e4502a27fce84c053686ecb4d412bc621b4bb97d064e9b460c +size 4856 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-6000/model.safetensors b/dinov2_small_v1_global_nominmax/checkpoint-6000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..350853f9d61610045e9daaf87fd7ad0da3ef76f0 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-6000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6c32ba7e1a3137f7b4fd8a18b2be6de84aa447b1968ba6041ef264ff42d0d27 +size 228416552 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-6000/optimizer.pt b/dinov2_small_v1_global_nominmax/checkpoint-6000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a6455d38da6c5faf35692bc8bf5a587bdec781fd --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-6000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06dd1abe9f459f0ebeaf8a798e05077ee310e675fa0c08de1e5a893212723461 +size 456914234 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_0.pth b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..573734be361b33074bad7976bc01de749f5d6479 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab4e74bbbd65d4c41a2a3bb1ea3fab4232793342026d4ffddf16cc632eae7a62 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_1.pth b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..00dad64f42e12d0afa1e60caca6210040dc49131 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a697538c03af5ff8d6452dc15809a2894197ffa5681aa24edbcffc60e254a44f +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_2.pth b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9ae0f8d5f1ebe6f385347e0bbf38ca560f444b7 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fbf72c1542dcec16af36c0222d3bbd5999db9343d956cbbc8773f3f66517526 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_3.pth b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..cb0bf81d6134a09c791172c3ca07fa2b29f739b5 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a68317944e9f214474713c9c1db897f06cab2c5a4c170c0c5134a6091190b5 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_4.pth b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..dc85f134ea3fbc6bc5d8d8a23706f5d0fe08b3fd --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cca38c6e7daab0237ed6da434a7df41d9f170d92b83d3a756bf26c5e8ec11fd +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_5.pth b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef8acbd251c3c21ed20e9316184f5ddb5b7c29e2 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4b8da61059b60a1e04b5439ccfded49d90ee8196f887e88b6a19571fb41b8ce +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_6.pth b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..5e9fed676d4b294296d8083ffa689e570dc39075 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6285187171ba9b778016af44ec47ceb282740a4dd23535d002b80bf5b2bdb97 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_7.pth b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..4afd09a3df8f4d942ac7d014c7a0b8fecf1a0423 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-6000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6473ee5bdf6c7a45259c4f355f40e16b480317db5b2b9d24efa31f9bf9d22202 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-6000/scheduler.pt b/dinov2_small_v1_global_nominmax/checkpoint-6000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a19f49862de9ff057d5dbf1579e3849486e1ddb --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-6000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c891c6464db0fd4c307d9cb1087a0312b5de2d7c04375cf97f0f59aa2882e71c +size 1064 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-6000/trainer_state.json b/dinov2_small_v1_global_nominmax/checkpoint-6000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..772dc69343f211b9af0770bca95a608779beafd2 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-6000/trainer_state.json @@ -0,0 +1,3621 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0241207725394283, + "eval_steps": 500, + "global_step": 6000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.5854, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5466, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5393, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.55, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5355, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5387, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5299, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5183, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5186, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5135, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5221, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5163, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5063, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5134, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5058, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5059, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5077, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5089, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5075, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5017, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5171, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5022, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5005, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5025, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5046, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5174, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5037, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4914, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4867, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4998, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.482, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.498, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4982, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4931, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4859, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4852, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.4942, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.5014, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5076, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4962, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.5002, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4979, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4999, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4908, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4884, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4801, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.478, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4838, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4879, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4761, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4853, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.477, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.491, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.4678, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4814, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4822, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4792, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4791, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4839, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4833, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4821, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4783, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4771, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4747, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4692, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4684, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4775, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4714, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4744, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4752, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4727, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4693, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4699, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4694, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4745, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4754, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4796, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4708, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4709, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.466, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4706, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4756, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4687, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4702, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4739, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.472, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.4632, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.458, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.463, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4655, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4682, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4627, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4608, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4718, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4612, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4711, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4615, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4621, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4575, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4602, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4672, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4613, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4685, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4643, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4637, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4595, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4593, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4585, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4538, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4559, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4535, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4623, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4605, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4569, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4649, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4572, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4578, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4617, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4646, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.457, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4564, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.455, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4475, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4591, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4543, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4536, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4547, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.451, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4534, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4537, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4485, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4518, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4601, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4609, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4606, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4476, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.456, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.4526, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4517, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4478, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4546, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.4565, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4471, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4498, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.4529, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4503, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4513, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4482, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4555, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4451, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4474, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4563, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4488, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4476, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4468, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4515, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4502, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4472, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4428, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.443, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.4549, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4554, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4481, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4463, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4492, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4505, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.438, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.4503, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.4464, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4504, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4461, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4401, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4432, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4458, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4478, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4507, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4471, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4463, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4506, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4393, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4387, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4392, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4451, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4483, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.4363, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4368, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4442, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4427, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4489, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4371, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4452, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4407, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4473, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4337, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4409, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4332, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4434, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4416, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4358, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4384, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4426, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4413, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4413, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.445, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.4453, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4402, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4389, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4434, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4457, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4475, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4401, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4385, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4414, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4396, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4355, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4389, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4384, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4382, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4408, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4398, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4343, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4352, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4345, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4363, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4342, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4308, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4361, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.435, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4327, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4319, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4388, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4387, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4338, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4335, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4424, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.4349, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.437, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.4449, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4405, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4426, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4366, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4324, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4305, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4318, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4405, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4374, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4359, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4351, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4397, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4354, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4327, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4358, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4291, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4304, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4277, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4319, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4392, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4288, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4305, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4253, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4295, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4298, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4283, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4278, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4306, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4263, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.4347, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.435, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.425, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4357, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4349, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4322, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4338, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4307, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4283, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4275, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4367, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.4258, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4325, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4299, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4268, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4263, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4335, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4314, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.419, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4254, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 4000 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4218, + "step": 4010 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4204, + "step": 4020 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4219, + "step": 4030 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4174, + "step": 4040 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4195, + "step": 4050 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4217, + "step": 4060 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4196, + "step": 4070 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4195, + "step": 4080 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4189, + "step": 4090 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4179, + "step": 4100 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4187, + "step": 4110 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4217, + "step": 4120 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4186, + "step": 4130 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4155, + "step": 4140 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4111, + "step": 4150 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4119, + "step": 4160 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4075, + "step": 4170 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4121, + "step": 4180 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4016, + "step": 4190 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4039, + "step": 4200 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4036, + "step": 4210 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4014, + "step": 4220 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.4002, + "step": 4230 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.4041, + "step": 4240 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.396, + "step": 4250 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.4055, + "step": 4260 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.3981, + "step": 4270 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.3986, + "step": 4280 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3912, + "step": 4290 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3863, + "step": 4300 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3913, + "step": 4310 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3858, + "step": 4320 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3886, + "step": 4330 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3874, + "step": 4340 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3859, + "step": 4350 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3846, + "step": 4360 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3785, + "step": 4370 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3858, + "step": 4380 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3882, + "step": 4390 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3813, + "step": 4400 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3835, + "step": 4410 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3775, + "step": 4420 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3794, + "step": 4430 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3788, + "step": 4440 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3724, + "step": 4450 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3812, + "step": 4460 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3694, + "step": 4470 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3775, + "step": 4480 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3654, + "step": 4490 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.377, + "step": 4500 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.3701, + "step": 4510 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.3682, + "step": 4520 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3643, + "step": 4530 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3737, + "step": 4540 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3724, + "step": 4550 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.369, + "step": 4560 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.3634, + "step": 4570 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3674, + "step": 4580 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3569, + "step": 4590 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3675, + "step": 4600 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.3634, + "step": 4610 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.362, + "step": 4620 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.3589, + "step": 4630 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3591, + "step": 4640 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3549, + "step": 4650 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3611, + "step": 4660 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.362, + "step": 4670 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.3554, + "step": 4680 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.3564, + "step": 4690 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.3604, + "step": 4700 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.3574, + "step": 4710 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.36, + "step": 4720 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3511, + "step": 4730 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3529, + "step": 4740 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3565, + "step": 4750 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.3501, + "step": 4760 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.3475, + "step": 4770 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.353, + "step": 4780 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.3524, + "step": 4790 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.35, + "step": 4800 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.3497, + "step": 4810 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3439, + "step": 4820 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3552, + "step": 4830 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3406, + "step": 4840 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3531, + "step": 4850 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3459, + "step": 4860 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3476, + "step": 4870 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.3421, + "step": 4880 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.3451, + "step": 4890 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.342, + "step": 4900 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.3357, + "step": 4910 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4920 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4930 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.345, + "step": 4940 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.3437, + "step": 4950 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4960 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3381, + "step": 4970 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3365, + "step": 4980 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3412, + "step": 4990 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.3413, + "step": 5000 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.3327, + "step": 5010 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.3368, + "step": 5020 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.3289, + "step": 5030 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.3353, + "step": 5040 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.3372, + "step": 5050 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.3321, + "step": 5060 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.3324, + "step": 5070 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.3292, + "step": 5080 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.3231, + "step": 5090 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.3289, + "step": 5100 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.3249, + "step": 5110 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.3259, + "step": 5120 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.3316, + "step": 5130 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.3353, + "step": 5140 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.3313, + "step": 5150 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.3332, + "step": 5160 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.3239, + "step": 5170 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.3232, + "step": 5180 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.3295, + "step": 5190 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.3244, + "step": 5200 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.3208, + "step": 5210 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.3079, + "step": 5220 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.328, + "step": 5230 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.3221, + "step": 5240 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.3203, + "step": 5250 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.3146, + "step": 5260 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.3184, + "step": 5270 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.316, + "step": 5280 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.3189, + "step": 5290 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.3119, + "step": 5300 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.3159, + "step": 5310 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.3115, + "step": 5320 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.3161, + "step": 5330 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.3122, + "step": 5340 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.3124, + "step": 5350 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.3182, + "step": 5360 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.3108, + "step": 5370 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.3129, + "step": 5380 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.3149, + "step": 5390 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.314, + "step": 5400 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.3141, + "step": 5410 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.3063, + "step": 5420 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.3087, + "step": 5430 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.3092, + "step": 5440 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.3059, + "step": 5450 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.32, + "step": 5460 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.3131, + "step": 5470 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.304, + "step": 5480 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.3136, + "step": 5490 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.307, + "step": 5500 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.313, + "step": 5510 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.3079, + "step": 5520 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.3069, + "step": 5530 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.3065, + "step": 5540 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.3017, + "step": 5550 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.3067, + "step": 5560 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.3079, + "step": 5570 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.3023, + "step": 5580 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.3007, + "step": 5590 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.2929, + "step": 5600 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.2994, + "step": 5610 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.301, + "step": 5620 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.2986, + "step": 5630 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.2997, + "step": 5640 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.302, + "step": 5650 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.2951, + "step": 5660 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.2956, + "step": 5670 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.2912, + "step": 5680 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.2929, + "step": 5690 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.2951, + "step": 5700 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.2939, + "step": 5710 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.2978, + "step": 5720 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.2914, + "step": 5730 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.2915, + "step": 5740 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.2919, + "step": 5750 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.2883, + "step": 5760 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.2928, + "step": 5770 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.29, + "step": 5780 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.2844, + "step": 5790 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.2983, + "step": 5800 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.2883, + "step": 5810 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.2848, + "step": 5820 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.2855, + "step": 5830 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.2859, + "step": 5840 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.2844, + "step": 5850 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.2875, + "step": 5860 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.2848, + "step": 5870 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.2858, + "step": 5880 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.2838, + "step": 5890 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.2844, + "step": 5900 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.2831, + "step": 5910 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.2828, + "step": 5920 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.2819, + "step": 5930 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.2736, + "step": 5940 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.2767, + "step": 5950 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.2724, + "step": 5960 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.2785, + "step": 5970 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.2725, + "step": 5980 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.2783, + "step": 5990 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.2706, + "step": 6000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/dinov2_small_v1_global_nominmax/checkpoint-6000/training_args.bin b/dinov2_small_v1_global_nominmax/checkpoint-6000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca4990ad6e2d6dc5c39192293644d1530e9bbcd5 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-6000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898baa792356a9e4502a27fce84c053686ecb4d412bc621b4bb97d064e9b460c +size 4856 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-7000/model.safetensors b/dinov2_small_v1_global_nominmax/checkpoint-7000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7e3e634e93da6afa978b4d839a88edbc9ff9cab --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-7000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00fd64f51a7758482a99f6dd284b3abf14ad907110c13d332b4ca856cde59bda +size 228416552 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-7000/optimizer.pt b/dinov2_small_v1_global_nominmax/checkpoint-7000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9703baf58f82245ead47a5b9c62817c2f4b0ffc --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-7000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfaf55844819163fb48978293901c4fb71e2eec1349d77ade59040cbcf927942 +size 456914234 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_0.pth b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..45df2fc5a65b8ed8addaca82738706bcbc3d354d --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79070614809b14d7d9ec529b08959bdf21acd157981c8790f18646e727ecb7a2 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_1.pth b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..c948306a0faaf3e0501f52348767111b70b59cd7 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1271e97eea72dbeacaa4e28a8ba68c9001897356682fed9d29d54f495006853d +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_2.pth b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..57db45639fee1b370933ee6c97f112bfaac04f90 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:271fc7f1ca0ec9742ec537789e746b0401ed98834a2288108d94cb14a694f803 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_3.pth b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..22fa7388ac0e625e289ce37e6b79dd608755fcf3 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ef2b67b03c9810b08b56e4947a089f8d8ab7b89b86348255ecd2cd8c779631b +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_4.pth b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e99c104b1f483d5eda22b12f56398f6de67e26d --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936416ff61c500377528f245d1c7100ba09e42993fe5edd62b64fc08b0ae180a +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_5.pth b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ef2b61a9a61c20ff816eb7d53d5a100b8a34071 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56af862adbec6e2b62670042d928d0eefc403b58310fa35f2528aa926c375ec7 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_6.pth b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..6d351974ca9fca1dda7126f22f4ee7f42fab3fb7 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cbc2fc39c7d03cec9648447b91c124a625c578e8c93e9870d9bdb8892f049a6 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_7.pth b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..6d475c0fc82bccf2e2063e998260e0a996e01701 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-7000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63b8eff2583ba40500598c1be7bd1e1875b644238c72d057fe1345d8c310b5ac +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-7000/scheduler.pt b/dinov2_small_v1_global_nominmax/checkpoint-7000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f80910e440c97c6830904084432a446f7a721cd5 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-7000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5445e128a7bcf4177cc50c7224415006e388ca1019002cdd1a4b59301d72556c +size 1064 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-7000/trainer_state.json b/dinov2_small_v1_global_nominmax/checkpoint-7000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2a1ea2647ad2df94fc8031d2e5b454166a65a4fc --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-7000/trainer_state.json @@ -0,0 +1,4221 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.3614742346293327, + "eval_steps": 500, + "global_step": 7000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.5854, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5466, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5393, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.55, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5355, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5387, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5299, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5183, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5186, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5135, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5221, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5163, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5063, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5134, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5058, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5059, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5077, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5089, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5075, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5017, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5171, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5022, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5005, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5025, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5046, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5174, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5037, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4914, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4867, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4998, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.482, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.498, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4982, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4931, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4859, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4852, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.4942, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.5014, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5076, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4962, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.5002, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4979, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4999, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4908, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4884, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4801, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.478, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4838, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4879, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4761, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4853, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.477, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.491, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.4678, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4814, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4822, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4792, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4791, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4839, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4833, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4821, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4783, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4771, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4747, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4692, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4684, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4775, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4714, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4744, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4752, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4727, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4693, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4699, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4694, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4745, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4754, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4796, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4708, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4709, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.466, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4706, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4756, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4687, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4702, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4739, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.472, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.4632, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.458, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.463, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4655, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4682, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4627, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4608, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4718, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4612, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4711, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4615, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4621, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4575, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4602, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4672, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4613, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4685, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4643, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4637, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4595, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4593, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4585, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4538, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4559, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4535, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4623, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4605, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4569, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4649, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4572, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4578, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4617, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4646, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.457, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4564, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.455, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4475, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4591, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4543, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4536, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4547, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.451, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4534, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4537, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4485, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4518, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4601, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4609, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4606, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4476, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.456, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.4526, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4517, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4478, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4546, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.4565, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4471, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4498, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.4529, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4503, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4513, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4482, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4555, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4451, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4474, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4563, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4488, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4476, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4468, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4515, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4502, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4472, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4428, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.443, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.4549, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4554, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4481, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4463, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4492, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4505, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.438, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.4503, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.4464, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4504, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4461, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4401, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4432, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4458, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4478, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4507, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4471, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4463, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4506, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4393, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4387, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4392, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4451, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4483, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.4363, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4368, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4442, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4427, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4489, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4371, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4452, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4407, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4473, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4337, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4409, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4332, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4434, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4416, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4358, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4384, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4426, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4413, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4413, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.445, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.4453, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4402, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4389, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4434, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4457, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4475, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4401, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4385, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4414, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4396, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4355, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4389, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4384, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4382, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4408, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4398, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4343, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4352, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4345, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4363, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4342, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4308, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4361, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.435, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4327, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4319, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4388, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4387, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4338, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4335, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4424, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.4349, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.437, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.4449, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4405, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4426, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4366, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4324, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4305, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4318, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4405, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4374, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4359, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4351, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4397, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4354, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4327, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4358, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4291, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4304, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4277, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4319, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4392, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4288, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4305, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4253, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4295, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4298, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4283, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4278, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4306, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4263, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.4347, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.435, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.425, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4357, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4349, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4322, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4338, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4307, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4283, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4275, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4367, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.4258, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4325, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4299, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4268, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4263, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4335, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4314, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.419, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4254, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 4000 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4218, + "step": 4010 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4204, + "step": 4020 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4219, + "step": 4030 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4174, + "step": 4040 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4195, + "step": 4050 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4217, + "step": 4060 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4196, + "step": 4070 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4195, + "step": 4080 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4189, + "step": 4090 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4179, + "step": 4100 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4187, + "step": 4110 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4217, + "step": 4120 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4186, + "step": 4130 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4155, + "step": 4140 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4111, + "step": 4150 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4119, + "step": 4160 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4075, + "step": 4170 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4121, + "step": 4180 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4016, + "step": 4190 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4039, + "step": 4200 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4036, + "step": 4210 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4014, + "step": 4220 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.4002, + "step": 4230 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.4041, + "step": 4240 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.396, + "step": 4250 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.4055, + "step": 4260 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.3981, + "step": 4270 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.3986, + "step": 4280 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3912, + "step": 4290 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3863, + "step": 4300 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3913, + "step": 4310 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3858, + "step": 4320 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3886, + "step": 4330 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3874, + "step": 4340 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3859, + "step": 4350 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3846, + "step": 4360 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3785, + "step": 4370 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3858, + "step": 4380 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3882, + "step": 4390 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3813, + "step": 4400 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3835, + "step": 4410 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3775, + "step": 4420 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3794, + "step": 4430 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3788, + "step": 4440 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3724, + "step": 4450 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3812, + "step": 4460 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3694, + "step": 4470 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3775, + "step": 4480 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3654, + "step": 4490 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.377, + "step": 4500 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.3701, + "step": 4510 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.3682, + "step": 4520 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3643, + "step": 4530 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3737, + "step": 4540 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3724, + "step": 4550 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.369, + "step": 4560 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.3634, + "step": 4570 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3674, + "step": 4580 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3569, + "step": 4590 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3675, + "step": 4600 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.3634, + "step": 4610 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.362, + "step": 4620 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.3589, + "step": 4630 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3591, + "step": 4640 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3549, + "step": 4650 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3611, + "step": 4660 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.362, + "step": 4670 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.3554, + "step": 4680 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.3564, + "step": 4690 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.3604, + "step": 4700 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.3574, + "step": 4710 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.36, + "step": 4720 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3511, + "step": 4730 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3529, + "step": 4740 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3565, + "step": 4750 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.3501, + "step": 4760 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.3475, + "step": 4770 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.353, + "step": 4780 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.3524, + "step": 4790 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.35, + "step": 4800 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.3497, + "step": 4810 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3439, + "step": 4820 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3552, + "step": 4830 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3406, + "step": 4840 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3531, + "step": 4850 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3459, + "step": 4860 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3476, + "step": 4870 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.3421, + "step": 4880 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.3451, + "step": 4890 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.342, + "step": 4900 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.3357, + "step": 4910 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4920 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4930 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.345, + "step": 4940 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.3437, + "step": 4950 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4960 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3381, + "step": 4970 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3365, + "step": 4980 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3412, + "step": 4990 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.3413, + "step": 5000 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.3327, + "step": 5010 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.3368, + "step": 5020 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.3289, + "step": 5030 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.3353, + "step": 5040 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.3372, + "step": 5050 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.3321, + "step": 5060 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.3324, + "step": 5070 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.3292, + "step": 5080 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.3231, + "step": 5090 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.3289, + "step": 5100 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.3249, + "step": 5110 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.3259, + "step": 5120 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.3316, + "step": 5130 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.3353, + "step": 5140 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.3313, + "step": 5150 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.3332, + "step": 5160 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.3239, + "step": 5170 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.3232, + "step": 5180 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.3295, + "step": 5190 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.3244, + "step": 5200 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.3208, + "step": 5210 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.3079, + "step": 5220 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.328, + "step": 5230 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.3221, + "step": 5240 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.3203, + "step": 5250 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.3146, + "step": 5260 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.3184, + "step": 5270 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.316, + "step": 5280 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.3189, + "step": 5290 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.3119, + "step": 5300 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.3159, + "step": 5310 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.3115, + "step": 5320 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.3161, + "step": 5330 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.3122, + "step": 5340 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.3124, + "step": 5350 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.3182, + "step": 5360 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.3108, + "step": 5370 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.3129, + "step": 5380 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.3149, + "step": 5390 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.314, + "step": 5400 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.3141, + "step": 5410 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.3063, + "step": 5420 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.3087, + "step": 5430 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.3092, + "step": 5440 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.3059, + "step": 5450 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.32, + "step": 5460 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.3131, + "step": 5470 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.304, + "step": 5480 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.3136, + "step": 5490 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.307, + "step": 5500 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.313, + "step": 5510 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.3079, + "step": 5520 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.3069, + "step": 5530 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.3065, + "step": 5540 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.3017, + "step": 5550 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.3067, + "step": 5560 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.3079, + "step": 5570 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.3023, + "step": 5580 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.3007, + "step": 5590 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.2929, + "step": 5600 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.2994, + "step": 5610 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.301, + "step": 5620 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.2986, + "step": 5630 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.2997, + "step": 5640 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.302, + "step": 5650 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.2951, + "step": 5660 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.2956, + "step": 5670 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.2912, + "step": 5680 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.2929, + "step": 5690 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.2951, + "step": 5700 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.2939, + "step": 5710 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.2978, + "step": 5720 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.2914, + "step": 5730 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.2915, + "step": 5740 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.2919, + "step": 5750 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.2883, + "step": 5760 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.2928, + "step": 5770 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.29, + "step": 5780 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.2844, + "step": 5790 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.2983, + "step": 5800 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.2883, + "step": 5810 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.2848, + "step": 5820 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.2855, + "step": 5830 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.2859, + "step": 5840 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.2844, + "step": 5850 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.2875, + "step": 5860 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.2848, + "step": 5870 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.2858, + "step": 5880 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.2838, + "step": 5890 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.2844, + "step": 5900 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.2831, + "step": 5910 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.2828, + "step": 5920 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.2819, + "step": 5930 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.2736, + "step": 5940 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.2767, + "step": 5950 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.2724, + "step": 5960 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.2785, + "step": 5970 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.2725, + "step": 5980 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.2783, + "step": 5990 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.2706, + "step": 6000 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.2692, + "step": 6010 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.2758, + "step": 6020 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.2716, + "step": 6030 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.2659, + "step": 6040 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.2652, + "step": 6050 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.2702, + "step": 6060 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.2686, + "step": 6070 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.2719, + "step": 6080 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.2671, + "step": 6090 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.2675, + "step": 6100 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.2691, + "step": 6110 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.2617, + "step": 6120 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.2733, + "step": 6130 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.2673, + "step": 6140 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.2606, + "step": 6150 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.2599, + "step": 6160 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.262, + "step": 6170 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.2579, + "step": 6180 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.2578, + "step": 6190 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 6200 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 6210 + }, + { + "epoch": 2.1, + "learning_rate": 5e-05, + "loss": 0.2589, + "step": 6220 + }, + { + "epoch": 2.1, + "learning_rate": 5e-05, + "loss": 0.2593, + "step": 6230 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.2565, + "step": 6240 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.2504, + "step": 6250 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.2555, + "step": 6260 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.2613, + "step": 6270 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.2618, + "step": 6280 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.2562, + "step": 6290 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.2542, + "step": 6300 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.2555, + "step": 6310 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.257, + "step": 6320 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.2576, + "step": 6330 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.2581, + "step": 6340 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.2601, + "step": 6350 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.2545, + "step": 6360 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.2526, + "step": 6370 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.247, + "step": 6380 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.2504, + "step": 6390 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.2479, + "step": 6400 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.2494, + "step": 6410 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.2482, + "step": 6420 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.2465, + "step": 6430 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.2475, + "step": 6440 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.2547, + "step": 6450 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.2504, + "step": 6460 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.2536, + "step": 6470 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.249, + "step": 6480 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.2499, + "step": 6490 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.2536, + "step": 6500 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.2481, + "step": 6510 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.2433, + "step": 6520 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.244, + "step": 6530 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.2446, + "step": 6540 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.2463, + "step": 6550 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.2418, + "step": 6560 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.2406, + "step": 6570 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.2483, + "step": 6580 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.2457, + "step": 6590 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.2411, + "step": 6600 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.2371, + "step": 6610 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.2381, + "step": 6620 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.2418, + "step": 6630 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.2386, + "step": 6640 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.2401, + "step": 6650 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.2431, + "step": 6660 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.2425, + "step": 6670 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.2391, + "step": 6680 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.2329, + "step": 6690 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.2397, + "step": 6700 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.2306, + "step": 6710 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.2403, + "step": 6720 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.2468, + "step": 6730 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.2374, + "step": 6740 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.2364, + "step": 6750 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.2396, + "step": 6760 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.2299, + "step": 6770 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.2406, + "step": 6780 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.2334, + "step": 6790 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.2367, + "step": 6800 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.2385, + "step": 6810 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.2324, + "step": 6820 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.2312, + "step": 6830 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.2257, + "step": 6840 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.233, + "step": 6850 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.2337, + "step": 6860 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.2348, + "step": 6870 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.2382, + "step": 6880 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.2309, + "step": 6890 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.236, + "step": 6900 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.2344, + "step": 6910 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.2301, + "step": 6920 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.2304, + "step": 6930 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.2324, + "step": 6940 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.2252, + "step": 6950 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.2334, + "step": 6960 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.2294, + "step": 6970 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.229, + "step": 6980 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.223, + "step": 6990 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.2272, + "step": 7000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/dinov2_small_v1_global_nominmax/checkpoint-7000/training_args.bin b/dinov2_small_v1_global_nominmax/checkpoint-7000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca4990ad6e2d6dc5c39192293644d1530e9bbcd5 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-7000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898baa792356a9e4502a27fce84c053686ecb4d412bc621b4bb97d064e9b460c +size 4856 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-8000/model.safetensors b/dinov2_small_v1_global_nominmax/checkpoint-8000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..092b1495115ea12fb9de2a6ff58475e1fcab2303 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-8000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22acf5d7a956fe7665e960974a50966ea3b5b48a5bda10bbc0aadaa5e7344b7f +size 228416552 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-8000/optimizer.pt b/dinov2_small_v1_global_nominmax/checkpoint-8000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e401c10cf5e6d57efefd92769edd521457e2f416 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-8000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:241c9885449bfbef370df8dd9c78240397fee981ffdbe46c7f3fe0ab03c7cc3b +size 456914234 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_0.pth b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e1a7000b2a2a9b7e66995194c7def83701672681 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6041fbe6d64344a7314c873a27b77bc02d1482287c212202188d36f4518a660d +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_1.pth b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..cca238fc6d5fb18fc6b45ee6a12e0b6439381ca2 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d98371ac0922b4275c5af92449fd0f7232b9afa2afae6edaeff681f991c72d33 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_2.pth b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..3ea9d02214ccd6e3e63d6f099324d649a3685139 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:780c0966ec7854b6c6e39f3e8fa4117f78c3e4680730e9fa98df60d0fe7eab92 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_3.pth b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..e3324706fc72c3c4eb41b160a595cc8c3920f031 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c663a5160d7288107012a869bab934436414514919f633809e6413d97c7ad945 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_4.pth b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..474dbcb768ff8961068af90d14d5a6e7761b2bda --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ecb80974b256cc4c9e5cf15046168299cd65b34a55bc197240e42ed1c885ae5 +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_5.pth b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..c8a6ddebeed1dfa08a5e502a3e3253075dcc5eca --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d98f2e800c82fc9025786118eb439c72f5f75939349d70c7f996ee05d9efbc0d +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_6.pth b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..ba103507ac4db9853bb68e7c0c4c9864ee383c1b --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb1e4bb44c061e0bde43201c6a526de0aa0db51d6e379cb474dc0df9403bef4a +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_7.pth b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..b78fa1dc7fa2a0a928683c007abd9004fb520aba --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-8000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f30a1cb01c4953e01f8a0855c963b5ad50f1bb2cc19a235a31b0d397d966d70c +size 15920 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-8000/scheduler.pt b/dinov2_small_v1_global_nominmax/checkpoint-8000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bc1a1fc7418b56600e0a35fa744b78dec1110ac --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-8000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33657ad014a83329319798b77914fe9b3b1f8d598a6a6cf79c7c92f3a50382cc +size 1064 diff --git a/dinov2_small_v1_global_nominmax/checkpoint-8000/trainer_state.json b/dinov2_small_v1_global_nominmax/checkpoint-8000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1793ff988fa3990f1d23ed7a5e7da66fbfce1751 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-8000/trainer_state.json @@ -0,0 +1,4821 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.6988276967192375, + "eval_steps": 500, + "global_step": 8000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.5854, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5466, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5393, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.55, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5355, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5387, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5299, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5183, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5186, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5135, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5221, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5163, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5063, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5134, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5058, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5059, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5077, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5089, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5075, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5017, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5171, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5022, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5005, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5025, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5046, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5174, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5037, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4914, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4867, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4998, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.482, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.498, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4982, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4931, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4859, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4852, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.4942, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.5014, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5076, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4962, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.5002, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4979, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4999, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4908, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4884, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4801, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.478, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4838, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4879, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4761, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4853, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.477, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.491, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.4678, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4814, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4822, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4792, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4791, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4839, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4833, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4821, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4783, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4771, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4747, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4692, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4684, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4775, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4714, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4744, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4752, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4727, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4693, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4699, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4694, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4745, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4754, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4796, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4708, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4709, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.466, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4706, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4756, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4687, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4702, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4739, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.472, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.4632, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.458, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.463, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4655, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4682, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4627, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4608, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4718, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4612, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4711, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4615, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4621, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4575, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4602, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4672, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4613, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4685, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4643, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4637, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4595, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4593, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4585, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4538, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4559, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4535, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4623, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4605, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4569, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4649, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4572, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4578, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4617, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4646, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.457, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4564, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.455, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4475, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4591, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4543, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4536, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4547, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.451, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4534, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4537, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4485, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4518, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4601, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4609, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4606, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4476, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.456, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.4526, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4517, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4478, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4546, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.4565, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4471, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4498, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.4529, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4503, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4513, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4482, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4555, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4451, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4474, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4563, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4488, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4476, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4468, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4515, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4502, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4472, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4428, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.443, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.4549, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4554, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4481, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4463, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4492, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4505, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.438, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.4503, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.4464, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4504, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4461, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4401, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4432, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4458, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4478, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4507, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4471, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4463, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4506, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4393, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4387, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4392, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4451, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4483, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.4363, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4368, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4442, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4427, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4489, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4371, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4452, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4407, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4473, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4337, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4409, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4332, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4434, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4416, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4358, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4384, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4426, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4413, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4413, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.445, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.4453, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4402, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4389, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4434, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4457, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4475, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4401, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4385, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4414, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4396, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4355, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4389, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4384, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4382, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4408, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4398, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4343, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4352, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4345, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4363, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4342, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4308, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4361, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.435, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4327, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4319, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4388, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4387, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4338, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4335, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4424, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.4349, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.437, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.4449, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4405, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4426, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4366, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4324, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4305, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4318, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4405, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4374, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4359, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4351, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4397, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4354, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4327, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4358, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4291, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4304, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4277, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4319, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4392, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4288, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4305, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4253, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4295, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4298, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4283, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4278, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4306, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4263, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.4347, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.435, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.425, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4357, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4349, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4322, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4338, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4307, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4283, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4275, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4367, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.4258, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4325, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4299, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4268, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4263, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4335, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4314, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.419, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4254, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 4000 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4218, + "step": 4010 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4204, + "step": 4020 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4219, + "step": 4030 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4174, + "step": 4040 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4195, + "step": 4050 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4217, + "step": 4060 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4196, + "step": 4070 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4195, + "step": 4080 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4189, + "step": 4090 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4179, + "step": 4100 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4187, + "step": 4110 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4217, + "step": 4120 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4186, + "step": 4130 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4155, + "step": 4140 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4111, + "step": 4150 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4119, + "step": 4160 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4075, + "step": 4170 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4121, + "step": 4180 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4016, + "step": 4190 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4039, + "step": 4200 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4036, + "step": 4210 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4014, + "step": 4220 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.4002, + "step": 4230 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.4041, + "step": 4240 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.396, + "step": 4250 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.4055, + "step": 4260 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.3981, + "step": 4270 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.3986, + "step": 4280 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3912, + "step": 4290 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3863, + "step": 4300 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3913, + "step": 4310 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3858, + "step": 4320 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3886, + "step": 4330 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3874, + "step": 4340 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3859, + "step": 4350 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3846, + "step": 4360 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3785, + "step": 4370 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3858, + "step": 4380 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3882, + "step": 4390 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3813, + "step": 4400 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3835, + "step": 4410 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3775, + "step": 4420 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3794, + "step": 4430 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3788, + "step": 4440 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3724, + "step": 4450 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3812, + "step": 4460 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3694, + "step": 4470 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3775, + "step": 4480 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3654, + "step": 4490 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.377, + "step": 4500 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.3701, + "step": 4510 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.3682, + "step": 4520 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3643, + "step": 4530 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3737, + "step": 4540 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3724, + "step": 4550 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.369, + "step": 4560 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.3634, + "step": 4570 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3674, + "step": 4580 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3569, + "step": 4590 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3675, + "step": 4600 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.3634, + "step": 4610 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.362, + "step": 4620 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.3589, + "step": 4630 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3591, + "step": 4640 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3549, + "step": 4650 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3611, + "step": 4660 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.362, + "step": 4670 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.3554, + "step": 4680 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.3564, + "step": 4690 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.3604, + "step": 4700 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.3574, + "step": 4710 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.36, + "step": 4720 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3511, + "step": 4730 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3529, + "step": 4740 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3565, + "step": 4750 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.3501, + "step": 4760 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.3475, + "step": 4770 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.353, + "step": 4780 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.3524, + "step": 4790 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.35, + "step": 4800 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.3497, + "step": 4810 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3439, + "step": 4820 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3552, + "step": 4830 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3406, + "step": 4840 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3531, + "step": 4850 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3459, + "step": 4860 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3476, + "step": 4870 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.3421, + "step": 4880 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.3451, + "step": 4890 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.342, + "step": 4900 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.3357, + "step": 4910 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4920 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4930 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.345, + "step": 4940 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.3437, + "step": 4950 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4960 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3381, + "step": 4970 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3365, + "step": 4980 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3412, + "step": 4990 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.3413, + "step": 5000 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.3327, + "step": 5010 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.3368, + "step": 5020 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.3289, + "step": 5030 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.3353, + "step": 5040 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.3372, + "step": 5050 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.3321, + "step": 5060 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.3324, + "step": 5070 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.3292, + "step": 5080 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.3231, + "step": 5090 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.3289, + "step": 5100 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.3249, + "step": 5110 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.3259, + "step": 5120 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.3316, + "step": 5130 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.3353, + "step": 5140 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.3313, + "step": 5150 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.3332, + "step": 5160 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.3239, + "step": 5170 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.3232, + "step": 5180 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.3295, + "step": 5190 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.3244, + "step": 5200 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.3208, + "step": 5210 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.3079, + "step": 5220 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.328, + "step": 5230 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.3221, + "step": 5240 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.3203, + "step": 5250 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.3146, + "step": 5260 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.3184, + "step": 5270 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.316, + "step": 5280 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.3189, + "step": 5290 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.3119, + "step": 5300 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.3159, + "step": 5310 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.3115, + "step": 5320 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.3161, + "step": 5330 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.3122, + "step": 5340 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.3124, + "step": 5350 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.3182, + "step": 5360 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.3108, + "step": 5370 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.3129, + "step": 5380 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.3149, + "step": 5390 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.314, + "step": 5400 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.3141, + "step": 5410 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.3063, + "step": 5420 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.3087, + "step": 5430 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.3092, + "step": 5440 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.3059, + "step": 5450 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.32, + "step": 5460 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.3131, + "step": 5470 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.304, + "step": 5480 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.3136, + "step": 5490 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.307, + "step": 5500 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.313, + "step": 5510 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.3079, + "step": 5520 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.3069, + "step": 5530 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.3065, + "step": 5540 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.3017, + "step": 5550 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.3067, + "step": 5560 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.3079, + "step": 5570 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.3023, + "step": 5580 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.3007, + "step": 5590 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.2929, + "step": 5600 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.2994, + "step": 5610 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.301, + "step": 5620 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.2986, + "step": 5630 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.2997, + "step": 5640 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.302, + "step": 5650 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.2951, + "step": 5660 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.2956, + "step": 5670 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.2912, + "step": 5680 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.2929, + "step": 5690 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.2951, + "step": 5700 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.2939, + "step": 5710 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.2978, + "step": 5720 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.2914, + "step": 5730 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.2915, + "step": 5740 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.2919, + "step": 5750 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.2883, + "step": 5760 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.2928, + "step": 5770 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.29, + "step": 5780 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.2844, + "step": 5790 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.2983, + "step": 5800 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.2883, + "step": 5810 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.2848, + "step": 5820 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.2855, + "step": 5830 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.2859, + "step": 5840 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.2844, + "step": 5850 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.2875, + "step": 5860 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.2848, + "step": 5870 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.2858, + "step": 5880 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.2838, + "step": 5890 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.2844, + "step": 5900 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.2831, + "step": 5910 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.2828, + "step": 5920 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.2819, + "step": 5930 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.2736, + "step": 5940 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.2767, + "step": 5950 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.2724, + "step": 5960 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.2785, + "step": 5970 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.2725, + "step": 5980 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.2783, + "step": 5990 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.2706, + "step": 6000 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.2692, + "step": 6010 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.2758, + "step": 6020 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.2716, + "step": 6030 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.2659, + "step": 6040 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.2652, + "step": 6050 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.2702, + "step": 6060 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.2686, + "step": 6070 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.2719, + "step": 6080 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.2671, + "step": 6090 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.2675, + "step": 6100 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.2691, + "step": 6110 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.2617, + "step": 6120 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.2733, + "step": 6130 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.2673, + "step": 6140 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.2606, + "step": 6150 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.2599, + "step": 6160 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.262, + "step": 6170 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.2579, + "step": 6180 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.2578, + "step": 6190 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 6200 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 6210 + }, + { + "epoch": 2.1, + "learning_rate": 5e-05, + "loss": 0.2589, + "step": 6220 + }, + { + "epoch": 2.1, + "learning_rate": 5e-05, + "loss": 0.2593, + "step": 6230 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.2565, + "step": 6240 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.2504, + "step": 6250 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.2555, + "step": 6260 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.2613, + "step": 6270 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.2618, + "step": 6280 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.2562, + "step": 6290 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.2542, + "step": 6300 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.2555, + "step": 6310 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.257, + "step": 6320 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.2576, + "step": 6330 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.2581, + "step": 6340 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.2601, + "step": 6350 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.2545, + "step": 6360 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.2526, + "step": 6370 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.247, + "step": 6380 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.2504, + "step": 6390 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.2479, + "step": 6400 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.2494, + "step": 6410 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.2482, + "step": 6420 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.2465, + "step": 6430 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.2475, + "step": 6440 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.2547, + "step": 6450 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.2504, + "step": 6460 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.2536, + "step": 6470 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.249, + "step": 6480 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.2499, + "step": 6490 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.2536, + "step": 6500 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.2481, + "step": 6510 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.2433, + "step": 6520 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.244, + "step": 6530 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.2446, + "step": 6540 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.2463, + "step": 6550 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.2418, + "step": 6560 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.2406, + "step": 6570 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.2483, + "step": 6580 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.2457, + "step": 6590 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.2411, + "step": 6600 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.2371, + "step": 6610 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.2381, + "step": 6620 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.2418, + "step": 6630 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.2386, + "step": 6640 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.2401, + "step": 6650 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.2431, + "step": 6660 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.2425, + "step": 6670 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.2391, + "step": 6680 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.2329, + "step": 6690 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.2397, + "step": 6700 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.2306, + "step": 6710 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.2403, + "step": 6720 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.2468, + "step": 6730 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.2374, + "step": 6740 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.2364, + "step": 6750 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.2396, + "step": 6760 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.2299, + "step": 6770 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.2406, + "step": 6780 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.2334, + "step": 6790 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.2367, + "step": 6800 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.2385, + "step": 6810 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.2324, + "step": 6820 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.2312, + "step": 6830 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.2257, + "step": 6840 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.233, + "step": 6850 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.2337, + "step": 6860 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.2348, + "step": 6870 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.2382, + "step": 6880 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.2309, + "step": 6890 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.236, + "step": 6900 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.2344, + "step": 6910 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.2301, + "step": 6920 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.2304, + "step": 6930 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.2324, + "step": 6940 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.2252, + "step": 6950 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.2334, + "step": 6960 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.2294, + "step": 6970 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.229, + "step": 6980 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.223, + "step": 6990 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.2272, + "step": 7000 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.2267, + "step": 7010 + }, + { + "epoch": 2.37, + "learning_rate": 5e-05, + "loss": 0.2299, + "step": 7020 + }, + { + "epoch": 2.37, + "learning_rate": 5e-05, + "loss": 0.2228, + "step": 7030 + }, + { + "epoch": 2.37, + "learning_rate": 5e-05, + "loss": 0.2315, + "step": 7040 + }, + { + "epoch": 2.38, + "learning_rate": 5e-05, + "loss": 0.2262, + "step": 7050 + }, + { + "epoch": 2.38, + "learning_rate": 5e-05, + "loss": 0.2291, + "step": 7060 + }, + { + "epoch": 2.39, + "learning_rate": 5e-05, + "loss": 0.2242, + "step": 7070 + }, + { + "epoch": 2.39, + "learning_rate": 5e-05, + "loss": 0.2215, + "step": 7080 + }, + { + "epoch": 2.39, + "learning_rate": 5e-05, + "loss": 0.2272, + "step": 7090 + }, + { + "epoch": 2.4, + "learning_rate": 5e-05, + "loss": 0.2275, + "step": 7100 + }, + { + "epoch": 2.4, + "learning_rate": 5e-05, + "loss": 0.225, + "step": 7110 + }, + { + "epoch": 2.4, + "learning_rate": 5e-05, + "loss": 0.2259, + "step": 7120 + }, + { + "epoch": 2.41, + "learning_rate": 5e-05, + "loss": 0.226, + "step": 7130 + }, + { + "epoch": 2.41, + "learning_rate": 5e-05, + "loss": 0.229, + "step": 7140 + }, + { + "epoch": 2.41, + "learning_rate": 5e-05, + "loss": 0.2206, + "step": 7150 + }, + { + "epoch": 2.42, + "learning_rate": 5e-05, + "loss": 0.2229, + "step": 7160 + }, + { + "epoch": 2.42, + "learning_rate": 5e-05, + "loss": 0.2269, + "step": 7170 + }, + { + "epoch": 2.42, + "learning_rate": 5e-05, + "loss": 0.2237, + "step": 7180 + }, + { + "epoch": 2.43, + "learning_rate": 5e-05, + "loss": 0.2153, + "step": 7190 + }, + { + "epoch": 2.43, + "learning_rate": 5e-05, + "loss": 0.2271, + "step": 7200 + }, + { + "epoch": 2.43, + "learning_rate": 5e-05, + "loss": 0.2294, + "step": 7210 + }, + { + "epoch": 2.44, + "learning_rate": 5e-05, + "loss": 0.225, + "step": 7220 + }, + { + "epoch": 2.44, + "learning_rate": 5e-05, + "loss": 0.2245, + "step": 7230 + }, + { + "epoch": 2.44, + "learning_rate": 5e-05, + "loss": 0.226, + "step": 7240 + }, + { + "epoch": 2.45, + "learning_rate": 5e-05, + "loss": 0.2226, + "step": 7250 + }, + { + "epoch": 2.45, + "learning_rate": 5e-05, + "loss": 0.2276, + "step": 7260 + }, + { + "epoch": 2.45, + "learning_rate": 5e-05, + "loss": 0.2223, + "step": 7270 + }, + { + "epoch": 2.46, + "learning_rate": 5e-05, + "loss": 0.2191, + "step": 7280 + }, + { + "epoch": 2.46, + "learning_rate": 5e-05, + "loss": 0.2263, + "step": 7290 + }, + { + "epoch": 2.46, + "learning_rate": 5e-05, + "loss": 0.2208, + "step": 7300 + }, + { + "epoch": 2.47, + "learning_rate": 5e-05, + "loss": 0.2259, + "step": 7310 + }, + { + "epoch": 2.47, + "learning_rate": 5e-05, + "loss": 0.2173, + "step": 7320 + }, + { + "epoch": 2.47, + "learning_rate": 5e-05, + "loss": 0.218, + "step": 7330 + }, + { + "epoch": 2.48, + "learning_rate": 5e-05, + "loss": 0.2148, + "step": 7340 + }, + { + "epoch": 2.48, + "learning_rate": 5e-05, + "loss": 0.2087, + "step": 7350 + }, + { + "epoch": 2.48, + "learning_rate": 5e-05, + "loss": 0.2257, + "step": 7360 + }, + { + "epoch": 2.49, + "learning_rate": 5e-05, + "loss": 0.2258, + "step": 7370 + }, + { + "epoch": 2.49, + "learning_rate": 5e-05, + "loss": 0.2198, + "step": 7380 + }, + { + "epoch": 2.49, + "learning_rate": 5e-05, + "loss": 0.2185, + "step": 7390 + }, + { + "epoch": 2.5, + "learning_rate": 5e-05, + "loss": 0.2173, + "step": 7400 + }, + { + "epoch": 2.5, + "learning_rate": 5e-05, + "loss": 0.217, + "step": 7410 + }, + { + "epoch": 2.5, + "learning_rate": 5e-05, + "loss": 0.2164, + "step": 7420 + }, + { + "epoch": 2.51, + "learning_rate": 5e-05, + "loss": 0.2161, + "step": 7430 + }, + { + "epoch": 2.51, + "learning_rate": 5e-05, + "loss": 0.2213, + "step": 7440 + }, + { + "epoch": 2.51, + "learning_rate": 5e-05, + "loss": 0.219, + "step": 7450 + }, + { + "epoch": 2.52, + "learning_rate": 5e-05, + "loss": 0.2142, + "step": 7460 + }, + { + "epoch": 2.52, + "learning_rate": 5e-05, + "loss": 0.2242, + "step": 7470 + }, + { + "epoch": 2.52, + "learning_rate": 5e-05, + "loss": 0.2233, + "step": 7480 + }, + { + "epoch": 2.53, + "learning_rate": 5e-05, + "loss": 0.212, + "step": 7490 + }, + { + "epoch": 2.53, + "learning_rate": 5e-05, + "loss": 0.214, + "step": 7500 + }, + { + "epoch": 2.53, + "learning_rate": 5e-05, + "loss": 0.2171, + "step": 7510 + }, + { + "epoch": 2.54, + "learning_rate": 5e-05, + "loss": 0.2191, + "step": 7520 + }, + { + "epoch": 2.54, + "learning_rate": 5e-05, + "loss": 0.2199, + "step": 7530 + }, + { + "epoch": 2.54, + "learning_rate": 5e-05, + "loss": 0.2148, + "step": 7540 + }, + { + "epoch": 2.55, + "learning_rate": 5e-05, + "loss": 0.2125, + "step": 7550 + }, + { + "epoch": 2.55, + "learning_rate": 5e-05, + "loss": 0.2209, + "step": 7560 + }, + { + "epoch": 2.55, + "learning_rate": 5e-05, + "loss": 0.2135, + "step": 7570 + }, + { + "epoch": 2.56, + "learning_rate": 5e-05, + "loss": 0.2147, + "step": 7580 + }, + { + "epoch": 2.56, + "learning_rate": 5e-05, + "loss": 0.2108, + "step": 7590 + }, + { + "epoch": 2.56, + "learning_rate": 5e-05, + "loss": 0.216, + "step": 7600 + }, + { + "epoch": 2.57, + "learning_rate": 5e-05, + "loss": 0.218, + "step": 7610 + }, + { + "epoch": 2.57, + "learning_rate": 5e-05, + "loss": 0.2163, + "step": 7620 + }, + { + "epoch": 2.57, + "learning_rate": 5e-05, + "loss": 0.2093, + "step": 7630 + }, + { + "epoch": 2.58, + "learning_rate": 5e-05, + "loss": 0.2146, + "step": 7640 + }, + { + "epoch": 2.58, + "learning_rate": 5e-05, + "loss": 0.2181, + "step": 7650 + }, + { + "epoch": 2.58, + "learning_rate": 5e-05, + "loss": 0.2119, + "step": 7660 + }, + { + "epoch": 2.59, + "learning_rate": 5e-05, + "loss": 0.2105, + "step": 7670 + }, + { + "epoch": 2.59, + "learning_rate": 5e-05, + "loss": 0.212, + "step": 7680 + }, + { + "epoch": 2.59, + "learning_rate": 5e-05, + "loss": 0.2129, + "step": 7690 + }, + { + "epoch": 2.6, + "learning_rate": 5e-05, + "loss": 0.206, + "step": 7700 + }, + { + "epoch": 2.6, + "learning_rate": 5e-05, + "loss": 0.2146, + "step": 7710 + }, + { + "epoch": 2.6, + "learning_rate": 5e-05, + "loss": 0.2158, + "step": 7720 + }, + { + "epoch": 2.61, + "learning_rate": 5e-05, + "loss": 0.2044, + "step": 7730 + }, + { + "epoch": 2.61, + "learning_rate": 5e-05, + "loss": 0.2062, + "step": 7740 + }, + { + "epoch": 2.61, + "learning_rate": 5e-05, + "loss": 0.208, + "step": 7750 + }, + { + "epoch": 2.62, + "learning_rate": 5e-05, + "loss": 0.2103, + "step": 7760 + }, + { + "epoch": 2.62, + "learning_rate": 5e-05, + "loss": 0.2103, + "step": 7770 + }, + { + "epoch": 2.62, + "learning_rate": 5e-05, + "loss": 0.2071, + "step": 7780 + }, + { + "epoch": 2.63, + "learning_rate": 5e-05, + "loss": 0.2099, + "step": 7790 + }, + { + "epoch": 2.63, + "learning_rate": 5e-05, + "loss": 0.2207, + "step": 7800 + }, + { + "epoch": 2.63, + "learning_rate": 5e-05, + "loss": 0.2087, + "step": 7810 + }, + { + "epoch": 2.64, + "learning_rate": 5e-05, + "loss": 0.2093, + "step": 7820 + }, + { + "epoch": 2.64, + "learning_rate": 5e-05, + "loss": 0.2104, + "step": 7830 + }, + { + "epoch": 2.64, + "learning_rate": 5e-05, + "loss": 0.2057, + "step": 7840 + }, + { + "epoch": 2.65, + "learning_rate": 5e-05, + "loss": 0.2089, + "step": 7850 + }, + { + "epoch": 2.65, + "learning_rate": 5e-05, + "loss": 0.2128, + "step": 7860 + }, + { + "epoch": 2.65, + "learning_rate": 5e-05, + "loss": 0.2088, + "step": 7870 + }, + { + "epoch": 2.66, + "learning_rate": 5e-05, + "loss": 0.2062, + "step": 7880 + }, + { + "epoch": 2.66, + "learning_rate": 5e-05, + "loss": 0.2095, + "step": 7890 + }, + { + "epoch": 2.67, + "learning_rate": 5e-05, + "loss": 0.2091, + "step": 7900 + }, + { + "epoch": 2.67, + "learning_rate": 5e-05, + "loss": 0.2142, + "step": 7910 + }, + { + "epoch": 2.67, + "learning_rate": 5e-05, + "loss": 0.2134, + "step": 7920 + }, + { + "epoch": 2.68, + "learning_rate": 5e-05, + "loss": 0.205, + "step": 7930 + }, + { + "epoch": 2.68, + "learning_rate": 5e-05, + "loss": 0.2071, + "step": 7940 + }, + { + "epoch": 2.68, + "learning_rate": 5e-05, + "loss": 0.2059, + "step": 7950 + }, + { + "epoch": 2.69, + "learning_rate": 5e-05, + "loss": 0.2037, + "step": 7960 + }, + { + "epoch": 2.69, + "learning_rate": 5e-05, + "loss": 0.2082, + "step": 7970 + }, + { + "epoch": 2.69, + "learning_rate": 5e-05, + "loss": 0.2078, + "step": 7980 + }, + { + "epoch": 2.7, + "learning_rate": 5e-05, + "loss": 0.2037, + "step": 7990 + }, + { + "epoch": 2.7, + "learning_rate": 5e-05, + "loss": 0.2057, + "step": 8000 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/dinov2_small_v1_global_nominmax/checkpoint-8000/training_args.bin b/dinov2_small_v1_global_nominmax/checkpoint-8000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca4990ad6e2d6dc5c39192293644d1530e9bbcd5 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/checkpoint-8000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898baa792356a9e4502a27fce84c053686ecb4d412bc621b4bb97d064e9b460c +size 4856 diff --git a/dinov2_small_v1_global_nominmax/model.safetensors b/dinov2_small_v1_global_nominmax/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70961870ba7ecbb6648eb4fd7308ff5bc73b6be8 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:977e19446a37f20a4cf8d9e9c07fdb9663123a6cfdb71f20690d4bbc8637b418 +size 228416552 diff --git a/dinov2_small_v1_global_nominmax/trainer_state.json b/dinov2_small_v1_global_nominmax/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6758e180ed2f9c656ff50f748cbebebf39ed3ba8 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/trainer_state.json @@ -0,0 +1,5364 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.9997469849034326, + "eval_steps": 500, + "global_step": 8892, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.5854, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5466, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5426, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 0.5393, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.55, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5355, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.5387, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5299, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5183, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.5188, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5186, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5135, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 0.5221, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5163, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5063, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 0.5134, + "step": 160 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5058, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5059, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 5e-05, + "loss": 0.5077, + "step": 190 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5089, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5075, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 5e-05, + "loss": 0.5017, + "step": 220 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5171, + "step": 230 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5022, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 5e-05, + "loss": 0.5005, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5025, + "step": 260 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5046, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 5e-05, + "loss": 0.5174, + "step": 280 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.5037, + "step": 290 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4952, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 5e-05, + "loss": 0.4914, + "step": 310 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4867, + "step": 320 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.4998, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 0.482, + "step": 340 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.498, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4982, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 5e-05, + "loss": 0.4931, + "step": 370 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 380 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4859, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.4852, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.4942, + "step": 410 + }, + { + "epoch": 0.14, + "learning_rate": 5e-05, + "loss": 0.5014, + "step": 420 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.5076, + "step": 430 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4962, + "step": 440 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 450 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.5002, + "step": 460 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4979, + "step": 470 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 0.4999, + "step": 480 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4908, + "step": 490 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4884, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 5e-05, + "loss": 0.4801, + "step": 510 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.478, + "step": 520 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4838, + "step": 530 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 0.4879, + "step": 540 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4761, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4853, + "step": 560 + }, + { + "epoch": 0.19, + "learning_rate": 5e-05, + "loss": 0.4938, + "step": 570 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.477, + "step": 580 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.491, + "step": 590 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.4678, + "step": 600 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4814, + "step": 610 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4822, + "step": 620 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 0.4792, + "step": 630 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4791, + "step": 640 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4839, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4833, + "step": 670 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4821, + "step": 680 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 690 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4783, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 710 + }, + { + "epoch": 0.24, + "learning_rate": 5e-05, + "loss": 0.4771, + "step": 720 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4747, + "step": 730 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4802, + "step": 740 + }, + { + "epoch": 0.25, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 750 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 760 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4692, + "step": 770 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 0.4684, + "step": 780 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4775, + "step": 790 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4714, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 5e-05, + "loss": 0.4744, + "step": 810 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4752, + "step": 820 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4727, + "step": 830 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 0.4693, + "step": 840 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4699, + "step": 860 + }, + { + "epoch": 0.29, + "learning_rate": 5e-05, + "loss": 0.4694, + "step": 870 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4745, + "step": 880 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 890 + }, + { + "epoch": 0.3, + "learning_rate": 5e-05, + "loss": 0.4754, + "step": 900 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4796, + "step": 910 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4708, + "step": 920 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 930 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4768, + "step": 940 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.4709, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 5e-05, + "loss": 0.466, + "step": 960 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4706, + "step": 970 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4756, + "step": 980 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 0.4687, + "step": 990 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4702, + "step": 1010 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 0.4739, + "step": 1020 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.472, + "step": 1030 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.4632, + "step": 1040 + }, + { + "epoch": 0.35, + "learning_rate": 5e-05, + "loss": 0.458, + "step": 1050 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.463, + "step": 1060 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4655, + "step": 1070 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1080 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1090 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4778, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 0.4746, + "step": 1110 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4738, + "step": 1120 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4682, + "step": 1130 + }, + { + "epoch": 0.38, + "learning_rate": 5e-05, + "loss": 0.4627, + "step": 1140 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4608, + "step": 1150 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1160 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 0.4718, + "step": 1170 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1180 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1190 + }, + { + "epoch": 0.4, + "learning_rate": 5e-05, + "loss": 0.4612, + "step": 1200 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1210 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4711, + "step": 1220 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 0.4645, + "step": 1230 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1240 + }, + { + "epoch": 0.42, + "learning_rate": 5e-05, + "loss": 0.4615, + "step": 1250 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1260 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1270 + }, + { + "epoch": 0.43, + "learning_rate": 5e-05, + "loss": 0.4621, + "step": 1280 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4575, + "step": 1290 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4602, + "step": 1300 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1310 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4672, + "step": 1320 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4613, + "step": 1330 + }, + { + "epoch": 0.45, + "learning_rate": 5e-05, + "loss": 0.4674, + "step": 1340 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4673, + "step": 1350 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 1360 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 0.4685, + "step": 1370 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1380 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4643, + "step": 1390 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 0.4637, + "step": 1400 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1410 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4595, + "step": 1420 + }, + { + "epoch": 0.48, + "learning_rate": 5e-05, + "loss": 0.4584, + "step": 1430 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4644, + "step": 1440 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4667, + "step": 1450 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 0.4593, + "step": 1460 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4585, + "step": 1470 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4634, + "step": 1480 + }, + { + "epoch": 0.5, + "learning_rate": 5e-05, + "loss": 0.4538, + "step": 1490 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1500 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4559, + "step": 1510 + }, + { + "epoch": 0.51, + "learning_rate": 5e-05, + "loss": 0.4535, + "step": 1520 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 1530 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4623, + "step": 1540 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 0.4605, + "step": 1550 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 1560 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1570 + }, + { + "epoch": 0.53, + "learning_rate": 5e-05, + "loss": 0.4569, + "step": 1580 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4611, + "step": 1590 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4649, + "step": 1600 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1610 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4572, + "step": 1620 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1630 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1640 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 1650 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1660 + }, + { + "epoch": 0.56, + "learning_rate": 5e-05, + "loss": 0.4578, + "step": 1670 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4617, + "step": 1680 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 1690 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 0.4646, + "step": 1700 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.457, + "step": 1710 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 1720 + }, + { + "epoch": 0.58, + "learning_rate": 5e-05, + "loss": 0.4629, + "step": 1730 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.46, + "step": 1740 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4598, + "step": 1750 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 0.4564, + "step": 1760 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.455, + "step": 1770 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4475, + "step": 1780 + }, + { + "epoch": 0.6, + "learning_rate": 5e-05, + "loss": 0.4591, + "step": 1790 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 1800 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 1810 + }, + { + "epoch": 0.61, + "learning_rate": 5e-05, + "loss": 0.4543, + "step": 1820 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 1830 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4536, + "step": 1840 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 1850 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4547, + "step": 1860 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4669, + "step": 1870 + }, + { + "epoch": 0.63, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 1880 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.451, + "step": 1890 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 1900 + }, + { + "epoch": 0.64, + "learning_rate": 5e-05, + "loss": 0.4534, + "step": 1910 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4537, + "step": 1920 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4485, + "step": 1930 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 0.4576, + "step": 1940 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4518, + "step": 1950 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4601, + "step": 1960 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 1970 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 1980 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 1990 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2000 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4607, + "step": 2010 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4609, + "step": 2020 + }, + { + "epoch": 0.68, + "learning_rate": 5e-05, + "loss": 0.4574, + "step": 2030 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4606, + "step": 2040 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4523, + "step": 2050 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 0.4476, + "step": 2060 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.456, + "step": 2070 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 0.4526, + "step": 2080 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4517, + "step": 2090 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4478, + "step": 2100 + }, + { + "epoch": 0.71, + "learning_rate": 5e-05, + "loss": 0.4546, + "step": 2110 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.4565, + "step": 2120 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.4589, + "step": 2130 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 0.449, + "step": 2140 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4471, + "step": 2150 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4562, + "step": 2160 + }, + { + "epoch": 0.73, + "learning_rate": 5e-05, + "loss": 0.4498, + "step": 2170 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2180 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.4529, + "step": 2190 + }, + { + "epoch": 0.74, + "learning_rate": 5e-05, + "loss": 0.4539, + "step": 2200 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4503, + "step": 2210 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4531, + "step": 2220 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 0.4513, + "step": 2230 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4519, + "step": 2240 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4482, + "step": 2250 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.4555, + "step": 2260 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4451, + "step": 2270 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4514, + "step": 2280 + }, + { + "epoch": 0.77, + "learning_rate": 5e-05, + "loss": 0.4474, + "step": 2290 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4561, + "step": 2300 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4563, + "step": 2310 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 0.4509, + "step": 2320 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4488, + "step": 2330 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4476, + "step": 2340 + }, + { + "epoch": 0.79, + "learning_rate": 5e-05, + "loss": 0.4524, + "step": 2350 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4468, + "step": 2360 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4515, + "step": 2370 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 0.4581, + "step": 2380 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4502, + "step": 2390 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4472, + "step": 2400 + }, + { + "epoch": 0.81, + "learning_rate": 5e-05, + "loss": 0.4428, + "step": 2410 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2420 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.443, + "step": 2430 + }, + { + "epoch": 0.82, + "learning_rate": 5e-05, + "loss": 0.4549, + "step": 2440 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4554, + "step": 2450 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4481, + "step": 2460 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 0.4463, + "step": 2470 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4492, + "step": 2480 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4505, + "step": 2490 + }, + { + "epoch": 0.84, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2500 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2510 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.438, + "step": 2520 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 0.4503, + "step": 2530 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2540 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2550 + }, + { + "epoch": 0.86, + "learning_rate": 5e-05, + "loss": 0.4464, + "step": 2560 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4504, + "step": 2570 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2580 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.4461, + "step": 2590 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2600 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4401, + "step": 2610 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 0.4479, + "step": 2620 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4432, + "step": 2630 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2640 + }, + { + "epoch": 0.89, + "learning_rate": 5e-05, + "loss": 0.4458, + "step": 2650 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4478, + "step": 2660 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4507, + "step": 2670 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 0.4471, + "step": 2680 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2690 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4463, + "step": 2700 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2710 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4506, + "step": 2720 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2730 + }, + { + "epoch": 0.92, + "learning_rate": 5e-05, + "loss": 0.4511, + "step": 2740 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4393, + "step": 2750 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4387, + "step": 2760 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 0.4392, + "step": 2770 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4451, + "step": 2780 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4483, + "step": 2790 + }, + { + "epoch": 0.94, + "learning_rate": 5e-05, + "loss": 0.4469, + "step": 2800 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.4363, + "step": 2810 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.446, + "step": 2820 + }, + { + "epoch": 0.95, + "learning_rate": 5e-05, + "loss": 0.4443, + "step": 2830 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4368, + "step": 2840 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4442, + "step": 2850 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 0.4427, + "step": 2860 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4489, + "step": 2870 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4371, + "step": 2880 + }, + { + "epoch": 0.97, + "learning_rate": 5e-05, + "loss": 0.4452, + "step": 2890 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4407, + "step": 2900 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.4473, + "step": 2910 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4337, + "step": 2920 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4409, + "step": 2930 + }, + { + "epoch": 0.99, + "learning_rate": 5e-05, + "loss": 0.4332, + "step": 2940 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4437, + "step": 2950 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4434, + "step": 2960 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.4416, + "step": 2970 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 2980 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4358, + "step": 2990 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 0.4384, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4426, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4413, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 5e-05, + "loss": 0.4413, + "step": 3030 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3040 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.445, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 0.4453, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4402, + "step": 3070 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4389, + "step": 3080 + }, + { + "epoch": 1.04, + "learning_rate": 5e-05, + "loss": 0.4434, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4457, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4475, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 5e-05, + "loss": 0.4401, + "step": 3120 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4385, + "step": 3130 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4414, + "step": 3140 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 0.4396, + "step": 3150 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4355, + "step": 3160 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4389, + "step": 3170 + }, + { + "epoch": 1.07, + "learning_rate": 5e-05, + "loss": 0.4384, + "step": 3180 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4382, + "step": 3190 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4408, + "step": 3200 + }, + { + "epoch": 1.08, + "learning_rate": 5e-05, + "loss": 0.4398, + "step": 3210 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 3220 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4343, + "step": 3230 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.4352, + "step": 3240 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4345, + "step": 3250 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4363, + "step": 3260 + }, + { + "epoch": 1.1, + "learning_rate": 5e-05, + "loss": 0.4342, + "step": 3270 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4308, + "step": 3280 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.4361, + "step": 3290 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 0.435, + "step": 3300 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4327, + "step": 3310 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4319, + "step": 3320 + }, + { + "epoch": 1.12, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3330 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4388, + "step": 3340 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4387, + "step": 3350 + }, + { + "epoch": 1.13, + "learning_rate": 5e-05, + "loss": 0.4338, + "step": 3360 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4335, + "step": 3370 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4424, + "step": 3380 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 0.4372, + "step": 3390 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.4349, + "step": 3400 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.437, + "step": 3410 + }, + { + "epoch": 1.15, + "learning_rate": 5e-05, + "loss": 0.4449, + "step": 3420 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4405, + "step": 3430 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4426, + "step": 3440 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 0.4415, + "step": 3450 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4366, + "step": 3460 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4324, + "step": 3470 + }, + { + "epoch": 1.17, + "learning_rate": 5e-05, + "loss": 0.4305, + "step": 3480 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4318, + "step": 3490 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4405, + "step": 3500 + }, + { + "epoch": 1.18, + "learning_rate": 5e-05, + "loss": 0.4374, + "step": 3510 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4359, + "step": 3520 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4351, + "step": 3530 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 0.4397, + "step": 3540 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4354, + "step": 3550 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4327, + "step": 3560 + }, + { + "epoch": 1.2, + "learning_rate": 5e-05, + "loss": 0.4358, + "step": 3570 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4291, + "step": 3580 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4304, + "step": 3590 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 0.4277, + "step": 3600 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4319, + "step": 3610 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 3620 + }, + { + "epoch": 1.22, + "learning_rate": 5e-05, + "loss": 0.4392, + "step": 3630 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4288, + "step": 3640 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4305, + "step": 3650 + }, + { + "epoch": 1.23, + "learning_rate": 5e-05, + "loss": 0.4253, + "step": 3660 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4295, + "step": 3670 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4298, + "step": 3680 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 0.4283, + "step": 3690 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4278, + "step": 3700 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4306, + "step": 3710 + }, + { + "epoch": 1.25, + "learning_rate": 5e-05, + "loss": 0.4263, + "step": 3720 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3730 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 0.4347, + "step": 3740 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.435, + "step": 3750 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.425, + "step": 3760 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3770 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4357, + "step": 3780 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4349, + "step": 3790 + }, + { + "epoch": 1.28, + "learning_rate": 5e-05, + "loss": 0.4322, + "step": 3800 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4338, + "step": 3810 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4307, + "step": 3820 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3830 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4283, + "step": 3840 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4275, + "step": 3850 + }, + { + "epoch": 1.3, + "learning_rate": 5e-05, + "loss": 0.4367, + "step": 3860 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3870 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.4258, + "step": 3880 + }, + { + "epoch": 1.31, + "learning_rate": 5e-05, + "loss": 0.431, + "step": 3890 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4325, + "step": 3900 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 3910 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 0.4299, + "step": 3920 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4268, + "step": 3930 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4313, + "step": 3940 + }, + { + "epoch": 1.33, + "learning_rate": 5e-05, + "loss": 0.4263, + "step": 3950 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4335, + "step": 3960 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.4314, + "step": 3970 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 0.419, + "step": 3980 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4254, + "step": 3990 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4323, + "step": 4000 + }, + { + "epoch": 1.35, + "learning_rate": 5e-05, + "loss": 0.4218, + "step": 4010 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4204, + "step": 4020 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4219, + "step": 4030 + }, + { + "epoch": 1.36, + "learning_rate": 5e-05, + "loss": 0.4174, + "step": 4040 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4195, + "step": 4050 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4217, + "step": 4060 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 0.4196, + "step": 4070 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4195, + "step": 4080 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4189, + "step": 4090 + }, + { + "epoch": 1.38, + "learning_rate": 5e-05, + "loss": 0.4179, + "step": 4100 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4187, + "step": 4110 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4217, + "step": 4120 + }, + { + "epoch": 1.39, + "learning_rate": 5e-05, + "loss": 0.4186, + "step": 4130 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4155, + "step": 4140 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4111, + "step": 4150 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 0.4119, + "step": 4160 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4075, + "step": 4170 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4121, + "step": 4180 + }, + { + "epoch": 1.41, + "learning_rate": 5e-05, + "loss": 0.4016, + "step": 4190 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4039, + "step": 4200 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4036, + "step": 4210 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 0.4014, + "step": 4220 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.4002, + "step": 4230 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.4041, + "step": 4240 + }, + { + "epoch": 1.43, + "learning_rate": 5e-05, + "loss": 0.396, + "step": 4250 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.4055, + "step": 4260 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.3981, + "step": 4270 + }, + { + "epoch": 1.44, + "learning_rate": 5e-05, + "loss": 0.3986, + "step": 4280 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3912, + "step": 4290 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3863, + "step": 4300 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 0.3913, + "step": 4310 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3858, + "step": 4320 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3886, + "step": 4330 + }, + { + "epoch": 1.46, + "learning_rate": 5e-05, + "loss": 0.3874, + "step": 4340 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3859, + "step": 4350 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3846, + "step": 4360 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 0.3785, + "step": 4370 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3858, + "step": 4380 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3882, + "step": 4390 + }, + { + "epoch": 1.48, + "learning_rate": 5e-05, + "loss": 0.3813, + "step": 4400 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3835, + "step": 4410 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3775, + "step": 4420 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 0.3794, + "step": 4430 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3788, + "step": 4440 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3724, + "step": 4450 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 0.3812, + "step": 4460 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3694, + "step": 4470 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3775, + "step": 4480 + }, + { + "epoch": 1.51, + "learning_rate": 5e-05, + "loss": 0.3654, + "step": 4490 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.377, + "step": 4500 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.3701, + "step": 4510 + }, + { + "epoch": 1.52, + "learning_rate": 5e-05, + "loss": 0.3682, + "step": 4520 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3643, + "step": 4530 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3737, + "step": 4540 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 0.3724, + "step": 4550 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.369, + "step": 4560 + }, + { + "epoch": 1.54, + "learning_rate": 5e-05, + "loss": 0.3634, + "step": 4570 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3674, + "step": 4580 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3569, + "step": 4590 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 0.3675, + "step": 4600 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.3634, + "step": 4610 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.362, + "step": 4620 + }, + { + "epoch": 1.56, + "learning_rate": 5e-05, + "loss": 0.3589, + "step": 4630 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3591, + "step": 4640 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3549, + "step": 4650 + }, + { + "epoch": 1.57, + "learning_rate": 5e-05, + "loss": 0.3611, + "step": 4660 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.362, + "step": 4670 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.3554, + "step": 4680 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 0.3564, + "step": 4690 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.3604, + "step": 4700 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.3574, + "step": 4710 + }, + { + "epoch": 1.59, + "learning_rate": 5e-05, + "loss": 0.36, + "step": 4720 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3511, + "step": 4730 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3529, + "step": 4740 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 0.3565, + "step": 4750 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.3501, + "step": 4760 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.3475, + "step": 4770 + }, + { + "epoch": 1.61, + "learning_rate": 5e-05, + "loss": 0.353, + "step": 4780 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.3524, + "step": 4790 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.35, + "step": 4800 + }, + { + "epoch": 1.62, + "learning_rate": 5e-05, + "loss": 0.3497, + "step": 4810 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3439, + "step": 4820 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3552, + "step": 4830 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 0.3406, + "step": 4840 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3531, + "step": 4850 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3459, + "step": 4860 + }, + { + "epoch": 1.64, + "learning_rate": 5e-05, + "loss": 0.3476, + "step": 4870 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.3421, + "step": 4880 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.3451, + "step": 4890 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 0.342, + "step": 4900 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.3357, + "step": 4910 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4920 + }, + { + "epoch": 1.66, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4930 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.345, + "step": 4940 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.3437, + "step": 4950 + }, + { + "epoch": 1.67, + "learning_rate": 5e-05, + "loss": 0.339, + "step": 4960 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3381, + "step": 4970 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3365, + "step": 4980 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 0.3412, + "step": 4990 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.3413, + "step": 5000 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.3327, + "step": 5010 + }, + { + "epoch": 1.69, + "learning_rate": 5e-05, + "loss": 0.3368, + "step": 5020 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.3289, + "step": 5030 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.3353, + "step": 5040 + }, + { + "epoch": 1.7, + "learning_rate": 5e-05, + "loss": 0.3372, + "step": 5050 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.3321, + "step": 5060 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.3324, + "step": 5070 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 0.3292, + "step": 5080 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.3231, + "step": 5090 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.3289, + "step": 5100 + }, + { + "epoch": 1.72, + "learning_rate": 5e-05, + "loss": 0.3249, + "step": 5110 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.3259, + "step": 5120 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.3316, + "step": 5130 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 0.3353, + "step": 5140 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.3313, + "step": 5150 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.3332, + "step": 5160 + }, + { + "epoch": 1.74, + "learning_rate": 5e-05, + "loss": 0.3239, + "step": 5170 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.3232, + "step": 5180 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.3295, + "step": 5190 + }, + { + "epoch": 1.75, + "learning_rate": 5e-05, + "loss": 0.3244, + "step": 5200 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.3208, + "step": 5210 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.3079, + "step": 5220 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 0.328, + "step": 5230 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.3221, + "step": 5240 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.3203, + "step": 5250 + }, + { + "epoch": 1.77, + "learning_rate": 5e-05, + "loss": 0.3146, + "step": 5260 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.3184, + "step": 5270 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.316, + "step": 5280 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 0.3189, + "step": 5290 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.3119, + "step": 5300 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.3159, + "step": 5310 + }, + { + "epoch": 1.79, + "learning_rate": 5e-05, + "loss": 0.3115, + "step": 5320 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.3161, + "step": 5330 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.3122, + "step": 5340 + }, + { + "epoch": 1.8, + "learning_rate": 5e-05, + "loss": 0.3124, + "step": 5350 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.3182, + "step": 5360 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.3108, + "step": 5370 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 0.3129, + "step": 5380 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.3149, + "step": 5390 + }, + { + "epoch": 1.82, + "learning_rate": 5e-05, + "loss": 0.314, + "step": 5400 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.3141, + "step": 5410 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.3063, + "step": 5420 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 0.3087, + "step": 5430 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.3092, + "step": 5440 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.3059, + "step": 5450 + }, + { + "epoch": 1.84, + "learning_rate": 5e-05, + "loss": 0.32, + "step": 5460 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.3131, + "step": 5470 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.304, + "step": 5480 + }, + { + "epoch": 1.85, + "learning_rate": 5e-05, + "loss": 0.3136, + "step": 5490 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.307, + "step": 5500 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.313, + "step": 5510 + }, + { + "epoch": 1.86, + "learning_rate": 5e-05, + "loss": 0.3079, + "step": 5520 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.3069, + "step": 5530 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.3065, + "step": 5540 + }, + { + "epoch": 1.87, + "learning_rate": 5e-05, + "loss": 0.3017, + "step": 5550 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.3067, + "step": 5560 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.3079, + "step": 5570 + }, + { + "epoch": 1.88, + "learning_rate": 5e-05, + "loss": 0.3023, + "step": 5580 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.3007, + "step": 5590 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.2929, + "step": 5600 + }, + { + "epoch": 1.89, + "learning_rate": 5e-05, + "loss": 0.2994, + "step": 5610 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.301, + "step": 5620 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.2986, + "step": 5630 + }, + { + "epoch": 1.9, + "learning_rate": 5e-05, + "loss": 0.2997, + "step": 5640 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.302, + "step": 5650 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.2951, + "step": 5660 + }, + { + "epoch": 1.91, + "learning_rate": 5e-05, + "loss": 0.2956, + "step": 5670 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.2912, + "step": 5680 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.2929, + "step": 5690 + }, + { + "epoch": 1.92, + "learning_rate": 5e-05, + "loss": 0.2951, + "step": 5700 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.2939, + "step": 5710 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.2978, + "step": 5720 + }, + { + "epoch": 1.93, + "learning_rate": 5e-05, + "loss": 0.2914, + "step": 5730 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.2915, + "step": 5740 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.2919, + "step": 5750 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 0.2883, + "step": 5760 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.2928, + "step": 5770 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.29, + "step": 5780 + }, + { + "epoch": 1.95, + "learning_rate": 5e-05, + "loss": 0.2844, + "step": 5790 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.2983, + "step": 5800 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.2883, + "step": 5810 + }, + { + "epoch": 1.96, + "learning_rate": 5e-05, + "loss": 0.2848, + "step": 5820 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.2855, + "step": 5830 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.2859, + "step": 5840 + }, + { + "epoch": 1.97, + "learning_rate": 5e-05, + "loss": 0.2844, + "step": 5850 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.2875, + "step": 5860 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.2848, + "step": 5870 + }, + { + "epoch": 1.98, + "learning_rate": 5e-05, + "loss": 0.2858, + "step": 5880 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.2838, + "step": 5890 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.2844, + "step": 5900 + }, + { + "epoch": 1.99, + "learning_rate": 5e-05, + "loss": 0.2831, + "step": 5910 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.2828, + "step": 5920 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.2819, + "step": 5930 + }, + { + "epoch": 2.0, + "learning_rate": 5e-05, + "loss": 0.2736, + "step": 5940 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.2767, + "step": 5950 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.2724, + "step": 5960 + }, + { + "epoch": 2.01, + "learning_rate": 5e-05, + "loss": 0.2785, + "step": 5970 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.2725, + "step": 5980 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.2783, + "step": 5990 + }, + { + "epoch": 2.02, + "learning_rate": 5e-05, + "loss": 0.2706, + "step": 6000 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.2692, + "step": 6010 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.2758, + "step": 6020 + }, + { + "epoch": 2.03, + "learning_rate": 5e-05, + "loss": 0.2716, + "step": 6030 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.2659, + "step": 6040 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.2652, + "step": 6050 + }, + { + "epoch": 2.04, + "learning_rate": 5e-05, + "loss": 0.2702, + "step": 6060 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.2686, + "step": 6070 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.2719, + "step": 6080 + }, + { + "epoch": 2.05, + "learning_rate": 5e-05, + "loss": 0.2671, + "step": 6090 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.2675, + "step": 6100 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.2691, + "step": 6110 + }, + { + "epoch": 2.06, + "learning_rate": 5e-05, + "loss": 0.2617, + "step": 6120 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.2733, + "step": 6130 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.2673, + "step": 6140 + }, + { + "epoch": 2.07, + "learning_rate": 5e-05, + "loss": 0.2606, + "step": 6150 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.2599, + "step": 6160 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.262, + "step": 6170 + }, + { + "epoch": 2.08, + "learning_rate": 5e-05, + "loss": 0.2579, + "step": 6180 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.2578, + "step": 6190 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 6200 + }, + { + "epoch": 2.09, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 6210 + }, + { + "epoch": 2.1, + "learning_rate": 5e-05, + "loss": 0.2589, + "step": 6220 + }, + { + "epoch": 2.1, + "learning_rate": 5e-05, + "loss": 0.2593, + "step": 6230 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.2565, + "step": 6240 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.2504, + "step": 6250 + }, + { + "epoch": 2.11, + "learning_rate": 5e-05, + "loss": 0.2555, + "step": 6260 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.2613, + "step": 6270 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.2618, + "step": 6280 + }, + { + "epoch": 2.12, + "learning_rate": 5e-05, + "loss": 0.2562, + "step": 6290 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.2542, + "step": 6300 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.2555, + "step": 6310 + }, + { + "epoch": 2.13, + "learning_rate": 5e-05, + "loss": 0.257, + "step": 6320 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.2576, + "step": 6330 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.2581, + "step": 6340 + }, + { + "epoch": 2.14, + "learning_rate": 5e-05, + "loss": 0.2601, + "step": 6350 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.2545, + "step": 6360 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.2526, + "step": 6370 + }, + { + "epoch": 2.15, + "learning_rate": 5e-05, + "loss": 0.247, + "step": 6380 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.2504, + "step": 6390 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.2479, + "step": 6400 + }, + { + "epoch": 2.16, + "learning_rate": 5e-05, + "loss": 0.2494, + "step": 6410 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.2482, + "step": 6420 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.2465, + "step": 6430 + }, + { + "epoch": 2.17, + "learning_rate": 5e-05, + "loss": 0.2475, + "step": 6440 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.2547, + "step": 6450 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.2504, + "step": 6460 + }, + { + "epoch": 2.18, + "learning_rate": 5e-05, + "loss": 0.2536, + "step": 6470 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.249, + "step": 6480 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.2499, + "step": 6490 + }, + { + "epoch": 2.19, + "learning_rate": 5e-05, + "loss": 0.2536, + "step": 6500 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.2481, + "step": 6510 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.2433, + "step": 6520 + }, + { + "epoch": 2.2, + "learning_rate": 5e-05, + "loss": 0.244, + "step": 6530 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.2446, + "step": 6540 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.2463, + "step": 6550 + }, + { + "epoch": 2.21, + "learning_rate": 5e-05, + "loss": 0.2418, + "step": 6560 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.2406, + "step": 6570 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.2483, + "step": 6580 + }, + { + "epoch": 2.22, + "learning_rate": 5e-05, + "loss": 0.2457, + "step": 6590 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.2411, + "step": 6600 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.2371, + "step": 6610 + }, + { + "epoch": 2.23, + "learning_rate": 5e-05, + "loss": 0.2381, + "step": 6620 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.2418, + "step": 6630 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.2386, + "step": 6640 + }, + { + "epoch": 2.24, + "learning_rate": 5e-05, + "loss": 0.2401, + "step": 6650 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.2431, + "step": 6660 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.2425, + "step": 6670 + }, + { + "epoch": 2.25, + "learning_rate": 5e-05, + "loss": 0.2391, + "step": 6680 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.2329, + "step": 6690 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.2397, + "step": 6700 + }, + { + "epoch": 2.26, + "learning_rate": 5e-05, + "loss": 0.2306, + "step": 6710 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.2403, + "step": 6720 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.2468, + "step": 6730 + }, + { + "epoch": 2.27, + "learning_rate": 5e-05, + "loss": 0.2374, + "step": 6740 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.2364, + "step": 6750 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.2396, + "step": 6760 + }, + { + "epoch": 2.28, + "learning_rate": 5e-05, + "loss": 0.2299, + "step": 6770 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.2406, + "step": 6780 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.2334, + "step": 6790 + }, + { + "epoch": 2.29, + "learning_rate": 5e-05, + "loss": 0.2367, + "step": 6800 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.2385, + "step": 6810 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.2324, + "step": 6820 + }, + { + "epoch": 2.3, + "learning_rate": 5e-05, + "loss": 0.2312, + "step": 6830 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.2257, + "step": 6840 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.233, + "step": 6850 + }, + { + "epoch": 2.31, + "learning_rate": 5e-05, + "loss": 0.2337, + "step": 6860 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.2348, + "step": 6870 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.2382, + "step": 6880 + }, + { + "epoch": 2.32, + "learning_rate": 5e-05, + "loss": 0.2309, + "step": 6890 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.236, + "step": 6900 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.2344, + "step": 6910 + }, + { + "epoch": 2.33, + "learning_rate": 5e-05, + "loss": 0.2301, + "step": 6920 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.2304, + "step": 6930 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.2324, + "step": 6940 + }, + { + "epoch": 2.34, + "learning_rate": 5e-05, + "loss": 0.2252, + "step": 6950 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.2334, + "step": 6960 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.2294, + "step": 6970 + }, + { + "epoch": 2.35, + "learning_rate": 5e-05, + "loss": 0.229, + "step": 6980 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.223, + "step": 6990 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.2272, + "step": 7000 + }, + { + "epoch": 2.36, + "learning_rate": 5e-05, + "loss": 0.2267, + "step": 7010 + }, + { + "epoch": 2.37, + "learning_rate": 5e-05, + "loss": 0.2299, + "step": 7020 + }, + { + "epoch": 2.37, + "learning_rate": 5e-05, + "loss": 0.2228, + "step": 7030 + }, + { + "epoch": 2.37, + "learning_rate": 5e-05, + "loss": 0.2315, + "step": 7040 + }, + { + "epoch": 2.38, + "learning_rate": 5e-05, + "loss": 0.2262, + "step": 7050 + }, + { + "epoch": 2.38, + "learning_rate": 5e-05, + "loss": 0.2291, + "step": 7060 + }, + { + "epoch": 2.39, + "learning_rate": 5e-05, + "loss": 0.2242, + "step": 7070 + }, + { + "epoch": 2.39, + "learning_rate": 5e-05, + "loss": 0.2215, + "step": 7080 + }, + { + "epoch": 2.39, + "learning_rate": 5e-05, + "loss": 0.2272, + "step": 7090 + }, + { + "epoch": 2.4, + "learning_rate": 5e-05, + "loss": 0.2275, + "step": 7100 + }, + { + "epoch": 2.4, + "learning_rate": 5e-05, + "loss": 0.225, + "step": 7110 + }, + { + "epoch": 2.4, + "learning_rate": 5e-05, + "loss": 0.2259, + "step": 7120 + }, + { + "epoch": 2.41, + "learning_rate": 5e-05, + "loss": 0.226, + "step": 7130 + }, + { + "epoch": 2.41, + "learning_rate": 5e-05, + "loss": 0.229, + "step": 7140 + }, + { + "epoch": 2.41, + "learning_rate": 5e-05, + "loss": 0.2206, + "step": 7150 + }, + { + "epoch": 2.42, + "learning_rate": 5e-05, + "loss": 0.2229, + "step": 7160 + }, + { + "epoch": 2.42, + "learning_rate": 5e-05, + "loss": 0.2269, + "step": 7170 + }, + { + "epoch": 2.42, + "learning_rate": 5e-05, + "loss": 0.2237, + "step": 7180 + }, + { + "epoch": 2.43, + "learning_rate": 5e-05, + "loss": 0.2153, + "step": 7190 + }, + { + "epoch": 2.43, + "learning_rate": 5e-05, + "loss": 0.2271, + "step": 7200 + }, + { + "epoch": 2.43, + "learning_rate": 5e-05, + "loss": 0.2294, + "step": 7210 + }, + { + "epoch": 2.44, + "learning_rate": 5e-05, + "loss": 0.225, + "step": 7220 + }, + { + "epoch": 2.44, + "learning_rate": 5e-05, + "loss": 0.2245, + "step": 7230 + }, + { + "epoch": 2.44, + "learning_rate": 5e-05, + "loss": 0.226, + "step": 7240 + }, + { + "epoch": 2.45, + "learning_rate": 5e-05, + "loss": 0.2226, + "step": 7250 + }, + { + "epoch": 2.45, + "learning_rate": 5e-05, + "loss": 0.2276, + "step": 7260 + }, + { + "epoch": 2.45, + "learning_rate": 5e-05, + "loss": 0.2223, + "step": 7270 + }, + { + "epoch": 2.46, + "learning_rate": 5e-05, + "loss": 0.2191, + "step": 7280 + }, + { + "epoch": 2.46, + "learning_rate": 5e-05, + "loss": 0.2263, + "step": 7290 + }, + { + "epoch": 2.46, + "learning_rate": 5e-05, + "loss": 0.2208, + "step": 7300 + }, + { + "epoch": 2.47, + "learning_rate": 5e-05, + "loss": 0.2259, + "step": 7310 + }, + { + "epoch": 2.47, + "learning_rate": 5e-05, + "loss": 0.2173, + "step": 7320 + }, + { + "epoch": 2.47, + "learning_rate": 5e-05, + "loss": 0.218, + "step": 7330 + }, + { + "epoch": 2.48, + "learning_rate": 5e-05, + "loss": 0.2148, + "step": 7340 + }, + { + "epoch": 2.48, + "learning_rate": 5e-05, + "loss": 0.2087, + "step": 7350 + }, + { + "epoch": 2.48, + "learning_rate": 5e-05, + "loss": 0.2257, + "step": 7360 + }, + { + "epoch": 2.49, + "learning_rate": 5e-05, + "loss": 0.2258, + "step": 7370 + }, + { + "epoch": 2.49, + "learning_rate": 5e-05, + "loss": 0.2198, + "step": 7380 + }, + { + "epoch": 2.49, + "learning_rate": 5e-05, + "loss": 0.2185, + "step": 7390 + }, + { + "epoch": 2.5, + "learning_rate": 5e-05, + "loss": 0.2173, + "step": 7400 + }, + { + "epoch": 2.5, + "learning_rate": 5e-05, + "loss": 0.217, + "step": 7410 + }, + { + "epoch": 2.5, + "learning_rate": 5e-05, + "loss": 0.2164, + "step": 7420 + }, + { + "epoch": 2.51, + "learning_rate": 5e-05, + "loss": 0.2161, + "step": 7430 + }, + { + "epoch": 2.51, + "learning_rate": 5e-05, + "loss": 0.2213, + "step": 7440 + }, + { + "epoch": 2.51, + "learning_rate": 5e-05, + "loss": 0.219, + "step": 7450 + }, + { + "epoch": 2.52, + "learning_rate": 5e-05, + "loss": 0.2142, + "step": 7460 + }, + { + "epoch": 2.52, + "learning_rate": 5e-05, + "loss": 0.2242, + "step": 7470 + }, + { + "epoch": 2.52, + "learning_rate": 5e-05, + "loss": 0.2233, + "step": 7480 + }, + { + "epoch": 2.53, + "learning_rate": 5e-05, + "loss": 0.212, + "step": 7490 + }, + { + "epoch": 2.53, + "learning_rate": 5e-05, + "loss": 0.214, + "step": 7500 + }, + { + "epoch": 2.53, + "learning_rate": 5e-05, + "loss": 0.2171, + "step": 7510 + }, + { + "epoch": 2.54, + "learning_rate": 5e-05, + "loss": 0.2191, + "step": 7520 + }, + { + "epoch": 2.54, + "learning_rate": 5e-05, + "loss": 0.2199, + "step": 7530 + }, + { + "epoch": 2.54, + "learning_rate": 5e-05, + "loss": 0.2148, + "step": 7540 + }, + { + "epoch": 2.55, + "learning_rate": 5e-05, + "loss": 0.2125, + "step": 7550 + }, + { + "epoch": 2.55, + "learning_rate": 5e-05, + "loss": 0.2209, + "step": 7560 + }, + { + "epoch": 2.55, + "learning_rate": 5e-05, + "loss": 0.2135, + "step": 7570 + }, + { + "epoch": 2.56, + "learning_rate": 5e-05, + "loss": 0.2147, + "step": 7580 + }, + { + "epoch": 2.56, + "learning_rate": 5e-05, + "loss": 0.2108, + "step": 7590 + }, + { + "epoch": 2.56, + "learning_rate": 5e-05, + "loss": 0.216, + "step": 7600 + }, + { + "epoch": 2.57, + "learning_rate": 5e-05, + "loss": 0.218, + "step": 7610 + }, + { + "epoch": 2.57, + "learning_rate": 5e-05, + "loss": 0.2163, + "step": 7620 + }, + { + "epoch": 2.57, + "learning_rate": 5e-05, + "loss": 0.2093, + "step": 7630 + }, + { + "epoch": 2.58, + "learning_rate": 5e-05, + "loss": 0.2146, + "step": 7640 + }, + { + "epoch": 2.58, + "learning_rate": 5e-05, + "loss": 0.2181, + "step": 7650 + }, + { + "epoch": 2.58, + "learning_rate": 5e-05, + "loss": 0.2119, + "step": 7660 + }, + { + "epoch": 2.59, + "learning_rate": 5e-05, + "loss": 0.2105, + "step": 7670 + }, + { + "epoch": 2.59, + "learning_rate": 5e-05, + "loss": 0.212, + "step": 7680 + }, + { + "epoch": 2.59, + "learning_rate": 5e-05, + "loss": 0.2129, + "step": 7690 + }, + { + "epoch": 2.6, + "learning_rate": 5e-05, + "loss": 0.206, + "step": 7700 + }, + { + "epoch": 2.6, + "learning_rate": 5e-05, + "loss": 0.2146, + "step": 7710 + }, + { + "epoch": 2.6, + "learning_rate": 5e-05, + "loss": 0.2158, + "step": 7720 + }, + { + "epoch": 2.61, + "learning_rate": 5e-05, + "loss": 0.2044, + "step": 7730 + }, + { + "epoch": 2.61, + "learning_rate": 5e-05, + "loss": 0.2062, + "step": 7740 + }, + { + "epoch": 2.61, + "learning_rate": 5e-05, + "loss": 0.208, + "step": 7750 + }, + { + "epoch": 2.62, + "learning_rate": 5e-05, + "loss": 0.2103, + "step": 7760 + }, + { + "epoch": 2.62, + "learning_rate": 5e-05, + "loss": 0.2103, + "step": 7770 + }, + { + "epoch": 2.62, + "learning_rate": 5e-05, + "loss": 0.2071, + "step": 7780 + }, + { + "epoch": 2.63, + "learning_rate": 5e-05, + "loss": 0.2099, + "step": 7790 + }, + { + "epoch": 2.63, + "learning_rate": 5e-05, + "loss": 0.2207, + "step": 7800 + }, + { + "epoch": 2.63, + "learning_rate": 5e-05, + "loss": 0.2087, + "step": 7810 + }, + { + "epoch": 2.64, + "learning_rate": 5e-05, + "loss": 0.2093, + "step": 7820 + }, + { + "epoch": 2.64, + "learning_rate": 5e-05, + "loss": 0.2104, + "step": 7830 + }, + { + "epoch": 2.64, + "learning_rate": 5e-05, + "loss": 0.2057, + "step": 7840 + }, + { + "epoch": 2.65, + "learning_rate": 5e-05, + "loss": 0.2089, + "step": 7850 + }, + { + "epoch": 2.65, + "learning_rate": 5e-05, + "loss": 0.2128, + "step": 7860 + }, + { + "epoch": 2.65, + "learning_rate": 5e-05, + "loss": 0.2088, + "step": 7870 + }, + { + "epoch": 2.66, + "learning_rate": 5e-05, + "loss": 0.2062, + "step": 7880 + }, + { + "epoch": 2.66, + "learning_rate": 5e-05, + "loss": 0.2095, + "step": 7890 + }, + { + "epoch": 2.67, + "learning_rate": 5e-05, + "loss": 0.2091, + "step": 7900 + }, + { + "epoch": 2.67, + "learning_rate": 5e-05, + "loss": 0.2142, + "step": 7910 + }, + { + "epoch": 2.67, + "learning_rate": 5e-05, + "loss": 0.2134, + "step": 7920 + }, + { + "epoch": 2.68, + "learning_rate": 5e-05, + "loss": 0.205, + "step": 7930 + }, + { + "epoch": 2.68, + "learning_rate": 5e-05, + "loss": 0.2071, + "step": 7940 + }, + { + "epoch": 2.68, + "learning_rate": 5e-05, + "loss": 0.2059, + "step": 7950 + }, + { + "epoch": 2.69, + "learning_rate": 5e-05, + "loss": 0.2037, + "step": 7960 + }, + { + "epoch": 2.69, + "learning_rate": 5e-05, + "loss": 0.2082, + "step": 7970 + }, + { + "epoch": 2.69, + "learning_rate": 5e-05, + "loss": 0.2078, + "step": 7980 + }, + { + "epoch": 2.7, + "learning_rate": 5e-05, + "loss": 0.2037, + "step": 7990 + }, + { + "epoch": 2.7, + "learning_rate": 5e-05, + "loss": 0.2057, + "step": 8000 + }, + { + "epoch": 2.7, + "learning_rate": 5e-05, + "loss": 0.2052, + "step": 8010 + }, + { + "epoch": 2.71, + "learning_rate": 5e-05, + "loss": 0.2039, + "step": 8020 + }, + { + "epoch": 2.71, + "learning_rate": 5e-05, + "loss": 0.2052, + "step": 8030 + }, + { + "epoch": 2.71, + "learning_rate": 5e-05, + "loss": 0.2029, + "step": 8040 + }, + { + "epoch": 2.72, + "learning_rate": 5e-05, + "loss": 0.2056, + "step": 8050 + }, + { + "epoch": 2.72, + "learning_rate": 5e-05, + "loss": 0.2059, + "step": 8060 + }, + { + "epoch": 2.72, + "learning_rate": 5e-05, + "loss": 0.2013, + "step": 8070 + }, + { + "epoch": 2.73, + "learning_rate": 5e-05, + "loss": 0.2026, + "step": 8080 + }, + { + "epoch": 2.73, + "learning_rate": 5e-05, + "loss": 0.2042, + "step": 8090 + }, + { + "epoch": 2.73, + "learning_rate": 5e-05, + "loss": 0.2093, + "step": 8100 + }, + { + "epoch": 2.74, + "learning_rate": 5e-05, + "loss": 0.2028, + "step": 8110 + }, + { + "epoch": 2.74, + "learning_rate": 5e-05, + "loss": 0.1982, + "step": 8120 + }, + { + "epoch": 2.74, + "learning_rate": 5e-05, + "loss": 0.2014, + "step": 8130 + }, + { + "epoch": 2.75, + "learning_rate": 5e-05, + "loss": 0.211, + "step": 8140 + }, + { + "epoch": 2.75, + "learning_rate": 5e-05, + "loss": 0.2018, + "step": 8150 + }, + { + "epoch": 2.75, + "learning_rate": 5e-05, + "loss": 0.2021, + "step": 8160 + }, + { + "epoch": 2.76, + "learning_rate": 5e-05, + "loss": 0.2102, + "step": 8170 + }, + { + "epoch": 2.76, + "learning_rate": 5e-05, + "loss": 0.1996, + "step": 8180 + }, + { + "epoch": 2.76, + "learning_rate": 5e-05, + "loss": 0.2055, + "step": 8190 + }, + { + "epoch": 2.77, + "learning_rate": 5e-05, + "loss": 0.2025, + "step": 8200 + }, + { + "epoch": 2.77, + "learning_rate": 5e-05, + "loss": 0.1975, + "step": 8210 + }, + { + "epoch": 2.77, + "learning_rate": 5e-05, + "loss": 0.2049, + "step": 8220 + }, + { + "epoch": 2.78, + "learning_rate": 5e-05, + "loss": 0.2004, + "step": 8230 + }, + { + "epoch": 2.78, + "learning_rate": 5e-05, + "loss": 0.1995, + "step": 8240 + }, + { + "epoch": 2.78, + "learning_rate": 5e-05, + "loss": 0.1995, + "step": 8250 + }, + { + "epoch": 2.79, + "learning_rate": 5e-05, + "loss": 0.1942, + "step": 8260 + }, + { + "epoch": 2.79, + "learning_rate": 5e-05, + "loss": 0.2027, + "step": 8270 + }, + { + "epoch": 2.79, + "learning_rate": 5e-05, + "loss": 0.199, + "step": 8280 + }, + { + "epoch": 2.8, + "learning_rate": 5e-05, + "loss": 0.2005, + "step": 8290 + }, + { + "epoch": 2.8, + "learning_rate": 5e-05, + "loss": 0.1987, + "step": 8300 + }, + { + "epoch": 2.8, + "learning_rate": 5e-05, + "loss": 0.2051, + "step": 8310 + }, + { + "epoch": 2.81, + "learning_rate": 5e-05, + "loss": 0.1996, + "step": 8320 + }, + { + "epoch": 2.81, + "learning_rate": 5e-05, + "loss": 0.1976, + "step": 8330 + }, + { + "epoch": 2.81, + "learning_rate": 5e-05, + "loss": 0.1999, + "step": 8340 + }, + { + "epoch": 2.82, + "learning_rate": 5e-05, + "loss": 0.205, + "step": 8350 + }, + { + "epoch": 2.82, + "learning_rate": 5e-05, + "loss": 0.1972, + "step": 8360 + }, + { + "epoch": 2.82, + "learning_rate": 5e-05, + "loss": 0.1985, + "step": 8370 + }, + { + "epoch": 2.83, + "learning_rate": 5e-05, + "loss": 0.1995, + "step": 8380 + }, + { + "epoch": 2.83, + "learning_rate": 5e-05, + "loss": 0.2038, + "step": 8390 + }, + { + "epoch": 2.83, + "learning_rate": 5e-05, + "loss": 0.1942, + "step": 8400 + }, + { + "epoch": 2.84, + "learning_rate": 5e-05, + "loss": 0.1976, + "step": 8410 + }, + { + "epoch": 2.84, + "learning_rate": 5e-05, + "loss": 0.1942, + "step": 8420 + }, + { + "epoch": 2.84, + "learning_rate": 5e-05, + "loss": 0.1911, + "step": 8430 + }, + { + "epoch": 2.85, + "learning_rate": 5e-05, + "loss": 0.1996, + "step": 8440 + }, + { + "epoch": 2.85, + "learning_rate": 5e-05, + "loss": 0.1974, + "step": 8450 + }, + { + "epoch": 2.85, + "learning_rate": 5e-05, + "loss": 0.1984, + "step": 8460 + }, + { + "epoch": 2.86, + "learning_rate": 5e-05, + "loss": 0.196, + "step": 8470 + }, + { + "epoch": 2.86, + "learning_rate": 5e-05, + "loss": 0.1954, + "step": 8480 + }, + { + "epoch": 2.86, + "learning_rate": 5e-05, + "loss": 0.1941, + "step": 8490 + }, + { + "epoch": 2.87, + "learning_rate": 5e-05, + "loss": 0.1949, + "step": 8500 + }, + { + "epoch": 2.87, + "learning_rate": 5e-05, + "loss": 0.1975, + "step": 8510 + }, + { + "epoch": 2.87, + "learning_rate": 5e-05, + "loss": 0.2007, + "step": 8520 + }, + { + "epoch": 2.88, + "learning_rate": 5e-05, + "loss": 0.2015, + "step": 8530 + }, + { + "epoch": 2.88, + "learning_rate": 5e-05, + "loss": 0.1943, + "step": 8540 + }, + { + "epoch": 2.88, + "learning_rate": 5e-05, + "loss": 0.203, + "step": 8550 + }, + { + "epoch": 2.89, + "learning_rate": 5e-05, + "loss": 0.1982, + "step": 8560 + }, + { + "epoch": 2.89, + "learning_rate": 5e-05, + "loss": 0.1948, + "step": 8570 + }, + { + "epoch": 2.89, + "learning_rate": 5e-05, + "loss": 0.1997, + "step": 8580 + }, + { + "epoch": 2.9, + "learning_rate": 5e-05, + "loss": 0.1941, + "step": 8590 + }, + { + "epoch": 2.9, + "learning_rate": 5e-05, + "loss": 0.1965, + "step": 8600 + }, + { + "epoch": 2.9, + "learning_rate": 5e-05, + "loss": 0.194, + "step": 8610 + }, + { + "epoch": 2.91, + "learning_rate": 5e-05, + "loss": 0.1951, + "step": 8620 + }, + { + "epoch": 2.91, + "learning_rate": 5e-05, + "loss": 0.1984, + "step": 8630 + }, + { + "epoch": 2.91, + "learning_rate": 5e-05, + "loss": 0.1924, + "step": 8640 + }, + { + "epoch": 2.92, + "learning_rate": 5e-05, + "loss": 0.19, + "step": 8650 + }, + { + "epoch": 2.92, + "learning_rate": 5e-05, + "loss": 0.1898, + "step": 8660 + }, + { + "epoch": 2.92, + "learning_rate": 5e-05, + "loss": 0.1942, + "step": 8670 + }, + { + "epoch": 2.93, + "learning_rate": 5e-05, + "loss": 0.189, + "step": 8680 + }, + { + "epoch": 2.93, + "learning_rate": 5e-05, + "loss": 0.1978, + "step": 8690 + }, + { + "epoch": 2.93, + "learning_rate": 5e-05, + "loss": 0.1945, + "step": 8700 + }, + { + "epoch": 2.94, + "learning_rate": 5e-05, + "loss": 0.1995, + "step": 8710 + }, + { + "epoch": 2.94, + "learning_rate": 5e-05, + "loss": 0.1984, + "step": 8720 + }, + { + "epoch": 2.95, + "learning_rate": 5e-05, + "loss": 0.1943, + "step": 8730 + }, + { + "epoch": 2.95, + "learning_rate": 5e-05, + "loss": 0.1954, + "step": 8740 + }, + { + "epoch": 2.95, + "learning_rate": 5e-05, + "loss": 0.1882, + "step": 8750 + }, + { + "epoch": 2.96, + "learning_rate": 5e-05, + "loss": 0.1931, + "step": 8760 + }, + { + "epoch": 2.96, + "learning_rate": 5e-05, + "loss": 0.1929, + "step": 8770 + }, + { + "epoch": 2.96, + "learning_rate": 5e-05, + "loss": 0.1903, + "step": 8780 + }, + { + "epoch": 2.97, + "learning_rate": 5e-05, + "loss": 0.193, + "step": 8790 + }, + { + "epoch": 2.97, + "learning_rate": 5e-05, + "loss": 0.192, + "step": 8800 + }, + { + "epoch": 2.97, + "learning_rate": 5e-05, + "loss": 0.19, + "step": 8810 + }, + { + "epoch": 2.98, + "learning_rate": 5e-05, + "loss": 0.194, + "step": 8820 + }, + { + "epoch": 2.98, + "learning_rate": 5e-05, + "loss": 0.1953, + "step": 8830 + }, + { + "epoch": 2.98, + "learning_rate": 5e-05, + "loss": 0.1914, + "step": 8840 + }, + { + "epoch": 2.99, + "learning_rate": 5e-05, + "loss": 0.1873, + "step": 8850 + }, + { + "epoch": 2.99, + "learning_rate": 5e-05, + "loss": 0.1902, + "step": 8860 + }, + { + "epoch": 2.99, + "learning_rate": 5e-05, + "loss": 0.1924, + "step": 8870 + }, + { + "epoch": 3.0, + "learning_rate": 5e-05, + "loss": 0.1854, + "step": 8880 + }, + { + "epoch": 3.0, + "learning_rate": 5e-05, + "loss": 0.1891, + "step": 8890 + }, + { + "epoch": 3.0, + "step": 8892, + "total_flos": 0.0, + "train_loss": 0.35511165388763033, + "train_runtime": 61372.9616, + "train_samples_per_second": 37.092, + "train_steps_per_second": 0.145 + } + ], + "logging_steps": 10, + "max_steps": 8892, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/dinov2_small_v1_global_nominmax/training_args.bin b/dinov2_small_v1_global_nominmax/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca4990ad6e2d6dc5c39192293644d1530e9bbcd5 --- /dev/null +++ b/dinov2_small_v1_global_nominmax/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898baa792356a9e4502a27fce84c053686ecb4d412bc621b4bb97d064e9b460c +size 4856