Training in progress, epoch 14
Browse files- last-checkpoint/{global_step903149 β global_step972622}/mp_rank_00_model_states.pt +1 -1
- last-checkpoint/{global_step903149 β global_step972622}/zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/{global_step903149 β global_step972622}/zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/{global_step903149 β global_step972622}/zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/latest +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/trainer_state.json +846 -3
- pytorch_model.bin +1 -1
- runs/Feb25_11-43-56_user-SYS-5049A-TR/events.out.tfevents.1677293058.user-SYS-5049A-TR.2588949.0 +2 -2
last-checkpoint/{global_step903149 β global_step972622}/mp_rank_00_model_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 59134503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc1c2cce3b3af75623cba9f5df822766660a8c3f01abfd04663e1df416c43e36
|
3 |
size 59134503
|
last-checkpoint/{global_step903149 β global_step972622}/zero_pp_rank_0_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118216675
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44c58fc587759a2a4e43df4c5560c2f7d84c31461ef58d22185fb17ddf310c87
|
3 |
size 118216675
|
last-checkpoint/{global_step903149 β global_step972622}/zero_pp_rank_1_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118217955
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe4e0426042e179b5cbacf399cf0597d8f3a976fb1cdc27af58866c5a37777ba
|
3 |
size 118217955
|
last-checkpoint/{global_step903149 β global_step972622}/zero_pp_rank_2_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118221091
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48176933c74e77fe7d60dcfbbd2f733b900fe7c30a30eabe01cea136147c61f7
|
3 |
size 118221091
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step972622
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 59121639
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7001eec29f4d2136c46e26be3a5900949768d4888951fa6848a5b3a40d9219e7
|
3 |
size 59121639
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67600fb789bd2bc45e01af81feba5312f17c5332d7f246f79cbc4ad4e7728b84
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cb34c8b86ab7a9e81c7226c57c89780cb074eb36acc2a8a60ac6ef0cba422c2
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2841365304cb73e3e817735a9f2b5ebf5a7a7a250da4285ca528595f5790d2a
|
3 |
size 14503
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -10959,11 +10959,854 @@
|
|
10959 |
"eval_samples_per_second": 760.419,
|
10960 |
"eval_steps_per_second": 31.685,
|
10961 |
"step": 903149
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10962 |
}
|
10963 |
],
|
10964 |
"max_steps": 972622,
|
10965 |
"num_train_epochs": 14,
|
10966 |
-
"total_flos":
|
10967 |
"trial_name": null,
|
10968 |
"trial_params": null
|
10969 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 14.0,
|
5 |
+
"global_step": 972622,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
10959 |
"eval_samples_per_second": 760.419,
|
10960 |
"eval_steps_per_second": 31.685,
|
10961 |
"step": 903149
|
10962 |
+
},
|
10963 |
+
{
|
10964 |
+
"epoch": 13.01,
|
10965 |
+
"learning_rate": 7.366016024592505e-06,
|
10966 |
+
"loss": 1.4168,
|
10967 |
+
"step": 903500
|
10968 |
+
},
|
10969 |
+
{
|
10970 |
+
"epoch": 13.01,
|
10971 |
+
"learning_rate": 7.314296989806781e-06,
|
10972 |
+
"loss": 1.4189,
|
10973 |
+
"step": 904000
|
10974 |
+
},
|
10975 |
+
{
|
10976 |
+
"epoch": 13.02,
|
10977 |
+
"learning_rate": 7.2623702480540456e-06,
|
10978 |
+
"loss": 1.412,
|
10979 |
+
"step": 904500
|
10980 |
+
},
|
10981 |
+
{
|
10982 |
+
"epoch": 13.03,
|
10983 |
+
"learning_rate": 7.210651213268322e-06,
|
10984 |
+
"loss": 1.4188,
|
10985 |
+
"step": 905000
|
10986 |
+
},
|
10987 |
+
{
|
10988 |
+
"epoch": 13.03,
|
10989 |
+
"learning_rate": 7.1587244715155865e-06,
|
10990 |
+
"loss": 1.4157,
|
10991 |
+
"step": 905500
|
10992 |
+
},
|
10993 |
+
{
|
10994 |
+
"epoch": 13.04,
|
10995 |
+
"learning_rate": 7.107005436729862e-06,
|
10996 |
+
"loss": 1.4185,
|
10997 |
+
"step": 906000
|
10998 |
+
},
|
10999 |
+
{
|
11000 |
+
"epoch": 13.05,
|
11001 |
+
"learning_rate": 7.055078694977127e-06,
|
11002 |
+
"loss": 1.4196,
|
11003 |
+
"step": 906500
|
11004 |
+
},
|
11005 |
+
{
|
11006 |
+
"epoch": 13.06,
|
11007 |
+
"learning_rate": 7.003359660191403e-06,
|
11008 |
+
"loss": 1.4161,
|
11009 |
+
"step": 907000
|
11010 |
+
},
|
11011 |
+
{
|
11012 |
+
"epoch": 13.06,
|
11013 |
+
"learning_rate": 6.951432918438668e-06,
|
11014 |
+
"loss": 1.4175,
|
11015 |
+
"step": 907500
|
11016 |
+
},
|
11017 |
+
{
|
11018 |
+
"epoch": 13.07,
|
11019 |
+
"learning_rate": 6.899713883652943e-06,
|
11020 |
+
"loss": 1.4168,
|
11021 |
+
"step": 908000
|
11022 |
+
},
|
11023 |
+
{
|
11024 |
+
"epoch": 13.08,
|
11025 |
+
"learning_rate": 6.8477871419002086e-06,
|
11026 |
+
"loss": 1.4149,
|
11027 |
+
"step": 908500
|
11028 |
+
},
|
11029 |
+
{
|
11030 |
+
"epoch": 13.08,
|
11031 |
+
"learning_rate": 6.796068107114484e-06,
|
11032 |
+
"loss": 1.4165,
|
11033 |
+
"step": 909000
|
11034 |
+
},
|
11035 |
+
{
|
11036 |
+
"epoch": 13.09,
|
11037 |
+
"learning_rate": 6.744141365361748e-06,
|
11038 |
+
"loss": 1.4169,
|
11039 |
+
"step": 909500
|
11040 |
+
},
|
11041 |
+
{
|
11042 |
+
"epoch": 13.1,
|
11043 |
+
"learning_rate": 6.692422330576023e-06,
|
11044 |
+
"loss": 1.4174,
|
11045 |
+
"step": 910000
|
11046 |
+
},
|
11047 |
+
{
|
11048 |
+
"epoch": 13.11,
|
11049 |
+
"learning_rate": 6.640495588823288e-06,
|
11050 |
+
"loss": 1.4155,
|
11051 |
+
"step": 910500
|
11052 |
+
},
|
11053 |
+
{
|
11054 |
+
"epoch": 13.11,
|
11055 |
+
"learning_rate": 6.588776554037564e-06,
|
11056 |
+
"loss": 1.4191,
|
11057 |
+
"step": 911000
|
11058 |
+
},
|
11059 |
+
{
|
11060 |
+
"epoch": 13.12,
|
11061 |
+
"learning_rate": 6.536849812284829e-06,
|
11062 |
+
"loss": 1.4193,
|
11063 |
+
"step": 911500
|
11064 |
+
},
|
11065 |
+
{
|
11066 |
+
"epoch": 13.13,
|
11067 |
+
"learning_rate": 6.485130777499104e-06,
|
11068 |
+
"loss": 1.4152,
|
11069 |
+
"step": 912000
|
11070 |
+
},
|
11071 |
+
{
|
11072 |
+
"epoch": 13.13,
|
11073 |
+
"learning_rate": 6.433204035746369e-06,
|
11074 |
+
"loss": 1.4168,
|
11075 |
+
"step": 912500
|
11076 |
+
},
|
11077 |
+
{
|
11078 |
+
"epoch": 13.14,
|
11079 |
+
"learning_rate": 6.381485000960645e-06,
|
11080 |
+
"loss": 1.4202,
|
11081 |
+
"step": 913000
|
11082 |
+
},
|
11083 |
+
{
|
11084 |
+
"epoch": 13.15,
|
11085 |
+
"learning_rate": 6.32955825920791e-06,
|
11086 |
+
"loss": 1.4164,
|
11087 |
+
"step": 913500
|
11088 |
+
},
|
11089 |
+
{
|
11090 |
+
"epoch": 13.16,
|
11091 |
+
"learning_rate": 6.277839224422186e-06,
|
11092 |
+
"loss": 1.4209,
|
11093 |
+
"step": 914000
|
11094 |
+
},
|
11095 |
+
{
|
11096 |
+
"epoch": 13.16,
|
11097 |
+
"learning_rate": 6.22591248266945e-06,
|
11098 |
+
"loss": 1.4127,
|
11099 |
+
"step": 914500
|
11100 |
+
},
|
11101 |
+
{
|
11102 |
+
"epoch": 13.17,
|
11103 |
+
"learning_rate": 6.174193447883726e-06,
|
11104 |
+
"loss": 1.4145,
|
11105 |
+
"step": 915000
|
11106 |
+
},
|
11107 |
+
{
|
11108 |
+
"epoch": 13.18,
|
11109 |
+
"learning_rate": 6.12226670613099e-06,
|
11110 |
+
"loss": 1.4168,
|
11111 |
+
"step": 915500
|
11112 |
+
},
|
11113 |
+
{
|
11114 |
+
"epoch": 13.18,
|
11115 |
+
"learning_rate": 6.070547671345266e-06,
|
11116 |
+
"loss": 1.418,
|
11117 |
+
"step": 916000
|
11118 |
+
},
|
11119 |
+
{
|
11120 |
+
"epoch": 13.19,
|
11121 |
+
"learning_rate": 6.018620929592531e-06,
|
11122 |
+
"loss": 1.4194,
|
11123 |
+
"step": 916500
|
11124 |
+
},
|
11125 |
+
{
|
11126 |
+
"epoch": 13.2,
|
11127 |
+
"learning_rate": 5.966901894806807e-06,
|
11128 |
+
"loss": 1.418,
|
11129 |
+
"step": 917000
|
11130 |
+
},
|
11131 |
+
{
|
11132 |
+
"epoch": 13.21,
|
11133 |
+
"learning_rate": 5.914975153054072e-06,
|
11134 |
+
"loss": 1.4147,
|
11135 |
+
"step": 917500
|
11136 |
+
},
|
11137 |
+
{
|
11138 |
+
"epoch": 13.21,
|
11139 |
+
"learning_rate": 5.863256118268347e-06,
|
11140 |
+
"loss": 1.4186,
|
11141 |
+
"step": 918000
|
11142 |
+
},
|
11143 |
+
{
|
11144 |
+
"epoch": 13.22,
|
11145 |
+
"learning_rate": 5.811329376515611e-06,
|
11146 |
+
"loss": 1.4159,
|
11147 |
+
"step": 918500
|
11148 |
+
},
|
11149 |
+
{
|
11150 |
+
"epoch": 13.23,
|
11151 |
+
"learning_rate": 5.7596103417298875e-06,
|
11152 |
+
"loss": 1.4174,
|
11153 |
+
"step": 919000
|
11154 |
+
},
|
11155 |
+
{
|
11156 |
+
"epoch": 13.24,
|
11157 |
+
"learning_rate": 5.707683599977152e-06,
|
11158 |
+
"loss": 1.4152,
|
11159 |
+
"step": 919500
|
11160 |
+
},
|
11161 |
+
{
|
11162 |
+
"epoch": 13.24,
|
11163 |
+
"learning_rate": 5.655964565191428e-06,
|
11164 |
+
"loss": 1.4149,
|
11165 |
+
"step": 920000
|
11166 |
+
},
|
11167 |
+
{
|
11168 |
+
"epoch": 13.25,
|
11169 |
+
"learning_rate": 5.604037823438693e-06,
|
11170 |
+
"loss": 1.4179,
|
11171 |
+
"step": 920500
|
11172 |
+
},
|
11173 |
+
{
|
11174 |
+
"epoch": 13.26,
|
11175 |
+
"learning_rate": 5.5523187886529685e-06,
|
11176 |
+
"loss": 1.4167,
|
11177 |
+
"step": 921000
|
11178 |
+
},
|
11179 |
+
{
|
11180 |
+
"epoch": 13.26,
|
11181 |
+
"learning_rate": 5.5003920469002334e-06,
|
11182 |
+
"loss": 1.4163,
|
11183 |
+
"step": 921500
|
11184 |
+
},
|
11185 |
+
{
|
11186 |
+
"epoch": 13.27,
|
11187 |
+
"learning_rate": 5.4486730121145095e-06,
|
11188 |
+
"loss": 1.4185,
|
11189 |
+
"step": 922000
|
11190 |
+
},
|
11191 |
+
{
|
11192 |
+
"epoch": 13.28,
|
11193 |
+
"learning_rate": 5.3967462703617736e-06,
|
11194 |
+
"loss": 1.4181,
|
11195 |
+
"step": 922500
|
11196 |
+
},
|
11197 |
+
{
|
11198 |
+
"epoch": 13.29,
|
11199 |
+
"learning_rate": 5.34502723557605e-06,
|
11200 |
+
"loss": 1.4162,
|
11201 |
+
"step": 923000
|
11202 |
+
},
|
11203 |
+
{
|
11204 |
+
"epoch": 13.29,
|
11205 |
+
"learning_rate": 5.2931004938233145e-06,
|
11206 |
+
"loss": 1.4138,
|
11207 |
+
"step": 923500
|
11208 |
+
},
|
11209 |
+
{
|
11210 |
+
"epoch": 13.3,
|
11211 |
+
"learning_rate": 5.24138145903759e-06,
|
11212 |
+
"loss": 1.4129,
|
11213 |
+
"step": 924000
|
11214 |
+
},
|
11215 |
+
{
|
11216 |
+
"epoch": 13.31,
|
11217 |
+
"learning_rate": 5.189454717284855e-06,
|
11218 |
+
"loss": 1.4189,
|
11219 |
+
"step": 924500
|
11220 |
+
},
|
11221 |
+
{
|
11222 |
+
"epoch": 13.31,
|
11223 |
+
"learning_rate": 5.137735682499131e-06,
|
11224 |
+
"loss": 1.415,
|
11225 |
+
"step": 925000
|
11226 |
+
},
|
11227 |
+
{
|
11228 |
+
"epoch": 13.32,
|
11229 |
+
"learning_rate": 5.085808940746395e-06,
|
11230 |
+
"loss": 1.4165,
|
11231 |
+
"step": 925500
|
11232 |
+
},
|
11233 |
+
{
|
11234 |
+
"epoch": 13.33,
|
11235 |
+
"learning_rate": 5.034089905960672e-06,
|
11236 |
+
"loss": 1.4164,
|
11237 |
+
"step": 926000
|
11238 |
+
},
|
11239 |
+
{
|
11240 |
+
"epoch": 13.34,
|
11241 |
+
"learning_rate": 4.982163164207936e-06,
|
11242 |
+
"loss": 1.414,
|
11243 |
+
"step": 926500
|
11244 |
+
},
|
11245 |
+
{
|
11246 |
+
"epoch": 13.34,
|
11247 |
+
"learning_rate": 4.930444129422211e-06,
|
11248 |
+
"loss": 1.4157,
|
11249 |
+
"step": 927000
|
11250 |
+
},
|
11251 |
+
{
|
11252 |
+
"epoch": 13.35,
|
11253 |
+
"learning_rate": 4.878517387669476e-06,
|
11254 |
+
"loss": 1.4119,
|
11255 |
+
"step": 927500
|
11256 |
+
},
|
11257 |
+
{
|
11258 |
+
"epoch": 13.36,
|
11259 |
+
"learning_rate": 4.826798352883752e-06,
|
11260 |
+
"loss": 1.4178,
|
11261 |
+
"step": 928000
|
11262 |
+
},
|
11263 |
+
{
|
11264 |
+
"epoch": 13.36,
|
11265 |
+
"learning_rate": 4.774871611131017e-06,
|
11266 |
+
"loss": 1.415,
|
11267 |
+
"step": 928500
|
11268 |
+
},
|
11269 |
+
{
|
11270 |
+
"epoch": 13.37,
|
11271 |
+
"learning_rate": 4.723152576345293e-06,
|
11272 |
+
"loss": 1.4165,
|
11273 |
+
"step": 929000
|
11274 |
+
},
|
11275 |
+
{
|
11276 |
+
"epoch": 13.38,
|
11277 |
+
"learning_rate": 4.671225834592557e-06,
|
11278 |
+
"loss": 1.4168,
|
11279 |
+
"step": 929500
|
11280 |
+
},
|
11281 |
+
{
|
11282 |
+
"epoch": 13.39,
|
11283 |
+
"learning_rate": 4.619506799806832e-06,
|
11284 |
+
"loss": 1.4159,
|
11285 |
+
"step": 930000
|
11286 |
+
},
|
11287 |
+
{
|
11288 |
+
"epoch": 13.39,
|
11289 |
+
"learning_rate": 4.567580058054097e-06,
|
11290 |
+
"loss": 1.4164,
|
11291 |
+
"step": 930500
|
11292 |
+
},
|
11293 |
+
{
|
11294 |
+
"epoch": 13.4,
|
11295 |
+
"learning_rate": 4.515861023268373e-06,
|
11296 |
+
"loss": 1.4166,
|
11297 |
+
"step": 931000
|
11298 |
+
},
|
11299 |
+
{
|
11300 |
+
"epoch": 13.41,
|
11301 |
+
"learning_rate": 4.463934281515638e-06,
|
11302 |
+
"loss": 1.4157,
|
11303 |
+
"step": 931500
|
11304 |
+
},
|
11305 |
+
{
|
11306 |
+
"epoch": 13.42,
|
11307 |
+
"learning_rate": 4.412215246729914e-06,
|
11308 |
+
"loss": 1.4137,
|
11309 |
+
"step": 932000
|
11310 |
+
},
|
11311 |
+
{
|
11312 |
+
"epoch": 13.42,
|
11313 |
+
"learning_rate": 4.360288504977179e-06,
|
11314 |
+
"loss": 1.4153,
|
11315 |
+
"step": 932500
|
11316 |
+
},
|
11317 |
+
{
|
11318 |
+
"epoch": 13.43,
|
11319 |
+
"learning_rate": 4.308569470191454e-06,
|
11320 |
+
"loss": 1.4146,
|
11321 |
+
"step": 933000
|
11322 |
+
},
|
11323 |
+
{
|
11324 |
+
"epoch": 13.44,
|
11325 |
+
"learning_rate": 4.256642728438719e-06,
|
11326 |
+
"loss": 1.4174,
|
11327 |
+
"step": 933500
|
11328 |
+
},
|
11329 |
+
{
|
11330 |
+
"epoch": 13.44,
|
11331 |
+
"learning_rate": 4.204923693652994e-06,
|
11332 |
+
"loss": 1.4103,
|
11333 |
+
"step": 934000
|
11334 |
+
},
|
11335 |
+
{
|
11336 |
+
"epoch": 13.45,
|
11337 |
+
"learning_rate": 4.152996951900259e-06,
|
11338 |
+
"loss": 1.4174,
|
11339 |
+
"step": 934500
|
11340 |
+
},
|
11341 |
+
{
|
11342 |
+
"epoch": 13.46,
|
11343 |
+
"learning_rate": 4.101277917114535e-06,
|
11344 |
+
"loss": 1.4162,
|
11345 |
+
"step": 935000
|
11346 |
+
},
|
11347 |
+
{
|
11348 |
+
"epoch": 13.47,
|
11349 |
+
"learning_rate": 4.0493511753618e-06,
|
11350 |
+
"loss": 1.4147,
|
11351 |
+
"step": 935500
|
11352 |
+
},
|
11353 |
+
{
|
11354 |
+
"epoch": 13.47,
|
11355 |
+
"learning_rate": 3.997632140576075e-06,
|
11356 |
+
"loss": 1.414,
|
11357 |
+
"step": 936000
|
11358 |
+
},
|
11359 |
+
{
|
11360 |
+
"epoch": 13.48,
|
11361 |
+
"learning_rate": 3.94570539882334e-06,
|
11362 |
+
"loss": 1.415,
|
11363 |
+
"step": 936500
|
11364 |
+
},
|
11365 |
+
{
|
11366 |
+
"epoch": 13.49,
|
11367 |
+
"learning_rate": 3.893986364037616e-06,
|
11368 |
+
"loss": 1.4149,
|
11369 |
+
"step": 937000
|
11370 |
+
},
|
11371 |
+
{
|
11372 |
+
"epoch": 13.49,
|
11373 |
+
"learning_rate": 3.84205962228488e-06,
|
11374 |
+
"loss": 1.4135,
|
11375 |
+
"step": 937500
|
11376 |
+
},
|
11377 |
+
{
|
11378 |
+
"epoch": 13.5,
|
11379 |
+
"learning_rate": 3.790340587499156e-06,
|
11380 |
+
"loss": 1.416,
|
11381 |
+
"step": 938000
|
11382 |
+
},
|
11383 |
+
{
|
11384 |
+
"epoch": 13.51,
|
11385 |
+
"learning_rate": 3.7384138457464213e-06,
|
11386 |
+
"loss": 1.4152,
|
11387 |
+
"step": 938500
|
11388 |
+
},
|
11389 |
+
{
|
11390 |
+
"epoch": 13.52,
|
11391 |
+
"learning_rate": 3.686694810960697e-06,
|
11392 |
+
"loss": 1.4164,
|
11393 |
+
"step": 939000
|
11394 |
+
},
|
11395 |
+
{
|
11396 |
+
"epoch": 13.52,
|
11397 |
+
"learning_rate": 3.634768069207962e-06,
|
11398 |
+
"loss": 1.4184,
|
11399 |
+
"step": 939500
|
11400 |
+
},
|
11401 |
+
{
|
11402 |
+
"epoch": 13.53,
|
11403 |
+
"learning_rate": 3.5830490344222375e-06,
|
11404 |
+
"loss": 1.4105,
|
11405 |
+
"step": 940000
|
11406 |
+
},
|
11407 |
+
{
|
11408 |
+
"epoch": 13.54,
|
11409 |
+
"learning_rate": 3.5311222926695015e-06,
|
11410 |
+
"loss": 1.4128,
|
11411 |
+
"step": 940500
|
11412 |
+
},
|
11413 |
+
{
|
11414 |
+
"epoch": 13.54,
|
11415 |
+
"learning_rate": 3.479403257883778e-06,
|
11416 |
+
"loss": 1.4138,
|
11417 |
+
"step": 941000
|
11418 |
+
},
|
11419 |
+
{
|
11420 |
+
"epoch": 13.55,
|
11421 |
+
"learning_rate": 3.4274765161310425e-06,
|
11422 |
+
"loss": 1.4149,
|
11423 |
+
"step": 941500
|
11424 |
+
},
|
11425 |
+
{
|
11426 |
+
"epoch": 13.56,
|
11427 |
+
"learning_rate": 3.375757481345318e-06,
|
11428 |
+
"loss": 1.4165,
|
11429 |
+
"step": 942000
|
11430 |
+
},
|
11431 |
+
{
|
11432 |
+
"epoch": 13.57,
|
11433 |
+
"learning_rate": 3.323830739592583e-06,
|
11434 |
+
"loss": 1.4187,
|
11435 |
+
"step": 942500
|
11436 |
+
},
|
11437 |
+
{
|
11438 |
+
"epoch": 13.57,
|
11439 |
+
"learning_rate": 3.2721117048068587e-06,
|
11440 |
+
"loss": 1.4189,
|
11441 |
+
"step": 943000
|
11442 |
+
},
|
11443 |
+
{
|
11444 |
+
"epoch": 13.58,
|
11445 |
+
"learning_rate": 3.2201849630541236e-06,
|
11446 |
+
"loss": 1.4156,
|
11447 |
+
"step": 943500
|
11448 |
+
},
|
11449 |
+
{
|
11450 |
+
"epoch": 13.59,
|
11451 |
+
"learning_rate": 3.1684659282683992e-06,
|
11452 |
+
"loss": 1.4144,
|
11453 |
+
"step": 944000
|
11454 |
+
},
|
11455 |
+
{
|
11456 |
+
"epoch": 13.6,
|
11457 |
+
"learning_rate": 3.116539186515664e-06,
|
11458 |
+
"loss": 1.4165,
|
11459 |
+
"step": 944500
|
11460 |
+
},
|
11461 |
+
{
|
11462 |
+
"epoch": 13.6,
|
11463 |
+
"learning_rate": 3.0648201517299398e-06,
|
11464 |
+
"loss": 1.4129,
|
11465 |
+
"step": 945000
|
11466 |
+
},
|
11467 |
+
{
|
11468 |
+
"epoch": 13.61,
|
11469 |
+
"learning_rate": 3.0128934099772042e-06,
|
11470 |
+
"loss": 1.414,
|
11471 |
+
"step": 945500
|
11472 |
+
},
|
11473 |
+
{
|
11474 |
+
"epoch": 13.62,
|
11475 |
+
"learning_rate": 2.96117437519148e-06,
|
11476 |
+
"loss": 1.4149,
|
11477 |
+
"step": 946000
|
11478 |
+
},
|
11479 |
+
{
|
11480 |
+
"epoch": 13.62,
|
11481 |
+
"learning_rate": 2.9092476334387448e-06,
|
11482 |
+
"loss": 1.416,
|
11483 |
+
"step": 946500
|
11484 |
+
},
|
11485 |
+
{
|
11486 |
+
"epoch": 13.63,
|
11487 |
+
"learning_rate": 2.8575285986530204e-06,
|
11488 |
+
"loss": 1.4189,
|
11489 |
+
"step": 947000
|
11490 |
+
},
|
11491 |
+
{
|
11492 |
+
"epoch": 13.64,
|
11493 |
+
"learning_rate": 2.8056018569002853e-06,
|
11494 |
+
"loss": 1.4189,
|
11495 |
+
"step": 947500
|
11496 |
+
},
|
11497 |
+
{
|
11498 |
+
"epoch": 13.65,
|
11499 |
+
"learning_rate": 2.753882822114561e-06,
|
11500 |
+
"loss": 1.4135,
|
11501 |
+
"step": 948000
|
11502 |
+
},
|
11503 |
+
{
|
11504 |
+
"epoch": 13.65,
|
11505 |
+
"learning_rate": 2.701956080361826e-06,
|
11506 |
+
"loss": 1.4142,
|
11507 |
+
"step": 948500
|
11508 |
+
},
|
11509 |
+
{
|
11510 |
+
"epoch": 13.66,
|
11511 |
+
"learning_rate": 2.6502370455761015e-06,
|
11512 |
+
"loss": 1.4147,
|
11513 |
+
"step": 949000
|
11514 |
+
},
|
11515 |
+
{
|
11516 |
+
"epoch": 13.67,
|
11517 |
+
"learning_rate": 2.598310303823366e-06,
|
11518 |
+
"loss": 1.4146,
|
11519 |
+
"step": 949500
|
11520 |
+
},
|
11521 |
+
{
|
11522 |
+
"epoch": 13.67,
|
11523 |
+
"learning_rate": 2.5465912690376416e-06,
|
11524 |
+
"loss": 1.418,
|
11525 |
+
"step": 950000
|
11526 |
+
},
|
11527 |
+
{
|
11528 |
+
"epoch": 13.68,
|
11529 |
+
"learning_rate": 2.494664527284907e-06,
|
11530 |
+
"loss": 1.4145,
|
11531 |
+
"step": 950500
|
11532 |
+
},
|
11533 |
+
{
|
11534 |
+
"epoch": 13.69,
|
11535 |
+
"learning_rate": 2.4429454924991826e-06,
|
11536 |
+
"loss": 1.4111,
|
11537 |
+
"step": 951000
|
11538 |
+
},
|
11539 |
+
{
|
11540 |
+
"epoch": 13.7,
|
11541 |
+
"learning_rate": 2.391018750746447e-06,
|
11542 |
+
"loss": 1.4181,
|
11543 |
+
"step": 951500
|
11544 |
+
},
|
11545 |
+
{
|
11546 |
+
"epoch": 13.7,
|
11547 |
+
"learning_rate": 2.3392997159607227e-06,
|
11548 |
+
"loss": 1.4129,
|
11549 |
+
"step": 952000
|
11550 |
+
},
|
11551 |
+
{
|
11552 |
+
"epoch": 13.71,
|
11553 |
+
"learning_rate": 2.287372974207987e-06,
|
11554 |
+
"loss": 1.4153,
|
11555 |
+
"step": 952500
|
11556 |
+
},
|
11557 |
+
{
|
11558 |
+
"epoch": 13.72,
|
11559 |
+
"learning_rate": 2.2356539394222632e-06,
|
11560 |
+
"loss": 1.4131,
|
11561 |
+
"step": 953000
|
11562 |
+
},
|
11563 |
+
{
|
11564 |
+
"epoch": 13.72,
|
11565 |
+
"learning_rate": 2.183727197669528e-06,
|
11566 |
+
"loss": 1.4121,
|
11567 |
+
"step": 953500
|
11568 |
+
},
|
11569 |
+
{
|
11570 |
+
"epoch": 13.73,
|
11571 |
+
"learning_rate": 2.1320081628838038e-06,
|
11572 |
+
"loss": 1.4153,
|
11573 |
+
"step": 954000
|
11574 |
+
},
|
11575 |
+
{
|
11576 |
+
"epoch": 13.74,
|
11577 |
+
"learning_rate": 2.0800814211310682e-06,
|
11578 |
+
"loss": 1.4129,
|
11579 |
+
"step": 954500
|
11580 |
+
},
|
11581 |
+
{
|
11582 |
+
"epoch": 13.75,
|
11583 |
+
"learning_rate": 2.0283623863453443e-06,
|
11584 |
+
"loss": 1.4121,
|
11585 |
+
"step": 955000
|
11586 |
+
},
|
11587 |
+
{
|
11588 |
+
"epoch": 13.75,
|
11589 |
+
"learning_rate": 1.9764356445926088e-06,
|
11590 |
+
"loss": 1.4184,
|
11591 |
+
"step": 955500
|
11592 |
+
},
|
11593 |
+
{
|
11594 |
+
"epoch": 13.76,
|
11595 |
+
"learning_rate": 1.9247166098068844e-06,
|
11596 |
+
"loss": 1.4101,
|
11597 |
+
"step": 956000
|
11598 |
+
},
|
11599 |
+
{
|
11600 |
+
"epoch": 13.77,
|
11601 |
+
"learning_rate": 1.872789868054149e-06,
|
11602 |
+
"loss": 1.4121,
|
11603 |
+
"step": 956500
|
11604 |
+
},
|
11605 |
+
{
|
11606 |
+
"epoch": 13.78,
|
11607 |
+
"learning_rate": 1.8210708332684252e-06,
|
11608 |
+
"loss": 1.4141,
|
11609 |
+
"step": 957000
|
11610 |
+
},
|
11611 |
+
{
|
11612 |
+
"epoch": 13.78,
|
11613 |
+
"learning_rate": 1.7691440915156898e-06,
|
11614 |
+
"loss": 1.4124,
|
11615 |
+
"step": 957500
|
11616 |
+
},
|
11617 |
+
{
|
11618 |
+
"epoch": 13.79,
|
11619 |
+
"learning_rate": 1.7174250567299655e-06,
|
11620 |
+
"loss": 1.4142,
|
11621 |
+
"step": 958000
|
11622 |
+
},
|
11623 |
+
{
|
11624 |
+
"epoch": 13.8,
|
11625 |
+
"learning_rate": 1.6654983149772302e-06,
|
11626 |
+
"loss": 1.4151,
|
11627 |
+
"step": 958500
|
11628 |
+
},
|
11629 |
+
{
|
11630 |
+
"epoch": 13.8,
|
11631 |
+
"learning_rate": 1.6137792801915058e-06,
|
11632 |
+
"loss": 1.4158,
|
11633 |
+
"step": 959000
|
11634 |
+
},
|
11635 |
+
{
|
11636 |
+
"epoch": 13.81,
|
11637 |
+
"learning_rate": 1.5618525384387705e-06,
|
11638 |
+
"loss": 1.4143,
|
11639 |
+
"step": 959500
|
11640 |
+
},
|
11641 |
+
{
|
11642 |
+
"epoch": 13.82,
|
11643 |
+
"learning_rate": 1.5101335036530463e-06,
|
11644 |
+
"loss": 1.4114,
|
11645 |
+
"step": 960000
|
11646 |
+
},
|
11647 |
+
{
|
11648 |
+
"epoch": 13.83,
|
11649 |
+
"learning_rate": 1.458206761900311e-06,
|
11650 |
+
"loss": 1.412,
|
11651 |
+
"step": 960500
|
11652 |
+
},
|
11653 |
+
{
|
11654 |
+
"epoch": 13.83,
|
11655 |
+
"learning_rate": 1.4064877271145869e-06,
|
11656 |
+
"loss": 1.4097,
|
11657 |
+
"step": 961000
|
11658 |
+
},
|
11659 |
+
{
|
11660 |
+
"epoch": 13.84,
|
11661 |
+
"learning_rate": 1.3545609853618516e-06,
|
11662 |
+
"loss": 1.415,
|
11663 |
+
"step": 961500
|
11664 |
+
},
|
11665 |
+
{
|
11666 |
+
"epoch": 13.85,
|
11667 |
+
"learning_rate": 1.3028419505761274e-06,
|
11668 |
+
"loss": 1.417,
|
11669 |
+
"step": 962000
|
11670 |
+
},
|
11671 |
+
{
|
11672 |
+
"epoch": 13.85,
|
11673 |
+
"learning_rate": 1.250915208823392e-06,
|
11674 |
+
"loss": 1.4113,
|
11675 |
+
"step": 962500
|
11676 |
+
},
|
11677 |
+
{
|
11678 |
+
"epoch": 13.86,
|
11679 |
+
"learning_rate": 1.1991961740376677e-06,
|
11680 |
+
"loss": 1.4159,
|
11681 |
+
"step": 963000
|
11682 |
+
},
|
11683 |
+
{
|
11684 |
+
"epoch": 13.87,
|
11685 |
+
"learning_rate": 1.1472694322849324e-06,
|
11686 |
+
"loss": 1.4118,
|
11687 |
+
"step": 963500
|
11688 |
+
},
|
11689 |
+
{
|
11690 |
+
"epoch": 13.88,
|
11691 |
+
"learning_rate": 1.0955503974992083e-06,
|
11692 |
+
"loss": 1.4143,
|
11693 |
+
"step": 964000
|
11694 |
+
},
|
11695 |
+
{
|
11696 |
+
"epoch": 13.88,
|
11697 |
+
"learning_rate": 1.043623655746473e-06,
|
11698 |
+
"loss": 1.415,
|
11699 |
+
"step": 964500
|
11700 |
+
},
|
11701 |
+
{
|
11702 |
+
"epoch": 13.89,
|
11703 |
+
"learning_rate": 9.919046209607486e-07,
|
11704 |
+
"loss": 1.4183,
|
11705 |
+
"step": 965000
|
11706 |
+
},
|
11707 |
+
{
|
11708 |
+
"epoch": 13.9,
|
11709 |
+
"learning_rate": 9.399778792080134e-07,
|
11710 |
+
"loss": 1.413,
|
11711 |
+
"step": 965500
|
11712 |
+
},
|
11713 |
+
{
|
11714 |
+
"epoch": 13.9,
|
11715 |
+
"learning_rate": 8.88258844422289e-07,
|
11716 |
+
"loss": 1.4165,
|
11717 |
+
"step": 966000
|
11718 |
+
},
|
11719 |
+
{
|
11720 |
+
"epoch": 13.91,
|
11721 |
+
"learning_rate": 8.363321026695538e-07,
|
11722 |
+
"loss": 1.4129,
|
11723 |
+
"step": 966500
|
11724 |
+
},
|
11725 |
+
{
|
11726 |
+
"epoch": 13.92,
|
11727 |
+
"learning_rate": 7.846130678838295e-07,
|
11728 |
+
"loss": 1.4152,
|
11729 |
+
"step": 967000
|
11730 |
+
},
|
11731 |
+
{
|
11732 |
+
"epoch": 13.93,
|
11733 |
+
"learning_rate": 7.326863261310943e-07,
|
11734 |
+
"loss": 1.4115,
|
11735 |
+
"step": 967500
|
11736 |
+
},
|
11737 |
+
{
|
11738 |
+
"epoch": 13.93,
|
11739 |
+
"learning_rate": 6.8096729134537e-07,
|
11740 |
+
"loss": 1.4165,
|
11741 |
+
"step": 968000
|
11742 |
+
},
|
11743 |
+
{
|
11744 |
+
"epoch": 13.94,
|
11745 |
+
"learning_rate": 6.290405495926348e-07,
|
11746 |
+
"loss": 1.4132,
|
11747 |
+
"step": 968500
|
11748 |
+
},
|
11749 |
+
{
|
11750 |
+
"epoch": 13.95,
|
11751 |
+
"learning_rate": 5.773215148069104e-07,
|
11752 |
+
"loss": 1.4156,
|
11753 |
+
"step": 969000
|
11754 |
+
},
|
11755 |
+
{
|
11756 |
+
"epoch": 13.96,
|
11757 |
+
"learning_rate": 5.253947730541751e-07,
|
11758 |
+
"loss": 1.4149,
|
11759 |
+
"step": 969500
|
11760 |
+
},
|
11761 |
+
{
|
11762 |
+
"epoch": 13.96,
|
11763 |
+
"learning_rate": 4.736757382684509e-07,
|
11764 |
+
"loss": 1.416,
|
11765 |
+
"step": 970000
|
11766 |
+
},
|
11767 |
+
{
|
11768 |
+
"epoch": 13.97,
|
11769 |
+
"learning_rate": 4.217489965157156e-07,
|
11770 |
+
"loss": 1.4144,
|
11771 |
+
"step": 970500
|
11772 |
+
},
|
11773 |
+
{
|
11774 |
+
"epoch": 13.98,
|
11775 |
+
"learning_rate": 3.700299617299913e-07,
|
11776 |
+
"loss": 1.4145,
|
11777 |
+
"step": 971000
|
11778 |
+
},
|
11779 |
+
{
|
11780 |
+
"epoch": 13.98,
|
11781 |
+
"learning_rate": 3.181032199772561e-07,
|
11782 |
+
"loss": 1.4134,
|
11783 |
+
"step": 971500
|
11784 |
+
},
|
11785 |
+
{
|
11786 |
+
"epoch": 13.99,
|
11787 |
+
"learning_rate": 2.663841851915318e-07,
|
11788 |
+
"loss": 1.4129,
|
11789 |
+
"step": 972000
|
11790 |
+
},
|
11791 |
+
{
|
11792 |
+
"epoch": 14.0,
|
11793 |
+
"learning_rate": 2.1445744343879657e-07,
|
11794 |
+
"loss": 1.4118,
|
11795 |
+
"step": 972500
|
11796 |
+
},
|
11797 |
+
{
|
11798 |
+
"epoch": 14.0,
|
11799 |
+
"eval_accuracy": 0.7232865042773621,
|
11800 |
+
"eval_loss": 1.2724609375,
|
11801 |
+
"eval_runtime": 708.7868,
|
11802 |
+
"eval_samples_per_second": 760.365,
|
11803 |
+
"eval_steps_per_second": 31.682,
|
11804 |
+
"step": 972622
|
11805 |
}
|
11806 |
],
|
11807 |
"max_steps": 972622,
|
11808 |
"num_train_epochs": 14,
|
11809 |
+
"total_flos": 7.235068192639418e+18,
|
11810 |
"trial_name": null,
|
11811 |
"trial_params": null
|
11812 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 59121639
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7001eec29f4d2136c46e26be3a5900949768d4888951fa6848a5b3a40d9219e7
|
3 |
size 59121639
|
runs/Feb25_11-43-56_user-SYS-5049A-TR/events.out.tfevents.1677293058.user-SYS-5049A-TR.2588949.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33ef3952fb4af1a89a9072d21727a1d90a4fe619580c32a2f7fe2665eb16f951
|
3 |
+
size 319167
|