Training in progress, step 5040, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 295488936
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2878ca186bfef3cce49ceaeb1b14a21f38a01c4b9b0ca7d8293f2ef04ec4f279
|
3 |
size 295488936
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 150487412
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c977421cd4d1446425ef50b917af56a1e510a6d0dd97f67a30c9cbcdaa5bc9d
|
3 |
size 150487412
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0390e6f5919a7dede5c4b10e6ca69080410c973c54dff99d225ab4d891b78151
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac3156db05f675ed073eccf0318853daf91582237d8a27d125ff8a6598249c53
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.9663305282592773,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-4950",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -34929,6 +34929,636 @@
|
|
34929 |
"eval_samples_per_second": 27.95,
|
34930 |
"eval_steps_per_second": 13.975,
|
34931 |
"step": 4950
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34932 |
}
|
34933 |
],
|
34934 |
"logging_steps": 1,
|
@@ -34952,12 +35582,12 @@
|
|
34952 |
"should_evaluate": false,
|
34953 |
"should_log": false,
|
34954 |
"should_save": true,
|
34955 |
-
"should_training_stop":
|
34956 |
},
|
34957 |
"attributes": {}
|
34958 |
}
|
34959 |
},
|
34960 |
-
"total_flos": 3.
|
34961 |
"train_batch_size": 2,
|
34962 |
"trial_name": null,
|
34963 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 1.9663305282592773,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-4950",
|
4 |
+
"epoch": 0.3772949300993768,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 5040,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
34929 |
"eval_samples_per_second": 27.95,
|
34930 |
"eval_steps_per_second": 13.975,
|
34931 |
"step": 4950
|
34932 |
+
},
|
34933 |
+
{
|
34934 |
+
"epoch": 0.3706323807384949,
|
34935 |
+
"grad_norm": 1.2411137819290161,
|
34936 |
+
"learning_rate": 1.544549968628295e-07,
|
34937 |
+
"loss": 2.1719,
|
34938 |
+
"step": 4951
|
34939 |
+
},
|
34940 |
+
{
|
34941 |
+
"epoch": 0.3707072408436734,
|
34942 |
+
"grad_norm": 1.2201836109161377,
|
34943 |
+
"learning_rate": 1.5100446638173228e-07,
|
34944 |
+
"loss": 1.5187,
|
34945 |
+
"step": 4952
|
34946 |
+
},
|
34947 |
+
{
|
34948 |
+
"epoch": 0.37078210094885183,
|
34949 |
+
"grad_norm": 1.0959951877593994,
|
34950 |
+
"learning_rate": 1.4759288590058263e-07,
|
34951 |
+
"loss": 1.2851,
|
34952 |
+
"step": 4953
|
34953 |
+
},
|
34954 |
+
{
|
34955 |
+
"epoch": 0.37085696105403027,
|
34956 |
+
"grad_norm": 1.2576202154159546,
|
34957 |
+
"learning_rate": 1.4422025675020488e-07,
|
34958 |
+
"loss": 1.8899,
|
34959 |
+
"step": 4954
|
34960 |
+
},
|
34961 |
+
{
|
34962 |
+
"epoch": 0.37093182115920875,
|
34963 |
+
"grad_norm": 1.198002576828003,
|
34964 |
+
"learning_rate": 1.4088658024622448e-07,
|
34965 |
+
"loss": 2.171,
|
34966 |
+
"step": 4955
|
34967 |
+
},
|
34968 |
+
{
|
34969 |
+
"epoch": 0.3710066812643872,
|
34970 |
+
"grad_norm": 1.283689022064209,
|
34971 |
+
"learning_rate": 1.375918576890678e-07,
|
34972 |
+
"loss": 2.2446,
|
34973 |
+
"step": 4956
|
34974 |
+
},
|
34975 |
+
{
|
34976 |
+
"epoch": 0.3710815413695656,
|
34977 |
+
"grad_norm": 1.3751139640808105,
|
34978 |
+
"learning_rate": 1.3433609036397342e-07,
|
34979 |
+
"loss": 1.8505,
|
34980 |
+
"step": 4957
|
34981 |
+
},
|
34982 |
+
{
|
34983 |
+
"epoch": 0.37115640147474405,
|
34984 |
+
"grad_norm": 1.064444661140442,
|
34985 |
+
"learning_rate": 1.3111927954098102e-07,
|
34986 |
+
"loss": 1.7843,
|
34987 |
+
"step": 4958
|
34988 |
+
},
|
34989 |
+
{
|
34990 |
+
"epoch": 0.37123126157992253,
|
34991 |
+
"grad_norm": 1.0398433208465576,
|
34992 |
+
"learning_rate": 1.2794142647492013e-07,
|
34993 |
+
"loss": 1.9175,
|
34994 |
+
"step": 4959
|
34995 |
+
},
|
34996 |
+
{
|
34997 |
+
"epoch": 0.37130612168510096,
|
34998 |
+
"grad_norm": 1.3033219575881958,
|
34999 |
+
"learning_rate": 1.248025324054658e-07,
|
35000 |
+
"loss": 2.3778,
|
35001 |
+
"step": 4960
|
35002 |
+
},
|
35003 |
+
{
|
35004 |
+
"epoch": 0.3713809817902794,
|
35005 |
+
"grad_norm": 1.0516605377197266,
|
35006 |
+
"learning_rate": 1.2170259855703858e-07,
|
35007 |
+
"loss": 2.3922,
|
35008 |
+
"step": 4961
|
35009 |
+
},
|
35010 |
+
{
|
35011 |
+
"epoch": 0.3714558418954579,
|
35012 |
+
"grad_norm": 1.3150949478149414,
|
35013 |
+
"learning_rate": 1.186416261389045e-07,
|
35014 |
+
"loss": 2.2298,
|
35015 |
+
"step": 4962
|
35016 |
+
},
|
35017 |
+
{
|
35018 |
+
"epoch": 0.3715307020006363,
|
35019 |
+
"grad_norm": 1.3337782621383667,
|
35020 |
+
"learning_rate": 1.1561961634510843e-07,
|
35021 |
+
"loss": 2.1364,
|
35022 |
+
"step": 4963
|
35023 |
+
},
|
35024 |
+
{
|
35025 |
+
"epoch": 0.37160556210581475,
|
35026 |
+
"grad_norm": 1.1191245317459106,
|
35027 |
+
"learning_rate": 1.1263657035449627e-07,
|
35028 |
+
"loss": 1.8137,
|
35029 |
+
"step": 4964
|
35030 |
+
},
|
35031 |
+
{
|
35032 |
+
"epoch": 0.37168042221099323,
|
35033 |
+
"grad_norm": 1.032953143119812,
|
35034 |
+
"learning_rate": 1.0969248933073717e-07,
|
35035 |
+
"loss": 1.5611,
|
35036 |
+
"step": 4965
|
35037 |
+
},
|
35038 |
+
{
|
35039 |
+
"epoch": 0.37175528231617166,
|
35040 |
+
"grad_norm": 1.4030370712280273,
|
35041 |
+
"learning_rate": 1.0678737442227915e-07,
|
35042 |
+
"loss": 1.856,
|
35043 |
+
"step": 4966
|
35044 |
+
},
|
35045 |
+
{
|
35046 |
+
"epoch": 0.3718301424213501,
|
35047 |
+
"grad_norm": 1.1899338960647583,
|
35048 |
+
"learning_rate": 1.0392122676237126e-07,
|
35049 |
+
"loss": 1.4475,
|
35050 |
+
"step": 4967
|
35051 |
+
},
|
35052 |
+
{
|
35053 |
+
"epoch": 0.3719050025265285,
|
35054 |
+
"grad_norm": 1.318682312965393,
|
35055 |
+
"learning_rate": 1.0109404746907469e-07,
|
35056 |
+
"loss": 2.1785,
|
35057 |
+
"step": 4968
|
35058 |
+
},
|
35059 |
+
{
|
35060 |
+
"epoch": 0.371979862631707,
|
35061 |
+
"grad_norm": 1.1499639749526978,
|
35062 |
+
"learning_rate": 9.830583764522949e-08,
|
35063 |
+
"loss": 1.7568,
|
35064 |
+
"step": 4969
|
35065 |
+
},
|
35066 |
+
{
|
35067 |
+
"epoch": 0.37205472273688545,
|
35068 |
+
"grad_norm": 1.2958184480667114,
|
35069 |
+
"learning_rate": 9.555659837849895e-08,
|
35070 |
+
"loss": 1.8185,
|
35071 |
+
"step": 4970
|
35072 |
+
},
|
35073 |
+
{
|
35074 |
+
"epoch": 0.3721295828420639,
|
35075 |
+
"grad_norm": 1.2534544467926025,
|
35076 |
+
"learning_rate": 9.28463307413141e-08,
|
35077 |
+
"loss": 2.1521,
|
35078 |
+
"step": 4971
|
35079 |
+
},
|
35080 |
+
{
|
35081 |
+
"epoch": 0.37220444294724236,
|
35082 |
+
"grad_norm": 1.4329583644866943,
|
35083 |
+
"learning_rate": 9.017503579094033e-08,
|
35084 |
+
"loss": 2.2751,
|
35085 |
+
"step": 4972
|
35086 |
+
},
|
35087 |
+
{
|
35088 |
+
"epoch": 0.3722793030524208,
|
35089 |
+
"grad_norm": 1.1662907600402832,
|
35090 |
+
"learning_rate": 8.754271456941077e-08,
|
35091 |
+
"loss": 1.9949,
|
35092 |
+
"step": 4973
|
35093 |
+
},
|
35094 |
+
{
|
35095 |
+
"epoch": 0.3723541631575992,
|
35096 |
+
"grad_norm": 1.1763770580291748,
|
35097 |
+
"learning_rate": 8.494936810355958e-08,
|
35098 |
+
"loss": 2.207,
|
35099 |
+
"step": 4974
|
35100 |
+
},
|
35101 |
+
{
|
35102 |
+
"epoch": 0.37242902326277766,
|
35103 |
+
"grad_norm": 1.3006685972213745,
|
35104 |
+
"learning_rate": 8.23949974050331e-08,
|
35105 |
+
"loss": 1.8742,
|
35106 |
+
"step": 4975
|
35107 |
+
},
|
35108 |
+
{
|
35109 |
+
"epoch": 0.37250388336795615,
|
35110 |
+
"grad_norm": 1.51710844039917,
|
35111 |
+
"learning_rate": 7.987960347025647e-08,
|
35112 |
+
"loss": 2.1904,
|
35113 |
+
"step": 4976
|
35114 |
+
},
|
35115 |
+
{
|
35116 |
+
"epoch": 0.3725787434731346,
|
35117 |
+
"grad_norm": 1.4190149307250977,
|
35118 |
+
"learning_rate": 7.740318728045593e-08,
|
35119 |
+
"loss": 2.177,
|
35120 |
+
"step": 4977
|
35121 |
+
},
|
35122 |
+
{
|
35123 |
+
"epoch": 0.372653603578313,
|
35124 |
+
"grad_norm": 1.3400824069976807,
|
35125 |
+
"learning_rate": 7.496574980166982e-08,
|
35126 |
+
"loss": 1.7044,
|
35127 |
+
"step": 4978
|
35128 |
+
},
|
35129 |
+
{
|
35130 |
+
"epoch": 0.3727284636834915,
|
35131 |
+
"grad_norm": 1.1763372421264648,
|
35132 |
+
"learning_rate": 7.256729198469314e-08,
|
35133 |
+
"loss": 1.7179,
|
35134 |
+
"step": 4979
|
35135 |
+
},
|
35136 |
+
{
|
35137 |
+
"epoch": 0.3728033237886699,
|
35138 |
+
"grad_norm": 1.2219486236572266,
|
35139 |
+
"learning_rate": 7.020781476515525e-08,
|
35140 |
+
"loss": 1.726,
|
35141 |
+
"step": 4980
|
35142 |
+
},
|
35143 |
+
{
|
35144 |
+
"epoch": 0.37287818389384836,
|
35145 |
+
"grad_norm": 1.2846757173538208,
|
35146 |
+
"learning_rate": 6.788731906345325e-08,
|
35147 |
+
"loss": 2.4991,
|
35148 |
+
"step": 4981
|
35149 |
+
},
|
35150 |
+
{
|
35151 |
+
"epoch": 0.37295304399902685,
|
35152 |
+
"grad_norm": 1.2243740558624268,
|
35153 |
+
"learning_rate": 6.560580578479636e-08,
|
35154 |
+
"loss": 1.6773,
|
35155 |
+
"step": 4982
|
35156 |
+
},
|
35157 |
+
{
|
35158 |
+
"epoch": 0.3730279041042053,
|
35159 |
+
"grad_norm": 1.3469129800796509,
|
35160 |
+
"learning_rate": 6.336327581916157e-08,
|
35161 |
+
"loss": 1.9793,
|
35162 |
+
"step": 4983
|
35163 |
+
},
|
35164 |
+
{
|
35165 |
+
"epoch": 0.3731027642093837,
|
35166 |
+
"grad_norm": 1.2938308715820312,
|
35167 |
+
"learning_rate": 6.115973004134912e-08,
|
35168 |
+
"loss": 2.1317,
|
35169 |
+
"step": 4984
|
35170 |
+
},
|
35171 |
+
{
|
35172 |
+
"epoch": 0.37317762431456214,
|
35173 |
+
"grad_norm": 1.362736463546753,
|
35174 |
+
"learning_rate": 5.899516931093807e-08,
|
35175 |
+
"loss": 2.272,
|
35176 |
+
"step": 4985
|
35177 |
+
},
|
35178 |
+
{
|
35179 |
+
"epoch": 0.3732524844197406,
|
35180 |
+
"grad_norm": 1.1034882068634033,
|
35181 |
+
"learning_rate": 5.686959447229745e-08,
|
35182 |
+
"loss": 1.6906,
|
35183 |
+
"step": 4986
|
35184 |
+
},
|
35185 |
+
{
|
35186 |
+
"epoch": 0.37332734452491906,
|
35187 |
+
"grad_norm": 1.1260019540786743,
|
35188 |
+
"learning_rate": 5.478300635458622e-08,
|
35189 |
+
"loss": 2.0264,
|
35190 |
+
"step": 4987
|
35191 |
+
},
|
35192 |
+
{
|
35193 |
+
"epoch": 0.3734022046300975,
|
35194 |
+
"grad_norm": 1.3279393911361694,
|
35195 |
+
"learning_rate": 5.273540577176439e-08,
|
35196 |
+
"loss": 1.6167,
|
35197 |
+
"step": 4988
|
35198 |
+
},
|
35199 |
+
{
|
35200 |
+
"epoch": 0.373477064735276,
|
35201 |
+
"grad_norm": 1.314508318901062,
|
35202 |
+
"learning_rate": 5.0726793522570814e-08,
|
35203 |
+
"loss": 1.5518,
|
35204 |
+
"step": 4989
|
35205 |
+
},
|
35206 |
+
{
|
35207 |
+
"epoch": 0.3735519248404544,
|
35208 |
+
"grad_norm": 1.2558382749557495,
|
35209 |
+
"learning_rate": 4.8757170390556495e-08,
|
35210 |
+
"loss": 1.9004,
|
35211 |
+
"step": 4990
|
35212 |
+
},
|
35213 |
+
{
|
35214 |
+
"epoch": 0.37362678494563284,
|
35215 |
+
"grad_norm": 1.2909865379333496,
|
35216 |
+
"learning_rate": 4.682653714404017e-08,
|
35217 |
+
"loss": 1.7055,
|
35218 |
+
"step": 4991
|
35219 |
+
},
|
35220 |
+
{
|
35221 |
+
"epoch": 0.37370164505081127,
|
35222 |
+
"grad_norm": 1.1747167110443115,
|
35223 |
+
"learning_rate": 4.493489453614164e-08,
|
35224 |
+
"loss": 2.4907,
|
35225 |
+
"step": 4992
|
35226 |
+
},
|
35227 |
+
{
|
35228 |
+
"epoch": 0.37377650515598976,
|
35229 |
+
"grad_norm": 1.4869656562805176,
|
35230 |
+
"learning_rate": 4.3082243304770617e-08,
|
35231 |
+
"loss": 2.4387,
|
35232 |
+
"step": 4993
|
35233 |
+
},
|
35234 |
+
{
|
35235 |
+
"epoch": 0.3738513652611682,
|
35236 |
+
"grad_norm": 1.2070189714431763,
|
35237 |
+
"learning_rate": 4.12685841726268e-08,
|
35238 |
+
"loss": 1.8671,
|
35239 |
+
"step": 4994
|
35240 |
+
},
|
35241 |
+
{
|
35242 |
+
"epoch": 0.3739262253663466,
|
35243 |
+
"grad_norm": 1.234904408454895,
|
35244 |
+
"learning_rate": 3.94939178471998e-08,
|
35245 |
+
"loss": 1.9867,
|
35246 |
+
"step": 4995
|
35247 |
+
},
|
35248 |
+
{
|
35249 |
+
"epoch": 0.3740010854715251,
|
35250 |
+
"grad_norm": 1.1703590154647827,
|
35251 |
+
"learning_rate": 3.775824502076919e-08,
|
35252 |
+
"loss": 2.0854,
|
35253 |
+
"step": 4996
|
35254 |
+
},
|
35255 |
+
{
|
35256 |
+
"epoch": 0.37407594557670354,
|
35257 |
+
"grad_norm": 1.4773565530776978,
|
35258 |
+
"learning_rate": 3.6061566370393376e-08,
|
35259 |
+
"loss": 2.1288,
|
35260 |
+
"step": 4997
|
35261 |
+
},
|
35262 |
+
{
|
35263 |
+
"epoch": 0.37415080568188197,
|
35264 |
+
"grad_norm": 1.5366137027740479,
|
35265 |
+
"learning_rate": 3.4403882557942915e-08,
|
35266 |
+
"loss": 2.2701,
|
35267 |
+
"step": 4998
|
35268 |
+
},
|
35269 |
+
{
|
35270 |
+
"epoch": 0.37422566578706046,
|
35271 |
+
"grad_norm": 1.0518525838851929,
|
35272 |
+
"learning_rate": 3.2785194230045004e-08,
|
35273 |
+
"loss": 1.7945,
|
35274 |
+
"step": 4999
|
35275 |
+
},
|
35276 |
+
{
|
35277 |
+
"epoch": 0.3743005258922389,
|
35278 |
+
"grad_norm": 1.3006261587142944,
|
35279 |
+
"learning_rate": 3.120550201815009e-08,
|
35280 |
+
"loss": 1.9247,
|
35281 |
+
"step": 5000
|
35282 |
+
},
|
35283 |
+
{
|
35284 |
+
"epoch": 0.3743753859974173,
|
35285 |
+
"grad_norm": 1.5011438131332397,
|
35286 |
+
"learning_rate": 2.9664806538465262e-08,
|
35287 |
+
"loss": 2.2498,
|
35288 |
+
"step": 5001
|
35289 |
+
},
|
35290 |
+
{
|
35291 |
+
"epoch": 0.37445024610259575,
|
35292 |
+
"grad_norm": 1.3992769718170166,
|
35293 |
+
"learning_rate": 2.816310839199865e-08,
|
35294 |
+
"loss": 1.8977,
|
35295 |
+
"step": 5002
|
35296 |
+
},
|
35297 |
+
{
|
35298 |
+
"epoch": 0.37452510620777424,
|
35299 |
+
"grad_norm": 1.1042829751968384,
|
35300 |
+
"learning_rate": 2.6700408164548328e-08,
|
35301 |
+
"loss": 1.2875,
|
35302 |
+
"step": 5003
|
35303 |
+
},
|
35304 |
+
{
|
35305 |
+
"epoch": 0.37459996631295267,
|
35306 |
+
"grad_norm": 1.28590726852417,
|
35307 |
+
"learning_rate": 2.5276706426713425e-08,
|
35308 |
+
"loss": 1.9616,
|
35309 |
+
"step": 5004
|
35310 |
+
},
|
35311 |
+
{
|
35312 |
+
"epoch": 0.3746748264181311,
|
35313 |
+
"grad_norm": 1.1898860931396484,
|
35314 |
+
"learning_rate": 2.3892003733838598e-08,
|
35315 |
+
"loss": 1.7639,
|
35316 |
+
"step": 5005
|
35317 |
+
},
|
35318 |
+
{
|
35319 |
+
"epoch": 0.3747496865233096,
|
35320 |
+
"grad_norm": 1.2147506475448608,
|
35321 |
+
"learning_rate": 2.2546300626091753e-08,
|
35322 |
+
"loss": 1.849,
|
35323 |
+
"step": 5006
|
35324 |
+
},
|
35325 |
+
{
|
35326 |
+
"epoch": 0.374824546628488,
|
35327 |
+
"grad_norm": 1.2294347286224365,
|
35328 |
+
"learning_rate": 2.123959762843075e-08,
|
35329 |
+
"loss": 2.1053,
|
35330 |
+
"step": 5007
|
35331 |
+
},
|
35332 |
+
{
|
35333 |
+
"epoch": 0.37489940673366645,
|
35334 |
+
"grad_norm": 1.2910950183868408,
|
35335 |
+
"learning_rate": 1.997189525055898e-08,
|
35336 |
+
"loss": 2.2129,
|
35337 |
+
"step": 5008
|
35338 |
+
},
|
35339 |
+
{
|
35340 |
+
"epoch": 0.3749742668388449,
|
35341 |
+
"grad_norm": 1.4780951738357544,
|
35342 |
+
"learning_rate": 1.874319398702529e-08,
|
35343 |
+
"loss": 1.9618,
|
35344 |
+
"step": 5009
|
35345 |
+
},
|
35346 |
+
{
|
35347 |
+
"epoch": 0.37504912694402337,
|
35348 |
+
"grad_norm": 1.4901782274246216,
|
35349 |
+
"learning_rate": 1.755349431710185e-08,
|
35350 |
+
"loss": 2.0023,
|
35351 |
+
"step": 5010
|
35352 |
+
},
|
35353 |
+
{
|
35354 |
+
"epoch": 0.3751239870492018,
|
35355 |
+
"grad_norm": 1.3150880336761475,
|
35356 |
+
"learning_rate": 1.6402796704895196e-08,
|
35357 |
+
"loss": 1.5646,
|
35358 |
+
"step": 5011
|
35359 |
+
},
|
35360 |
+
{
|
35361 |
+
"epoch": 0.37519884715438023,
|
35362 |
+
"grad_norm": 1.3421481847763062,
|
35363 |
+
"learning_rate": 1.52911015992796e-08,
|
35364 |
+
"loss": 2.0128,
|
35365 |
+
"step": 5012
|
35366 |
+
},
|
35367 |
+
{
|
35368 |
+
"epoch": 0.3752737072595587,
|
35369 |
+
"grad_norm": 1.2243260145187378,
|
35370 |
+
"learning_rate": 1.4218409433908175e-08,
|
35371 |
+
"loss": 2.037,
|
35372 |
+
"step": 5013
|
35373 |
+
},
|
35374 |
+
{
|
35375 |
+
"epoch": 0.37534856736473715,
|
35376 |
+
"grad_norm": 1.2326892614364624,
|
35377 |
+
"learning_rate": 1.3184720627235081e-08,
|
35378 |
+
"loss": 1.8788,
|
35379 |
+
"step": 5014
|
35380 |
+
},
|
35381 |
+
{
|
35382 |
+
"epoch": 0.3754234274699156,
|
35383 |
+
"grad_norm": 1.4980422258377075,
|
35384 |
+
"learning_rate": 1.2190035582471115e-08,
|
35385 |
+
"loss": 2.3279,
|
35386 |
+
"step": 5015
|
35387 |
+
},
|
35388 |
+
{
|
35389 |
+
"epoch": 0.37549828757509407,
|
35390 |
+
"grad_norm": 1.1286108493804932,
|
35391 |
+
"learning_rate": 1.123435468766143e-08,
|
35392 |
+
"loss": 1.798,
|
35393 |
+
"step": 5016
|
35394 |
+
},
|
35395 |
+
{
|
35396 |
+
"epoch": 0.3755731476802725,
|
35397 |
+
"grad_norm": 1.1455106735229492,
|
35398 |
+
"learning_rate": 1.031767831558561e-08,
|
35399 |
+
"loss": 1.6733,
|
35400 |
+
"step": 5017
|
35401 |
+
},
|
35402 |
+
{
|
35403 |
+
"epoch": 0.37564800778545093,
|
35404 |
+
"grad_norm": 1.0452263355255127,
|
35405 |
+
"learning_rate": 9.44000682383539e-09,
|
35406 |
+
"loss": 1.8773,
|
35407 |
+
"step": 5018
|
35408 |
+
},
|
35409 |
+
{
|
35410 |
+
"epoch": 0.37572286789062936,
|
35411 |
+
"grad_norm": 1.4218225479125977,
|
35412 |
+
"learning_rate": 8.601340554781346e-09,
|
35413 |
+
"loss": 2.0528,
|
35414 |
+
"step": 5019
|
35415 |
+
},
|
35416 |
+
{
|
35417 |
+
"epoch": 0.37579772799580785,
|
35418 |
+
"grad_norm": 1.6470973491668701,
|
35419 |
+
"learning_rate": 7.801679835572895e-09,
|
35420 |
+
"loss": 1.7786,
|
35421 |
+
"step": 5020
|
35422 |
+
},
|
35423 |
+
{
|
35424 |
+
"epoch": 0.3758725881009863,
|
35425 |
+
"grad_norm": 1.2469727993011475,
|
35426 |
+
"learning_rate": 7.041024978160504e-09,
|
35427 |
+
"loss": 2.051,
|
35428 |
+
"step": 5021
|
35429 |
+
},
|
35430 |
+
{
|
35431 |
+
"epoch": 0.3759474482061647,
|
35432 |
+
"grad_norm": 1.224454641342163,
|
35433 |
+
"learning_rate": 6.319376279262379e-09,
|
35434 |
+
"loss": 2.2662,
|
35435 |
+
"step": 5022
|
35436 |
+
},
|
35437 |
+
{
|
35438 |
+
"epoch": 0.3760223083113432,
|
35439 |
+
"grad_norm": 1.1871840953826904,
|
35440 |
+
"learning_rate": 5.636734020375567e-09,
|
35441 |
+
"loss": 1.5023,
|
35442 |
+
"step": 5023
|
35443 |
+
},
|
35444 |
+
{
|
35445 |
+
"epoch": 0.37609716841652163,
|
35446 |
+
"grad_norm": 1.135377049446106,
|
35447 |
+
"learning_rate": 4.993098467798163e-09,
|
35448 |
+
"loss": 1.3566,
|
35449 |
+
"step": 5024
|
35450 |
+
},
|
35451 |
+
{
|
35452 |
+
"epoch": 0.37617202852170006,
|
35453 |
+
"grad_norm": 1.3303520679473877,
|
35454 |
+
"learning_rate": 4.388469872618206e-09,
|
35455 |
+
"loss": 1.3843,
|
35456 |
+
"step": 5025
|
35457 |
+
},
|
35458 |
+
{
|
35459 |
+
"epoch": 0.3762468886268785,
|
35460 |
+
"grad_norm": 1.462297797203064,
|
35461 |
+
"learning_rate": 3.822848470669272e-09,
|
35462 |
+
"loss": 2.2968,
|
35463 |
+
"step": 5026
|
35464 |
+
},
|
35465 |
+
{
|
35466 |
+
"epoch": 0.376321748732057,
|
35467 |
+
"grad_norm": 1.2747176885604858,
|
35468 |
+
"learning_rate": 3.296234482619287e-09,
|
35469 |
+
"loss": 1.7962,
|
35470 |
+
"step": 5027
|
35471 |
+
},
|
35472 |
+
{
|
35473 |
+
"epoch": 0.3763966088372354,
|
35474 |
+
"grad_norm": 1.284083604812622,
|
35475 |
+
"learning_rate": 2.8086281138706148e-09,
|
35476 |
+
"loss": 2.1965,
|
35477 |
+
"step": 5028
|
35478 |
+
},
|
35479 |
+
{
|
35480 |
+
"epoch": 0.37647146894241384,
|
35481 |
+
"grad_norm": 1.3260568380355835,
|
35482 |
+
"learning_rate": 2.3600295546599704e-09,
|
35483 |
+
"loss": 1.9976,
|
35484 |
+
"step": 5029
|
35485 |
+
},
|
35486 |
+
{
|
35487 |
+
"epoch": 0.37654632904759233,
|
35488 |
+
"grad_norm": 1.0794789791107178,
|
35489 |
+
"learning_rate": 1.950438979958502e-09,
|
35490 |
+
"loss": 1.7483,
|
35491 |
+
"step": 5030
|
35492 |
+
},
|
35493 |
+
{
|
35494 |
+
"epoch": 0.37662118915277076,
|
35495 |
+
"grad_norm": 1.2783838510513306,
|
35496 |
+
"learning_rate": 1.5798565495495076e-09,
|
35497 |
+
"loss": 1.5004,
|
35498 |
+
"step": 5031
|
35499 |
+
},
|
35500 |
+
{
|
35501 |
+
"epoch": 0.3766960492579492,
|
35502 |
+
"grad_norm": 1.3007869720458984,
|
35503 |
+
"learning_rate": 1.2482824079951271e-09,
|
35504 |
+
"loss": 2.0624,
|
35505 |
+
"step": 5032
|
35506 |
+
},
|
35507 |
+
{
|
35508 |
+
"epoch": 0.3767709093631277,
|
35509 |
+
"grad_norm": 1.372740387916565,
|
35510 |
+
"learning_rate": 9.55716684636343e-10,
|
35511 |
+
"loss": 1.866,
|
35512 |
+
"step": 5033
|
35513 |
+
},
|
35514 |
+
{
|
35515 |
+
"epoch": 0.3768457694683061,
|
35516 |
+
"grad_norm": 1.371090054512024,
|
35517 |
+
"learning_rate": 7.021594936040821e-10,
|
35518 |
+
"loss": 1.9814,
|
35519 |
+
"step": 5034
|
35520 |
+
},
|
35521 |
+
{
|
35522 |
+
"epoch": 0.37692062957348454,
|
35523 |
+
"grad_norm": 1.1047563552856445,
|
35524 |
+
"learning_rate": 4.876109338081137e-10,
|
35525 |
+
"loss": 1.2821,
|
35526 |
+
"step": 5035
|
35527 |
+
},
|
35528 |
+
{
|
35529 |
+
"epoch": 0.376995489678663,
|
35530 |
+
"grad_norm": 1.2861955165863037,
|
35531 |
+
"learning_rate": 3.1207108893704927e-10,
|
35532 |
+
"loss": 1.6591,
|
35533 |
+
"step": 5036
|
35534 |
+
},
|
35535 |
+
{
|
35536 |
+
"epoch": 0.37707034978384146,
|
35537 |
+
"grad_norm": 1.1517539024353027,
|
35538 |
+
"learning_rate": 1.755400274694452e-10,
|
35539 |
+
"loss": 1.8742,
|
35540 |
+
"step": 5037
|
35541 |
+
},
|
35542 |
+
{
|
35543 |
+
"epoch": 0.3771452098890199,
|
35544 |
+
"grad_norm": 1.1889746189117432,
|
35545 |
+
"learning_rate": 7.80178026738021e-11,
|
35546 |
+
"loss": 1.9279,
|
35547 |
+
"step": 5038
|
35548 |
+
},
|
35549 |
+
{
|
35550 |
+
"epoch": 0.3772200699941983,
|
35551 |
+
"grad_norm": 1.262064814567566,
|
35552 |
+
"learning_rate": 1.950445256415634e-11,
|
35553 |
+
"loss": 1.9269,
|
35554 |
+
"step": 5039
|
35555 |
+
},
|
35556 |
+
{
|
35557 |
+
"epoch": 0.3772949300993768,
|
35558 |
+
"grad_norm": 1.4935128688812256,
|
35559 |
+
"learning_rate": 0.0,
|
35560 |
+
"loss": 1.9802,
|
35561 |
+
"step": 5040
|
35562 |
}
|
35563 |
],
|
35564 |
"logging_steps": 1,
|
|
|
35582 |
"should_evaluate": false,
|
35583 |
"should_log": false,
|
35584 |
"should_save": true,
|
35585 |
+
"should_training_stop": true
|
35586 |
},
|
35587 |
"attributes": {}
|
35588 |
}
|
35589 |
},
|
35590 |
+
"total_flos": 3.431234782573363e+17,
|
35591 |
"train_batch_size": 2,
|
35592 |
"trial_name": null,
|
35593 |
"trial_params": null
|