Training in progress, step 2639, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 671149168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6326305b02c5345bbc9a927c7664228a65ff343e0847e6f540371df214898245
|
3 |
size 671149168
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 341314644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39cff31ff9cdaa3d7bf98887213ed1fe7065b9b469b83a1f2874185b37641eff
|
3 |
size 341314644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c886e41b7a0f834b74bd37a49b30c0918f520a0ee05607af869fc7415f419c1e
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:457691edb449186d661b3ffd85f29c9eb7a6ed830917fa95f78e4941adeac57c
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.09612426161766052,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2550",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -18001,6 +18001,629 @@
|
|
18001 |
"eval_samples_per_second": 4.176,
|
18002 |
"eval_steps_per_second": 2.088,
|
18003 |
"step": 2550
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18004 |
}
|
18005 |
],
|
18006 |
"logging_steps": 1,
|
@@ -18024,12 +18647,12 @@
|
|
18024 |
"should_evaluate": false,
|
18025 |
"should_log": false,
|
18026 |
"should_save": true,
|
18027 |
-
"should_training_stop":
|
18028 |
},
|
18029 |
"attributes": {}
|
18030 |
}
|
18031 |
},
|
18032 |
-
"total_flos": 1.
|
18033 |
"train_batch_size": 2,
|
18034 |
"trial_name": null,
|
18035 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.09612426161766052,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2550",
|
4 |
+
"epoch": 0.1441151173426898,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 2639,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
18001 |
"eval_samples_per_second": 4.176,
|
18002 |
"eval_steps_per_second": 2.088,
|
18003 |
"step": 2550
|
18004 |
+
},
|
18005 |
+
{
|
18006 |
+
"epoch": 0.1393094597730965,
|
18007 |
+
"grad_norm": 3.723029851913452,
|
18008 |
+
"learning_rate": 5.52400007501297e-07,
|
18009 |
+
"loss": 0.2928,
|
18010 |
+
"step": 2551
|
18011 |
+
},
|
18012 |
+
{
|
18013 |
+
"epoch": 0.1393640695182055,
|
18014 |
+
"grad_norm": 2.9333627223968506,
|
18015 |
+
"learning_rate": 5.39928040206128e-07,
|
18016 |
+
"loss": 0.3217,
|
18017 |
+
"step": 2552
|
18018 |
+
},
|
18019 |
+
{
|
18020 |
+
"epoch": 0.13941867926331453,
|
18021 |
+
"grad_norm": 2.4878392219543457,
|
18022 |
+
"learning_rate": 5.27598098726123e-07,
|
18023 |
+
"loss": 0.2826,
|
18024 |
+
"step": 2553
|
18025 |
+
},
|
18026 |
+
{
|
18027 |
+
"epoch": 0.13947328900842354,
|
18028 |
+
"grad_norm": 2.785881757736206,
|
18029 |
+
"learning_rate": 5.15410200668054e-07,
|
18030 |
+
"loss": 0.4198,
|
18031 |
+
"step": 2554
|
18032 |
+
},
|
18033 |
+
{
|
18034 |
+
"epoch": 0.13952789875353258,
|
18035 |
+
"grad_norm": 4.309914588928223,
|
18036 |
+
"learning_rate": 5.03364363435832e-07,
|
18037 |
+
"loss": 0.4567,
|
18038 |
+
"step": 2555
|
18039 |
+
},
|
18040 |
+
{
|
18041 |
+
"epoch": 0.1395825084986416,
|
18042 |
+
"grad_norm": 3.7101707458496094,
|
18043 |
+
"learning_rate": 4.914606042305426e-07,
|
18044 |
+
"loss": 0.4291,
|
18045 |
+
"step": 2556
|
18046 |
+
},
|
18047 |
+
{
|
18048 |
+
"epoch": 0.1396371182437506,
|
18049 |
+
"grad_norm": 4.047240734100342,
|
18050 |
+
"learning_rate": 4.796989400503771e-07,
|
18051 |
+
"loss": 0.4754,
|
18052 |
+
"step": 2557
|
18053 |
+
},
|
18054 |
+
{
|
18055 |
+
"epoch": 0.13969172798885962,
|
18056 |
+
"grad_norm": 4.014715671539307,
|
18057 |
+
"learning_rate": 4.680793876906009e-07,
|
18058 |
+
"loss": 0.4832,
|
18059 |
+
"step": 2558
|
18060 |
+
},
|
18061 |
+
{
|
18062 |
+
"epoch": 0.13974633773396863,
|
18063 |
+
"grad_norm": 3.6147289276123047,
|
18064 |
+
"learning_rate": 4.566019637435748e-07,
|
18065 |
+
"loss": 0.4284,
|
18066 |
+
"step": 2559
|
18067 |
+
},
|
18068 |
+
{
|
18069 |
+
"epoch": 0.13980094747907765,
|
18070 |
+
"grad_norm": 6.866396903991699,
|
18071 |
+
"learning_rate": 4.4526668459869967e-07,
|
18072 |
+
"loss": 0.6259,
|
18073 |
+
"step": 2560
|
18074 |
+
},
|
18075 |
+
{
|
18076 |
+
"epoch": 0.13985555722418666,
|
18077 |
+
"grad_norm": 4.688859939575195,
|
18078 |
+
"learning_rate": 4.340735664423834e-07,
|
18079 |
+
"loss": 0.6314,
|
18080 |
+
"step": 2561
|
18081 |
+
},
|
18082 |
+
{
|
18083 |
+
"epoch": 0.13991016696929567,
|
18084 |
+
"grad_norm": 5.6968488693237305,
|
18085 |
+
"learning_rate": 4.230226252580516e-07,
|
18086 |
+
"loss": 0.646,
|
18087 |
+
"step": 2562
|
18088 |
+
},
|
18089 |
+
{
|
18090 |
+
"epoch": 0.1399647767144047,
|
18091 |
+
"grad_norm": 4.044732093811035,
|
18092 |
+
"learning_rate": 4.1211387682609237e-07,
|
18093 |
+
"loss": 0.3599,
|
18094 |
+
"step": 2563
|
18095 |
+
},
|
18096 |
+
{
|
18097 |
+
"epoch": 0.1400193864595137,
|
18098 |
+
"grad_norm": 2.7242889404296875,
|
18099 |
+
"learning_rate": 4.013473367238452e-07,
|
18100 |
+
"loss": 0.2921,
|
18101 |
+
"step": 2564
|
18102 |
+
},
|
18103 |
+
{
|
18104 |
+
"epoch": 0.1400739962046227,
|
18105 |
+
"grad_norm": 4.420613765716553,
|
18106 |
+
"learning_rate": 3.907230203255896e-07,
|
18107 |
+
"loss": 0.4667,
|
18108 |
+
"step": 2565
|
18109 |
+
},
|
18110 |
+
{
|
18111 |
+
"epoch": 0.14012860594973173,
|
18112 |
+
"grad_norm": 5.145106792449951,
|
18113 |
+
"learning_rate": 3.802409428025233e-07,
|
18114 |
+
"loss": 0.5955,
|
18115 |
+
"step": 2566
|
18116 |
+
},
|
18117 |
+
{
|
18118 |
+
"epoch": 0.14018321569484074,
|
18119 |
+
"grad_norm": 3.0266265869140625,
|
18120 |
+
"learning_rate": 3.699011191227064e-07,
|
18121 |
+
"loss": 0.2157,
|
18122 |
+
"step": 2567
|
18123 |
+
},
|
18124 |
+
{
|
18125 |
+
"epoch": 0.14023782543994975,
|
18126 |
+
"grad_norm": 3.0053975582122803,
|
18127 |
+
"learning_rate": 3.5970356405107263e-07,
|
18128 |
+
"loss": 0.329,
|
18129 |
+
"step": 2568
|
18130 |
+
},
|
18131 |
+
{
|
18132 |
+
"epoch": 0.14029243518505877,
|
18133 |
+
"grad_norm": 3.0817947387695312,
|
18134 |
+
"learning_rate": 3.496482921494182e-07,
|
18135 |
+
"loss": 0.3323,
|
18136 |
+
"step": 2569
|
18137 |
+
},
|
18138 |
+
{
|
18139 |
+
"epoch": 0.14034704493016778,
|
18140 |
+
"grad_norm": 3.6538212299346924,
|
18141 |
+
"learning_rate": 3.3973531777634625e-07,
|
18142 |
+
"loss": 0.5624,
|
18143 |
+
"step": 2570
|
18144 |
+
},
|
18145 |
+
{
|
18146 |
+
"epoch": 0.1404016546752768,
|
18147 |
+
"grad_norm": 4.178391456604004,
|
18148 |
+
"learning_rate": 3.29964655087267e-07,
|
18149 |
+
"loss": 0.4504,
|
18150 |
+
"step": 2571
|
18151 |
+
},
|
18152 |
+
{
|
18153 |
+
"epoch": 0.1404562644203858,
|
18154 |
+
"grad_norm": 3.166633367538452,
|
18155 |
+
"learning_rate": 3.2033631803437546e-07,
|
18156 |
+
"loss": 0.3282,
|
18157 |
+
"step": 2572
|
18158 |
+
},
|
18159 |
+
{
|
18160 |
+
"epoch": 0.14051087416549482,
|
18161 |
+
"grad_norm": 5.640282154083252,
|
18162 |
+
"learning_rate": 3.108503203666402e-07,
|
18163 |
+
"loss": 0.642,
|
18164 |
+
"step": 2573
|
18165 |
+
},
|
18166 |
+
{
|
18167 |
+
"epoch": 0.14056548391060383,
|
18168 |
+
"grad_norm": 4.808424949645996,
|
18169 |
+
"learning_rate": 3.01506675629748e-07,
|
18170 |
+
"loss": 0.6067,
|
18171 |
+
"step": 2574
|
18172 |
+
},
|
18173 |
+
{
|
18174 |
+
"epoch": 0.14062009365571287,
|
18175 |
+
"grad_norm": 2.6138014793395996,
|
18176 |
+
"learning_rate": 2.9230539716613713e-07,
|
18177 |
+
"loss": 0.2736,
|
18178 |
+
"step": 2575
|
18179 |
+
},
|
18180 |
+
{
|
18181 |
+
"epoch": 0.1406747034008219,
|
18182 |
+
"grad_norm": 4.742761135101318,
|
18183 |
+
"learning_rate": 2.832464981149308e-07,
|
18184 |
+
"loss": 0.4355,
|
18185 |
+
"step": 2576
|
18186 |
+
},
|
18187 |
+
{
|
18188 |
+
"epoch": 0.1407293131459309,
|
18189 |
+
"grad_norm": 4.499370098114014,
|
18190 |
+
"learning_rate": 2.7432999141195904e-07,
|
18191 |
+
"loss": 0.5156,
|
18192 |
+
"step": 2577
|
18193 |
+
},
|
18194 |
+
{
|
18195 |
+
"epoch": 0.14078392289103991,
|
18196 |
+
"grad_norm": 4.269051551818848,
|
18197 |
+
"learning_rate": 2.655558897897037e-07,
|
18198 |
+
"loss": 0.4639,
|
18199 |
+
"step": 2578
|
18200 |
+
},
|
18201 |
+
{
|
18202 |
+
"epoch": 0.14083853263614893,
|
18203 |
+
"grad_norm": 2.7144224643707275,
|
18204 |
+
"learning_rate": 2.569242057773091e-07,
|
18205 |
+
"loss": 0.2879,
|
18206 |
+
"step": 2579
|
18207 |
+
},
|
18208 |
+
{
|
18209 |
+
"epoch": 0.14089314238125794,
|
18210 |
+
"grad_norm": 3.774793863296509,
|
18211 |
+
"learning_rate": 2.48434951700538e-07,
|
18212 |
+
"loss": 0.3831,
|
18213 |
+
"step": 2580
|
18214 |
+
},
|
18215 |
+
{
|
18216 |
+
"epoch": 0.14094775212636695,
|
18217 |
+
"grad_norm": 2.9699277877807617,
|
18218 |
+
"learning_rate": 2.4008813968177115e-07,
|
18219 |
+
"loss": 0.3127,
|
18220 |
+
"step": 2581
|
18221 |
+
},
|
18222 |
+
{
|
18223 |
+
"epoch": 0.14100236187147597,
|
18224 |
+
"grad_norm": 4.905210971832275,
|
18225 |
+
"learning_rate": 2.318837816399966e-07,
|
18226 |
+
"loss": 0.4349,
|
18227 |
+
"step": 2582
|
18228 |
+
},
|
18229 |
+
{
|
18230 |
+
"epoch": 0.14105697161658498,
|
18231 |
+
"grad_norm": 6.435173511505127,
|
18232 |
+
"learning_rate": 2.238218892907762e-07,
|
18233 |
+
"loss": 0.5555,
|
18234 |
+
"step": 2583
|
18235 |
+
},
|
18236 |
+
{
|
18237 |
+
"epoch": 0.141111581361694,
|
18238 |
+
"grad_norm": 2.4339380264282227,
|
18239 |
+
"learning_rate": 2.1590247414624566e-07,
|
18240 |
+
"loss": 0.2213,
|
18241 |
+
"step": 2584
|
18242 |
+
},
|
18243 |
+
{
|
18244 |
+
"epoch": 0.141166191106803,
|
18245 |
+
"grad_norm": 25.24824333190918,
|
18246 |
+
"learning_rate": 2.0812554751504788e-07,
|
18247 |
+
"loss": 1.1507,
|
18248 |
+
"step": 2585
|
18249 |
+
},
|
18250 |
+
{
|
18251 |
+
"epoch": 0.14122080085191202,
|
18252 |
+
"grad_norm": 2.5197904109954834,
|
18253 |
+
"learning_rate": 2.004911205024107e-07,
|
18254 |
+
"loss": 0.2165,
|
18255 |
+
"step": 2586
|
18256 |
+
},
|
18257 |
+
{
|
18258 |
+
"epoch": 0.14127541059702103,
|
18259 |
+
"grad_norm": 3.841722011566162,
|
18260 |
+
"learning_rate": 1.9299920401004702e-07,
|
18261 |
+
"loss": 0.4137,
|
18262 |
+
"step": 2587
|
18263 |
+
},
|
18264 |
+
{
|
18265 |
+
"epoch": 0.14133002034213005,
|
18266 |
+
"grad_norm": 3.005751609802246,
|
18267 |
+
"learning_rate": 1.8564980873618798e-07,
|
18268 |
+
"loss": 0.4004,
|
18269 |
+
"step": 2588
|
18270 |
+
},
|
18271 |
+
{
|
18272 |
+
"epoch": 0.14138463008723906,
|
18273 |
+
"grad_norm": 2.7246780395507812,
|
18274 |
+
"learning_rate": 1.784429451755054e-07,
|
18275 |
+
"loss": 0.3111,
|
18276 |
+
"step": 2589
|
18277 |
+
},
|
18278 |
+
{
|
18279 |
+
"epoch": 0.14143923983234807,
|
18280 |
+
"grad_norm": 5.2464494705200195,
|
18281 |
+
"learning_rate": 1.7137862361920054e-07,
|
18282 |
+
"loss": 0.5692,
|
18283 |
+
"step": 2590
|
18284 |
+
},
|
18285 |
+
{
|
18286 |
+
"epoch": 0.1414938495774571,
|
18287 |
+
"grad_norm": 5.011432647705078,
|
18288 |
+
"learning_rate": 1.6445685415488188e-07,
|
18289 |
+
"loss": 0.6385,
|
18290 |
+
"step": 2591
|
18291 |
+
},
|
18292 |
+
{
|
18293 |
+
"epoch": 0.1415484593225661,
|
18294 |
+
"grad_norm": 5.104406356811523,
|
18295 |
+
"learning_rate": 1.5767764666662078e-07,
|
18296 |
+
"loss": 0.452,
|
18297 |
+
"step": 2592
|
18298 |
+
},
|
18299 |
+
{
|
18300 |
+
"epoch": 0.1416030690676751,
|
18301 |
+
"grad_norm": 3.401982545852661,
|
18302 |
+
"learning_rate": 1.5104101083490695e-07,
|
18303 |
+
"loss": 0.5705,
|
18304 |
+
"step": 2593
|
18305 |
+
},
|
18306 |
+
{
|
18307 |
+
"epoch": 0.14165767881278415,
|
18308 |
+
"grad_norm": 3.839580535888672,
|
18309 |
+
"learning_rate": 1.445469561366486e-07,
|
18310 |
+
"loss": 0.5416,
|
18311 |
+
"step": 2594
|
18312 |
+
},
|
18313 |
+
{
|
18314 |
+
"epoch": 0.14171228855789317,
|
18315 |
+
"grad_norm": 6.060207366943359,
|
18316 |
+
"learning_rate": 1.3819549184516112e-07,
|
18317 |
+
"loss": 0.3357,
|
18318 |
+
"step": 2595
|
18319 |
+
},
|
18320 |
+
{
|
18321 |
+
"epoch": 0.14176689830300218,
|
18322 |
+
"grad_norm": 5.15747594833374,
|
18323 |
+
"learning_rate": 1.3198662703011178e-07,
|
18324 |
+
"loss": 0.7372,
|
18325 |
+
"step": 2596
|
18326 |
+
},
|
18327 |
+
{
|
18328 |
+
"epoch": 0.1418215080481112,
|
18329 |
+
"grad_norm": 5.06006383895874,
|
18330 |
+
"learning_rate": 1.2592037055756401e-07,
|
18331 |
+
"loss": 0.4465,
|
18332 |
+
"step": 2597
|
18333 |
+
},
|
18334 |
+
{
|
18335 |
+
"epoch": 0.1418761177932202,
|
18336 |
+
"grad_norm": 3.257328987121582,
|
18337 |
+
"learning_rate": 1.1999673108995523e-07,
|
18338 |
+
"loss": 0.3966,
|
18339 |
+
"step": 2598
|
18340 |
+
},
|
18341 |
+
{
|
18342 |
+
"epoch": 0.14193072753832922,
|
18343 |
+
"grad_norm": 5.779103755950928,
|
18344 |
+
"learning_rate": 1.142157170860414e-07,
|
18345 |
+
"loss": 0.5437,
|
18346 |
+
"step": 2599
|
18347 |
+
},
|
18348 |
+
{
|
18349 |
+
"epoch": 0.14198533728343823,
|
18350 |
+
"grad_norm": 4.808257579803467,
|
18351 |
+
"learning_rate": 1.0857733680093018e-07,
|
18352 |
+
"loss": 0.4866,
|
18353 |
+
"step": 2600
|
18354 |
+
},
|
18355 |
+
{
|
18356 |
+
"epoch": 0.14203994702854725,
|
18357 |
+
"grad_norm": 4.1833720207214355,
|
18358 |
+
"learning_rate": 1.030815982860478e-07,
|
18359 |
+
"loss": 0.4623,
|
18360 |
+
"step": 2601
|
18361 |
+
},
|
18362 |
+
{
|
18363 |
+
"epoch": 0.14209455677365626,
|
18364 |
+
"grad_norm": 3.7359843254089355,
|
18365 |
+
"learning_rate": 9.772850938913891e-08,
|
18366 |
+
"loss": 0.4209,
|
18367 |
+
"step": 2602
|
18368 |
+
},
|
18369 |
+
{
|
18370 |
+
"epoch": 0.14214916651876527,
|
18371 |
+
"grad_norm": 3.794407844543457,
|
18372 |
+
"learning_rate": 9.251807775423338e-08,
|
18373 |
+
"loss": 0.464,
|
18374 |
+
"step": 2603
|
18375 |
+
},
|
18376 |
+
{
|
18377 |
+
"epoch": 0.1422037762638743,
|
18378 |
+
"grad_norm": 4.367050647735596,
|
18379 |
+
"learning_rate": 8.745031082166843e-08,
|
18380 |
+
"loss": 0.592,
|
18381 |
+
"step": 2604
|
18382 |
+
},
|
18383 |
+
{
|
18384 |
+
"epoch": 0.1422583860089833,
|
18385 |
+
"grad_norm": 2.95585560798645,
|
18386 |
+
"learning_rate": 8.252521582805539e-08,
|
18387 |
+
"loss": 0.2558,
|
18388 |
+
"step": 2605
|
18389 |
+
},
|
18390 |
+
{
|
18391 |
+
"epoch": 0.14231299575409231,
|
18392 |
+
"grad_norm": 8.54871940612793,
|
18393 |
+
"learning_rate": 7.774279980626853e-08,
|
18394 |
+
"loss": 0.3912,
|
18395 |
+
"step": 2606
|
18396 |
+
},
|
18397 |
+
{
|
18398 |
+
"epoch": 0.14236760549920133,
|
18399 |
+
"grad_norm": 3.3426690101623535,
|
18400 |
+
"learning_rate": 7.31030695854451e-08,
|
18401 |
+
"loss": 0.3907,
|
18402 |
+
"step": 2607
|
18403 |
+
},
|
18404 |
+
{
|
18405 |
+
"epoch": 0.14242221524431034,
|
18406 |
+
"grad_norm": 3.3232293128967285,
|
18407 |
+
"learning_rate": 6.860603179098535e-08,
|
18408 |
+
"loss": 0.3101,
|
18409 |
+
"step": 2608
|
18410 |
+
},
|
18411 |
+
{
|
18412 |
+
"epoch": 0.14247682498941935,
|
18413 |
+
"grad_norm": 3.4219775199890137,
|
18414 |
+
"learning_rate": 6.425169284449695e-08,
|
18415 |
+
"loss": 0.4823,
|
18416 |
+
"step": 2609
|
18417 |
+
},
|
18418 |
+
{
|
18419 |
+
"epoch": 0.14253143473452837,
|
18420 |
+
"grad_norm": 3.4401795864105225,
|
18421 |
+
"learning_rate": 6.004005896385057e-08,
|
18422 |
+
"loss": 0.3489,
|
18423 |
+
"step": 2610
|
18424 |
+
},
|
18425 |
+
{
|
18426 |
+
"epoch": 0.14258604447963738,
|
18427 |
+
"grad_norm": 3.8011462688446045,
|
18428 |
+
"learning_rate": 5.597113616311322e-08,
|
18429 |
+
"loss": 0.4371,
|
18430 |
+
"step": 2611
|
18431 |
+
},
|
18432 |
+
{
|
18433 |
+
"epoch": 0.1426406542247464,
|
18434 |
+
"grad_norm": 3.8608498573303223,
|
18435 |
+
"learning_rate": 5.2044930252592714e-08,
|
18436 |
+
"loss": 0.3707,
|
18437 |
+
"step": 2612
|
18438 |
+
},
|
18439 |
+
{
|
18440 |
+
"epoch": 0.1426952639698554,
|
18441 |
+
"grad_norm": 4.046934604644775,
|
18442 |
+
"learning_rate": 4.826144683877098e-08,
|
18443 |
+
"loss": 0.4979,
|
18444 |
+
"step": 2613
|
18445 |
+
},
|
18446 |
+
{
|
18447 |
+
"epoch": 0.14274987371496445,
|
18448 |
+
"grad_norm": 3.4525563716888428,
|
18449 |
+
"learning_rate": 4.462069132434854e-08,
|
18450 |
+
"loss": 0.3075,
|
18451 |
+
"step": 2614
|
18452 |
+
},
|
18453 |
+
{
|
18454 |
+
"epoch": 0.14280448346007346,
|
18455 |
+
"grad_norm": 4.25659704208374,
|
18456 |
+
"learning_rate": 4.112266890821115e-08,
|
18457 |
+
"loss": 0.4598,
|
18458 |
+
"step": 2615
|
18459 |
+
},
|
18460 |
+
{
|
18461 |
+
"epoch": 0.14285909320518247,
|
18462 |
+
"grad_norm": 3.9870779514312744,
|
18463 |
+
"learning_rate": 3.776738458541873e-08,
|
18464 |
+
"loss": 0.4917,
|
18465 |
+
"step": 2616
|
18466 |
+
},
|
18467 |
+
{
|
18468 |
+
"epoch": 0.1429137029502915,
|
18469 |
+
"grad_norm": 4.058454990386963,
|
18470 |
+
"learning_rate": 3.4554843147216464e-08,
|
18471 |
+
"loss": 0.4187,
|
18472 |
+
"step": 2617
|
18473 |
+
},
|
18474 |
+
{
|
18475 |
+
"epoch": 0.1429683126954005,
|
18476 |
+
"grad_norm": 3.4139716625213623,
|
18477 |
+
"learning_rate": 3.148504918100148e-08,
|
18478 |
+
"loss": 0.3487,
|
18479 |
+
"step": 2618
|
18480 |
+
},
|
18481 |
+
{
|
18482 |
+
"epoch": 0.14302292244050951,
|
18483 |
+
"grad_norm": 4.22880744934082,
|
18484 |
+
"learning_rate": 2.855800707034506e-08,
|
18485 |
+
"loss": 0.3527,
|
18486 |
+
"step": 2619
|
18487 |
+
},
|
18488 |
+
{
|
18489 |
+
"epoch": 0.14307753218561853,
|
18490 |
+
"grad_norm": 6.186976432800293,
|
18491 |
+
"learning_rate": 2.5773720994981542e-08,
|
18492 |
+
"loss": 0.8495,
|
18493 |
+
"step": 2620
|
18494 |
+
},
|
18495 |
+
{
|
18496 |
+
"epoch": 0.14313214193072754,
|
18497 |
+
"grad_norm": 3.880368232727051,
|
18498 |
+
"learning_rate": 2.313219493077501e-08,
|
18499 |
+
"loss": 0.3824,
|
18500 |
+
"step": 2621
|
18501 |
+
},
|
18502 |
+
{
|
18503 |
+
"epoch": 0.14318675167583655,
|
18504 |
+
"grad_norm": 4.570279121398926,
|
18505 |
+
"learning_rate": 2.063343264973039e-08,
|
18506 |
+
"loss": 0.4636,
|
18507 |
+
"step": 2622
|
18508 |
+
},
|
18509 |
+
{
|
18510 |
+
"epoch": 0.14324136142094557,
|
18511 |
+
"grad_norm": 5.0569844245910645,
|
18512 |
+
"learning_rate": 1.8277437720015668e-08,
|
18513 |
+
"loss": 0.4485,
|
18514 |
+
"step": 2623
|
18515 |
+
},
|
18516 |
+
{
|
18517 |
+
"epoch": 0.14329597116605458,
|
18518 |
+
"grad_norm": 3.9018783569335938,
|
18519 |
+
"learning_rate": 1.606421350590637e-08,
|
18520 |
+
"loss": 0.3351,
|
18521 |
+
"step": 2624
|
18522 |
+
},
|
18523 |
+
{
|
18524 |
+
"epoch": 0.1433505809111636,
|
18525 |
+
"grad_norm": 5.237850189208984,
|
18526 |
+
"learning_rate": 1.3993763167818863e-08,
|
18527 |
+
"loss": 0.3547,
|
18528 |
+
"step": 2625
|
18529 |
+
},
|
18530 |
+
{
|
18531 |
+
"epoch": 0.1434051906562726,
|
18532 |
+
"grad_norm": 3.112959861755371,
|
18533 |
+
"learning_rate": 1.2066089662288172e-08,
|
18534 |
+
"loss": 0.3124,
|
18535 |
+
"step": 2626
|
18536 |
+
},
|
18537 |
+
{
|
18538 |
+
"epoch": 0.14345980040138162,
|
18539 |
+
"grad_norm": 3.759575366973877,
|
18540 |
+
"learning_rate": 1.0281195741979055e-08,
|
18541 |
+
"loss": 0.4726,
|
18542 |
+
"step": 2627
|
18543 |
+
},
|
18544 |
+
{
|
18545 |
+
"epoch": 0.14351441014649063,
|
18546 |
+
"grad_norm": 3.29337477684021,
|
18547 |
+
"learning_rate": 8.639083955663818e-09,
|
18548 |
+
"loss": 0.3879,
|
18549 |
+
"step": 2628
|
18550 |
+
},
|
18551 |
+
{
|
18552 |
+
"epoch": 0.14356901989159965,
|
18553 |
+
"grad_norm": 6.597833633422852,
|
18554 |
+
"learning_rate": 7.1397566482112045e-09,
|
18555 |
+
"loss": 0.648,
|
18556 |
+
"step": 2629
|
18557 |
+
},
|
18558 |
+
{
|
18559 |
+
"epoch": 0.14362362963670866,
|
18560 |
+
"grad_norm": 3.1305155754089355,
|
18561 |
+
"learning_rate": 5.783215960630806e-09,
|
18562 |
+
"loss": 0.2629,
|
18563 |
+
"step": 2630
|
18564 |
+
},
|
18565 |
+
{
|
18566 |
+
"epoch": 0.14367823938181767,
|
18567 |
+
"grad_norm": 2.792299270629883,
|
18568 |
+
"learning_rate": 4.569463830006448e-09,
|
18569 |
+
"loss": 0.3509,
|
18570 |
+
"step": 2631
|
18571 |
+
},
|
18572 |
+
{
|
18573 |
+
"epoch": 0.1437328491269267,
|
18574 |
+
"grad_norm": 5.20680046081543,
|
18575 |
+
"learning_rate": 3.498501989529501e-09,
|
18576 |
+
"loss": 0.4678,
|
18577 |
+
"step": 2632
|
18578 |
+
},
|
18579 |
+
{
|
18580 |
+
"epoch": 0.14378745887203573,
|
18581 |
+
"grad_norm": 4.746030330657959,
|
18582 |
+
"learning_rate": 2.5703319685321805e-09,
|
18583 |
+
"loss": 0.5117,
|
18584 |
+
"step": 2633
|
18585 |
+
},
|
18586 |
+
{
|
18587 |
+
"epoch": 0.14384206861714474,
|
18588 |
+
"grad_norm": 4.22279167175293,
|
18589 |
+
"learning_rate": 1.784955092376528e-09,
|
18590 |
+
"loss": 0.3752,
|
18591 |
+
"step": 2634
|
18592 |
+
},
|
18593 |
+
{
|
18594 |
+
"epoch": 0.14389667836225375,
|
18595 |
+
"grad_norm": 3.952853202819824,
|
18596 |
+
"learning_rate": 1.1423724825765369e-09,
|
18597 |
+
"loss": 0.3835,
|
18598 |
+
"step": 2635
|
18599 |
+
},
|
18600 |
+
{
|
18601 |
+
"epoch": 0.14395128810736277,
|
18602 |
+
"grad_norm": 7.198517322540283,
|
18603 |
+
"learning_rate": 6.425850567093328e-10,
|
18604 |
+
"loss": 0.8721,
|
18605 |
+
"step": 2636
|
18606 |
+
},
|
18607 |
+
{
|
18608 |
+
"epoch": 0.14400589785247178,
|
18609 |
+
"grad_norm": 4.09921407699585,
|
18610 |
+
"learning_rate": 2.8559352845958234e-10,
|
18611 |
+
"loss": 0.4584,
|
18612 |
+
"step": 2637
|
18613 |
+
},
|
18614 |
+
{
|
18615 |
+
"epoch": 0.1440605075975808,
|
18616 |
+
"grad_norm": 4.515547752380371,
|
18617 |
+
"learning_rate": 7.139840760839178e-11,
|
18618 |
+
"loss": 0.6557,
|
18619 |
+
"step": 2638
|
18620 |
+
},
|
18621 |
+
{
|
18622 |
+
"epoch": 0.1441151173426898,
|
18623 |
+
"grad_norm": 3.298574447631836,
|
18624 |
+
"learning_rate": 0.0,
|
18625 |
+
"loss": 0.3579,
|
18626 |
+
"step": 2639
|
18627 |
}
|
18628 |
],
|
18629 |
"logging_steps": 1,
|
|
|
18647 |
"should_evaluate": false,
|
18648 |
"should_log": false,
|
18649 |
"should_save": true,
|
18650 |
+
"should_training_stop": true
|
18651 |
},
|
18652 |
"attributes": {}
|
18653 |
}
|
18654 |
},
|
18655 |
+
"total_flos": 1.8914216183794237e+18,
|
18656 |
"train_batch_size": 2,
|
18657 |
"trial_name": null,
|
18658 |
"trial_params": null
|