|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 1025, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004878048780487805, |
|
"grad_norm": 616.0, |
|
"learning_rate": 1.941747572815534e-06, |
|
"loss": 39.9034, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.024390243902439025, |
|
"grad_norm": 466.0, |
|
"learning_rate": 9.70873786407767e-06, |
|
"loss": 34.9341, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04878048780487805, |
|
"grad_norm": 251.0, |
|
"learning_rate": 1.941747572815534e-05, |
|
"loss": 29.4015, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07317073170731707, |
|
"grad_norm": 56.25, |
|
"learning_rate": 2.912621359223301e-05, |
|
"loss": 17.9289, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0975609756097561, |
|
"grad_norm": 39.75, |
|
"learning_rate": 3.883495145631068e-05, |
|
"loss": 14.9717, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12195121951219512, |
|
"grad_norm": 15.75, |
|
"learning_rate": 4.854368932038835e-05, |
|
"loss": 12.4657, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.14634146341463414, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 5.825242718446602e-05, |
|
"loss": 10.6748, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17073170731707318, |
|
"grad_norm": 3.890625, |
|
"learning_rate": 6.79611650485437e-05, |
|
"loss": 9.811, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 6.75, |
|
"learning_rate": 7.766990291262136e-05, |
|
"loss": 9.6309, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21951219512195122, |
|
"grad_norm": 15.3125, |
|
"learning_rate": 8.737864077669902e-05, |
|
"loss": 8.5315, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 31.75, |
|
"learning_rate": 9.70873786407767e-05, |
|
"loss": 6.2519, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2682926829268293, |
|
"grad_norm": 5.25, |
|
"learning_rate": 0.00010679611650485437, |
|
"loss": 2.2665, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2926829268292683, |
|
"grad_norm": 2.40625, |
|
"learning_rate": 0.00011650485436893204, |
|
"loss": 1.5008, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3170731707317073, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 0.00012621359223300972, |
|
"loss": 1.3289, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.34146341463414637, |
|
"grad_norm": 2.796875, |
|
"learning_rate": 0.0001359223300970874, |
|
"loss": 1.2198, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.36585365853658536, |
|
"grad_norm": 1.4375, |
|
"learning_rate": 0.00014563106796116506, |
|
"loss": 1.1493, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 0.0001553398058252427, |
|
"loss": 1.0862, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4146341463414634, |
|
"grad_norm": 3.0625, |
|
"learning_rate": 0.0001650485436893204, |
|
"loss": 1.0768, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.43902439024390244, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 0.00017475728155339805, |
|
"loss": 1.0468, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4634146341463415, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 0.00018446601941747576, |
|
"loss": 0.9991, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 2.0625, |
|
"learning_rate": 0.0001941747572815534, |
|
"loss": 0.9596, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5121951219512195, |
|
"grad_norm": 1.5078125, |
|
"learning_rate": 0.00019999767797859854, |
|
"loss": 0.9712, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5365853658536586, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 0.0001999715564762413, |
|
"loss": 0.9139, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5609756097560976, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 0.00019991641855173097, |
|
"loss": 0.9175, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 1.25, |
|
"learning_rate": 0.00019983228020867242, |
|
"loss": 0.8959, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6097560975609756, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 0.00019971916586794867, |
|
"loss": 0.8605, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6341463414634146, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 0.00019957710836063263, |
|
"loss": 0.8857, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6585365853658537, |
|
"grad_norm": 1.8046875, |
|
"learning_rate": 0.00019940614891845809, |
|
"loss": 0.8533, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6829268292682927, |
|
"grad_norm": 1.9140625, |
|
"learning_rate": 0.00019920633716185226, |
|
"loss": 0.8332, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7073170731707317, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 0.00019897773108553378, |
|
"loss": 0.8485, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 1.9296875, |
|
"learning_rate": 0.00019872039704167964, |
|
"loss": 0.8479, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7560975609756098, |
|
"grad_norm": 1.4921875, |
|
"learning_rate": 0.00019843440972066697, |
|
"loss": 0.8418, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 1.9296875, |
|
"learning_rate": 0.00019811985212939416, |
|
"loss": 0.8513, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8048780487804879, |
|
"grad_norm": 1.5234375, |
|
"learning_rate": 0.00019777681556718864, |
|
"loss": 0.8217, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.8292682926829268, |
|
"grad_norm": 1.8671875, |
|
"learning_rate": 0.00019740539959930725, |
|
"loss": 0.8169, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8536585365853658, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 0.00019700571202803797, |
|
"loss": 0.8386, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.8780487804878049, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 0.00019657786886141052, |
|
"loss": 0.8144, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9024390243902439, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 0.00019612199427952552, |
|
"loss": 0.8229, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.926829268292683, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 0.00019563822059851145, |
|
"loss": 0.8128, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9512195121951219, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 0.00019512668823212055, |
|
"loss": 0.8072, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 0.0001945875456509739, |
|
"loss": 0.8097, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 0.00019402094933946857, |
|
"loss": 0.7892, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.2406020164489746, |
|
"eval_runtime": 0.9998, |
|
"eval_samples_per_second": 5.001, |
|
"eval_steps_per_second": 2.0, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.024390243902439, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 0.0001934270637503584, |
|
"loss": 0.7753, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.048780487804878, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 0.00019280606125702203, |
|
"loss": 0.7289, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.0731707317073171, |
|
"grad_norm": 0.625, |
|
"learning_rate": 0.00019215812210343226, |
|
"loss": 0.7656, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0975609756097562, |
|
"grad_norm": 0.65625, |
|
"learning_rate": 0.00019148343435184079, |
|
"loss": 0.7603, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.1219512195121952, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 0.00019078219382819353, |
|
"loss": 0.7491, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.146341463414634, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 0.00019005460406529311, |
|
"loss": 0.7249, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.170731707317073, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 0.00018930087624372387, |
|
"loss": 0.7755, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.1951219512195121, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 0.00018852122913055742, |
|
"loss": 0.7553, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 0.00018771588901585635, |
|
"loss": 0.7482, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2439024390243902, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 0.00018688508964699404, |
|
"loss": 0.7402, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.2682926829268293, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 0.00018602907216081044, |
|
"loss": 0.7263, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.2926829268292683, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 0.0001851480850136228, |
|
"loss": 0.7435, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.3170731707317074, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 0.00018424238390911198, |
|
"loss": 0.7457, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.3414634146341464, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 0.00018331223172410535, |
|
"loss": 0.7483, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.3658536585365852, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 0.00018235789843227756, |
|
"loss": 0.7486, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.3902439024390243, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 0.00018137966102579176, |
|
"loss": 0.7548, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.4146341463414633, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 0.00018037780343490312, |
|
"loss": 0.7282, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.4390243902439024, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 0.00017935261644554942, |
|
"loss": 0.7415, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 0.0001783043976149511, |
|
"loss": 0.7362, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.4878048780487805, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 0.0001772334511852463, |
|
"loss": 0.724, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.5121951219512195, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 0.0001761400879951856, |
|
"loss": 0.7249, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.5365853658536586, |
|
"grad_norm": 1.6953125, |
|
"learning_rate": 0.00017502462538991205, |
|
"loss": 0.7212, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.5609756097560976, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 0.00017388738712885275, |
|
"loss": 0.7194, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.5853658536585367, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 0.0001727287032917487, |
|
"loss": 0.765, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.6097560975609757, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 0.00017154891018285028, |
|
"loss": 0.7318, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.6341463414634148, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 0.00017034835023330597, |
|
"loss": 0.7396, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.6585365853658538, |
|
"grad_norm": 2.0625, |
|
"learning_rate": 0.00016912737190177292, |
|
"loss": 0.737, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.6829268292682928, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 0.00016788632957327772, |
|
"loss": 0.7302, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.7073170731707317, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 0.00016662558345635753, |
|
"loss": 0.7193, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.7317073170731707, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 0.00016534549947851062, |
|
"loss": 0.7147, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.7560975609756098, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 0.00016404644917998698, |
|
"loss": 0.7539, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.7804878048780488, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 0.00016272880960595024, |
|
"loss": 0.754, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.8048780487804879, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 0.00016139296319704117, |
|
"loss": 0.7346, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.8292682926829267, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 0.00016003929767837588, |
|
"loss": 0.6961, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.8536585365853657, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 0.00015866820594700944, |
|
"loss": 0.7259, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.8780487804878048, |
|
"grad_norm": 0.65625, |
|
"learning_rate": 0.00015728008595789926, |
|
"loss": 0.7198, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.9024390243902438, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 0.0001558753406083995, |
|
"loss": 0.6858, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.9268292682926829, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 0.00015445437762132174, |
|
"loss": 0.6987, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 0.0001530176094265945, |
|
"loss": 0.7054, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.975609756097561, |
|
"grad_norm": 0.62890625, |
|
"learning_rate": 0.00015156545304155698, |
|
"loss": 0.7226, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 0.00015009832994992102, |
|
"loss": 0.7033, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.2004494667053223, |
|
"eval_runtime": 1.0005, |
|
"eval_samples_per_second": 4.998, |
|
"eval_steps_per_second": 1.999, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.024390243902439, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 0.0001486166659794368, |
|
"loss": 0.6398, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.048780487804878, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 0.00014712089117829776, |
|
"loss": 0.647, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.073170731707317, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 0.0001456114396903204, |
|
"loss": 0.6333, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.097560975609756, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 0.0001440887496289356, |
|
"loss": 0.6296, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.1219512195121952, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 0.00014255326295002754, |
|
"loss": 0.6516, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.1463414634146343, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 0.00014100542532365724, |
|
"loss": 0.6468, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.1707317073170733, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 0.0001394456860047086, |
|
"loss": 0.6472, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.1951219512195124, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 0.00013787449770249336, |
|
"loss": 0.6587, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.2195121951219514, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 0.0001362923164493538, |
|
"loss": 0.6505, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.2439024390243905, |
|
"grad_norm": 0.69140625, |
|
"learning_rate": 0.00013469960146830073, |
|
"loss": 0.6463, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.2682926829268295, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 0.00013309681503972565, |
|
"loss": 0.6372, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.292682926829268, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 0.00013148442236722506, |
|
"loss": 0.6285, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.317073170731707, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 0.00012986289144257705, |
|
"loss": 0.6645, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.341463414634146, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 0.00012823269290990777, |
|
"loss": 0.6663, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.3658536585365852, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 0.0001265942999290887, |
|
"loss": 0.6495, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.3902439024390243, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 0.00012494818803840367, |
|
"loss": 0.6429, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.4146341463414633, |
|
"grad_norm": 1.796875, |
|
"learning_rate": 0.00012329483501652492, |
|
"loss": 0.6573, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 0.00012163472074383994, |
|
"loss": 0.6487, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4634146341463414, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 0.00011996832706316739, |
|
"loss": 0.6726, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.4878048780487805, |
|
"grad_norm": 0.5546875, |
|
"learning_rate": 0.00011829613763990384, |
|
"loss": 0.6546, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.5121951219512195, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 0.00011661863782164153, |
|
"loss": 0.6306, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 2.5365853658536586, |
|
"grad_norm": 0.61328125, |
|
"learning_rate": 0.00011493631449729767, |
|
"loss": 0.6591, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.5609756097560976, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 0.00011324965595579666, |
|
"loss": 0.6387, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.5853658536585367, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 0.00011155915174434561, |
|
"loss": 0.6539, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.6097560975609757, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 0.00010986529252634503, |
|
"loss": 0.6654, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.6341463414634148, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 0.00010816856993897522, |
|
"loss": 0.6471, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.658536585365854, |
|
"grad_norm": 0.5703125, |
|
"learning_rate": 0.00010646947645050023, |
|
"loss": 0.6482, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.682926829268293, |
|
"grad_norm": 0.5703125, |
|
"learning_rate": 0.00010476850521733048, |
|
"loss": 0.6252, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.7073170731707314, |
|
"grad_norm": 0.69140625, |
|
"learning_rate": 0.00010306614994088582, |
|
"loss": 0.6605, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.7317073170731705, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 0.00010136290472430013, |
|
"loss": 0.646, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.7560975609756095, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 9.965926392900956e-05, |
|
"loss": 0.6492, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.7804878048780486, |
|
"grad_norm": 0.63671875, |
|
"learning_rate": 9.795572203126573e-05, |
|
"loss": 0.6477, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.8048780487804876, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 9.625277347861553e-05, |
|
"loss": 0.6268, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.8292682926829267, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 9.455091254638939e-05, |
|
"loss": 0.6364, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.8536585365853657, |
|
"grad_norm": 0.62890625, |
|
"learning_rate": 9.285063319423939e-05, |
|
"loss": 0.6497, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.8780487804878048, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 9.115242892276909e-05, |
|
"loss": 0.6446, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.902439024390244, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 8.945679263029661e-05, |
|
"loss": 0.6657, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 8.776421646979232e-05, |
|
"loss": 0.6498, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.951219512195122, |
|
"grad_norm": 0.5546875, |
|
"learning_rate": 8.607519170603328e-05, |
|
"loss": 0.6623, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 2.975609756097561, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 8.439020857301503e-05, |
|
"loss": 0.6467, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 8.270975613166281e-05, |
|
"loss": 0.6346, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.245811939239502, |
|
"eval_runtime": 1.001, |
|
"eval_samples_per_second": 4.995, |
|
"eval_steps_per_second": 1.998, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 3.024390243902439, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 8.103432212788323e-05, |
|
"loss": 0.5718, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.048780487804878, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 7.936439285099752e-05, |
|
"loss": 0.5989, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 3.073170731707317, |
|
"grad_norm": 0.62890625, |
|
"learning_rate": 7.770045299259774e-05, |
|
"loss": 0.598, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.097560975609756, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 7.60429855058664e-05, |
|
"loss": 0.5731, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 3.1219512195121952, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 7.439247146540109e-05, |
|
"loss": 0.6034, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.1463414634146343, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 7.274938992758403e-05, |
|
"loss": 0.5669, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 3.1707317073170733, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 7.111421779153745e-05, |
|
"loss": 0.5873, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.1951219512195124, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 6.94874296607052e-05, |
|
"loss": 0.5674, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 3.2195121951219514, |
|
"grad_norm": 0.54296875, |
|
"learning_rate": 6.786949770510071e-05, |
|
"loss": 0.5726, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.2439024390243905, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 6.626089152426097e-05, |
|
"loss": 0.6006, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 3.2682926829268295, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 6.4662078010947e-05, |
|
"loss": 0.5729, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.292682926829268, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 6.307352121562949e-05, |
|
"loss": 0.5719, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 3.317073170731707, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 6.149568221179993e-05, |
|
"loss": 0.571, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.341463414634146, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 5.992901896214526e-05, |
|
"loss": 0.5674, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 3.3658536585365852, |
|
"grad_norm": 0.6640625, |
|
"learning_rate": 5.837398618562584e-05, |
|
"loss": 0.5772, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.3902439024390243, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 5.68310352254946e-05, |
|
"loss": 0.5893, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 3.4146341463414633, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 5.5300613918296295e-05, |
|
"loss": 0.5771, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.4390243902439024, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 5.378316646388424e-05, |
|
"loss": 0.5721, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 3.4634146341463414, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 5.227913329649271e-05, |
|
"loss": 0.5788, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.4878048780487805, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 5.078895095690249e-05, |
|
"loss": 0.5693, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 3.5121951219512195, |
|
"grad_norm": 0.5703125, |
|
"learning_rate": 4.931305196573621e-05, |
|
"loss": 0.6107, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.5365853658536586, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 4.78518646979206e-05, |
|
"loss": 0.5695, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 3.5609756097560976, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 4.6405813258352135e-05, |
|
"loss": 0.5707, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.5853658536585367, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 4.4975317358801885e-05, |
|
"loss": 0.5635, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 3.6097560975609757, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 4.3560792196095543e-05, |
|
"loss": 0.5747, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.6341463414634148, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 4.216264833160396e-05, |
|
"loss": 0.5856, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 3.658536585365854, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 4.0781291572078806e-05, |
|
"loss": 0.5797, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.682926829268293, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 3.941712285186878e-05, |
|
"loss": 0.5643, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 3.7073170731707314, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 3.807053811654948e-05, |
|
"loss": 0.5723, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.7317073170731705, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 3.674192820800156e-05, |
|
"loss": 0.5894, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 3.7560975609756095, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 3.543167875097013e-05, |
|
"loss": 0.5505, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.7804878048780486, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 3.4140170041138385e-05, |
|
"loss": 0.5752, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 3.8048780487804876, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 3.286777693474803e-05, |
|
"loss": 0.5629, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.8292682926829267, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 3.1614868739798495e-05, |
|
"loss": 0.5787, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 3.8536585365853657, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 3.0381809108856398e-05, |
|
"loss": 0.574, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.8780487804878048, |
|
"grad_norm": 0.6171875, |
|
"learning_rate": 2.9168955933506648e-05, |
|
"loss": 0.5826, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 3.902439024390244, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 2.79766612404755e-05, |
|
"loss": 0.5725, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.926829268292683, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 2.6805271089455986e-05, |
|
"loss": 0.5612, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 3.951219512195122, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 2.565512547266511e-05, |
|
"loss": 0.5721, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.975609756097561, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 2.4526558216162322e-05, |
|
"loss": 0.5725, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 2.3419896882957527e-05, |
|
"loss": 0.5755, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.2934529781341553, |
|
"eval_runtime": 0.9995, |
|
"eval_samples_per_second": 5.002, |
|
"eval_steps_per_second": 2.001, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.024390243902439, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 2.2335462677936957e-05, |
|
"loss": 0.5324, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 4.048780487804878, |
|
"grad_norm": 0.61328125, |
|
"learning_rate": 2.1273570354634508e-05, |
|
"loss": 0.5386, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.073170731707317, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 2.023452812387555e-05, |
|
"loss": 0.5296, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 4.097560975609756, |
|
"grad_norm": 0.62890625, |
|
"learning_rate": 1.9218637564319696e-05, |
|
"loss": 0.5304, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.121951219512195, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 1.8226193534928604e-05, |
|
"loss": 0.5554, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 4.146341463414634, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 1.725748408938408e-05, |
|
"loss": 0.5376, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.170731707317073, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 1.63127903924815e-05, |
|
"loss": 0.5335, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 4.195121951219512, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 1.5392386638522482e-05, |
|
"loss": 0.5298, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.219512195121951, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 1.4496539971731026e-05, |
|
"loss": 0.5239, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 4.2439024390243905, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 1.3625510408715714e-05, |
|
"loss": 0.5255, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.2682926829268295, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 1.2779550763000703e-05, |
|
"loss": 0.5382, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 4.2926829268292686, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 1.1958906571647421e-05, |
|
"loss": 0.5469, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.317073170731708, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 1.1163816023988261e-05, |
|
"loss": 0.547, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 4.341463414634147, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 1.0394509892492833e-05, |
|
"loss": 0.5387, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.365853658536586, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 9.65121146578709e-06, |
|
"loss": 0.5487, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 4.390243902439025, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 8.934136483844391e-06, |
|
"loss": 0.5306, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.414634146341464, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 8.243493075367813e-06, |
|
"loss": 0.5259, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 4.439024390243903, |
|
"grad_norm": 0.5703125, |
|
"learning_rate": 7.579481697381363e-06, |
|
"loss": 0.5473, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.463414634146342, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 6.942295077048011e-06, |
|
"loss": 0.5329, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 4.487804878048781, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 6.3321181557312815e-06, |
|
"loss": 0.551, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.512195121951219, |
|
"grad_norm": 0.5703125, |
|
"learning_rate": 5.749128035316553e-06, |
|
"loss": 0.5508, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 4.536585365853659, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 5.193493926807835e-06, |
|
"loss": 0.5348, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.560975609756097, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 4.665377101214863e-06, |
|
"loss": 0.5168, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 4.585365853658536, |
|
"grad_norm": 0.61328125, |
|
"learning_rate": 4.164930842744608e-06, |
|
"loss": 0.5442, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.609756097560975, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 3.6923004043111444e-06, |
|
"loss": 0.5296, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 4.634146341463414, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 3.2476229653763734e-06, |
|
"loss": 0.5566, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.658536585365853, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 2.8310275921341944e-06, |
|
"loss": 0.5219, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 4.682926829268292, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 2.44263520004937e-06, |
|
"loss": 0.5577, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.7073170731707314, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 2.0825585187623007e-06, |
|
"loss": 0.5235, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 4.7317073170731705, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 1.7509020593695302e-06, |
|
"loss": 0.547, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.7560975609756095, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 1.4477620840897766e-06, |
|
"loss": 0.5331, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 4.780487804878049, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 1.1732265783241492e-06, |
|
"loss": 0.5251, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.804878048780488, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 9.273752251186096e-07, |
|
"loss": 0.527, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 4.829268292682927, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 7.102793820362829e-07, |
|
"loss": 0.5353, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.853658536585366, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 5.22002060446125e-07, |
|
"loss": 0.5347, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 3.6259790723409683e-07, |
|
"loss": 0.5436, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.902439024390244, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 2.3211318894205136e-07, |
|
"loss": 0.535, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 4.926829268292683, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 1.3058577833905404e-07, |
|
"loss": 0.5213, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.951219512195122, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 5.804514342889755e-08, |
|
"loss": 0.5247, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 4.975609756097561, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 1.4512338897121335e-08, |
|
"loss": 0.5333, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 0.0, |
|
"loss": 0.5144, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.326845407485962, |
|
"eval_runtime": 1.0003, |
|
"eval_samples_per_second": 4.998, |
|
"eval_steps_per_second": 1.999, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 1025, |
|
"total_flos": 1.5670950020754964e+18, |
|
"train_loss": 1.4066738275202308, |
|
"train_runtime": 8016.4558, |
|
"train_samples_per_second": 2.044, |
|
"train_steps_per_second": 0.128 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1025, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5670950020754964e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|