|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 66, |
|
"global_step": 326, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003067484662576687, |
|
"grad_norm": 1.0254060683433053, |
|
"learning_rate": 5e-06, |
|
"loss": 1.9557, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.003067484662576687, |
|
"eval_loss": 2.6437082290649414, |
|
"eval_runtime": 55.4152, |
|
"eval_samples_per_second": 1.805, |
|
"eval_steps_per_second": 0.126, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006134969325153374, |
|
"grad_norm": 0.5293660177597584, |
|
"learning_rate": 1e-05, |
|
"loss": 1.9268, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.009202453987730062, |
|
"grad_norm": 0.6031237810490027, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.9666, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.012269938650306749, |
|
"grad_norm": 0.5216691776821837, |
|
"learning_rate": 2e-05, |
|
"loss": 1.9176, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.015337423312883436, |
|
"grad_norm": 0.45736012052053565, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.9172, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.018404907975460124, |
|
"grad_norm": 0.4721331330094363, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9038, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02147239263803681, |
|
"grad_norm": 0.4699970169077475, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.972, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.024539877300613498, |
|
"grad_norm": 0.5998147513619175, |
|
"learning_rate": 4e-05, |
|
"loss": 1.9115, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.027607361963190184, |
|
"grad_norm": 0.39982194363235835, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.9362, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03067484662576687, |
|
"grad_norm": 0.41316001445589784, |
|
"learning_rate": 5e-05, |
|
"loss": 1.9367, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03374233128834356, |
|
"grad_norm": 1.978145485337434, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 1.9018, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03680981595092025, |
|
"grad_norm": 0.5763394527514556, |
|
"learning_rate": 6e-05, |
|
"loss": 1.9239, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03987730061349693, |
|
"grad_norm": 0.6656094180752898, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 1.8601, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04294478527607362, |
|
"grad_norm": 0.3779888950718134, |
|
"learning_rate": 7e-05, |
|
"loss": 1.9467, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.046012269938650305, |
|
"grad_norm": 0.4210293643738542, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.9491, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.049079754601226995, |
|
"grad_norm": 0.284470526924256, |
|
"learning_rate": 8e-05, |
|
"loss": 1.96, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05214723926380368, |
|
"grad_norm": 0.4511944107373649, |
|
"learning_rate": 8.5e-05, |
|
"loss": 1.9688, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05521472392638037, |
|
"grad_norm": 0.5213533339486691, |
|
"learning_rate": 9e-05, |
|
"loss": 1.8883, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05828220858895705, |
|
"grad_norm": 0.3529095514608687, |
|
"learning_rate": 9.5e-05, |
|
"loss": 1.9652, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.06134969325153374, |
|
"grad_norm": 0.37388599933304034, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9701, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06441717791411043, |
|
"grad_norm": 0.6715118705762056, |
|
"learning_rate": 9.999762843192279e-05, |
|
"loss": 1.9591, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06748466257668712, |
|
"grad_norm": 0.3339477252516958, |
|
"learning_rate": 9.999051397766162e-05, |
|
"loss": 1.8851, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0705521472392638, |
|
"grad_norm": 0.38292464677189253, |
|
"learning_rate": 9.997865738710147e-05, |
|
"loss": 1.9505, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0736196319018405, |
|
"grad_norm": 0.46332198422774334, |
|
"learning_rate": 9.996205990996288e-05, |
|
"loss": 1.8819, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07668711656441718, |
|
"grad_norm": 0.32033971816842144, |
|
"learning_rate": 9.994072329567015e-05, |
|
"loss": 1.9778, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07975460122699386, |
|
"grad_norm": 0.32764211011622874, |
|
"learning_rate": 9.991464979316699e-05, |
|
"loss": 2.0035, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.08282208588957055, |
|
"grad_norm": 0.35749570152374016, |
|
"learning_rate": 9.988384215067945e-05, |
|
"loss": 1.897, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08588957055214724, |
|
"grad_norm": 0.47517571287279864, |
|
"learning_rate": 9.984830361542625e-05, |
|
"loss": 1.9916, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08895705521472393, |
|
"grad_norm": 0.37844919890358947, |
|
"learning_rate": 9.980803793327656e-05, |
|
"loss": 1.9787, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.09202453987730061, |
|
"grad_norm": 0.3392783686369942, |
|
"learning_rate": 9.976304934835509e-05, |
|
"loss": 1.9915, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0950920245398773, |
|
"grad_norm": 0.3672803421436023, |
|
"learning_rate": 9.97133426025948e-05, |
|
"loss": 1.9237, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.09815950920245399, |
|
"grad_norm": 0.3717328207326788, |
|
"learning_rate": 9.965892293523712e-05, |
|
"loss": 1.8755, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.10122699386503067, |
|
"grad_norm": 0.41380648649234975, |
|
"learning_rate": 9.959979608227961e-05, |
|
"loss": 2.021, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.10429447852760736, |
|
"grad_norm": 1.0263652968268477, |
|
"learning_rate": 9.95359682758715e-05, |
|
"loss": 1.9528, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.10736196319018405, |
|
"grad_norm": 0.9592485389518621, |
|
"learning_rate": 9.946744624365668e-05, |
|
"loss": 1.9055, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.11042944785276074, |
|
"grad_norm": 0.43725271995243464, |
|
"learning_rate": 9.939423720806468e-05, |
|
"loss": 1.9306, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11349693251533742, |
|
"grad_norm": 0.3175345165915247, |
|
"learning_rate": 9.931634888554937e-05, |
|
"loss": 1.9159, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1165644171779141, |
|
"grad_norm": 0.4731845530714391, |
|
"learning_rate": 9.923378948577559e-05, |
|
"loss": 1.993, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1196319018404908, |
|
"grad_norm": 0.3274613986874974, |
|
"learning_rate": 9.914656771075387e-05, |
|
"loss": 1.8971, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.12269938650306748, |
|
"grad_norm": 0.4175774555118117, |
|
"learning_rate": 9.90546927539232e-05, |
|
"loss": 1.9529, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12576687116564417, |
|
"grad_norm": 0.4723214170983414, |
|
"learning_rate": 9.895817429918203e-05, |
|
"loss": 1.9775, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.12883435582822086, |
|
"grad_norm": 0.5517874328207245, |
|
"learning_rate": 9.885702251986753e-05, |
|
"loss": 1.9704, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.13190184049079753, |
|
"grad_norm": 0.7112812651734346, |
|
"learning_rate": 9.875124807768324e-05, |
|
"loss": 1.9396, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.13496932515337423, |
|
"grad_norm": 0.4122128687502141, |
|
"learning_rate": 9.864086212157544e-05, |
|
"loss": 1.9495, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.13803680981595093, |
|
"grad_norm": 0.33784719392668305, |
|
"learning_rate": 9.852587628655787e-05, |
|
"loss": 1.8904, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1411042944785276, |
|
"grad_norm": 0.281184642101553, |
|
"learning_rate": 9.840630269248549e-05, |
|
"loss": 1.9156, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1441717791411043, |
|
"grad_norm": 0.7601259994555819, |
|
"learning_rate": 9.828215394277687e-05, |
|
"loss": 1.9516, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.147239263803681, |
|
"grad_norm": 0.36449789385058556, |
|
"learning_rate": 9.815344312308587e-05, |
|
"loss": 1.9182, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.15030674846625766, |
|
"grad_norm": 0.32613788602651017, |
|
"learning_rate": 9.80201837999223e-05, |
|
"loss": 1.9367, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.15337423312883436, |
|
"grad_norm": 0.4437625986967123, |
|
"learning_rate": 9.788239001922206e-05, |
|
"loss": 1.8838, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15644171779141106, |
|
"grad_norm": 0.7368917728925937, |
|
"learning_rate": 9.774007630486651e-05, |
|
"loss": 1.9125, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.15950920245398773, |
|
"grad_norm": 0.43661779665549927, |
|
"learning_rate": 9.759325765715176e-05, |
|
"loss": 1.9309, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.16257668711656442, |
|
"grad_norm": 0.27925292993087114, |
|
"learning_rate": 9.744194955120748e-05, |
|
"loss": 1.9374, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1656441717791411, |
|
"grad_norm": 0.46390992287233235, |
|
"learning_rate": 9.728616793536588e-05, |
|
"loss": 1.9425, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1687116564417178, |
|
"grad_norm": 0.2514992126441497, |
|
"learning_rate": 9.712592922948057e-05, |
|
"loss": 1.9482, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.17177914110429449, |
|
"grad_norm": 0.2703640459793386, |
|
"learning_rate": 9.6961250323196e-05, |
|
"loss": 1.8895, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.17484662576687116, |
|
"grad_norm": 0.561176184389631, |
|
"learning_rate": 9.679214857416717e-05, |
|
"loss": 1.928, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.17791411042944785, |
|
"grad_norm": 0.29671160399395613, |
|
"learning_rate": 9.661864180623003e-05, |
|
"loss": 1.9542, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.18098159509202455, |
|
"grad_norm": 0.28259623949277235, |
|
"learning_rate": 9.644074830752293e-05, |
|
"loss": 1.9519, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.18404907975460122, |
|
"grad_norm": 0.32102511884381013, |
|
"learning_rate": 9.625848682855884e-05, |
|
"loss": 1.8776, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18711656441717792, |
|
"grad_norm": 1.6811025479349568, |
|
"learning_rate": 9.607187658024912e-05, |
|
"loss": 1.9016, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1901840490797546, |
|
"grad_norm": 0.2951789033160566, |
|
"learning_rate": 9.588093723187857e-05, |
|
"loss": 1.9204, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.19325153374233128, |
|
"grad_norm": 0.35508359779387055, |
|
"learning_rate": 9.568568890903221e-05, |
|
"loss": 1.9144, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.19631901840490798, |
|
"grad_norm": 0.3620090919465414, |
|
"learning_rate": 9.548615219147405e-05, |
|
"loss": 1.8699, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.19938650306748465, |
|
"grad_norm": 0.3475528667692185, |
|
"learning_rate": 9.528234811097782e-05, |
|
"loss": 1.855, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.20245398773006135, |
|
"grad_norm": 0.2922421805064443, |
|
"learning_rate": 9.507429814911024e-05, |
|
"loss": 1.8648, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.20245398773006135, |
|
"eval_loss": 2.6012535095214844, |
|
"eval_runtime": 55.5905, |
|
"eval_samples_per_second": 1.799, |
|
"eval_steps_per_second": 0.126, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.20552147239263804, |
|
"grad_norm": 0.525841804476554, |
|
"learning_rate": 9.486202423496679e-05, |
|
"loss": 1.8319, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.2085889570552147, |
|
"grad_norm": 0.33648300397500824, |
|
"learning_rate": 9.46455487428603e-05, |
|
"loss": 1.889, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.2116564417177914, |
|
"grad_norm": 0.2982307248009996, |
|
"learning_rate": 9.442489448996261e-05, |
|
"loss": 1.9004, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.2147239263803681, |
|
"grad_norm": 1.3863327829569763, |
|
"learning_rate": 9.42000847338996e-05, |
|
"loss": 1.9529, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21779141104294478, |
|
"grad_norm": 0.3507002144386185, |
|
"learning_rate": 9.397114317029975e-05, |
|
"loss": 1.9561, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.22085889570552147, |
|
"grad_norm": 0.26047398296778806, |
|
"learning_rate": 9.373809393029654e-05, |
|
"loss": 1.9666, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.22392638036809817, |
|
"grad_norm": 0.31142946623961487, |
|
"learning_rate": 9.350096157798505e-05, |
|
"loss": 1.9669, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.22699386503067484, |
|
"grad_norm": 0.6059103096641723, |
|
"learning_rate": 9.325977110783264e-05, |
|
"loss": 1.8732, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.23006134969325154, |
|
"grad_norm": 0.2988013721693877, |
|
"learning_rate": 9.301454794204464e-05, |
|
"loss": 1.9106, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2331288343558282, |
|
"grad_norm": 0.3322046656491888, |
|
"learning_rate": 9.276531792788471e-05, |
|
"loss": 1.9082, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.2361963190184049, |
|
"grad_norm": 0.4251032871261752, |
|
"learning_rate": 9.251210733495039e-05, |
|
"loss": 1.873, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.2392638036809816, |
|
"grad_norm": 0.5316920231449993, |
|
"learning_rate": 9.225494285240432e-05, |
|
"loss": 1.9237, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.24233128834355827, |
|
"grad_norm": 0.3879744017362554, |
|
"learning_rate": 9.199385158616103e-05, |
|
"loss": 1.9097, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.24539877300613497, |
|
"grad_norm": 0.34345641723744996, |
|
"learning_rate": 9.172886105602998e-05, |
|
"loss": 1.8854, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.24846625766871167, |
|
"grad_norm": 0.28939057442749516, |
|
"learning_rate": 9.145999919281481e-05, |
|
"loss": 1.8964, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.25153374233128833, |
|
"grad_norm": 1.3304291601448779, |
|
"learning_rate": 9.118729433536938e-05, |
|
"loss": 1.9008, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.254601226993865, |
|
"grad_norm": 0.31217347045844684, |
|
"learning_rate": 9.091077522761079e-05, |
|
"loss": 1.9452, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.25766871165644173, |
|
"grad_norm": 0.437112787156602, |
|
"learning_rate": 9.063047101548962e-05, |
|
"loss": 1.8645, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2607361963190184, |
|
"grad_norm": 0.29101868827151584, |
|
"learning_rate": 9.034641124391795e-05, |
|
"loss": 1.9555, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.26380368098159507, |
|
"grad_norm": 0.3581357829575129, |
|
"learning_rate": 9.005862585365517e-05, |
|
"loss": 1.8963, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2668711656441718, |
|
"grad_norm": 0.2870730838141048, |
|
"learning_rate": 8.976714517815216e-05, |
|
"loss": 1.9004, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.26993865030674846, |
|
"grad_norm": 0.432917577879272, |
|
"learning_rate": 8.947199994035401e-05, |
|
"loss": 1.9512, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.27300613496932513, |
|
"grad_norm": 0.2818163590615669, |
|
"learning_rate": 8.917322124946182e-05, |
|
"loss": 1.951, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.27607361963190186, |
|
"grad_norm": 0.35253042451634276, |
|
"learning_rate": 8.88708405976536e-05, |
|
"loss": 1.8632, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2791411042944785, |
|
"grad_norm": 0.2590173941857926, |
|
"learning_rate": 8.856488985676495e-05, |
|
"loss": 1.9345, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2822085889570552, |
|
"grad_norm": 0.27658536342174034, |
|
"learning_rate": 8.825540127492967e-05, |
|
"loss": 1.9323, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2852760736196319, |
|
"grad_norm": 0.4745120742354108, |
|
"learning_rate": 8.794240747318066e-05, |
|
"loss": 1.9018, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2883435582822086, |
|
"grad_norm": 0.26070920298493305, |
|
"learning_rate": 8.762594144201167e-05, |
|
"loss": 1.9387, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.29141104294478526, |
|
"grad_norm": 0.5280391087971116, |
|
"learning_rate": 8.73060365378999e-05, |
|
"loss": 1.862, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.294478527607362, |
|
"grad_norm": 0.2507206580092369, |
|
"learning_rate": 8.698272647979012e-05, |
|
"loss": 1.9286, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.29754601226993865, |
|
"grad_norm": 0.26686171742356907, |
|
"learning_rate": 8.665604534554075e-05, |
|
"loss": 1.8256, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.3006134969325153, |
|
"grad_norm": 0.2528790515143118, |
|
"learning_rate": 8.632602756833172e-05, |
|
"loss": 1.9627, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.30368098159509205, |
|
"grad_norm": 0.3485782871675419, |
|
"learning_rate": 8.599270793303524e-05, |
|
"loss": 1.8465, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.3067484662576687, |
|
"grad_norm": 0.26793745211248754, |
|
"learning_rate": 8.565612157254943e-05, |
|
"loss": 1.8918, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3098159509202454, |
|
"grad_norm": 0.25037629545985934, |
|
"learning_rate": 8.531630396409507e-05, |
|
"loss": 1.8663, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.3128834355828221, |
|
"grad_norm": 0.2592216678438039, |
|
"learning_rate": 8.497329092547627e-05, |
|
"loss": 1.9302, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.3159509202453988, |
|
"grad_norm": 0.26334854065125896, |
|
"learning_rate": 8.46271186113051e-05, |
|
"loss": 1.8775, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.31901840490797545, |
|
"grad_norm": 0.2626800828290798, |
|
"learning_rate": 8.42778235091909e-05, |
|
"loss": 1.9522, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.3220858895705521, |
|
"grad_norm": 0.24256073020090993, |
|
"learning_rate": 8.392544243589427e-05, |
|
"loss": 1.9295, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.32515337423312884, |
|
"grad_norm": 0.2484627790629833, |
|
"learning_rate": 8.357001253344653e-05, |
|
"loss": 1.9287, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.3282208588957055, |
|
"grad_norm": 0.31955912356468386, |
|
"learning_rate": 8.32115712652348e-05, |
|
"loss": 1.9886, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3312883435582822, |
|
"grad_norm": 0.2434642052279205, |
|
"learning_rate": 8.285015641205325e-05, |
|
"loss": 1.9623, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.3343558282208589, |
|
"grad_norm": 0.28552157930226957, |
|
"learning_rate": 8.248580606812096e-05, |
|
"loss": 1.8705, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3374233128834356, |
|
"grad_norm": 0.27716036272992295, |
|
"learning_rate": 8.211855863706654e-05, |
|
"loss": 1.8958, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.34049079754601225, |
|
"grad_norm": 0.40776621930987433, |
|
"learning_rate": 8.174845282788041e-05, |
|
"loss": 1.9219, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.34355828220858897, |
|
"grad_norm": 0.27546145956009194, |
|
"learning_rate": 8.137552765083466e-05, |
|
"loss": 1.8948, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.34662576687116564, |
|
"grad_norm": 0.2463745150403918, |
|
"learning_rate": 8.09998224133713e-05, |
|
"loss": 1.907, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.3496932515337423, |
|
"grad_norm": 0.2530717713867962, |
|
"learning_rate": 8.062137671595911e-05, |
|
"loss": 1.8945, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.35276073619631904, |
|
"grad_norm": 0.26804689577846247, |
|
"learning_rate": 8.024023044791964e-05, |
|
"loss": 1.8984, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3558282208588957, |
|
"grad_norm": 0.2922869142073029, |
|
"learning_rate": 7.985642378322276e-05, |
|
"loss": 1.9499, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3588957055214724, |
|
"grad_norm": 0.2302050850660013, |
|
"learning_rate": 7.946999717625221e-05, |
|
"loss": 1.9398, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.3619631901840491, |
|
"grad_norm": 0.4179152288704764, |
|
"learning_rate": 7.908099135754152e-05, |
|
"loss": 1.909, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.36503067484662577, |
|
"grad_norm": 0.2448034947982603, |
|
"learning_rate": 7.868944732948101e-05, |
|
"loss": 1.9202, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.36809815950920244, |
|
"grad_norm": 0.3642159637354568, |
|
"learning_rate": 7.829540636199591e-05, |
|
"loss": 1.9188, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.37116564417177916, |
|
"grad_norm": 0.2751031027135651, |
|
"learning_rate": 7.789890998819643e-05, |
|
"loss": 1.8903, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.37423312883435583, |
|
"grad_norm": 0.2519348027896112, |
|
"learning_rate": 7.75e-05, |
|
"loss": 1.9422, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3773006134969325, |
|
"grad_norm": 0.2724753380540709, |
|
"learning_rate": 7.709871844372639e-05, |
|
"loss": 1.9314, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3803680981595092, |
|
"grad_norm": 0.2831411354349516, |
|
"learning_rate": 7.669510761566571e-05, |
|
"loss": 1.8467, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3834355828220859, |
|
"grad_norm": 0.34065192298819646, |
|
"learning_rate": 7.628921005762047e-05, |
|
"loss": 1.9109, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.38650306748466257, |
|
"grad_norm": 0.2744987049992245, |
|
"learning_rate": 7.588106855242135e-05, |
|
"loss": 1.8961, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3895705521472393, |
|
"grad_norm": 0.24972903865472293, |
|
"learning_rate": 7.547072611941795e-05, |
|
"loss": 1.9183, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.39263803680981596, |
|
"grad_norm": 0.2717954573790397, |
|
"learning_rate": 7.505822600994424e-05, |
|
"loss": 1.9925, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.39570552147239263, |
|
"grad_norm": 0.2710599653280406, |
|
"learning_rate": 7.46436117027598e-05, |
|
"loss": 1.9588, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3987730061349693, |
|
"grad_norm": 0.3038954677693998, |
|
"learning_rate": 7.422692689946714e-05, |
|
"loss": 1.9182, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.401840490797546, |
|
"grad_norm": 0.2587552748890865, |
|
"learning_rate": 7.380821551990525e-05, |
|
"loss": 1.9383, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.4049079754601227, |
|
"grad_norm": 0.25905002770576757, |
|
"learning_rate": 7.338752169752042e-05, |
|
"loss": 1.9514, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.4049079754601227, |
|
"eval_loss": 2.577134370803833, |
|
"eval_runtime": 55.6924, |
|
"eval_samples_per_second": 1.796, |
|
"eval_steps_per_second": 0.126, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.40797546012269936, |
|
"grad_norm": 0.2703996506167688, |
|
"learning_rate": 7.29648897747144e-05, |
|
"loss": 1.9516, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.4110429447852761, |
|
"grad_norm": 0.2499546230234631, |
|
"learning_rate": 7.254036429817058e-05, |
|
"loss": 2.0144, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.41411042944785276, |
|
"grad_norm": 0.2755759481735348, |
|
"learning_rate": 7.211399001415866e-05, |
|
"loss": 1.8909, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.4171779141104294, |
|
"grad_norm": 0.25578131710544816, |
|
"learning_rate": 7.168581186381824e-05, |
|
"loss": 1.9747, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.42024539877300615, |
|
"grad_norm": 0.27719697668216164, |
|
"learning_rate": 7.12558749784219e-05, |
|
"loss": 1.9548, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.4233128834355828, |
|
"grad_norm": 0.3398789070245734, |
|
"learning_rate": 7.082422467461816e-05, |
|
"loss": 1.9209, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.4263803680981595, |
|
"grad_norm": 0.3891484871642631, |
|
"learning_rate": 7.03909064496551e-05, |
|
"loss": 1.8979, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.4294478527607362, |
|
"grad_norm": 0.28744028744457395, |
|
"learning_rate": 6.995596597658468e-05, |
|
"loss": 1.8568, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4325153374233129, |
|
"grad_norm": 0.465137214109235, |
|
"learning_rate": 6.951944909944877e-05, |
|
"loss": 1.9201, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.43558282208588955, |
|
"grad_norm": 0.26138177619827196, |
|
"learning_rate": 6.908140182844695e-05, |
|
"loss": 1.9864, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.4386503067484663, |
|
"grad_norm": 0.2580799320688176, |
|
"learning_rate": 6.864187033508695e-05, |
|
"loss": 1.9603, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.44171779141104295, |
|
"grad_norm": 0.2342374798488655, |
|
"learning_rate": 6.820090094731808e-05, |
|
"loss": 1.8695, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.4447852760736196, |
|
"grad_norm": 0.31939812381318156, |
|
"learning_rate": 6.775854014464799e-05, |
|
"loss": 1.89, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.44785276073619634, |
|
"grad_norm": 0.3745349673551468, |
|
"learning_rate": 6.731483455324374e-05, |
|
"loss": 1.9072, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.450920245398773, |
|
"grad_norm": 0.2398137142916484, |
|
"learning_rate": 6.686983094101712e-05, |
|
"loss": 1.9224, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4539877300613497, |
|
"grad_norm": 0.7029063348936169, |
|
"learning_rate": 6.642357621269535e-05, |
|
"loss": 1.9042, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.4570552147239264, |
|
"grad_norm": 0.9822378439608801, |
|
"learning_rate": 6.597611740487698e-05, |
|
"loss": 1.9367, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.4601226993865031, |
|
"grad_norm": 0.30640641324748263, |
|
"learning_rate": 6.55275016810742e-05, |
|
"loss": 1.8906, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.46319018404907975, |
|
"grad_norm": 0.28453603828616697, |
|
"learning_rate": 6.507777632674165e-05, |
|
"loss": 1.9607, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4662576687116564, |
|
"grad_norm": 0.6855412180718642, |
|
"learning_rate": 6.462698874429239e-05, |
|
"loss": 1.8572, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.46932515337423314, |
|
"grad_norm": 0.2849104974414773, |
|
"learning_rate": 6.417518644810155e-05, |
|
"loss": 1.9385, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4723926380368098, |
|
"grad_norm": 0.31769414398981494, |
|
"learning_rate": 6.372241705949815e-05, |
|
"loss": 1.8972, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4754601226993865, |
|
"grad_norm": 0.6853208214886923, |
|
"learning_rate": 6.326872830174567e-05, |
|
"loss": 1.873, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4785276073619632, |
|
"grad_norm": 0.3810470202905365, |
|
"learning_rate": 6.281416799501188e-05, |
|
"loss": 2.0, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4815950920245399, |
|
"grad_norm": 0.3784628917790679, |
|
"learning_rate": 6.235878405132842e-05, |
|
"loss": 1.8814, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.48466257668711654, |
|
"grad_norm": 0.3427014353184805, |
|
"learning_rate": 6.190262446954085e-05, |
|
"loss": 1.9223, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.48773006134969327, |
|
"grad_norm": 0.46855229041092994, |
|
"learning_rate": 6.144573733024922e-05, |
|
"loss": 1.9059, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.49079754601226994, |
|
"grad_norm": 0.29232827174073656, |
|
"learning_rate": 6.0988170790740416e-05, |
|
"loss": 1.8491, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4938650306748466, |
|
"grad_norm": 0.30132959369450213, |
|
"learning_rate": 6.052997307991214e-05, |
|
"loss": 1.9595, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.49693251533742333, |
|
"grad_norm": 0.3195413242096082, |
|
"learning_rate": 6.007119249318945e-05, |
|
"loss": 1.9063, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.34517635749728204, |
|
"learning_rate": 5.961187738743432e-05, |
|
"loss": 1.9111, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.5030674846625767, |
|
"grad_norm": 0.2593428730143879, |
|
"learning_rate": 5.9152076175848594e-05, |
|
"loss": 1.9011, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.5061349693251533, |
|
"grad_norm": 0.31658622781595325, |
|
"learning_rate": 5.86918373228712e-05, |
|
"loss": 1.9918, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.50920245398773, |
|
"grad_norm": 0.6628038110211543, |
|
"learning_rate": 5.8231209339069746e-05, |
|
"loss": 1.9152, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.5122699386503068, |
|
"grad_norm": 0.2797312671008732, |
|
"learning_rate": 5.777024077602744e-05, |
|
"loss": 1.868, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.5153374233128835, |
|
"grad_norm": 0.26640093514522606, |
|
"learning_rate": 5.730898022122554e-05, |
|
"loss": 1.8938, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.5184049079754601, |
|
"grad_norm": 0.4054825634426873, |
|
"learning_rate": 5.6847476292922155e-05, |
|
"loss": 1.9428, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.5214723926380368, |
|
"grad_norm": 0.29142731230985613, |
|
"learning_rate": 5.6385777635027684e-05, |
|
"loss": 1.8903, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5245398773006135, |
|
"grad_norm": 0.3511142336480421, |
|
"learning_rate": 5.5923932911977575e-05, |
|
"loss": 1.9386, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.5276073619631901, |
|
"grad_norm": 0.5560176165666619, |
|
"learning_rate": 5.5461990803603045e-05, |
|
"loss": 1.9562, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.5306748466257669, |
|
"grad_norm": 0.3171565471545065, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 1.9565, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.5337423312883436, |
|
"grad_norm": 0.29095744910567595, |
|
"learning_rate": 5.4538009196396966e-05, |
|
"loss": 1.9282, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.5368098159509203, |
|
"grad_norm": 0.41192796716349284, |
|
"learning_rate": 5.407606708802244e-05, |
|
"loss": 1.918, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5398773006134969, |
|
"grad_norm": 0.5305521764688194, |
|
"learning_rate": 5.361422236497235e-05, |
|
"loss": 1.9096, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.5429447852760736, |
|
"grad_norm": 0.6434585908707302, |
|
"learning_rate": 5.315252370707786e-05, |
|
"loss": 1.8935, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5460122699386503, |
|
"grad_norm": 0.2939723397914849, |
|
"learning_rate": 5.2691019778774465e-05, |
|
"loss": 1.9531, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.549079754601227, |
|
"grad_norm": 0.4989500512121766, |
|
"learning_rate": 5.2229759223972574e-05, |
|
"loss": 1.9341, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5521472392638037, |
|
"grad_norm": 0.6024485433735285, |
|
"learning_rate": 5.1768790660930265e-05, |
|
"loss": 1.9001, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5552147239263804, |
|
"grad_norm": 0.47950946229716923, |
|
"learning_rate": 5.130816267712881e-05, |
|
"loss": 1.9209, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.558282208588957, |
|
"grad_norm": 1.2341600337232164, |
|
"learning_rate": 5.0847923824151424e-05, |
|
"loss": 1.977, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5613496932515337, |
|
"grad_norm": 0.3100804420788902, |
|
"learning_rate": 5.038812261256569e-05, |
|
"loss": 1.9594, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5644171779141104, |
|
"grad_norm": 0.5390046601483737, |
|
"learning_rate": 4.992880750681056e-05, |
|
"loss": 1.8533, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5674846625766872, |
|
"grad_norm": 0.43167483611230206, |
|
"learning_rate": 4.9470026920087876e-05, |
|
"loss": 1.8782, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5705521472392638, |
|
"grad_norm": 0.3684508227191539, |
|
"learning_rate": 4.901182920925961e-05, |
|
"loss": 1.8684, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5736196319018405, |
|
"grad_norm": 0.2936392864589777, |
|
"learning_rate": 4.8554262669750794e-05, |
|
"loss": 1.8586, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5766871165644172, |
|
"grad_norm": 0.3204686860443095, |
|
"learning_rate": 4.809737553045916e-05, |
|
"loss": 1.8977, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5797546012269938, |
|
"grad_norm": 0.3024045894502796, |
|
"learning_rate": 4.764121594867157e-05, |
|
"loss": 1.8882, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5828220858895705, |
|
"grad_norm": 0.40522790311176354, |
|
"learning_rate": 4.718583200498814e-05, |
|
"loss": 1.924, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5858895705521472, |
|
"grad_norm": 0.5053931616075322, |
|
"learning_rate": 4.673127169825433e-05, |
|
"loss": 1.8868, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.588957055214724, |
|
"grad_norm": 0.3211686422583536, |
|
"learning_rate": 4.627758294050185e-05, |
|
"loss": 1.9068, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5920245398773006, |
|
"grad_norm": 0.24127093990601076, |
|
"learning_rate": 4.582481355189846e-05, |
|
"loss": 1.895, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5950920245398773, |
|
"grad_norm": 0.4074710701692581, |
|
"learning_rate": 4.537301125570763e-05, |
|
"loss": 1.8969, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.598159509202454, |
|
"grad_norm": 0.25841948774460555, |
|
"learning_rate": 4.492222367325837e-05, |
|
"loss": 1.94, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.6012269938650306, |
|
"grad_norm": 0.2943706481314386, |
|
"learning_rate": 4.447249831892583e-05, |
|
"loss": 1.9482, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.6042944785276073, |
|
"grad_norm": 0.3110992148589072, |
|
"learning_rate": 4.402388259512303e-05, |
|
"loss": 1.9495, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.6073619631901841, |
|
"grad_norm": 0.3723312760498351, |
|
"learning_rate": 4.357642378730466e-05, |
|
"loss": 1.9213, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.6073619631901841, |
|
"eval_loss": 2.594010353088379, |
|
"eval_runtime": 55.7716, |
|
"eval_samples_per_second": 1.793, |
|
"eval_steps_per_second": 0.126, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.6104294478527608, |
|
"grad_norm": 0.31755022515076264, |
|
"learning_rate": 4.313016905898286e-05, |
|
"loss": 1.8861, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.6134969325153374, |
|
"grad_norm": 0.37514333178831394, |
|
"learning_rate": 4.268516544675628e-05, |
|
"loss": 1.9366, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6165644171779141, |
|
"grad_norm": 0.2768732078613857, |
|
"learning_rate": 4.224145985535202e-05, |
|
"loss": 1.8781, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.6196319018404908, |
|
"grad_norm": 0.385983235346578, |
|
"learning_rate": 4.1799099052681934e-05, |
|
"loss": 1.9089, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.6226993865030674, |
|
"grad_norm": 0.34929147929166254, |
|
"learning_rate": 4.135812966491305e-05, |
|
"loss": 1.9409, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.6257668711656442, |
|
"grad_norm": 0.3448745967701562, |
|
"learning_rate": 4.091859817155307e-05, |
|
"loss": 1.8935, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.6288343558282209, |
|
"grad_norm": 0.24777573443198542, |
|
"learning_rate": 4.048055090055125e-05, |
|
"loss": 1.9007, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.6319018404907976, |
|
"grad_norm": 0.33163324355956286, |
|
"learning_rate": 4.004403402341532e-05, |
|
"loss": 1.8816, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.6349693251533742, |
|
"grad_norm": 0.6161345209342699, |
|
"learning_rate": 3.960909355034491e-05, |
|
"loss": 1.8952, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.6380368098159509, |
|
"grad_norm": 0.29863513222265725, |
|
"learning_rate": 3.917577532538185e-05, |
|
"loss": 1.8622, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.6411042944785276, |
|
"grad_norm": 0.23544641297651625, |
|
"learning_rate": 3.8744125021578126e-05, |
|
"loss": 1.9098, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.6441717791411042, |
|
"grad_norm": 0.29701664972205183, |
|
"learning_rate": 3.831418813618177e-05, |
|
"loss": 1.8963, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.647239263803681, |
|
"grad_norm": 0.2608462550147094, |
|
"learning_rate": 3.788600998584135e-05, |
|
"loss": 1.9425, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.6503067484662577, |
|
"grad_norm": 0.2753794235571961, |
|
"learning_rate": 3.7459635701829435e-05, |
|
"loss": 1.9312, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.6533742331288344, |
|
"grad_norm": 0.40974803557689143, |
|
"learning_rate": 3.703511022528562e-05, |
|
"loss": 1.8992, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.656441717791411, |
|
"grad_norm": 0.24030236007607908, |
|
"learning_rate": 3.6612478302479594e-05, |
|
"loss": 1.9326, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.6595092024539877, |
|
"grad_norm": 0.4383608820045659, |
|
"learning_rate": 3.619178448009477e-05, |
|
"loss": 1.932, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.6625766871165644, |
|
"grad_norm": 0.5102560092350799, |
|
"learning_rate": 3.5773073100532874e-05, |
|
"loss": 1.8956, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.6656441717791411, |
|
"grad_norm": 0.36274812580727284, |
|
"learning_rate": 3.535638829724019e-05, |
|
"loss": 1.8919, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.6687116564417178, |
|
"grad_norm": 0.23488730500365318, |
|
"learning_rate": 3.494177399005578e-05, |
|
"loss": 1.9158, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.6717791411042945, |
|
"grad_norm": 0.3741689726256645, |
|
"learning_rate": 3.452927388058206e-05, |
|
"loss": 1.9423, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6748466257668712, |
|
"grad_norm": 0.25651456348082824, |
|
"learning_rate": 3.411893144757866e-05, |
|
"loss": 1.8415, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6779141104294478, |
|
"grad_norm": 0.2612828905023135, |
|
"learning_rate": 3.3710789942379556e-05, |
|
"loss": 1.9472, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6809815950920245, |
|
"grad_norm": 0.25469935789428655, |
|
"learning_rate": 3.33048923843343e-05, |
|
"loss": 1.949, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6840490797546013, |
|
"grad_norm": 0.23410106434735667, |
|
"learning_rate": 3.2901281556273646e-05, |
|
"loss": 1.8963, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6871165644171779, |
|
"grad_norm": 0.25811790889112224, |
|
"learning_rate": 3.250000000000001e-05, |
|
"loss": 1.8488, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6901840490797546, |
|
"grad_norm": 0.2701258126507899, |
|
"learning_rate": 3.210109001180358e-05, |
|
"loss": 1.9429, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6932515337423313, |
|
"grad_norm": 0.27336206551312103, |
|
"learning_rate": 3.170459363800409e-05, |
|
"loss": 1.9063, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.696319018404908, |
|
"grad_norm": 0.30139160569284024, |
|
"learning_rate": 3.1310552670518986e-05, |
|
"loss": 1.9182, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6993865030674846, |
|
"grad_norm": 0.23370917590561624, |
|
"learning_rate": 3.0919008642458494e-05, |
|
"loss": 1.9541, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.7024539877300614, |
|
"grad_norm": 0.22222235132591592, |
|
"learning_rate": 3.053000282374781e-05, |
|
"loss": 1.8864, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.7055214723926381, |
|
"grad_norm": 0.27873390973935386, |
|
"learning_rate": 3.014357621677724e-05, |
|
"loss": 1.8852, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.7085889570552147, |
|
"grad_norm": 0.3108583744507131, |
|
"learning_rate": 2.9759769552080376e-05, |
|
"loss": 1.8663, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.7116564417177914, |
|
"grad_norm": 0.30913975922284836, |
|
"learning_rate": 2.93786232840409e-05, |
|
"loss": 1.9404, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.7147239263803681, |
|
"grad_norm": 0.28385532690084997, |
|
"learning_rate": 2.90001775866287e-05, |
|
"loss": 1.9023, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.7177914110429447, |
|
"grad_norm": 0.2808200803737186, |
|
"learning_rate": 2.8624472349165355e-05, |
|
"loss": 1.9192, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.7208588957055214, |
|
"grad_norm": 0.23648694756886077, |
|
"learning_rate": 2.8251547172119603e-05, |
|
"loss": 2.0132, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.7239263803680982, |
|
"grad_norm": 0.6069490067148141, |
|
"learning_rate": 2.7881441362933468e-05, |
|
"loss": 1.8395, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.7269938650306749, |
|
"grad_norm": 0.3350257794257116, |
|
"learning_rate": 2.751419393187905e-05, |
|
"loss": 1.8667, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.7300613496932515, |
|
"grad_norm": 0.232164276820369, |
|
"learning_rate": 2.7149843587946744e-05, |
|
"loss": 1.8656, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.7331288343558282, |
|
"grad_norm": 0.38356734047420593, |
|
"learning_rate": 2.6788428734765224e-05, |
|
"loss": 1.9048, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.7361963190184049, |
|
"grad_norm": 0.2618731826165273, |
|
"learning_rate": 2.642998746655348e-05, |
|
"loss": 1.9783, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7392638036809815, |
|
"grad_norm": 0.6648822511934657, |
|
"learning_rate": 2.6074557564105727e-05, |
|
"loss": 1.9043, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.7423312883435583, |
|
"grad_norm": 0.27175163581016115, |
|
"learning_rate": 2.5722176490809118e-05, |
|
"loss": 1.9585, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.745398773006135, |
|
"grad_norm": 0.3925966681047075, |
|
"learning_rate": 2.5372881388694912e-05, |
|
"loss": 1.8515, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.7484662576687117, |
|
"grad_norm": 0.37190935188206453, |
|
"learning_rate": 2.5026709074523748e-05, |
|
"loss": 1.9688, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.7515337423312883, |
|
"grad_norm": 0.2257138379202953, |
|
"learning_rate": 2.4683696035904928e-05, |
|
"loss": 1.9486, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.754601226993865, |
|
"grad_norm": 0.2274145468605237, |
|
"learning_rate": 2.434387842745056e-05, |
|
"loss": 1.9302, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.7576687116564417, |
|
"grad_norm": 0.5126959359452324, |
|
"learning_rate": 2.400729206696477e-05, |
|
"loss": 1.9443, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.7607361963190185, |
|
"grad_norm": 0.2551304692334095, |
|
"learning_rate": 2.3673972431668306e-05, |
|
"loss": 2.009, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.7638036809815951, |
|
"grad_norm": 0.4447523876477682, |
|
"learning_rate": 2.334395465445926e-05, |
|
"loss": 1.8468, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.7668711656441718, |
|
"grad_norm": 0.2657558360669318, |
|
"learning_rate": 2.3017273520209882e-05, |
|
"loss": 1.8886, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7699386503067485, |
|
"grad_norm": 0.37573420755761094, |
|
"learning_rate": 2.2693963462100117e-05, |
|
"loss": 1.8663, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.7730061349693251, |
|
"grad_norm": 0.26075506564879214, |
|
"learning_rate": 2.2374058557988336e-05, |
|
"loss": 1.909, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.7760736196319018, |
|
"grad_norm": 0.2951446457265513, |
|
"learning_rate": 2.2057592526819353e-05, |
|
"loss": 1.9362, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.7791411042944786, |
|
"grad_norm": 0.24420003456766767, |
|
"learning_rate": 2.1744598725070347e-05, |
|
"loss": 1.9134, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.7822085889570553, |
|
"grad_norm": 0.2563261666147908, |
|
"learning_rate": 2.143511014323506e-05, |
|
"loss": 1.9569, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7852760736196319, |
|
"grad_norm": 0.27427716272900493, |
|
"learning_rate": 2.11291594023464e-05, |
|
"loss": 1.8982, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7883435582822086, |
|
"grad_norm": 0.4685271777395839, |
|
"learning_rate": 2.082677875053818e-05, |
|
"loss": 1.9256, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.7914110429447853, |
|
"grad_norm": 0.3080424306042412, |
|
"learning_rate": 2.0528000059645997e-05, |
|
"loss": 1.9154, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.7944785276073619, |
|
"grad_norm": 0.2672783439075976, |
|
"learning_rate": 2.023285482184785e-05, |
|
"loss": 1.9574, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.7975460122699386, |
|
"grad_norm": 0.3311914465278651, |
|
"learning_rate": 1.994137414634483e-05, |
|
"loss": 1.9133, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.8006134969325154, |
|
"grad_norm": 0.7675438620825049, |
|
"learning_rate": 1.9653588756082064e-05, |
|
"loss": 1.892, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.803680981595092, |
|
"grad_norm": 0.2757310062776552, |
|
"learning_rate": 1.9369528984510394e-05, |
|
"loss": 1.9087, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.8067484662576687, |
|
"grad_norm": 0.24797296946202665, |
|
"learning_rate": 1.9089224772389225e-05, |
|
"loss": 1.8836, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.8098159509202454, |
|
"grad_norm": 0.41244928985184576, |
|
"learning_rate": 1.881270566463062e-05, |
|
"loss": 1.9094, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.8098159509202454, |
|
"eval_loss": 2.593792200088501, |
|
"eval_runtime": 55.7303, |
|
"eval_samples_per_second": 1.794, |
|
"eval_steps_per_second": 0.126, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.8128834355828221, |
|
"grad_norm": 0.2830531876648041, |
|
"learning_rate": 1.8540000807185192e-05, |
|
"loss": 1.9384, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.8159509202453987, |
|
"grad_norm": 0.2670241830579454, |
|
"learning_rate": 1.827113894397003e-05, |
|
"loss": 1.8443, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.8190184049079755, |
|
"grad_norm": 0.5199599677205632, |
|
"learning_rate": 1.800614841383898e-05, |
|
"loss": 1.9262, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.8220858895705522, |
|
"grad_norm": 0.2979059774589199, |
|
"learning_rate": 1.7745057147595694e-05, |
|
"loss": 1.8408, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.8251533742331288, |
|
"grad_norm": 0.3369017601149041, |
|
"learning_rate": 1.7487892665049627e-05, |
|
"loss": 1.9671, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.8282208588957055, |
|
"grad_norm": 0.24208825522114308, |
|
"learning_rate": 1.7234682072115305e-05, |
|
"loss": 1.9101, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8312883435582822, |
|
"grad_norm": 0.3809834134932596, |
|
"learning_rate": 1.698545205795536e-05, |
|
"loss": 1.8445, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.8343558282208589, |
|
"grad_norm": 0.27384739149228576, |
|
"learning_rate": 1.674022889216737e-05, |
|
"loss": 1.9337, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.8374233128834356, |
|
"grad_norm": 0.25542052798806203, |
|
"learning_rate": 1.6499038422014962e-05, |
|
"loss": 1.8697, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.8404907975460123, |
|
"grad_norm": 0.30649006891608727, |
|
"learning_rate": 1.626190606970346e-05, |
|
"loss": 1.8985, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.843558282208589, |
|
"grad_norm": 0.27648461915446576, |
|
"learning_rate": 1.602885682970026e-05, |
|
"loss": 1.8851, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.8466257668711656, |
|
"grad_norm": 1.1533982638871452, |
|
"learning_rate": 1.57999152661004e-05, |
|
"loss": 1.9318, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.8496932515337423, |
|
"grad_norm": 0.33969524913455146, |
|
"learning_rate": 1.5575105510037396e-05, |
|
"loss": 2.0149, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.852760736196319, |
|
"grad_norm": 0.5956725111127443, |
|
"learning_rate": 1.53544512571397e-05, |
|
"loss": 1.8834, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.8558282208588958, |
|
"grad_norm": 0.5892298656241596, |
|
"learning_rate": 1.5137975765033205e-05, |
|
"loss": 1.8972, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.8588957055214724, |
|
"grad_norm": 0.41593605055209165, |
|
"learning_rate": 1.4925701850889772e-05, |
|
"loss": 1.9427, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8619631901840491, |
|
"grad_norm": 0.2630748817948859, |
|
"learning_rate": 1.4717651889022202e-05, |
|
"loss": 1.9469, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.8650306748466258, |
|
"grad_norm": 0.2232832403928089, |
|
"learning_rate": 1.4513847808525969e-05, |
|
"loss": 1.9662, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.8680981595092024, |
|
"grad_norm": 0.31719749827250515, |
|
"learning_rate": 1.4314311090967786e-05, |
|
"loss": 1.9091, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.8711656441717791, |
|
"grad_norm": 0.301123405840287, |
|
"learning_rate": 1.4119062768121433e-05, |
|
"loss": 1.8862, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.8742331288343558, |
|
"grad_norm": 0.6726088360165043, |
|
"learning_rate": 1.3928123419750888e-05, |
|
"loss": 1.8739, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.8773006134969326, |
|
"grad_norm": 0.4202167476604764, |
|
"learning_rate": 1.3741513171441176e-05, |
|
"loss": 1.9232, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.8803680981595092, |
|
"grad_norm": 0.304988395998919, |
|
"learning_rate": 1.3559251692477087e-05, |
|
"loss": 1.9318, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.8834355828220859, |
|
"grad_norm": 0.274507041819108, |
|
"learning_rate": 1.3381358193769976e-05, |
|
"loss": 1.8499, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.8865030674846626, |
|
"grad_norm": 0.47861538421593386, |
|
"learning_rate": 1.320785142583284e-05, |
|
"loss": 1.9518, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.8895705521472392, |
|
"grad_norm": 0.45942646770952145, |
|
"learning_rate": 1.3038749676803994e-05, |
|
"loss": 1.9109, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8926380368098159, |
|
"grad_norm": 0.27087716251353355, |
|
"learning_rate": 1.2874070770519428e-05, |
|
"loss": 1.8813, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.8957055214723927, |
|
"grad_norm": 0.255203728473793, |
|
"learning_rate": 1.2713832064634126e-05, |
|
"loss": 1.873, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8987730061349694, |
|
"grad_norm": 0.40071001023936836, |
|
"learning_rate": 1.2558050448792515e-05, |
|
"loss": 1.9324, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.901840490797546, |
|
"grad_norm": 0.33237213114045755, |
|
"learning_rate": 1.2406742342848248e-05, |
|
"loss": 1.96, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.9049079754601227, |
|
"grad_norm": 0.2921583930232282, |
|
"learning_rate": 1.2259923695133503e-05, |
|
"loss": 1.8696, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.9079754601226994, |
|
"grad_norm": 0.2753105203678559, |
|
"learning_rate": 1.2117609980777959e-05, |
|
"loss": 1.9038, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.911042944785276, |
|
"grad_norm": 0.497963211949326, |
|
"learning_rate": 1.1979816200077707e-05, |
|
"loss": 1.9388, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.9141104294478528, |
|
"grad_norm": 0.2474786285871462, |
|
"learning_rate": 1.1846556876914151e-05, |
|
"loss": 1.9544, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.9171779141104295, |
|
"grad_norm": 0.26791445026050176, |
|
"learning_rate": 1.1717846057223144e-05, |
|
"loss": 1.9231, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.9202453987730062, |
|
"grad_norm": 0.3923236183364779, |
|
"learning_rate": 1.159369730751452e-05, |
|
"loss": 1.8686, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9233128834355828, |
|
"grad_norm": 0.36556731516768504, |
|
"learning_rate": 1.1474123713442137e-05, |
|
"loss": 1.9278, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.9263803680981595, |
|
"grad_norm": 0.24192425833135245, |
|
"learning_rate": 1.1359137878424578e-05, |
|
"loss": 1.8853, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.9294478527607362, |
|
"grad_norm": 0.31690600810620534, |
|
"learning_rate": 1.1248751922316776e-05, |
|
"loss": 1.9523, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.9325153374233128, |
|
"grad_norm": 0.27955140199036155, |
|
"learning_rate": 1.1142977480132493e-05, |
|
"loss": 1.8225, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.9355828220858896, |
|
"grad_norm": 0.2831264739725871, |
|
"learning_rate": 1.104182570081797e-05, |
|
"loss": 1.9258, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.9386503067484663, |
|
"grad_norm": 0.26580496177825247, |
|
"learning_rate": 1.0945307246076797e-05, |
|
"loss": 1.9327, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.941717791411043, |
|
"grad_norm": 0.30887069355917346, |
|
"learning_rate": 1.0853432289246138e-05, |
|
"loss": 1.9412, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.9447852760736196, |
|
"grad_norm": 0.44810137462917216, |
|
"learning_rate": 1.076621051422442e-05, |
|
"loss": 1.9057, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.9478527607361963, |
|
"grad_norm": 0.27583855429775517, |
|
"learning_rate": 1.0683651114450641e-05, |
|
"loss": 1.9357, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.950920245398773, |
|
"grad_norm": 0.26050390516719396, |
|
"learning_rate": 1.0605762791935325e-05, |
|
"loss": 1.8674, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9539877300613497, |
|
"grad_norm": 0.26034125726942287, |
|
"learning_rate": 1.0532553756343328e-05, |
|
"loss": 1.8837, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.9570552147239264, |
|
"grad_norm": 0.380331760419281, |
|
"learning_rate": 1.0464031724128512e-05, |
|
"loss": 1.9202, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.9601226993865031, |
|
"grad_norm": 0.3024899052220286, |
|
"learning_rate": 1.0400203917720394e-05, |
|
"loss": 1.833, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.9631901840490797, |
|
"grad_norm": 0.26156906536760005, |
|
"learning_rate": 1.0341077064762893e-05, |
|
"loss": 1.8538, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.9662576687116564, |
|
"grad_norm": 0.5419644400783428, |
|
"learning_rate": 1.0286657397405204e-05, |
|
"loss": 1.8956, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.9693251533742331, |
|
"grad_norm": 0.2754473793756419, |
|
"learning_rate": 1.0236950651644922e-05, |
|
"loss": 1.8821, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.9723926380368099, |
|
"grad_norm": 0.32743295245170423, |
|
"learning_rate": 1.019196206672345e-05, |
|
"loss": 1.8669, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.9754601226993865, |
|
"grad_norm": 0.2983793501294546, |
|
"learning_rate": 1.0151696384573753e-05, |
|
"loss": 1.8806, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.9785276073619632, |
|
"grad_norm": 0.274678179585171, |
|
"learning_rate": 1.011615784932056e-05, |
|
"loss": 1.9428, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.9815950920245399, |
|
"grad_norm": 0.802831711997894, |
|
"learning_rate": 1.0085350206833016e-05, |
|
"loss": 1.8988, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9846625766871165, |
|
"grad_norm": 0.36523952422202455, |
|
"learning_rate": 1.0059276704329856e-05, |
|
"loss": 1.8695, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.9877300613496932, |
|
"grad_norm": 0.2857793976397457, |
|
"learning_rate": 1.003794009003713e-05, |
|
"loss": 1.8923, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.99079754601227, |
|
"grad_norm": 0.306887686398712, |
|
"learning_rate": 1.0021342612898534e-05, |
|
"loss": 1.9541, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.9938650306748467, |
|
"grad_norm": 0.5124292513803443, |
|
"learning_rate": 1.0009486022338391e-05, |
|
"loss": 1.9622, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.9969325153374233, |
|
"grad_norm": 0.27281561169770374, |
|
"learning_rate": 1.0002371568077212e-05, |
|
"loss": 1.9336, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.28851290398135704, |
|
"learning_rate": 1e-05, |
|
"loss": 1.8766, |
|
"step": 326 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 326, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 66, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 355990511812608.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|