|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 689340, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00013054830287206266, |
|
"loss": 18.218, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002610966057441253, |
|
"loss": 4.1185, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002990742124651621, |
|
"loss": 3.2662, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00029775542680299976, |
|
"loss": 3.0539, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002964366411408375, |
|
"loss": 2.9401, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002951178554786752, |
|
"loss": 2.8804, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00029379906981651295, |
|
"loss": 2.8248, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002924802841543507, |
|
"loss": 2.7855, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0002911614984921884, |
|
"loss": 2.7762, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002898427128300261, |
|
"loss": 2.7404, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0002885239271678638, |
|
"loss": 2.7029, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00028720514150570154, |
|
"loss": 2.6612, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0002858863558435392, |
|
"loss": 2.6141, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.000284567570181377, |
|
"loss": 2.5986, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00028324878451921467, |
|
"loss": 2.5744, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0002819299988570524, |
|
"loss": 2.5785, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00028061121319489013, |
|
"loss": 2.5643, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0002792924275327278, |
|
"loss": 2.569, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00027797364187056553, |
|
"loss": 2.5455, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00027665485620840326, |
|
"loss": 2.5417, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.000275336070546241, |
|
"loss": 2.5095, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.0002740172848840787, |
|
"loss": 2.5052, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00027269849922191645, |
|
"loss": 2.5086, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0002713797135597541, |
|
"loss": 2.4391, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00027006092789759185, |
|
"loss": 2.4283, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002687421422354296, |
|
"loss": 2.4079, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0002674233565732673, |
|
"loss": 2.4065, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00026610457091110504, |
|
"loss": 2.3893, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.00026478578524894277, |
|
"loss": 2.4166, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.00026346699958678044, |
|
"loss": 2.3907, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.00026214821392461817, |
|
"loss": 2.3829, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0002608294282624559, |
|
"loss": 2.3767, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00025951064260029363, |
|
"loss": 2.3518, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00025819185693813136, |
|
"loss": 2.3734, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.0002568730712759691, |
|
"loss": 2.3126, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.00025555428561380676, |
|
"loss": 2.2802, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0002542354999516445, |
|
"loss": 2.2739, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.0002529167142894822, |
|
"loss": 2.2588, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00025159792862731995, |
|
"loss": 2.2573, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.0002502791429651577, |
|
"loss": 2.2541, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0002489603573029954, |
|
"loss": 2.2843, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.0002476415716408331, |
|
"loss": 2.2548, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.0002463227859786708, |
|
"loss": 2.252, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.00024500400031650854, |
|
"loss": 2.2677, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.00024368521465434624, |
|
"loss": 2.2497, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00024236642899218397, |
|
"loss": 2.2434, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.0002410476433300217, |
|
"loss": 2.1625, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.0002397288576678594, |
|
"loss": 2.1795, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.00023841007200569713, |
|
"loss": 2.1666, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.00023709128634353483, |
|
"loss": 2.1749, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.00023577250068137256, |
|
"loss": 2.1931, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.0002344537150192103, |
|
"loss": 2.1797, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.000233134929357048, |
|
"loss": 2.1573, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.00023181614369488572, |
|
"loss": 2.1811, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.00023049735803272345, |
|
"loss": 2.1539, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.00022917857237056115, |
|
"loss": 2.1657, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.00022785978670839888, |
|
"loss": 2.1662, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.0002265410010462366, |
|
"loss": 2.1237, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.0002252222153840743, |
|
"loss": 2.0812, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.00022390342972191204, |
|
"loss": 2.0917, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00022258464405974977, |
|
"loss": 2.09, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.00022126585839758747, |
|
"loss": 2.0931, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.0002199470727354252, |
|
"loss": 2.0907, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.00021862828707326293, |
|
"loss": 2.0855, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.00021730950141110063, |
|
"loss": 2.0946, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.00021599071574893836, |
|
"loss": 2.0904, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.0002146719300867761, |
|
"loss": 2.0788, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.0002133531444246138, |
|
"loss": 2.0771, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.00021203435876245152, |
|
"loss": 2.0746, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.00021071557310028925, |
|
"loss": 2.0091, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.00020939678743812695, |
|
"loss": 2.0144, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.00020807800177596468, |
|
"loss": 2.0083, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.0002067592161138024, |
|
"loss": 2.0337, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.0002054404304516401, |
|
"loss": 2.0169, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.00020412164478947784, |
|
"loss": 2.0264, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 0.00020280285912731557, |
|
"loss": 2.0089, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.00020148407346515327, |
|
"loss": 2.0148, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 0.000200165287802991, |
|
"loss": 2.0224, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 0.00019884650214082873, |
|
"loss": 2.0242, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.00019752771647866643, |
|
"loss": 2.0142, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.00019620893081650416, |
|
"loss": 1.9622, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.0001948901451543419, |
|
"loss": 1.9643, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.0001935713594921796, |
|
"loss": 1.9589, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.00019225257383001732, |
|
"loss": 1.9411, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.000190933788167855, |
|
"loss": 1.956, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.00018961500250569275, |
|
"loss": 1.9596, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 0.00018829621684353048, |
|
"loss": 1.9373, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 0.00018697743118136815, |
|
"loss": 1.9532, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.00018565864551920588, |
|
"loss": 1.9669, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.00018433985985704364, |
|
"loss": 1.957, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.0001830210741948813, |
|
"loss": 1.977, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.00018170228853271904, |
|
"loss": 1.9712, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 0.0001803835028705568, |
|
"loss": 1.8922, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 0.00017906471720839447, |
|
"loss": 1.9054, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 0.0001777459315462322, |
|
"loss": 1.8847, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 0.00017642714588406993, |
|
"loss": 1.896, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 0.00017510836022190763, |
|
"loss": 1.9139, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.00017378957455974536, |
|
"loss": 1.9086, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 0.0001724707888975831, |
|
"loss": 1.9158, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.0001711520032354208, |
|
"loss": 1.908, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.00016983321757325852, |
|
"loss": 1.9034, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 0.00016851443191109625, |
|
"loss": 1.9022, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 0.00016719564624893395, |
|
"loss": 1.9045, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 0.00016587686058677168, |
|
"loss": 1.8607, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.0001645580749246094, |
|
"loss": 1.8439, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 0.0001632392892624471, |
|
"loss": 1.8252, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 0.00016192050360028484, |
|
"loss": 1.844, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 0.00016060171793812257, |
|
"loss": 1.839, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 0.00015928293227596027, |
|
"loss": 1.8442, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.000157964146613798, |
|
"loss": 1.8378, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 0.00015664536095163573, |
|
"loss": 1.8436, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 0.00015532657528947343, |
|
"loss": 1.8399, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 0.00015400778962731116, |
|
"loss": 1.8357, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 0.0001526890039651489, |
|
"loss": 1.8406, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 0.0001513702183029866, |
|
"loss": 1.8385, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 0.00015005143264082432, |
|
"loss": 1.7886, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 0.00014873264697866202, |
|
"loss": 1.7909, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 0.00014741386131649975, |
|
"loss": 1.794, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 0.00014609507565433748, |
|
"loss": 1.7712, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 0.00014477628999217518, |
|
"loss": 1.7875, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 0.0001434575043300129, |
|
"loss": 1.786, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 0.00014213871866785064, |
|
"loss": 1.7925, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 0.00014081993300568834, |
|
"loss": 1.7875, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 0.00013950114734352607, |
|
"loss": 1.7752, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 0.0001381823616813638, |
|
"loss": 1.791, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 0.0001368635760192015, |
|
"loss": 1.7857, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 0.00013554479035703923, |
|
"loss": 1.7407, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 0.00013422600469487693, |
|
"loss": 1.7115, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 0.00013290721903271466, |
|
"loss": 1.7294, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 0.0001315884333705524, |
|
"loss": 1.7205, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 0.0001302696477083901, |
|
"loss": 1.7281, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 0.00012895086204622782, |
|
"loss": 1.7418, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 0.00012763207638406555, |
|
"loss": 1.7316, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 0.00012631329072190325, |
|
"loss": 1.7348, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"learning_rate": 0.00012499450505974098, |
|
"loss": 1.7392, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 0.0001236757193975787, |
|
"loss": 1.7341, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 11.92, |
|
"learning_rate": 0.0001223569337354164, |
|
"loss": 1.7347, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 0.00012103814807325414, |
|
"loss": 1.7196, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 0.00011971936241109186, |
|
"loss": 1.6614, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"learning_rate": 0.00011840057674892957, |
|
"loss": 1.6671, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 0.0001170817910867673, |
|
"loss": 1.6665, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 0.00011576300542460502, |
|
"loss": 1.6775, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 0.00011444421976244273, |
|
"loss": 1.6646, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 0.00011312543410028046, |
|
"loss": 1.6779, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 0.00011180664843811818, |
|
"loss": 1.6802, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 0.00011048786277595589, |
|
"loss": 1.6759, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 0.0001091690771137936, |
|
"loss": 1.6729, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"learning_rate": 0.00010785029145163134, |
|
"loss": 1.6769, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 0.00010653150578946905, |
|
"loss": 1.6721, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 0.00010521272012730677, |
|
"loss": 1.6302, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 0.0001038939344651445, |
|
"loss": 1.6112, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"learning_rate": 0.00010257514880298221, |
|
"loss": 1.5991, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 0.00010125636314081991, |
|
"loss": 1.6221, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 9.993757747865765e-05, |
|
"loss": 1.6168, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 9.861879181649536e-05, |
|
"loss": 1.6166, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 13.58, |
|
"learning_rate": 9.730000615433307e-05, |
|
"loss": 1.6234, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 9.59812204921708e-05, |
|
"loss": 1.6245, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 9.466243483000852e-05, |
|
"loss": 1.618, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 13.84, |
|
"learning_rate": 9.334364916784623e-05, |
|
"loss": 1.6341, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"learning_rate": 9.202486350568396e-05, |
|
"loss": 1.6353, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 9.070607784352168e-05, |
|
"loss": 1.6104, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 8.938729218135939e-05, |
|
"loss": 1.562, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 8.806850651919711e-05, |
|
"loss": 1.5654, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 14.27, |
|
"learning_rate": 8.674972085703484e-05, |
|
"loss": 1.564, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 8.543093519487255e-05, |
|
"loss": 1.5606, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 14.45, |
|
"learning_rate": 8.411214953271027e-05, |
|
"loss": 1.5764, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 14.54, |
|
"learning_rate": 8.2793363870548e-05, |
|
"loss": 1.5718, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 8.147457820838571e-05, |
|
"loss": 1.5768, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 8.015579254622343e-05, |
|
"loss": 1.5686, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 7.883700688406116e-05, |
|
"loss": 1.5753, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 7.751822122189887e-05, |
|
"loss": 1.5588, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 7.619943555973659e-05, |
|
"loss": 1.5588, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"learning_rate": 7.48806498975743e-05, |
|
"loss": 1.5413, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 15.14, |
|
"learning_rate": 7.356186423541203e-05, |
|
"loss": 1.5102, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 7.224307857324975e-05, |
|
"loss": 1.5183, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 7.092429291108746e-05, |
|
"loss": 1.5267, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 6.960550724892519e-05, |
|
"loss": 1.5185, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 6.828672158676289e-05, |
|
"loss": 1.5195, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 6.696793592460062e-05, |
|
"loss": 1.5154, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 6.564915026243834e-05, |
|
"loss": 1.5312, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"learning_rate": 6.433036460027605e-05, |
|
"loss": 1.5191, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 6.301157893811378e-05, |
|
"loss": 1.5209, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 6.16927932759515e-05, |
|
"loss": 1.5145, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 6.037400761378922e-05, |
|
"loss": 1.5147, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 5.905522195162694e-05, |
|
"loss": 1.47, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 16.19, |
|
"learning_rate": 5.773643628946465e-05, |
|
"loss": 1.4766, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"learning_rate": 5.641765062730237e-05, |
|
"loss": 1.4688, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 5.5098864965140094e-05, |
|
"loss": 1.4727, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"learning_rate": 5.378007930297781e-05, |
|
"loss": 1.4673, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 16.54, |
|
"learning_rate": 5.246129364081553e-05, |
|
"loss": 1.4728, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 5.1142507978653254e-05, |
|
"loss": 1.4751, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 4.982372231649097e-05, |
|
"loss": 1.4652, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 4.850493665432869e-05, |
|
"loss": 1.4783, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 4.718615099216641e-05, |
|
"loss": 1.4678, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 16.97, |
|
"learning_rate": 4.586736533000413e-05, |
|
"loss": 1.4717, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 17.06, |
|
"learning_rate": 4.454857966784185e-05, |
|
"loss": 1.4422, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 17.15, |
|
"learning_rate": 4.322979400567956e-05, |
|
"loss": 1.4209, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 17.23, |
|
"learning_rate": 4.191100834351728e-05, |
|
"loss": 1.4398, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 17.32, |
|
"learning_rate": 4.0592222681355005e-05, |
|
"loss": 1.4331, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 3.927343701919272e-05, |
|
"loss": 1.4253, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 17.49, |
|
"learning_rate": 3.795465135703044e-05, |
|
"loss": 1.4247, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 17.58, |
|
"learning_rate": 3.6635865694868164e-05, |
|
"loss": 1.432, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 17.67, |
|
"learning_rate": 3.531708003270588e-05, |
|
"loss": 1.4275, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"learning_rate": 3.39982943705436e-05, |
|
"loss": 1.4305, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 17.84, |
|
"learning_rate": 3.267950870838132e-05, |
|
"loss": 1.4237, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 3.136072304621904e-05, |
|
"loss": 1.4339, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 3.0041937384056755e-05, |
|
"loss": 1.4083, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"learning_rate": 2.8723151721894477e-05, |
|
"loss": 1.383, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"learning_rate": 2.7404366059732196e-05, |
|
"loss": 1.3949, |
|
"step": 627000 |
|
}, |
|
{ |
|
"epoch": 18.28, |
|
"learning_rate": 2.6085580397569915e-05, |
|
"loss": 1.3857, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"learning_rate": 2.4766794735407634e-05, |
|
"loss": 1.4049, |
|
"step": 633000 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 2.3448009073245356e-05, |
|
"loss": 1.3972, |
|
"step": 636000 |
|
}, |
|
{ |
|
"epoch": 18.54, |
|
"learning_rate": 2.2129223411083075e-05, |
|
"loss": 1.3983, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"learning_rate": 2.081043774892079e-05, |
|
"loss": 1.3907, |
|
"step": 642000 |
|
}, |
|
{ |
|
"epoch": 18.71, |
|
"learning_rate": 1.949165208675851e-05, |
|
"loss": 1.3888, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 1.817286642459623e-05, |
|
"loss": 1.3762, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"learning_rate": 1.685408076243395e-05, |
|
"loss": 1.3926, |
|
"step": 651000 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 1.553529510027167e-05, |
|
"loss": 1.3874, |
|
"step": 654000 |
|
}, |
|
{ |
|
"epoch": 19.06, |
|
"learning_rate": 1.4216509438109387e-05, |
|
"loss": 1.375, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 1.2897723775947106e-05, |
|
"loss": 1.3655, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"learning_rate": 1.1578938113784827e-05, |
|
"loss": 1.3625, |
|
"step": 663000 |
|
}, |
|
{ |
|
"epoch": 19.32, |
|
"learning_rate": 1.0260152451622544e-05, |
|
"loss": 1.3705, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 8.941366789460264e-06, |
|
"loss": 1.3672, |
|
"step": 669000 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"learning_rate": 7.622581127297983e-06, |
|
"loss": 1.3545, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 19.58, |
|
"learning_rate": 6.303795465135703e-06, |
|
"loss": 1.3611, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"learning_rate": 4.985009802973422e-06, |
|
"loss": 1.3574, |
|
"step": 678000 |
|
}, |
|
{ |
|
"epoch": 19.76, |
|
"learning_rate": 3.666224140811141e-06, |
|
"loss": 1.3673, |
|
"step": 681000 |
|
}, |
|
{ |
|
"epoch": 19.85, |
|
"learning_rate": 2.34743847864886e-06, |
|
"loss": 1.3629, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 19.93, |
|
"learning_rate": 1.028652816486579e-06, |
|
"loss": 1.3694, |
|
"step": 687000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 689340, |
|
"total_flos": 1.193067231326117e+21, |
|
"train_loss": 1.952947931640795, |
|
"train_runtime": 385301.7422, |
|
"train_samples_per_second": 28.625, |
|
"train_steps_per_second": 1.789 |
|
} |
|
], |
|
"logging_steps": 3000, |
|
"max_steps": 689340, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.193067231326117e+21, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|