Upload trainer_state.json with huggingface_hub
Browse files- trainer_state.json +353 -3
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6307,6 +6307,356 @@
|
|
6307 |
"learning_rate": 2.7091379149682685e-06,
|
6308 |
"loss": 0.0062,
|
6309 |
"step": 9000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6310 |
}
|
6311 |
],
|
6312 |
"logging_steps": 10,
|
@@ -6326,7 +6676,7 @@
|
|
6326 |
"attributes": {}
|
6327 |
}
|
6328 |
},
|
6329 |
-
"total_flos": 3.
|
6330 |
"train_batch_size": 16,
|
6331 |
"trial_name": null,
|
6332 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 19.66873706004141,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 9500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6307 |
"learning_rate": 2.7091379149682685e-06,
|
6308 |
"loss": 0.0062,
|
6309 |
"step": 9000
|
6310 |
+
},
|
6311 |
+
{
|
6312 |
+
"epoch": 18.65424430641822,
|
6313 |
+
"grad_norm": 0.1678360253572464,
|
6314 |
+
"learning_rate": 2.6557085182532582e-06,
|
6315 |
+
"loss": 0.0092,
|
6316 |
+
"step": 9010
|
6317 |
+
},
|
6318 |
+
{
|
6319 |
+
"epoch": 18.67494824016563,
|
6320 |
+
"grad_norm": 0.15553714334964752,
|
6321 |
+
"learning_rate": 2.602796871124663e-06,
|
6322 |
+
"loss": 0.0116,
|
6323 |
+
"step": 9020
|
6324 |
+
},
|
6325 |
+
{
|
6326 |
+
"epoch": 18.695652173913043,
|
6327 |
+
"grad_norm": 0.12781013548374176,
|
6328 |
+
"learning_rate": 2.5504035522157854e-06,
|
6329 |
+
"loss": 0.0069,
|
6330 |
+
"step": 9030
|
6331 |
+
},
|
6332 |
+
{
|
6333 |
+
"epoch": 18.716356107660456,
|
6334 |
+
"grad_norm": 0.10278218239545822,
|
6335 |
+
"learning_rate": 2.4985291344915674e-06,
|
6336 |
+
"loss": 0.0068,
|
6337 |
+
"step": 9040
|
6338 |
+
},
|
6339 |
+
{
|
6340 |
+
"epoch": 18.73706004140787,
|
6341 |
+
"grad_norm": 0.31690600514411926,
|
6342 |
+
"learning_rate": 2.4471741852423237e-06,
|
6343 |
+
"loss": 0.0093,
|
6344 |
+
"step": 9050
|
6345 |
+
},
|
6346 |
+
{
|
6347 |
+
"epoch": 18.757763975155278,
|
6348 |
+
"grad_norm": 0.27544263005256653,
|
6349 |
+
"learning_rate": 2.3963392660775575e-06,
|
6350 |
+
"loss": 0.0075,
|
6351 |
+
"step": 9060
|
6352 |
+
},
|
6353 |
+
{
|
6354 |
+
"epoch": 18.77846790890269,
|
6355 |
+
"grad_norm": 0.26249682903289795,
|
6356 |
+
"learning_rate": 2.3460249329197824e-06,
|
6357 |
+
"loss": 0.0076,
|
6358 |
+
"step": 9070
|
6359 |
+
},
|
6360 |
+
{
|
6361 |
+
"epoch": 18.799171842650104,
|
6362 |
+
"grad_norm": 0.09675180912017822,
|
6363 |
+
"learning_rate": 2.296231735998511e-06,
|
6364 |
+
"loss": 0.0059,
|
6365 |
+
"step": 9080
|
6366 |
+
},
|
6367 |
+
{
|
6368 |
+
"epoch": 18.819875776397517,
|
6369 |
+
"grad_norm": 0.08136120438575745,
|
6370 |
+
"learning_rate": 2.2469602198441573e-06,
|
6371 |
+
"loss": 0.0049,
|
6372 |
+
"step": 9090
|
6373 |
+
},
|
6374 |
+
{
|
6375 |
+
"epoch": 18.840579710144926,
|
6376 |
+
"grad_norm": 0.17611472308635712,
|
6377 |
+
"learning_rate": 2.1982109232821178e-06,
|
6378 |
+
"loss": 0.0063,
|
6379 |
+
"step": 9100
|
6380 |
+
},
|
6381 |
+
{
|
6382 |
+
"epoch": 18.86128364389234,
|
6383 |
+
"grad_norm": 0.09379903227090836,
|
6384 |
+
"learning_rate": 2.149984379426906e-06,
|
6385 |
+
"loss": 0.0064,
|
6386 |
+
"step": 9110
|
6387 |
+
},
|
6388 |
+
{
|
6389 |
+
"epoch": 18.88198757763975,
|
6390 |
+
"grad_norm": 0.05829261615872383,
|
6391 |
+
"learning_rate": 2.102281115676258e-06,
|
6392 |
+
"loss": 0.0042,
|
6393 |
+
"step": 9120
|
6394 |
+
},
|
6395 |
+
{
|
6396 |
+
"epoch": 18.902691511387165,
|
6397 |
+
"grad_norm": 0.05887362360954285,
|
6398 |
+
"learning_rate": 2.0551016537054493e-06,
|
6399 |
+
"loss": 0.0091,
|
6400 |
+
"step": 9130
|
6401 |
+
},
|
6402 |
+
{
|
6403 |
+
"epoch": 18.923395445134574,
|
6404 |
+
"grad_norm": 0.08341336995363235,
|
6405 |
+
"learning_rate": 2.008446509461498e-06,
|
6406 |
+
"loss": 0.0075,
|
6407 |
+
"step": 9140
|
6408 |
+
},
|
6409 |
+
{
|
6410 |
+
"epoch": 18.944099378881987,
|
6411 |
+
"grad_norm": 0.13443836569786072,
|
6412 |
+
"learning_rate": 1.962316193157593e-06,
|
6413 |
+
"loss": 0.0051,
|
6414 |
+
"step": 9150
|
6415 |
+
},
|
6416 |
+
{
|
6417 |
+
"epoch": 18.9648033126294,
|
6418 |
+
"grad_norm": 0.16308577358722687,
|
6419 |
+
"learning_rate": 1.91671120926748e-06,
|
6420 |
+
"loss": 0.0084,
|
6421 |
+
"step": 9160
|
6422 |
+
},
|
6423 |
+
{
|
6424 |
+
"epoch": 18.985507246376812,
|
6425 |
+
"grad_norm": 0.080412857234478,
|
6426 |
+
"learning_rate": 1.8716320565199618e-06,
|
6427 |
+
"loss": 0.0094,
|
6428 |
+
"step": 9170
|
6429 |
+
},
|
6430 |
+
{
|
6431 |
+
"epoch": 19.006211180124225,
|
6432 |
+
"grad_norm": 0.29631340503692627,
|
6433 |
+
"learning_rate": 1.8270792278934302e-06,
|
6434 |
+
"loss": 0.0051,
|
6435 |
+
"step": 9180
|
6436 |
+
},
|
6437 |
+
{
|
6438 |
+
"epoch": 19.026915113871635,
|
6439 |
+
"grad_norm": 0.15507416427135468,
|
6440 |
+
"learning_rate": 1.7830532106104747e-06,
|
6441 |
+
"loss": 0.0062,
|
6442 |
+
"step": 9190
|
6443 |
+
},
|
6444 |
+
{
|
6445 |
+
"epoch": 19.047619047619047,
|
6446 |
+
"grad_norm": 0.23660752177238464,
|
6447 |
+
"learning_rate": 1.7395544861325718e-06,
|
6448 |
+
"loss": 0.0073,
|
6449 |
+
"step": 9200
|
6450 |
+
},
|
6451 |
+
{
|
6452 |
+
"epoch": 19.06832298136646,
|
6453 |
+
"grad_norm": 0.2019672840833664,
|
6454 |
+
"learning_rate": 1.696583530154794e-06,
|
6455 |
+
"loss": 0.0115,
|
6456 |
+
"step": 9210
|
6457 |
+
},
|
6458 |
+
{
|
6459 |
+
"epoch": 19.089026915113873,
|
6460 |
+
"grad_norm": 0.06961517781019211,
|
6461 |
+
"learning_rate": 1.6541408126006463e-06,
|
6462 |
+
"loss": 0.0061,
|
6463 |
+
"step": 9220
|
6464 |
+
},
|
6465 |
+
{
|
6466 |
+
"epoch": 19.109730848861282,
|
6467 |
+
"grad_norm": 0.2266739010810852,
|
6468 |
+
"learning_rate": 1.6122267976168781e-06,
|
6469 |
+
"loss": 0.0104,
|
6470 |
+
"step": 9230
|
6471 |
+
},
|
6472 |
+
{
|
6473 |
+
"epoch": 19.130434782608695,
|
6474 |
+
"grad_norm": 0.10511913150548935,
|
6475 |
+
"learning_rate": 1.5708419435684462e-06,
|
6476 |
+
"loss": 0.0094,
|
6477 |
+
"step": 9240
|
6478 |
+
},
|
6479 |
+
{
|
6480 |
+
"epoch": 19.151138716356108,
|
6481 |
+
"grad_norm": 0.1996021419763565,
|
6482 |
+
"learning_rate": 1.5299867030334814e-06,
|
6483 |
+
"loss": 0.0093,
|
6484 |
+
"step": 9250
|
6485 |
+
},
|
6486 |
+
{
|
6487 |
+
"epoch": 19.17184265010352,
|
6488 |
+
"grad_norm": 0.12162107229232788,
|
6489 |
+
"learning_rate": 1.4896615227983468e-06,
|
6490 |
+
"loss": 0.0044,
|
6491 |
+
"step": 9260
|
6492 |
+
},
|
6493 |
+
{
|
6494 |
+
"epoch": 19.19254658385093,
|
6495 |
+
"grad_norm": 0.09749267995357513,
|
6496 |
+
"learning_rate": 1.4498668438527597e-06,
|
6497 |
+
"loss": 0.0064,
|
6498 |
+
"step": 9270
|
6499 |
+
},
|
6500 |
+
{
|
6501 |
+
"epoch": 19.213250517598343,
|
6502 |
+
"grad_norm": 0.26853621006011963,
|
6503 |
+
"learning_rate": 1.4106031013849496e-06,
|
6504 |
+
"loss": 0.0141,
|
6505 |
+
"step": 9280
|
6506 |
+
},
|
6507 |
+
{
|
6508 |
+
"epoch": 19.233954451345756,
|
6509 |
+
"grad_norm": 0.08542772382497787,
|
6510 |
+
"learning_rate": 1.3718707247769135e-06,
|
6511 |
+
"loss": 0.0049,
|
6512 |
+
"step": 9290
|
6513 |
+
},
|
6514 |
+
{
|
6515 |
+
"epoch": 19.25465838509317,
|
6516 |
+
"grad_norm": 0.4365129768848419,
|
6517 |
+
"learning_rate": 1.333670137599713e-06,
|
6518 |
+
"loss": 0.01,
|
6519 |
+
"step": 9300
|
6520 |
+
},
|
6521 |
+
{
|
6522 |
+
"epoch": 19.27536231884058,
|
6523 |
+
"grad_norm": 0.3198767602443695,
|
6524 |
+
"learning_rate": 1.2960017576088446e-06,
|
6525 |
+
"loss": 0.0104,
|
6526 |
+
"step": 9310
|
6527 |
+
},
|
6528 |
+
{
|
6529 |
+
"epoch": 19.29606625258799,
|
6530 |
+
"grad_norm": 0.2250336855649948,
|
6531 |
+
"learning_rate": 1.2588659967397e-06,
|
6532 |
+
"loss": 0.0175,
|
6533 |
+
"step": 9320
|
6534 |
+
},
|
6535 |
+
{
|
6536 |
+
"epoch": 19.316770186335404,
|
6537 |
+
"grad_norm": 0.22645168006420135,
|
6538 |
+
"learning_rate": 1.222263261102985e-06,
|
6539 |
+
"loss": 0.0079,
|
6540 |
+
"step": 9330
|
6541 |
+
},
|
6542 |
+
{
|
6543 |
+
"epoch": 19.337474120082817,
|
6544 |
+
"grad_norm": 0.18786205351352692,
|
6545 |
+
"learning_rate": 1.1861939509803687e-06,
|
6546 |
+
"loss": 0.0074,
|
6547 |
+
"step": 9340
|
6548 |
+
},
|
6549 |
+
{
|
6550 |
+
"epoch": 19.358178053830226,
|
6551 |
+
"grad_norm": 0.1437540352344513,
|
6552 |
+
"learning_rate": 1.1506584608200367e-06,
|
6553 |
+
"loss": 0.0044,
|
6554 |
+
"step": 9350
|
6555 |
+
},
|
6556 |
+
{
|
6557 |
+
"epoch": 19.37888198757764,
|
6558 |
+
"grad_norm": 0.13397444784641266,
|
6559 |
+
"learning_rate": 1.1156571792324211e-06,
|
6560 |
+
"loss": 0.0055,
|
6561 |
+
"step": 9360
|
6562 |
+
},
|
6563 |
+
{
|
6564 |
+
"epoch": 19.399585921325052,
|
6565 |
+
"grad_norm": 0.1180369108915329,
|
6566 |
+
"learning_rate": 1.0811904889859336e-06,
|
6567 |
+
"loss": 0.0147,
|
6568 |
+
"step": 9370
|
6569 |
+
},
|
6570 |
+
{
|
6571 |
+
"epoch": 19.420289855072465,
|
6572 |
+
"grad_norm": 0.13278833031654358,
|
6573 |
+
"learning_rate": 1.0472587670027678e-06,
|
6574 |
+
"loss": 0.0038,
|
6575 |
+
"step": 9380
|
6576 |
+
},
|
6577 |
+
{
|
6578 |
+
"epoch": 19.440993788819874,
|
6579 |
+
"grad_norm": 0.2261534333229065,
|
6580 |
+
"learning_rate": 1.0138623843548078e-06,
|
6581 |
+
"loss": 0.0145,
|
6582 |
+
"step": 9390
|
6583 |
+
},
|
6584 |
+
{
|
6585 |
+
"epoch": 19.461697722567287,
|
6586 |
+
"grad_norm": 0.16062599420547485,
|
6587 |
+
"learning_rate": 9.810017062595322e-07,
|
6588 |
+
"loss": 0.0091,
|
6589 |
+
"step": 9400
|
6590 |
+
},
|
6591 |
+
{
|
6592 |
+
"epoch": 19.4824016563147,
|
6593 |
+
"grad_norm": 0.09465906769037247,
|
6594 |
+
"learning_rate": 9.486770920760668e-07,
|
6595 |
+
"loss": 0.0079,
|
6596 |
+
"step": 9410
|
6597 |
+
},
|
6598 |
+
{
|
6599 |
+
"epoch": 19.503105590062113,
|
6600 |
+
"grad_norm": 0.06441060453653336,
|
6601 |
+
"learning_rate": 9.168888953011989e-07,
|
6602 |
+
"loss": 0.0056,
|
6603 |
+
"step": 9420
|
6604 |
+
},
|
6605 |
+
{
|
6606 |
+
"epoch": 19.523809523809526,
|
6607 |
+
"grad_norm": 0.3446926176548004,
|
6608 |
+
"learning_rate": 8.856374635655695e-07,
|
6609 |
+
"loss": 0.0066,
|
6610 |
+
"step": 9430
|
6611 |
+
},
|
6612 |
+
{
|
6613 |
+
"epoch": 19.544513457556935,
|
6614 |
+
"grad_norm": 0.264846533536911,
|
6615 |
+
"learning_rate": 8.549231386298151e-07,
|
6616 |
+
"loss": 0.008,
|
6617 |
+
"step": 9440
|
6618 |
+
},
|
6619 |
+
{
|
6620 |
+
"epoch": 19.565217391304348,
|
6621 |
+
"grad_norm": 0.23693622648715973,
|
6622 |
+
"learning_rate": 8.247462563808817e-07,
|
6623 |
+
"loss": 0.0085,
|
6624 |
+
"step": 9450
|
6625 |
+
},
|
6626 |
+
{
|
6627 |
+
"epoch": 19.58592132505176,
|
6628 |
+
"grad_norm": 0.1094195693731308,
|
6629 |
+
"learning_rate": 7.951071468283167e-07,
|
6630 |
+
"loss": 0.0042,
|
6631 |
+
"step": 9460
|
6632 |
+
},
|
6633 |
+
{
|
6634 |
+
"epoch": 19.606625258799173,
|
6635 |
+
"grad_norm": 0.058433957397937775,
|
6636 |
+
"learning_rate": 7.66006134100672e-07,
|
6637 |
+
"loss": 0.0063,
|
6638 |
+
"step": 9470
|
6639 |
+
},
|
6640 |
+
{
|
6641 |
+
"epoch": 19.627329192546583,
|
6642 |
+
"grad_norm": 0.28139904141426086,
|
6643 |
+
"learning_rate": 7.374435364419674e-07,
|
6644 |
+
"loss": 0.0064,
|
6645 |
+
"step": 9480
|
6646 |
+
},
|
6647 |
+
{
|
6648 |
+
"epoch": 19.648033126293996,
|
6649 |
+
"grad_norm": 0.13973474502563477,
|
6650 |
+
"learning_rate": 7.094196662081831e-07,
|
6651 |
+
"loss": 0.0054,
|
6652 |
+
"step": 9490
|
6653 |
+
},
|
6654 |
+
{
|
6655 |
+
"epoch": 19.66873706004141,
|
6656 |
+
"grad_norm": 0.27205267548561096,
|
6657 |
+
"learning_rate": 6.819348298638839e-07,
|
6658 |
+
"loss": 0.0077,
|
6659 |
+
"step": 9500
|
6660 |
}
|
6661 |
],
|
6662 |
"logging_steps": 10,
|
|
|
6676 |
"attributes": {}
|
6677 |
}
|
6678 |
},
|
6679 |
+
"total_flos": 3.4453830668706816e+17,
|
6680 |
"train_batch_size": 16,
|
6681 |
"trial_name": null,
|
6682 |
"trial_params": null
|