Training in progress, step 1600, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 73911112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd8edd4b23894593994dd1597dbc694ccc81001cb7faf035867a3339f3d6ca28
|
3 |
size 73911112
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 37965684
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3fa9ed80396e8f083b3d374dcff89bc7c9037cfc4e5585d635d883ef34faa3f
|
3 |
size 37965684
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9ec4f7c75fc8dc73feec982dc49c1db4c92d75f32040d95e28ccfad97fcf3c4
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a342d756ed2deadb7604676fd81f55043100e7cbb57f36d28b57fa6ef3c9fda
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04516534134745598,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1400",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -10635,6 +10635,714 @@
|
|
10635 |
"eval_samples_per_second": 29.681,
|
10636 |
"eval_steps_per_second": 7.42,
|
10637 |
"step": 1500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10638 |
}
|
10639 |
],
|
10640 |
"logging_steps": 1,
|
@@ -10649,7 +11357,7 @@
|
|
10649 |
"early_stopping_threshold": 0.0
|
10650 |
},
|
10651 |
"attributes": {
|
10652 |
-
"early_stopping_patience_counter":
|
10653 |
}
|
10654 |
},
|
10655 |
"TrainerControl": {
|
@@ -10658,12 +11366,12 @@
|
|
10658 |
"should_evaluate": false,
|
10659 |
"should_log": false,
|
10660 |
"should_save": true,
|
10661 |
-
"should_training_stop":
|
10662 |
},
|
10663 |
"attributes": {}
|
10664 |
}
|
10665 |
},
|
10666 |
-
"total_flos": 4.
|
10667 |
"train_batch_size": 4,
|
10668 |
"trial_name": null,
|
10669 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04516534134745598,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1400",
|
4 |
+
"epoch": 0.13984944333366112,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 1600,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
10635 |
"eval_samples_per_second": 29.681,
|
10636 |
"eval_steps_per_second": 7.42,
|
10637 |
"step": 1500
|
10638 |
+
},
|
10639 |
+
{
|
10640 |
+
"epoch": 0.13119625902739082,
|
10641 |
+
"grad_norm": 0.08697141706943512,
|
10642 |
+
"learning_rate": 0.00010350138745749725,
|
10643 |
+
"loss": 0.0364,
|
10644 |
+
"step": 1501
|
10645 |
+
},
|
10646 |
+
{
|
10647 |
+
"epoch": 0.13128366492947435,
|
10648 |
+
"grad_norm": 0.11794944107532501,
|
10649 |
+
"learning_rate": 0.00010339844573921038,
|
10650 |
+
"loss": 0.0406,
|
10651 |
+
"step": 1502
|
10652 |
+
},
|
10653 |
+
{
|
10654 |
+
"epoch": 0.1313710708315579,
|
10655 |
+
"grad_norm": 0.20966710150241852,
|
10656 |
+
"learning_rate": 0.00010329550041530005,
|
10657 |
+
"loss": 0.0717,
|
10658 |
+
"step": 1503
|
10659 |
+
},
|
10660 |
+
{
|
10661 |
+
"epoch": 0.13145847673364144,
|
10662 |
+
"grad_norm": 0.15292756259441376,
|
10663 |
+
"learning_rate": 0.0001031925515949874,
|
10664 |
+
"loss": 0.0329,
|
10665 |
+
"step": 1504
|
10666 |
+
},
|
10667 |
+
{
|
10668 |
+
"epoch": 0.13154588263572498,
|
10669 |
+
"grad_norm": 0.06401754915714264,
|
10670 |
+
"learning_rate": 0.00010308959938749729,
|
10671 |
+
"loss": 0.024,
|
10672 |
+
"step": 1505
|
10673 |
+
},
|
10674 |
+
{
|
10675 |
+
"epoch": 0.1316332885378085,
|
10676 |
+
"grad_norm": 0.0975053682923317,
|
10677 |
+
"learning_rate": 0.00010298664390205812,
|
10678 |
+
"loss": 0.0453,
|
10679 |
+
"step": 1506
|
10680 |
+
},
|
10681 |
+
{
|
10682 |
+
"epoch": 0.13172069443989207,
|
10683 |
+
"grad_norm": 0.07814626395702362,
|
10684 |
+
"learning_rate": 0.00010288368524790182,
|
10685 |
+
"loss": 0.029,
|
10686 |
+
"step": 1507
|
10687 |
+
},
|
10688 |
+
{
|
10689 |
+
"epoch": 0.1318081003419756,
|
10690 |
+
"grad_norm": 0.08279038220643997,
|
10691 |
+
"learning_rate": 0.00010278072353426365,
|
10692 |
+
"loss": 0.0267,
|
10693 |
+
"step": 1508
|
10694 |
+
},
|
10695 |
+
{
|
10696 |
+
"epoch": 0.13189550624405913,
|
10697 |
+
"grad_norm": 0.10478601604700089,
|
10698 |
+
"learning_rate": 0.0001026777588703821,
|
10699 |
+
"loss": 0.0389,
|
10700 |
+
"step": 1509
|
10701 |
+
},
|
10702 |
+
{
|
10703 |
+
"epoch": 0.13198291214614266,
|
10704 |
+
"grad_norm": 0.0825965404510498,
|
10705 |
+
"learning_rate": 0.00010257479136549889,
|
10706 |
+
"loss": 0.0278,
|
10707 |
+
"step": 1510
|
10708 |
+
},
|
10709 |
+
{
|
10710 |
+
"epoch": 0.1320703180482262,
|
10711 |
+
"grad_norm": 0.09572532027959824,
|
10712 |
+
"learning_rate": 0.00010247182112885861,
|
10713 |
+
"loss": 0.0352,
|
10714 |
+
"step": 1511
|
10715 |
+
},
|
10716 |
+
{
|
10717 |
+
"epoch": 0.13215772395030975,
|
10718 |
+
"grad_norm": 0.06361362338066101,
|
10719 |
+
"learning_rate": 0.00010236884826970878,
|
10720 |
+
"loss": 0.0197,
|
10721 |
+
"step": 1512
|
10722 |
+
},
|
10723 |
+
{
|
10724 |
+
"epoch": 0.13224512985239328,
|
10725 |
+
"grad_norm": 0.08282926678657532,
|
10726 |
+
"learning_rate": 0.00010226587289729988,
|
10727 |
+
"loss": 0.0324,
|
10728 |
+
"step": 1513
|
10729 |
+
},
|
10730 |
+
{
|
10731 |
+
"epoch": 0.13233253575447682,
|
10732 |
+
"grad_norm": 0.13065838813781738,
|
10733 |
+
"learning_rate": 0.00010216289512088479,
|
10734 |
+
"loss": 0.0547,
|
10735 |
+
"step": 1514
|
10736 |
+
},
|
10737 |
+
{
|
10738 |
+
"epoch": 0.13241994165656035,
|
10739 |
+
"grad_norm": 0.060922764241695404,
|
10740 |
+
"learning_rate": 0.00010205991504971912,
|
10741 |
+
"loss": 0.0205,
|
10742 |
+
"step": 1515
|
10743 |
+
},
|
10744 |
+
{
|
10745 |
+
"epoch": 0.1325073475586439,
|
10746 |
+
"grad_norm": 0.07022725045681,
|
10747 |
+
"learning_rate": 0.00010195693279306089,
|
10748 |
+
"loss": 0.0221,
|
10749 |
+
"step": 1516
|
10750 |
+
},
|
10751 |
+
{
|
10752 |
+
"epoch": 0.13259475346072744,
|
10753 |
+
"grad_norm": 0.10593367367982864,
|
10754 |
+
"learning_rate": 0.00010185394846017036,
|
10755 |
+
"loss": 0.036,
|
10756 |
+
"step": 1517
|
10757 |
+
},
|
10758 |
+
{
|
10759 |
+
"epoch": 0.13268215936281097,
|
10760 |
+
"grad_norm": 0.07627231627702713,
|
10761 |
+
"learning_rate": 0.00010175096216031006,
|
10762 |
+
"loss": 0.0322,
|
10763 |
+
"step": 1518
|
10764 |
+
},
|
10765 |
+
{
|
10766 |
+
"epoch": 0.1327695652648945,
|
10767 |
+
"grad_norm": 0.05789932608604431,
|
10768 |
+
"learning_rate": 0.0001016479740027446,
|
10769 |
+
"loss": 0.0289,
|
10770 |
+
"step": 1519
|
10771 |
+
},
|
10772 |
+
{
|
10773 |
+
"epoch": 0.13285697116697806,
|
10774 |
+
"grad_norm": 0.09948685020208359,
|
10775 |
+
"learning_rate": 0.00010154498409674051,
|
10776 |
+
"loss": 0.0331,
|
10777 |
+
"step": 1520
|
10778 |
+
},
|
10779 |
+
{
|
10780 |
+
"epoch": 0.1329443770690616,
|
10781 |
+
"grad_norm": 0.07831980288028717,
|
10782 |
+
"learning_rate": 0.00010144199255156631,
|
10783 |
+
"loss": 0.0306,
|
10784 |
+
"step": 1521
|
10785 |
+
},
|
10786 |
+
{
|
10787 |
+
"epoch": 0.13303178297114512,
|
10788 |
+
"grad_norm": 0.10856369137763977,
|
10789 |
+
"learning_rate": 0.00010133899947649209,
|
10790 |
+
"loss": 0.0355,
|
10791 |
+
"step": 1522
|
10792 |
+
},
|
10793 |
+
{
|
10794 |
+
"epoch": 0.13311918887322866,
|
10795 |
+
"grad_norm": 0.16798873245716095,
|
10796 |
+
"learning_rate": 0.0001012360049807896,
|
10797 |
+
"loss": 0.0508,
|
10798 |
+
"step": 1523
|
10799 |
+
},
|
10800 |
+
{
|
10801 |
+
"epoch": 0.13320659477531221,
|
10802 |
+
"grad_norm": 0.11936990171670914,
|
10803 |
+
"learning_rate": 0.00010113300917373226,
|
10804 |
+
"loss": 0.0575,
|
10805 |
+
"step": 1524
|
10806 |
+
},
|
10807 |
+
{
|
10808 |
+
"epoch": 0.13329400067739575,
|
10809 |
+
"grad_norm": 0.09081139415502548,
|
10810 |
+
"learning_rate": 0.00010103001216459469,
|
10811 |
+
"loss": 0.0288,
|
10812 |
+
"step": 1525
|
10813 |
+
},
|
10814 |
+
{
|
10815 |
+
"epoch": 0.13338140657947928,
|
10816 |
+
"grad_norm": 0.08312739431858063,
|
10817 |
+
"learning_rate": 0.0001009270140626528,
|
10818 |
+
"loss": 0.0196,
|
10819 |
+
"step": 1526
|
10820 |
+
},
|
10821 |
+
{
|
10822 |
+
"epoch": 0.1334688124815628,
|
10823 |
+
"grad_norm": 0.08712684363126755,
|
10824 |
+
"learning_rate": 0.0001008240149771838,
|
10825 |
+
"loss": 0.0302,
|
10826 |
+
"step": 1527
|
10827 |
+
},
|
10828 |
+
{
|
10829 |
+
"epoch": 0.13355621838364637,
|
10830 |
+
"grad_norm": 0.17785540223121643,
|
10831 |
+
"learning_rate": 0.0001007210150174658,
|
10832 |
+
"loss": 0.0321,
|
10833 |
+
"step": 1528
|
10834 |
+
},
|
10835 |
+
{
|
10836 |
+
"epoch": 0.1336436242857299,
|
10837 |
+
"grad_norm": 0.119221031665802,
|
10838 |
+
"learning_rate": 0.00010061801429277796,
|
10839 |
+
"loss": 0.0481,
|
10840 |
+
"step": 1529
|
10841 |
+
},
|
10842 |
+
{
|
10843 |
+
"epoch": 0.13373103018781343,
|
10844 |
+
"grad_norm": 0.04911545664072037,
|
10845 |
+
"learning_rate": 0.00010051501291240008,
|
10846 |
+
"loss": 0.0181,
|
10847 |
+
"step": 1530
|
10848 |
+
},
|
10849 |
+
{
|
10850 |
+
"epoch": 0.13381843608989696,
|
10851 |
+
"grad_norm": 0.05027123540639877,
|
10852 |
+
"learning_rate": 0.00010041201098561286,
|
10853 |
+
"loss": 0.0183,
|
10854 |
+
"step": 1531
|
10855 |
+
},
|
10856 |
+
{
|
10857 |
+
"epoch": 0.1339058419919805,
|
10858 |
+
"grad_norm": 0.13207297027111053,
|
10859 |
+
"learning_rate": 0.00010030900862169744,
|
10860 |
+
"loss": 0.0185,
|
10861 |
+
"step": 1532
|
10862 |
+
},
|
10863 |
+
{
|
10864 |
+
"epoch": 0.13399324789406405,
|
10865 |
+
"grad_norm": 0.05222494527697563,
|
10866 |
+
"learning_rate": 0.00010020600592993548,
|
10867 |
+
"loss": 0.0165,
|
10868 |
+
"step": 1533
|
10869 |
+
},
|
10870 |
+
{
|
10871 |
+
"epoch": 0.1340806537961476,
|
10872 |
+
"grad_norm": 0.07455245405435562,
|
10873 |
+
"learning_rate": 0.00010010300301960888,
|
10874 |
+
"loss": 0.0299,
|
10875 |
+
"step": 1534
|
10876 |
+
},
|
10877 |
+
{
|
10878 |
+
"epoch": 0.13416805969823112,
|
10879 |
+
"grad_norm": 0.07873023301362991,
|
10880 |
+
"learning_rate": 0.0001,
|
10881 |
+
"loss": 0.0283,
|
10882 |
+
"step": 1535
|
10883 |
+
},
|
10884 |
+
{
|
10885 |
+
"epoch": 0.13425546560031465,
|
10886 |
+
"grad_norm": 0.07464205473661423,
|
10887 |
+
"learning_rate": 9.989699698039111e-05,
|
10888 |
+
"loss": 0.0312,
|
10889 |
+
"step": 1536
|
10890 |
+
},
|
10891 |
+
{
|
10892 |
+
"epoch": 0.1343428715023982,
|
10893 |
+
"grad_norm": 0.08801527321338654,
|
10894 |
+
"learning_rate": 9.979399407006458e-05,
|
10895 |
+
"loss": 0.0233,
|
10896 |
+
"step": 1537
|
10897 |
+
},
|
10898 |
+
{
|
10899 |
+
"epoch": 0.13443027740448174,
|
10900 |
+
"grad_norm": 0.104781374335289,
|
10901 |
+
"learning_rate": 9.969099137830259e-05,
|
10902 |
+
"loss": 0.0353,
|
10903 |
+
"step": 1538
|
10904 |
+
},
|
10905 |
+
{
|
10906 |
+
"epoch": 0.13451768330656527,
|
10907 |
+
"grad_norm": 0.0791974663734436,
|
10908 |
+
"learning_rate": 9.958798901438715e-05,
|
10909 |
+
"loss": 0.0283,
|
10910 |
+
"step": 1539
|
10911 |
+
},
|
10912 |
+
{
|
10913 |
+
"epoch": 0.1346050892086488,
|
10914 |
+
"grad_norm": 0.060504913330078125,
|
10915 |
+
"learning_rate": 9.948498708759993e-05,
|
10916 |
+
"loss": 0.0192,
|
10917 |
+
"step": 1540
|
10918 |
+
},
|
10919 |
+
{
|
10920 |
+
"epoch": 0.13469249511073236,
|
10921 |
+
"grad_norm": 0.04931584745645523,
|
10922 |
+
"learning_rate": 9.938198570722207e-05,
|
10923 |
+
"loss": 0.0225,
|
10924 |
+
"step": 1541
|
10925 |
+
},
|
10926 |
+
{
|
10927 |
+
"epoch": 0.1347799010128159,
|
10928 |
+
"grad_norm": 0.10144636034965515,
|
10929 |
+
"learning_rate": 9.927898498253422e-05,
|
10930 |
+
"loss": 0.0259,
|
10931 |
+
"step": 1542
|
10932 |
+
},
|
10933 |
+
{
|
10934 |
+
"epoch": 0.13486730691489943,
|
10935 |
+
"grad_norm": 0.06569879502058029,
|
10936 |
+
"learning_rate": 9.917598502281621e-05,
|
10937 |
+
"loss": 0.0258,
|
10938 |
+
"step": 1543
|
10939 |
+
},
|
10940 |
+
{
|
10941 |
+
"epoch": 0.13495471281698296,
|
10942 |
+
"grad_norm": 0.06734506040811539,
|
10943 |
+
"learning_rate": 9.90729859373472e-05,
|
10944 |
+
"loss": 0.0259,
|
10945 |
+
"step": 1544
|
10946 |
+
},
|
10947 |
+
{
|
10948 |
+
"epoch": 0.13504211871906652,
|
10949 |
+
"grad_norm": 0.07036790996789932,
|
10950 |
+
"learning_rate": 9.896998783540536e-05,
|
10951 |
+
"loss": 0.0289,
|
10952 |
+
"step": 1545
|
10953 |
+
},
|
10954 |
+
{
|
10955 |
+
"epoch": 0.13512952462115005,
|
10956 |
+
"grad_norm": 0.05877295881509781,
|
10957 |
+
"learning_rate": 9.886699082626775e-05,
|
10958 |
+
"loss": 0.0193,
|
10959 |
+
"step": 1546
|
10960 |
+
},
|
10961 |
+
{
|
10962 |
+
"epoch": 0.13521693052323358,
|
10963 |
+
"grad_norm": 0.0712662935256958,
|
10964 |
+
"learning_rate": 9.87639950192104e-05,
|
10965 |
+
"loss": 0.0256,
|
10966 |
+
"step": 1547
|
10967 |
+
},
|
10968 |
+
{
|
10969 |
+
"epoch": 0.1353043364253171,
|
10970 |
+
"grad_norm": 0.10854820907115936,
|
10971 |
+
"learning_rate": 9.866100052350796e-05,
|
10972 |
+
"loss": 0.0423,
|
10973 |
+
"step": 1548
|
10974 |
+
},
|
10975 |
+
{
|
10976 |
+
"epoch": 0.13539174232740067,
|
10977 |
+
"grad_norm": 0.10757939517498016,
|
10978 |
+
"learning_rate": 9.855800744843372e-05,
|
10979 |
+
"loss": 0.0378,
|
10980 |
+
"step": 1549
|
10981 |
+
},
|
10982 |
+
{
|
10983 |
+
"epoch": 0.1354791482294842,
|
10984 |
+
"grad_norm": 0.07478786259889603,
|
10985 |
+
"learning_rate": 9.845501590325948e-05,
|
10986 |
+
"loss": 0.0251,
|
10987 |
+
"step": 1550
|
10988 |
+
},
|
10989 |
+
{
|
10990 |
+
"epoch": 0.13556655413156773,
|
10991 |
+
"grad_norm": 0.08897072076797485,
|
10992 |
+
"learning_rate": 9.835202599725544e-05,
|
10993 |
+
"loss": 0.027,
|
10994 |
+
"step": 1551
|
10995 |
+
},
|
10996 |
+
{
|
10997 |
+
"epoch": 0.13565396003365127,
|
10998 |
+
"grad_norm": 0.07537046819925308,
|
10999 |
+
"learning_rate": 9.824903783968996e-05,
|
11000 |
+
"loss": 0.0223,
|
11001 |
+
"step": 1552
|
11002 |
+
},
|
11003 |
+
{
|
11004 |
+
"epoch": 0.1357413659357348,
|
11005 |
+
"grad_norm": 0.0578019842505455,
|
11006 |
+
"learning_rate": 9.814605153982967e-05,
|
11007 |
+
"loss": 0.0197,
|
11008 |
+
"step": 1553
|
11009 |
+
},
|
11010 |
+
{
|
11011 |
+
"epoch": 0.13582877183781836,
|
11012 |
+
"grad_norm": 0.0902201235294342,
|
11013 |
+
"learning_rate": 9.804306720693913e-05,
|
11014 |
+
"loss": 0.0264,
|
11015 |
+
"step": 1554
|
11016 |
+
},
|
11017 |
+
{
|
11018 |
+
"epoch": 0.1359161777399019,
|
11019 |
+
"grad_norm": 0.1797989010810852,
|
11020 |
+
"learning_rate": 9.794008495028087e-05,
|
11021 |
+
"loss": 0.0301,
|
11022 |
+
"step": 1555
|
11023 |
+
},
|
11024 |
+
{
|
11025 |
+
"epoch": 0.13600358364198542,
|
11026 |
+
"grad_norm": 0.1257629692554474,
|
11027 |
+
"learning_rate": 9.783710487911523e-05,
|
11028 |
+
"loss": 0.0292,
|
11029 |
+
"step": 1556
|
11030 |
+
},
|
11031 |
+
{
|
11032 |
+
"epoch": 0.13609098954406895,
|
11033 |
+
"grad_norm": 0.0894462838768959,
|
11034 |
+
"learning_rate": 9.773412710270016e-05,
|
11035 |
+
"loss": 0.0302,
|
11036 |
+
"step": 1557
|
11037 |
+
},
|
11038 |
+
{
|
11039 |
+
"epoch": 0.1361783954461525,
|
11040 |
+
"grad_norm": 0.1309974491596222,
|
11041 |
+
"learning_rate": 9.763115173029121e-05,
|
11042 |
+
"loss": 0.038,
|
11043 |
+
"step": 1558
|
11044 |
+
},
|
11045 |
+
{
|
11046 |
+
"epoch": 0.13626580134823604,
|
11047 |
+
"grad_norm": 0.15583781898021698,
|
11048 |
+
"learning_rate": 9.752817887114146e-05,
|
11049 |
+
"loss": 0.0389,
|
11050 |
+
"step": 1559
|
11051 |
+
},
|
11052 |
+
{
|
11053 |
+
"epoch": 0.13635320725031957,
|
11054 |
+
"grad_norm": 0.055186927318573,
|
11055 |
+
"learning_rate": 9.742520863450115e-05,
|
11056 |
+
"loss": 0.0215,
|
11057 |
+
"step": 1560
|
11058 |
+
},
|
11059 |
+
{
|
11060 |
+
"epoch": 0.1364406131524031,
|
11061 |
+
"grad_norm": 0.06592228263616562,
|
11062 |
+
"learning_rate": 9.73222411296179e-05,
|
11063 |
+
"loss": 0.0205,
|
11064 |
+
"step": 1561
|
11065 |
+
},
|
11066 |
+
{
|
11067 |
+
"epoch": 0.13652801905448667,
|
11068 |
+
"grad_norm": 0.16731403768062592,
|
11069 |
+
"learning_rate": 9.721927646573639e-05,
|
11070 |
+
"loss": 0.0476,
|
11071 |
+
"step": 1562
|
11072 |
+
},
|
11073 |
+
{
|
11074 |
+
"epoch": 0.1366154249565702,
|
11075 |
+
"grad_norm": 0.07706723362207413,
|
11076 |
+
"learning_rate": 9.71163147520982e-05,
|
11077 |
+
"loss": 0.0264,
|
11078 |
+
"step": 1563
|
11079 |
+
},
|
11080 |
+
{
|
11081 |
+
"epoch": 0.13670283085865373,
|
11082 |
+
"grad_norm": 0.0804000049829483,
|
11083 |
+
"learning_rate": 9.70133560979419e-05,
|
11084 |
+
"loss": 0.0284,
|
11085 |
+
"step": 1564
|
11086 |
+
},
|
11087 |
+
{
|
11088 |
+
"epoch": 0.13679023676073726,
|
11089 |
+
"grad_norm": 0.1061941385269165,
|
11090 |
+
"learning_rate": 9.691040061250273e-05,
|
11091 |
+
"loss": 0.0333,
|
11092 |
+
"step": 1565
|
11093 |
+
},
|
11094 |
+
{
|
11095 |
+
"epoch": 0.13687764266282082,
|
11096 |
+
"grad_norm": 0.06735522300004959,
|
11097 |
+
"learning_rate": 9.680744840501261e-05,
|
11098 |
+
"loss": 0.0242,
|
11099 |
+
"step": 1566
|
11100 |
+
},
|
11101 |
+
{
|
11102 |
+
"epoch": 0.13696504856490435,
|
11103 |
+
"grad_norm": 0.16169671714305878,
|
11104 |
+
"learning_rate": 9.670449958469999e-05,
|
11105 |
+
"loss": 0.0599,
|
11106 |
+
"step": 1567
|
11107 |
+
},
|
11108 |
+
{
|
11109 |
+
"epoch": 0.13705245446698788,
|
11110 |
+
"grad_norm": 0.054925642907619476,
|
11111 |
+
"learning_rate": 9.660155426078964e-05,
|
11112 |
+
"loss": 0.0177,
|
11113 |
+
"step": 1568
|
11114 |
+
},
|
11115 |
+
{
|
11116 |
+
"epoch": 0.13713986036907141,
|
11117 |
+
"grad_norm": 0.10755883902311325,
|
11118 |
+
"learning_rate": 9.649861254250275e-05,
|
11119 |
+
"loss": 0.0349,
|
11120 |
+
"step": 1569
|
11121 |
+
},
|
11122 |
+
{
|
11123 |
+
"epoch": 0.13722726627115495,
|
11124 |
+
"grad_norm": 0.06160590425133705,
|
11125 |
+
"learning_rate": 9.639567453905661e-05,
|
11126 |
+
"loss": 0.024,
|
11127 |
+
"step": 1570
|
11128 |
+
},
|
11129 |
+
{
|
11130 |
+
"epoch": 0.1373146721732385,
|
11131 |
+
"grad_norm": 0.20590682327747345,
|
11132 |
+
"learning_rate": 9.629274035966457e-05,
|
11133 |
+
"loss": 0.0551,
|
11134 |
+
"step": 1571
|
11135 |
+
},
|
11136 |
+
{
|
11137 |
+
"epoch": 0.13740207807532204,
|
11138 |
+
"grad_norm": 0.1019708439707756,
|
11139 |
+
"learning_rate": 9.618981011353592e-05,
|
11140 |
+
"loss": 0.0335,
|
11141 |
+
"step": 1572
|
11142 |
+
},
|
11143 |
+
{
|
11144 |
+
"epoch": 0.13748948397740557,
|
11145 |
+
"grad_norm": 0.06255567073822021,
|
11146 |
+
"learning_rate": 9.608688390987584e-05,
|
11147 |
+
"loss": 0.0234,
|
11148 |
+
"step": 1573
|
11149 |
+
},
|
11150 |
+
{
|
11151 |
+
"epoch": 0.1375768898794891,
|
11152 |
+
"grad_norm": 0.10318184643983841,
|
11153 |
+
"learning_rate": 9.59839618578851e-05,
|
11154 |
+
"loss": 0.039,
|
11155 |
+
"step": 1574
|
11156 |
+
},
|
11157 |
+
{
|
11158 |
+
"epoch": 0.13766429578157266,
|
11159 |
+
"grad_norm": 0.1166694313287735,
|
11160 |
+
"learning_rate": 9.58810440667602e-05,
|
11161 |
+
"loss": 0.0457,
|
11162 |
+
"step": 1575
|
11163 |
+
},
|
11164 |
+
{
|
11165 |
+
"epoch": 0.1377517016836562,
|
11166 |
+
"grad_norm": 0.1220555454492569,
|
11167 |
+
"learning_rate": 9.577813064569301e-05,
|
11168 |
+
"loss": 0.0469,
|
11169 |
+
"step": 1576
|
11170 |
+
},
|
11171 |
+
{
|
11172 |
+
"epoch": 0.13783910758573972,
|
11173 |
+
"grad_norm": 0.06090294197201729,
|
11174 |
+
"learning_rate": 9.567522170387082e-05,
|
11175 |
+
"loss": 0.0246,
|
11176 |
+
"step": 1577
|
11177 |
+
},
|
11178 |
+
{
|
11179 |
+
"epoch": 0.13792651348782325,
|
11180 |
+
"grad_norm": 0.14577405154705048,
|
11181 |
+
"learning_rate": 9.557231735047619e-05,
|
11182 |
+
"loss": 0.0258,
|
11183 |
+
"step": 1578
|
11184 |
+
},
|
11185 |
+
{
|
11186 |
+
"epoch": 0.13801391938990681,
|
11187 |
+
"grad_norm": 0.10201065242290497,
|
11188 |
+
"learning_rate": 9.546941769468673e-05,
|
11189 |
+
"loss": 0.0313,
|
11190 |
+
"step": 1579
|
11191 |
+
},
|
11192 |
+
{
|
11193 |
+
"epoch": 0.13810132529199035,
|
11194 |
+
"grad_norm": 0.10618524253368378,
|
11195 |
+
"learning_rate": 9.536652284567513e-05,
|
11196 |
+
"loss": 0.0301,
|
11197 |
+
"step": 1580
|
11198 |
+
},
|
11199 |
+
{
|
11200 |
+
"epoch": 0.13818873119407388,
|
11201 |
+
"grad_norm": 0.1036718562245369,
|
11202 |
+
"learning_rate": 9.5263632912609e-05,
|
11203 |
+
"loss": 0.0289,
|
11204 |
+
"step": 1581
|
11205 |
+
},
|
11206 |
+
{
|
11207 |
+
"epoch": 0.1382761370961574,
|
11208 |
+
"grad_norm": 0.10260613262653351,
|
11209 |
+
"learning_rate": 9.516074800465064e-05,
|
11210 |
+
"loss": 0.0358,
|
11211 |
+
"step": 1582
|
11212 |
+
},
|
11213 |
+
{
|
11214 |
+
"epoch": 0.13836354299824097,
|
11215 |
+
"grad_norm": 0.09655741602182388,
|
11216 |
+
"learning_rate": 9.505786823095712e-05,
|
11217 |
+
"loss": 0.0363,
|
11218 |
+
"step": 1583
|
11219 |
+
},
|
11220 |
+
{
|
11221 |
+
"epoch": 0.1384509489003245,
|
11222 |
+
"grad_norm": 0.17335118353366852,
|
11223 |
+
"learning_rate": 9.495499370068004e-05,
|
11224 |
+
"loss": 0.029,
|
11225 |
+
"step": 1584
|
11226 |
+
},
|
11227 |
+
{
|
11228 |
+
"epoch": 0.13853835480240803,
|
11229 |
+
"grad_norm": 0.07364212721586227,
|
11230 |
+
"learning_rate": 9.485212452296535e-05,
|
11231 |
+
"loss": 0.0333,
|
11232 |
+
"step": 1585
|
11233 |
+
},
|
11234 |
+
{
|
11235 |
+
"epoch": 0.13862576070449156,
|
11236 |
+
"grad_norm": 0.1601741462945938,
|
11237 |
+
"learning_rate": 9.474926080695345e-05,
|
11238 |
+
"loss": 0.0691,
|
11239 |
+
"step": 1586
|
11240 |
+
},
|
11241 |
+
{
|
11242 |
+
"epoch": 0.13871316660657512,
|
11243 |
+
"grad_norm": 0.06292049586772919,
|
11244 |
+
"learning_rate": 9.464640266177883e-05,
|
11245 |
+
"loss": 0.0215,
|
11246 |
+
"step": 1587
|
11247 |
+
},
|
11248 |
+
{
|
11249 |
+
"epoch": 0.13880057250865865,
|
11250 |
+
"grad_norm": 0.09334374964237213,
|
11251 |
+
"learning_rate": 9.454355019657017e-05,
|
11252 |
+
"loss": 0.0269,
|
11253 |
+
"step": 1588
|
11254 |
+
},
|
11255 |
+
{
|
11256 |
+
"epoch": 0.13888797841074219,
|
11257 |
+
"grad_norm": 0.07139234989881516,
|
11258 |
+
"learning_rate": 9.444070352045006e-05,
|
11259 |
+
"loss": 0.025,
|
11260 |
+
"step": 1589
|
11261 |
+
},
|
11262 |
+
{
|
11263 |
+
"epoch": 0.13897538431282572,
|
11264 |
+
"grad_norm": 0.07587097585201263,
|
11265 |
+
"learning_rate": 9.433786274253495e-05,
|
11266 |
+
"loss": 0.0256,
|
11267 |
+
"step": 1590
|
11268 |
+
},
|
11269 |
+
{
|
11270 |
+
"epoch": 0.13906279021490925,
|
11271 |
+
"grad_norm": 0.1509503573179245,
|
11272 |
+
"learning_rate": 9.423502797193508e-05,
|
11273 |
+
"loss": 0.0459,
|
11274 |
+
"step": 1591
|
11275 |
+
},
|
11276 |
+
{
|
11277 |
+
"epoch": 0.1391501961169928,
|
11278 |
+
"grad_norm": 0.15848566591739655,
|
11279 |
+
"learning_rate": 9.413219931775426e-05,
|
11280 |
+
"loss": 0.0469,
|
11281 |
+
"step": 1592
|
11282 |
+
},
|
11283 |
+
{
|
11284 |
+
"epoch": 0.13923760201907634,
|
11285 |
+
"grad_norm": 0.126972034573555,
|
11286 |
+
"learning_rate": 9.402937688908983e-05,
|
11287 |
+
"loss": 0.0427,
|
11288 |
+
"step": 1593
|
11289 |
+
},
|
11290 |
+
{
|
11291 |
+
"epoch": 0.13932500792115987,
|
11292 |
+
"grad_norm": 0.10220225900411606,
|
11293 |
+
"learning_rate": 9.392656079503256e-05,
|
11294 |
+
"loss": 0.0349,
|
11295 |
+
"step": 1594
|
11296 |
+
},
|
11297 |
+
{
|
11298 |
+
"epoch": 0.1394124138232434,
|
11299 |
+
"grad_norm": 0.07176347076892853,
|
11300 |
+
"learning_rate": 9.382375114466644e-05,
|
11301 |
+
"loss": 0.0295,
|
11302 |
+
"step": 1595
|
11303 |
+
},
|
11304 |
+
{
|
11305 |
+
"epoch": 0.13949981972532696,
|
11306 |
+
"grad_norm": 0.13212533295154572,
|
11307 |
+
"learning_rate": 9.372094804706867e-05,
|
11308 |
+
"loss": 0.0551,
|
11309 |
+
"step": 1596
|
11310 |
+
},
|
11311 |
+
{
|
11312 |
+
"epoch": 0.1395872256274105,
|
11313 |
+
"grad_norm": 0.10252740979194641,
|
11314 |
+
"learning_rate": 9.36181516113095e-05,
|
11315 |
+
"loss": 0.0403,
|
11316 |
+
"step": 1597
|
11317 |
+
},
|
11318 |
+
{
|
11319 |
+
"epoch": 0.13967463152949403,
|
11320 |
+
"grad_norm": 0.06956984102725983,
|
11321 |
+
"learning_rate": 9.351536194645198e-05,
|
11322 |
+
"loss": 0.0216,
|
11323 |
+
"step": 1598
|
11324 |
+
},
|
11325 |
+
{
|
11326 |
+
"epoch": 0.13976203743157756,
|
11327 |
+
"grad_norm": 0.11074226349592209,
|
11328 |
+
"learning_rate": 9.341257916155222e-05,
|
11329 |
+
"loss": 0.0274,
|
11330 |
+
"step": 1599
|
11331 |
+
},
|
11332 |
+
{
|
11333 |
+
"epoch": 0.13984944333366112,
|
11334 |
+
"grad_norm": 0.14012524485588074,
|
11335 |
+
"learning_rate": 9.330980336565887e-05,
|
11336 |
+
"loss": 0.0365,
|
11337 |
+
"step": 1600
|
11338 |
+
},
|
11339 |
+
{
|
11340 |
+
"epoch": 0.13984944333366112,
|
11341 |
+
"eval_loss": 0.04779767617583275,
|
11342 |
+
"eval_runtime": 168.5729,
|
11343 |
+
"eval_samples_per_second": 29.661,
|
11344 |
+
"eval_steps_per_second": 7.415,
|
11345 |
+
"step": 1600
|
11346 |
}
|
11347 |
],
|
11348 |
"logging_steps": 1,
|
|
|
11357 |
"early_stopping_threshold": 0.0
|
11358 |
},
|
11359 |
"attributes": {
|
11360 |
+
"early_stopping_patience_counter": 2
|
11361 |
}
|
11362 |
},
|
11363 |
"TrainerControl": {
|
|
|
11366 |
"should_evaluate": false,
|
11367 |
"should_log": false,
|
11368 |
"should_save": true,
|
11369 |
+
"should_training_stop": true
|
11370 |
},
|
11371 |
"attributes": {}
|
11372 |
}
|
11373 |
},
|
11374 |
+
"total_flos": 4.835480666709688e+17,
|
11375 |
"train_batch_size": 4,
|
11376 |
"trial_name": null,
|
11377 |
"trial_params": null
|