Upload trainer_state.json with huggingface_hub
Browse files- trainer_state.json +354 -4
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6657,6 +6657,356 @@
|
|
6657 |
"learning_rate": 6.819348298638839e-07,
|
6658 |
"loss": 0.0077,
|
6659 |
"step": 9500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6660 |
}
|
6661 |
],
|
6662 |
"logging_steps": 10,
|
@@ -6671,12 +7021,12 @@
|
|
6671 |
"should_evaluate": false,
|
6672 |
"should_log": false,
|
6673 |
"should_save": true,
|
6674 |
-
"should_training_stop":
|
6675 |
},
|
6676 |
"attributes": {}
|
6677 |
}
|
6678 |
},
|
6679 |
-
"total_flos": 3.
|
6680 |
"train_batch_size": 16,
|
6681 |
"trial_name": null,
|
6682 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 20.70393374741201,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 10000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6657 |
"learning_rate": 6.819348298638839e-07,
|
6658 |
"loss": 0.0077,
|
6659 |
"step": 9500
|
6660 |
+
},
|
6661 |
+
{
|
6662 |
+
"epoch": 19.68944099378882,
|
6663 |
+
"grad_norm": 0.20381984114646912,
|
6664 |
+
"learning_rate": 6.549893279788277e-07,
|
6665 |
+
"loss": 0.0079,
|
6666 |
+
"step": 9510
|
6667 |
+
},
|
6668 |
+
{
|
6669 |
+
"epoch": 19.71014492753623,
|
6670 |
+
"grad_norm": 0.15530520677566528,
|
6671 |
+
"learning_rate": 6.285834552247128e-07,
|
6672 |
+
"loss": 0.0062,
|
6673 |
+
"step": 9520
|
6674 |
+
},
|
6675 |
+
{
|
6676 |
+
"epoch": 19.730848861283643,
|
6677 |
+
"grad_norm": 0.05255560576915741,
|
6678 |
+
"learning_rate": 6.027175003719354e-07,
|
6679 |
+
"loss": 0.0047,
|
6680 |
+
"step": 9530
|
6681 |
+
},
|
6682 |
+
{
|
6683 |
+
"epoch": 19.751552795031056,
|
6684 |
+
"grad_norm": 0.06348340958356857,
|
6685 |
+
"learning_rate": 5.773917462864264e-07,
|
6686 |
+
"loss": 0.0062,
|
6687 |
+
"step": 9540
|
6688 |
+
},
|
6689 |
+
{
|
6690 |
+
"epoch": 19.77225672877847,
|
6691 |
+
"grad_norm": 0.12964807450771332,
|
6692 |
+
"learning_rate": 5.526064699265753e-07,
|
6693 |
+
"loss": 0.0127,
|
6694 |
+
"step": 9550
|
6695 |
+
},
|
6696 |
+
{
|
6697 |
+
"epoch": 19.79296066252588,
|
6698 |
+
"grad_norm": 0.1979799121618271,
|
6699 |
+
"learning_rate": 5.283619423401998e-07,
|
6700 |
+
"loss": 0.0048,
|
6701 |
+
"step": 9560
|
6702 |
+
},
|
6703 |
+
{
|
6704 |
+
"epoch": 19.81366459627329,
|
6705 |
+
"grad_norm": 0.14146266877651215,
|
6706 |
+
"learning_rate": 5.046584286615697e-07,
|
6707 |
+
"loss": 0.0089,
|
6708 |
+
"step": 9570
|
6709 |
+
},
|
6710 |
+
{
|
6711 |
+
"epoch": 19.834368530020704,
|
6712 |
+
"grad_norm": 0.1563977748155594,
|
6713 |
+
"learning_rate": 4.814961881085045e-07,
|
6714 |
+
"loss": 0.0042,
|
6715 |
+
"step": 9580
|
6716 |
+
},
|
6717 |
+
{
|
6718 |
+
"epoch": 19.855072463768117,
|
6719 |
+
"grad_norm": 0.04129205644130707,
|
6720 |
+
"learning_rate": 4.5887547397955864e-07,
|
6721 |
+
"loss": 0.0047,
|
6722 |
+
"step": 9590
|
6723 |
+
},
|
6724 |
+
{
|
6725 |
+
"epoch": 19.875776397515526,
|
6726 |
+
"grad_norm": 0.2608759105205536,
|
6727 |
+
"learning_rate": 4.367965336512403e-07,
|
6728 |
+
"loss": 0.0134,
|
6729 |
+
"step": 9600
|
6730 |
+
},
|
6731 |
+
{
|
6732 |
+
"epoch": 19.89648033126294,
|
6733 |
+
"grad_norm": 0.16297097504138947,
|
6734 |
+
"learning_rate": 4.1525960857530243e-07,
|
6735 |
+
"loss": 0.0035,
|
6736 |
+
"step": 9610
|
6737 |
+
},
|
6738 |
+
{
|
6739 |
+
"epoch": 19.917184265010352,
|
6740 |
+
"grad_norm": 0.14169001579284668,
|
6741 |
+
"learning_rate": 3.9426493427611177e-07,
|
6742 |
+
"loss": 0.0076,
|
6743 |
+
"step": 9620
|
6744 |
+
},
|
6745 |
+
{
|
6746 |
+
"epoch": 19.937888198757765,
|
6747 |
+
"grad_norm": 0.141464963555336,
|
6748 |
+
"learning_rate": 3.738127403480507e-07,
|
6749 |
+
"loss": 0.0052,
|
6750 |
+
"step": 9630
|
6751 |
+
},
|
6752 |
+
{
|
6753 |
+
"epoch": 19.958592132505174,
|
6754 |
+
"grad_norm": 0.08023510873317719,
|
6755 |
+
"learning_rate": 3.5390325045304706e-07,
|
6756 |
+
"loss": 0.0055,
|
6757 |
+
"step": 9640
|
6758 |
+
},
|
6759 |
+
{
|
6760 |
+
"epoch": 19.979296066252587,
|
6761 |
+
"grad_norm": 0.09788880497217178,
|
6762 |
+
"learning_rate": 3.3453668231809286e-07,
|
6763 |
+
"loss": 0.0086,
|
6764 |
+
"step": 9650
|
6765 |
+
},
|
6766 |
+
{
|
6767 |
+
"epoch": 20.0,
|
6768 |
+
"grad_norm": 0.2991919219493866,
|
6769 |
+
"learning_rate": 3.157132477328628e-07,
|
6770 |
+
"loss": 0.0117,
|
6771 |
+
"step": 9660
|
6772 |
+
},
|
6773 |
+
{
|
6774 |
+
"epoch": 20.020703933747413,
|
6775 |
+
"grad_norm": 0.152107372879982,
|
6776 |
+
"learning_rate": 2.9743315254743833e-07,
|
6777 |
+
"loss": 0.0212,
|
6778 |
+
"step": 9670
|
6779 |
+
},
|
6780 |
+
{
|
6781 |
+
"epoch": 20.041407867494826,
|
6782 |
+
"grad_norm": 0.1049988642334938,
|
6783 |
+
"learning_rate": 2.796965966699927e-07,
|
6784 |
+
"loss": 0.012,
|
6785 |
+
"step": 9680
|
6786 |
+
},
|
6787 |
+
{
|
6788 |
+
"epoch": 20.062111801242235,
|
6789 |
+
"grad_norm": 0.28902608156204224,
|
6790 |
+
"learning_rate": 2.625037740646763e-07,
|
6791 |
+
"loss": 0.0103,
|
6792 |
+
"step": 9690
|
6793 |
+
},
|
6794 |
+
{
|
6795 |
+
"epoch": 20.082815734989648,
|
6796 |
+
"grad_norm": 0.2800842821598053,
|
6797 |
+
"learning_rate": 2.458548727494292e-07,
|
6798 |
+
"loss": 0.0067,
|
6799 |
+
"step": 9700
|
6800 |
+
},
|
6801 |
+
{
|
6802 |
+
"epoch": 20.10351966873706,
|
6803 |
+
"grad_norm": 0.18260431289672852,
|
6804 |
+
"learning_rate": 2.2975007479397738e-07,
|
6805 |
+
"loss": 0.0049,
|
6806 |
+
"step": 9710
|
6807 |
+
},
|
6808 |
+
{
|
6809 |
+
"epoch": 20.124223602484474,
|
6810 |
+
"grad_norm": 0.19015970826148987,
|
6811 |
+
"learning_rate": 2.1418955631781202e-07,
|
6812 |
+
"loss": 0.0117,
|
6813 |
+
"step": 9720
|
6814 |
+
},
|
6815 |
+
{
|
6816 |
+
"epoch": 20.144927536231883,
|
6817 |
+
"grad_norm": 0.1346769630908966,
|
6818 |
+
"learning_rate": 1.9917348748826335e-07,
|
6819 |
+
"loss": 0.0065,
|
6820 |
+
"step": 9730
|
6821 |
+
},
|
6822 |
+
{
|
6823 |
+
"epoch": 20.165631469979296,
|
6824 |
+
"grad_norm": 0.12038490176200867,
|
6825 |
+
"learning_rate": 1.847020325186577e-07,
|
6826 |
+
"loss": 0.0078,
|
6827 |
+
"step": 9740
|
6828 |
+
},
|
6829 |
+
{
|
6830 |
+
"epoch": 20.18633540372671,
|
6831 |
+
"grad_norm": 0.2004089653491974,
|
6832 |
+
"learning_rate": 1.7077534966650766e-07,
|
6833 |
+
"loss": 0.0096,
|
6834 |
+
"step": 9750
|
6835 |
+
},
|
6836 |
+
{
|
6837 |
+
"epoch": 20.20703933747412,
|
6838 |
+
"grad_norm": 0.19906413555145264,
|
6839 |
+
"learning_rate": 1.5739359123178587e-07,
|
6840 |
+
"loss": 0.0078,
|
6841 |
+
"step": 9760
|
6842 |
+
},
|
6843 |
+
{
|
6844 |
+
"epoch": 20.22774327122153,
|
6845 |
+
"grad_norm": 0.15226063132286072,
|
6846 |
+
"learning_rate": 1.4455690355525964e-07,
|
6847 |
+
"loss": 0.0049,
|
6848 |
+
"step": 9770
|
6849 |
+
},
|
6850 |
+
{
|
6851 |
+
"epoch": 20.248447204968944,
|
6852 |
+
"grad_norm": 0.12389522045850754,
|
6853 |
+
"learning_rate": 1.3226542701689215e-07,
|
6854 |
+
"loss": 0.0062,
|
6855 |
+
"step": 9780
|
6856 |
+
},
|
6857 |
+
{
|
6858 |
+
"epoch": 20.269151138716357,
|
6859 |
+
"grad_norm": 0.10260294377803802,
|
6860 |
+
"learning_rate": 1.2051929603428825e-07,
|
6861 |
+
"loss": 0.0131,
|
6862 |
+
"step": 9790
|
6863 |
+
},
|
6864 |
+
{
|
6865 |
+
"epoch": 20.28985507246377,
|
6866 |
+
"grad_norm": 0.059663962572813034,
|
6867 |
+
"learning_rate": 1.0931863906127327e-07,
|
6868 |
+
"loss": 0.0059,
|
6869 |
+
"step": 9800
|
6870 |
+
},
|
6871 |
+
{
|
6872 |
+
"epoch": 20.31055900621118,
|
6873 |
+
"grad_norm": 0.05520065873861313,
|
6874 |
+
"learning_rate": 9.866357858642205e-08,
|
6875 |
+
"loss": 0.0074,
|
6876 |
+
"step": 9810
|
6877 |
+
},
|
6878 |
+
{
|
6879 |
+
"epoch": 20.33126293995859,
|
6880 |
+
"grad_norm": 0.21085630357265472,
|
6881 |
+
"learning_rate": 8.855423113177664e-08,
|
6882 |
+
"loss": 0.0072,
|
6883 |
+
"step": 9820
|
6884 |
+
},
|
6885 |
+
{
|
6886 |
+
"epoch": 20.351966873706004,
|
6887 |
+
"grad_norm": 0.1336776465177536,
|
6888 |
+
"learning_rate": 7.899070725153613e-08,
|
6889 |
+
"loss": 0.0054,
|
6890 |
+
"step": 9830
|
6891 |
+
},
|
6892 |
+
{
|
6893 |
+
"epoch": 20.372670807453417,
|
6894 |
+
"grad_norm": 0.19630035758018494,
|
6895 |
+
"learning_rate": 6.997311153086883e-08,
|
6896 |
+
"loss": 0.0046,
|
6897 |
+
"step": 9840
|
6898 |
+
},
|
6899 |
+
{
|
6900 |
+
"epoch": 20.393374741200827,
|
6901 |
+
"grad_norm": 0.20599442720413208,
|
6902 |
+
"learning_rate": 6.150154258476315e-08,
|
6903 |
+
"loss": 0.0081,
|
6904 |
+
"step": 9850
|
6905 |
+
},
|
6906 |
+
{
|
6907 |
+
"epoch": 20.41407867494824,
|
6908 |
+
"grad_norm": 0.22885958850383759,
|
6909 |
+
"learning_rate": 5.3576093056922906e-08,
|
6910 |
+
"loss": 0.0076,
|
6911 |
+
"step": 9860
|
6912 |
+
},
|
6913 |
+
{
|
6914 |
+
"epoch": 20.434782608695652,
|
6915 |
+
"grad_norm": 0.07992696017026901,
|
6916 |
+
"learning_rate": 4.619684961881254e-08,
|
6917 |
+
"loss": 0.0071,
|
6918 |
+
"step": 9870
|
6919 |
+
},
|
6920 |
+
{
|
6921 |
+
"epoch": 20.455486542443065,
|
6922 |
+
"grad_norm": 0.2755813002586365,
|
6923 |
+
"learning_rate": 3.936389296864129e-08,
|
6924 |
+
"loss": 0.0064,
|
6925 |
+
"step": 9880
|
6926 |
+
},
|
6927 |
+
{
|
6928 |
+
"epoch": 20.476190476190474,
|
6929 |
+
"grad_norm": 0.1255924105644226,
|
6930 |
+
"learning_rate": 3.3077297830541584e-08,
|
6931 |
+
"loss": 0.0075,
|
6932 |
+
"step": 9890
|
6933 |
+
},
|
6934 |
+
{
|
6935 |
+
"epoch": 20.496894409937887,
|
6936 |
+
"grad_norm": 0.10394856333732605,
|
6937 |
+
"learning_rate": 2.7337132953697554e-08,
|
6938 |
+
"loss": 0.0072,
|
6939 |
+
"step": 9900
|
6940 |
+
},
|
6941 |
+
{
|
6942 |
+
"epoch": 20.5175983436853,
|
6943 |
+
"grad_norm": 0.11971770226955414,
|
6944 |
+
"learning_rate": 2.214346111164556e-08,
|
6945 |
+
"loss": 0.007,
|
6946 |
+
"step": 9910
|
6947 |
+
},
|
6948 |
+
{
|
6949 |
+
"epoch": 20.538302277432713,
|
6950 |
+
"grad_norm": 0.20464111864566803,
|
6951 |
+
"learning_rate": 1.749633910153592e-08,
|
6952 |
+
"loss": 0.0046,
|
6953 |
+
"step": 9920
|
6954 |
+
},
|
6955 |
+
{
|
6956 |
+
"epoch": 20.559006211180126,
|
6957 |
+
"grad_norm": 0.12219670414924622,
|
6958 |
+
"learning_rate": 1.3395817743561134e-08,
|
6959 |
+
"loss": 0.0082,
|
6960 |
+
"step": 9930
|
6961 |
+
},
|
6962 |
+
{
|
6963 |
+
"epoch": 20.579710144927535,
|
6964 |
+
"grad_norm": 0.2536000907421112,
|
6965 |
+
"learning_rate": 9.841941880361916e-09,
|
6966 |
+
"loss": 0.0152,
|
6967 |
+
"step": 9940
|
6968 |
+
},
|
6969 |
+
{
|
6970 |
+
"epoch": 20.600414078674948,
|
6971 |
+
"grad_norm": 0.16146942973136902,
|
6972 |
+
"learning_rate": 6.834750376549792e-09,
|
6973 |
+
"loss": 0.0055,
|
6974 |
+
"step": 9950
|
6975 |
+
},
|
6976 |
+
{
|
6977 |
+
"epoch": 20.62111801242236,
|
6978 |
+
"grad_norm": 0.1295255571603775,
|
6979 |
+
"learning_rate": 4.3742761183018784e-09,
|
6980 |
+
"loss": 0.0118,
|
6981 |
+
"step": 9960
|
6982 |
+
},
|
6983 |
+
{
|
6984 |
+
"epoch": 20.641821946169774,
|
6985 |
+
"grad_norm": 0.19239592552185059,
|
6986 |
+
"learning_rate": 2.4605460129556445e-09,
|
6987 |
+
"loss": 0.0065,
|
6988 |
+
"step": 9970
|
6989 |
+
},
|
6990 |
+
{
|
6991 |
+
"epoch": 20.662525879917183,
|
6992 |
+
"grad_norm": 0.21993553638458252,
|
6993 |
+
"learning_rate": 1.0935809887702154e-09,
|
6994 |
+
"loss": 0.0065,
|
6995 |
+
"step": 9980
|
6996 |
+
},
|
6997 |
+
{
|
6998 |
+
"epoch": 20.683229813664596,
|
6999 |
+
"grad_norm": 0.15056583285331726,
|
7000 |
+
"learning_rate": 2.7339599464326627e-10,
|
7001 |
+
"loss": 0.0062,
|
7002 |
+
"step": 9990
|
7003 |
+
},
|
7004 |
+
{
|
7005 |
+
"epoch": 20.70393374741201,
|
7006 |
+
"grad_norm": 0.24346260726451874,
|
7007 |
+
"learning_rate": 0.0,
|
7008 |
+
"loss": 0.0084,
|
7009 |
+
"step": 10000
|
7010 |
}
|
7011 |
],
|
7012 |
"logging_steps": 10,
|
|
|
7021 |
"should_evaluate": false,
|
7022 |
"should_log": false,
|
7023 |
"should_save": true,
|
7024 |
+
"should_training_stop": true
|
7025 |
},
|
7026 |
"attributes": {}
|
7027 |
}
|
7028 |
},
|
7029 |
+
"total_flos": 3.6267190177586125e+17,
|
7030 |
"train_batch_size": 16,
|
7031 |
"trial_name": null,
|
7032 |
"trial_params": null
|