PLB commited on
Commit
1581889
·
verified ·
1 Parent(s): 3239f00

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +354 -4
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 19.66873706004141,
5
  "eval_steps": 500,
6
- "global_step": 9500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6657,6 +6657,356 @@
6657
  "learning_rate": 6.819348298638839e-07,
6658
  "loss": 0.0077,
6659
  "step": 9500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6660
  }
6661
  ],
6662
  "logging_steps": 10,
@@ -6671,12 +7021,12 @@
6671
  "should_evaluate": false,
6672
  "should_log": false,
6673
  "should_save": true,
6674
- "should_training_stop": false
6675
  },
6676
  "attributes": {}
6677
  }
6678
  },
6679
- "total_flos": 3.4453830668706816e+17,
6680
  "train_batch_size": 16,
6681
  "trial_name": null,
6682
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.70393374741201,
5
  "eval_steps": 500,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6657
  "learning_rate": 6.819348298638839e-07,
6658
  "loss": 0.0077,
6659
  "step": 9500
6660
+ },
6661
+ {
6662
+ "epoch": 19.68944099378882,
6663
+ "grad_norm": 0.20381984114646912,
6664
+ "learning_rate": 6.549893279788277e-07,
6665
+ "loss": 0.0079,
6666
+ "step": 9510
6667
+ },
6668
+ {
6669
+ "epoch": 19.71014492753623,
6670
+ "grad_norm": 0.15530520677566528,
6671
+ "learning_rate": 6.285834552247128e-07,
6672
+ "loss": 0.0062,
6673
+ "step": 9520
6674
+ },
6675
+ {
6676
+ "epoch": 19.730848861283643,
6677
+ "grad_norm": 0.05255560576915741,
6678
+ "learning_rate": 6.027175003719354e-07,
6679
+ "loss": 0.0047,
6680
+ "step": 9530
6681
+ },
6682
+ {
6683
+ "epoch": 19.751552795031056,
6684
+ "grad_norm": 0.06348340958356857,
6685
+ "learning_rate": 5.773917462864264e-07,
6686
+ "loss": 0.0062,
6687
+ "step": 9540
6688
+ },
6689
+ {
6690
+ "epoch": 19.77225672877847,
6691
+ "grad_norm": 0.12964807450771332,
6692
+ "learning_rate": 5.526064699265753e-07,
6693
+ "loss": 0.0127,
6694
+ "step": 9550
6695
+ },
6696
+ {
6697
+ "epoch": 19.79296066252588,
6698
+ "grad_norm": 0.1979799121618271,
6699
+ "learning_rate": 5.283619423401998e-07,
6700
+ "loss": 0.0048,
6701
+ "step": 9560
6702
+ },
6703
+ {
6704
+ "epoch": 19.81366459627329,
6705
+ "grad_norm": 0.14146266877651215,
6706
+ "learning_rate": 5.046584286615697e-07,
6707
+ "loss": 0.0089,
6708
+ "step": 9570
6709
+ },
6710
+ {
6711
+ "epoch": 19.834368530020704,
6712
+ "grad_norm": 0.1563977748155594,
6713
+ "learning_rate": 4.814961881085045e-07,
6714
+ "loss": 0.0042,
6715
+ "step": 9580
6716
+ },
6717
+ {
6718
+ "epoch": 19.855072463768117,
6719
+ "grad_norm": 0.04129205644130707,
6720
+ "learning_rate": 4.5887547397955864e-07,
6721
+ "loss": 0.0047,
6722
+ "step": 9590
6723
+ },
6724
+ {
6725
+ "epoch": 19.875776397515526,
6726
+ "grad_norm": 0.2608759105205536,
6727
+ "learning_rate": 4.367965336512403e-07,
6728
+ "loss": 0.0134,
6729
+ "step": 9600
6730
+ },
6731
+ {
6732
+ "epoch": 19.89648033126294,
6733
+ "grad_norm": 0.16297097504138947,
6734
+ "learning_rate": 4.1525960857530243e-07,
6735
+ "loss": 0.0035,
6736
+ "step": 9610
6737
+ },
6738
+ {
6739
+ "epoch": 19.917184265010352,
6740
+ "grad_norm": 0.14169001579284668,
6741
+ "learning_rate": 3.9426493427611177e-07,
6742
+ "loss": 0.0076,
6743
+ "step": 9620
6744
+ },
6745
+ {
6746
+ "epoch": 19.937888198757765,
6747
+ "grad_norm": 0.141464963555336,
6748
+ "learning_rate": 3.738127403480507e-07,
6749
+ "loss": 0.0052,
6750
+ "step": 9630
6751
+ },
6752
+ {
6753
+ "epoch": 19.958592132505174,
6754
+ "grad_norm": 0.08023510873317719,
6755
+ "learning_rate": 3.5390325045304706e-07,
6756
+ "loss": 0.0055,
6757
+ "step": 9640
6758
+ },
6759
+ {
6760
+ "epoch": 19.979296066252587,
6761
+ "grad_norm": 0.09788880497217178,
6762
+ "learning_rate": 3.3453668231809286e-07,
6763
+ "loss": 0.0086,
6764
+ "step": 9650
6765
+ },
6766
+ {
6767
+ "epoch": 20.0,
6768
+ "grad_norm": 0.2991919219493866,
6769
+ "learning_rate": 3.157132477328628e-07,
6770
+ "loss": 0.0117,
6771
+ "step": 9660
6772
+ },
6773
+ {
6774
+ "epoch": 20.020703933747413,
6775
+ "grad_norm": 0.152107372879982,
6776
+ "learning_rate": 2.9743315254743833e-07,
6777
+ "loss": 0.0212,
6778
+ "step": 9670
6779
+ },
6780
+ {
6781
+ "epoch": 20.041407867494826,
6782
+ "grad_norm": 0.1049988642334938,
6783
+ "learning_rate": 2.796965966699927e-07,
6784
+ "loss": 0.012,
6785
+ "step": 9680
6786
+ },
6787
+ {
6788
+ "epoch": 20.062111801242235,
6789
+ "grad_norm": 0.28902608156204224,
6790
+ "learning_rate": 2.625037740646763e-07,
6791
+ "loss": 0.0103,
6792
+ "step": 9690
6793
+ },
6794
+ {
6795
+ "epoch": 20.082815734989648,
6796
+ "grad_norm": 0.2800842821598053,
6797
+ "learning_rate": 2.458548727494292e-07,
6798
+ "loss": 0.0067,
6799
+ "step": 9700
6800
+ },
6801
+ {
6802
+ "epoch": 20.10351966873706,
6803
+ "grad_norm": 0.18260431289672852,
6804
+ "learning_rate": 2.2975007479397738e-07,
6805
+ "loss": 0.0049,
6806
+ "step": 9710
6807
+ },
6808
+ {
6809
+ "epoch": 20.124223602484474,
6810
+ "grad_norm": 0.19015970826148987,
6811
+ "learning_rate": 2.1418955631781202e-07,
6812
+ "loss": 0.0117,
6813
+ "step": 9720
6814
+ },
6815
+ {
6816
+ "epoch": 20.144927536231883,
6817
+ "grad_norm": 0.1346769630908966,
6818
+ "learning_rate": 1.9917348748826335e-07,
6819
+ "loss": 0.0065,
6820
+ "step": 9730
6821
+ },
6822
+ {
6823
+ "epoch": 20.165631469979296,
6824
+ "grad_norm": 0.12038490176200867,
6825
+ "learning_rate": 1.847020325186577e-07,
6826
+ "loss": 0.0078,
6827
+ "step": 9740
6828
+ },
6829
+ {
6830
+ "epoch": 20.18633540372671,
6831
+ "grad_norm": 0.2004089653491974,
6832
+ "learning_rate": 1.7077534966650766e-07,
6833
+ "loss": 0.0096,
6834
+ "step": 9750
6835
+ },
6836
+ {
6837
+ "epoch": 20.20703933747412,
6838
+ "grad_norm": 0.19906413555145264,
6839
+ "learning_rate": 1.5739359123178587e-07,
6840
+ "loss": 0.0078,
6841
+ "step": 9760
6842
+ },
6843
+ {
6844
+ "epoch": 20.22774327122153,
6845
+ "grad_norm": 0.15226063132286072,
6846
+ "learning_rate": 1.4455690355525964e-07,
6847
+ "loss": 0.0049,
6848
+ "step": 9770
6849
+ },
6850
+ {
6851
+ "epoch": 20.248447204968944,
6852
+ "grad_norm": 0.12389522045850754,
6853
+ "learning_rate": 1.3226542701689215e-07,
6854
+ "loss": 0.0062,
6855
+ "step": 9780
6856
+ },
6857
+ {
6858
+ "epoch": 20.269151138716357,
6859
+ "grad_norm": 0.10260294377803802,
6860
+ "learning_rate": 1.2051929603428825e-07,
6861
+ "loss": 0.0131,
6862
+ "step": 9790
6863
+ },
6864
+ {
6865
+ "epoch": 20.28985507246377,
6866
+ "grad_norm": 0.059663962572813034,
6867
+ "learning_rate": 1.0931863906127327e-07,
6868
+ "loss": 0.0059,
6869
+ "step": 9800
6870
+ },
6871
+ {
6872
+ "epoch": 20.31055900621118,
6873
+ "grad_norm": 0.05520065873861313,
6874
+ "learning_rate": 9.866357858642205e-08,
6875
+ "loss": 0.0074,
6876
+ "step": 9810
6877
+ },
6878
+ {
6879
+ "epoch": 20.33126293995859,
6880
+ "grad_norm": 0.21085630357265472,
6881
+ "learning_rate": 8.855423113177664e-08,
6882
+ "loss": 0.0072,
6883
+ "step": 9820
6884
+ },
6885
+ {
6886
+ "epoch": 20.351966873706004,
6887
+ "grad_norm": 0.1336776465177536,
6888
+ "learning_rate": 7.899070725153613e-08,
6889
+ "loss": 0.0054,
6890
+ "step": 9830
6891
+ },
6892
+ {
6893
+ "epoch": 20.372670807453417,
6894
+ "grad_norm": 0.19630035758018494,
6895
+ "learning_rate": 6.997311153086883e-08,
6896
+ "loss": 0.0046,
6897
+ "step": 9840
6898
+ },
6899
+ {
6900
+ "epoch": 20.393374741200827,
6901
+ "grad_norm": 0.20599442720413208,
6902
+ "learning_rate": 6.150154258476315e-08,
6903
+ "loss": 0.0081,
6904
+ "step": 9850
6905
+ },
6906
+ {
6907
+ "epoch": 20.41407867494824,
6908
+ "grad_norm": 0.22885958850383759,
6909
+ "learning_rate": 5.3576093056922906e-08,
6910
+ "loss": 0.0076,
6911
+ "step": 9860
6912
+ },
6913
+ {
6914
+ "epoch": 20.434782608695652,
6915
+ "grad_norm": 0.07992696017026901,
6916
+ "learning_rate": 4.619684961881254e-08,
6917
+ "loss": 0.0071,
6918
+ "step": 9870
6919
+ },
6920
+ {
6921
+ "epoch": 20.455486542443065,
6922
+ "grad_norm": 0.2755813002586365,
6923
+ "learning_rate": 3.936389296864129e-08,
6924
+ "loss": 0.0064,
6925
+ "step": 9880
6926
+ },
6927
+ {
6928
+ "epoch": 20.476190476190474,
6929
+ "grad_norm": 0.1255924105644226,
6930
+ "learning_rate": 3.3077297830541584e-08,
6931
+ "loss": 0.0075,
6932
+ "step": 9890
6933
+ },
6934
+ {
6935
+ "epoch": 20.496894409937887,
6936
+ "grad_norm": 0.10394856333732605,
6937
+ "learning_rate": 2.7337132953697554e-08,
6938
+ "loss": 0.0072,
6939
+ "step": 9900
6940
+ },
6941
+ {
6942
+ "epoch": 20.5175983436853,
6943
+ "grad_norm": 0.11971770226955414,
6944
+ "learning_rate": 2.214346111164556e-08,
6945
+ "loss": 0.007,
6946
+ "step": 9910
6947
+ },
6948
+ {
6949
+ "epoch": 20.538302277432713,
6950
+ "grad_norm": 0.20464111864566803,
6951
+ "learning_rate": 1.749633910153592e-08,
6952
+ "loss": 0.0046,
6953
+ "step": 9920
6954
+ },
6955
+ {
6956
+ "epoch": 20.559006211180126,
6957
+ "grad_norm": 0.12219670414924622,
6958
+ "learning_rate": 1.3395817743561134e-08,
6959
+ "loss": 0.0082,
6960
+ "step": 9930
6961
+ },
6962
+ {
6963
+ "epoch": 20.579710144927535,
6964
+ "grad_norm": 0.2536000907421112,
6965
+ "learning_rate": 9.841941880361916e-09,
6966
+ "loss": 0.0152,
6967
+ "step": 9940
6968
+ },
6969
+ {
6970
+ "epoch": 20.600414078674948,
6971
+ "grad_norm": 0.16146942973136902,
6972
+ "learning_rate": 6.834750376549792e-09,
6973
+ "loss": 0.0055,
6974
+ "step": 9950
6975
+ },
6976
+ {
6977
+ "epoch": 20.62111801242236,
6978
+ "grad_norm": 0.1295255571603775,
6979
+ "learning_rate": 4.3742761183018784e-09,
6980
+ "loss": 0.0118,
6981
+ "step": 9960
6982
+ },
6983
+ {
6984
+ "epoch": 20.641821946169774,
6985
+ "grad_norm": 0.19239592552185059,
6986
+ "learning_rate": 2.4605460129556445e-09,
6987
+ "loss": 0.0065,
6988
+ "step": 9970
6989
+ },
6990
+ {
6991
+ "epoch": 20.662525879917183,
6992
+ "grad_norm": 0.21993553638458252,
6993
+ "learning_rate": 1.0935809887702154e-09,
6994
+ "loss": 0.0065,
6995
+ "step": 9980
6996
+ },
6997
+ {
6998
+ "epoch": 20.683229813664596,
6999
+ "grad_norm": 0.15056583285331726,
7000
+ "learning_rate": 2.7339599464326627e-10,
7001
+ "loss": 0.0062,
7002
+ "step": 9990
7003
+ },
7004
+ {
7005
+ "epoch": 20.70393374741201,
7006
+ "grad_norm": 0.24346260726451874,
7007
+ "learning_rate": 0.0,
7008
+ "loss": 0.0084,
7009
+ "step": 10000
7010
  }
7011
  ],
7012
  "logging_steps": 10,
 
7021
  "should_evaluate": false,
7022
  "should_log": false,
7023
  "should_save": true,
7024
+ "should_training_stop": true
7025
  },
7026
  "attributes": {}
7027
  }
7028
  },
7029
+ "total_flos": 3.6267190177586125e+17,
7030
  "train_batch_size": 16,
7031
  "trial_name": null,
7032
  "trial_params": null