Training in progress, step 500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 349243752
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77e076f7213cd541801b56e598732e42d7aaa49f322189b3825f935f5e1a9284
|
3 |
size 349243752
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 177909253
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47e1d75dde1eaa357bbd53fa9529a45fd976f9322d969b161d305861e4d4d4f6
|
3 |
size 177909253
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14645
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96e967965c983a20a302f5bd4e11508247b969959098eb0b66c2fdc8d23296fa
|
3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1465
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7258fe24b7785a5fa76614aef57f913158962a01367bf1fe11174ca5bb4f2704
|
3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -708,6 +708,181 @@
|
|
708 |
"learning_rate": 7.68782851756094e-06,
|
709 |
"loss": 1.3148,
|
710 |
"step": 400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
711 |
}
|
712 |
],
|
713 |
"logging_steps": 4,
|
@@ -727,7 +902,7 @@
|
|
727 |
"attributes": {}
|
728 |
}
|
729 |
},
|
730 |
-
"total_flos":
|
731 |
"train_batch_size": 24,
|
732 |
"trial_name": null,
|
733 |
"trial_params": null
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.4975124378109453,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 500,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
708 |
"learning_rate": 7.68782851756094e-06,
|
709 |
"loss": 1.3148,
|
710 |
"step": 400
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"epoch": 0.4019900497512438,
|
714 |
+
"grad_norm": 0.17020876705646515,
|
715 |
+
"learning_rate": 7.633781445683757e-06,
|
716 |
+
"loss": 1.4201,
|
717 |
+
"step": 404
|
718 |
+
},
|
719 |
+
{
|
720 |
+
"epoch": 0.4059701492537313,
|
721 |
+
"grad_norm": 0.17484496533870697,
|
722 |
+
"learning_rate": 7.578917304062244e-06,
|
723 |
+
"loss": 1.3405,
|
724 |
+
"step": 408
|
725 |
+
},
|
726 |
+
{
|
727 |
+
"epoch": 0.4099502487562189,
|
728 |
+
"grad_norm": 0.1632368564605713,
|
729 |
+
"learning_rate": 7.523250629339467e-06,
|
730 |
+
"loss": 1.4,
|
731 |
+
"step": 412
|
732 |
+
},
|
733 |
+
{
|
734 |
+
"epoch": 0.41393034825870645,
|
735 |
+
"grad_norm": 0.17625893652439117,
|
736 |
+
"learning_rate": 7.4667961707953255e-06,
|
737 |
+
"loss": 1.4348,
|
738 |
+
"step": 416
|
739 |
+
},
|
740 |
+
{
|
741 |
+
"epoch": 0.417910447761194,
|
742 |
+
"grad_norm": 0.15959715843200684,
|
743 |
+
"learning_rate": 7.409568886438621e-06,
|
744 |
+
"loss": 1.4332,
|
745 |
+
"step": 420
|
746 |
+
},
|
747 |
+
{
|
748 |
+
"epoch": 0.4218905472636816,
|
749 |
+
"grad_norm": 0.17473378777503967,
|
750 |
+
"learning_rate": 7.351583939043828e-06,
|
751 |
+
"loss": 1.4602,
|
752 |
+
"step": 424
|
753 |
+
},
|
754 |
+
{
|
755 |
+
"epoch": 0.42587064676616915,
|
756 |
+
"grad_norm": 0.18552260100841522,
|
757 |
+
"learning_rate": 7.292856692133618e-06,
|
758 |
+
"loss": 1.3272,
|
759 |
+
"step": 428
|
760 |
+
},
|
761 |
+
{
|
762 |
+
"epoch": 0.4298507462686567,
|
763 |
+
"grad_norm": 0.15143436193466187,
|
764 |
+
"learning_rate": 7.233402705908171e-06,
|
765 |
+
"loss": 1.2945,
|
766 |
+
"step": 432
|
767 |
+
},
|
768 |
+
{
|
769 |
+
"epoch": 0.4338308457711443,
|
770 |
+
"grad_norm": 0.17919780313968658,
|
771 |
+
"learning_rate": 7.173237733122405e-06,
|
772 |
+
"loss": 1.3907,
|
773 |
+
"step": 436
|
774 |
+
},
|
775 |
+
{
|
776 |
+
"epoch": 0.43781094527363185,
|
777 |
+
"grad_norm": 0.18790380656719208,
|
778 |
+
"learning_rate": 7.11237771491216e-06,
|
779 |
+
"loss": 1.3891,
|
780 |
+
"step": 440
|
781 |
+
},
|
782 |
+
{
|
783 |
+
"epoch": 0.4417910447761194,
|
784 |
+
"grad_norm": 0.17522069811820984,
|
785 |
+
"learning_rate": 7.050838776570487e-06,
|
786 |
+
"loss": 1.4066,
|
787 |
+
"step": 444
|
788 |
+
},
|
789 |
+
{
|
790 |
+
"epoch": 0.445771144278607,
|
791 |
+
"grad_norm": 0.13809643685817719,
|
792 |
+
"learning_rate": 6.9886372232751395e-06,
|
793 |
+
"loss": 1.3792,
|
794 |
+
"step": 448
|
795 |
+
},
|
796 |
+
{
|
797 |
+
"epoch": 0.44975124378109455,
|
798 |
+
"grad_norm": 0.16660109162330627,
|
799 |
+
"learning_rate": 6.925789535768393e-06,
|
800 |
+
"loss": 1.4093,
|
801 |
+
"step": 452
|
802 |
+
},
|
803 |
+
{
|
804 |
+
"epoch": 0.4537313432835821,
|
805 |
+
"grad_norm": 0.1820070743560791,
|
806 |
+
"learning_rate": 6.862312365990363e-06,
|
807 |
+
"loss": 1.38,
|
808 |
+
"step": 456
|
809 |
+
},
|
810 |
+
{
|
811 |
+
"epoch": 0.4577114427860697,
|
812 |
+
"grad_norm": 0.13574036955833435,
|
813 |
+
"learning_rate": 6.798222532666956e-06,
|
814 |
+
"loss": 1.3845,
|
815 |
+
"step": 460
|
816 |
+
},
|
817 |
+
{
|
818 |
+
"epoch": 0.4616915422885572,
|
819 |
+
"grad_norm": 0.18035098910331726,
|
820 |
+
"learning_rate": 6.73353701685362e-06,
|
821 |
+
"loss": 1.4684,
|
822 |
+
"step": 464
|
823 |
+
},
|
824 |
+
{
|
825 |
+
"epoch": 0.46567164179104475,
|
826 |
+
"grad_norm": 0.13899867236614227,
|
827 |
+
"learning_rate": 6.668272957436101e-06,
|
828 |
+
"loss": 1.4925,
|
829 |
+
"step": 468
|
830 |
+
},
|
831 |
+
{
|
832 |
+
"epoch": 0.4696517412935323,
|
833 |
+
"grad_norm": 0.16560381650924683,
|
834 |
+
"learning_rate": 6.602447646589379e-06,
|
835 |
+
"loss": 1.4543,
|
836 |
+
"step": 472
|
837 |
+
},
|
838 |
+
{
|
839 |
+
"epoch": 0.4736318407960199,
|
840 |
+
"grad_norm": 0.19061513245105743,
|
841 |
+
"learning_rate": 6.536078525195966e-06,
|
842 |
+
"loss": 1.4835,
|
843 |
+
"step": 476
|
844 |
+
},
|
845 |
+
{
|
846 |
+
"epoch": 0.47761194029850745,
|
847 |
+
"grad_norm": 0.20418591797351837,
|
848 |
+
"learning_rate": 6.46918317822484e-06,
|
849 |
+
"loss": 1.3522,
|
850 |
+
"step": 480
|
851 |
+
},
|
852 |
+
{
|
853 |
+
"epoch": 0.481592039800995,
|
854 |
+
"grad_norm": 0.14897631108760834,
|
855 |
+
"learning_rate": 6.401779330072171e-06,
|
856 |
+
"loss": 1.4015,
|
857 |
+
"step": 484
|
858 |
+
},
|
859 |
+
{
|
860 |
+
"epoch": 0.4855721393034826,
|
861 |
+
"grad_norm": 0.15127280354499817,
|
862 |
+
"learning_rate": 6.33388483986512e-06,
|
863 |
+
"loss": 1.3304,
|
864 |
+
"step": 488
|
865 |
+
},
|
866 |
+
{
|
867 |
+
"epoch": 0.48955223880597015,
|
868 |
+
"grad_norm": 0.16975510120391846,
|
869 |
+
"learning_rate": 6.265517696729937e-06,
|
870 |
+
"loss": 1.4004,
|
871 |
+
"step": 492
|
872 |
+
},
|
873 |
+
{
|
874 |
+
"epoch": 0.4935323383084577,
|
875 |
+
"grad_norm": 0.17138828337192535,
|
876 |
+
"learning_rate": 6.196696015025615e-06,
|
877 |
+
"loss": 1.3479,
|
878 |
+
"step": 496
|
879 |
+
},
|
880 |
+
{
|
881 |
+
"epoch": 0.4975124378109453,
|
882 |
+
"grad_norm": 0.16889625787734985,
|
883 |
+
"learning_rate": 6.1274380295443624e-06,
|
884 |
+
"loss": 1.345,
|
885 |
+
"step": 500
|
886 |
}
|
887 |
],
|
888 |
"logging_steps": 4,
|
|
|
902 |
"attributes": {}
|
903 |
}
|
904 |
},
|
905 |
+
"total_flos": 3.748818411375821e+17,
|
906 |
"train_batch_size": 24,
|
907 |
"trial_name": null,
|
908 |
"trial_params": null
|