apriasmoro commited on
Commit
bf03a68
·
verified ·
1 Parent(s): 65ffd93

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77e076f7213cd541801b56e598732e42d7aaa49f322189b3825f935f5e1a9284
3
  size 349243752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:170b9839a6f9cb4849a79d372fdd8978c15d9f959a62f6fc4739c2fab1a62cb9
3
  size 349243752
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47e1d75dde1eaa357bbd53fa9529a45fd976f9322d969b161d305861e4d4d4f6
3
  size 177909253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fde78758d4bfbd2f73a3973efb9b2f5d957019b5c9fc4ad6115838e371515762
3
  size 177909253
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96e967965c983a20a302f5bd4e11508247b969959098eb0b66c2fdc8d23296fa
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:996933921c4deed1d34ddceb89eb427578e0472ba88129c76719f873c95cb898
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7258fe24b7785a5fa76614aef57f913158962a01367bf1fe11174ca5bb4f2704
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6557e2f348eac233d3d621679f2b4c7e01f4a6a61c82ffe28816c1229ea4afae
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.4975124378109453,
6
  "eval_steps": 500,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -883,6 +883,181 @@
883
  "learning_rate": 6.1274380295443624e-06,
884
  "loss": 1.345,
885
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
886
  }
887
  ],
888
  "logging_steps": 4,
@@ -902,7 +1077,7 @@
902
  "attributes": {}
903
  }
904
  },
905
- "total_flos": 3.748818411375821e+17,
906
  "train_batch_size": 24,
907
  "trial_name": null,
908
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5970149253731343,
6
  "eval_steps": 500,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
883
  "learning_rate": 6.1274380295443624e-06,
884
  "loss": 1.345,
885
  "step": 500
886
+ },
887
+ {
888
+ "epoch": 0.5014925373134328,
889
+ "grad_norm": 0.16558654606342316,
890
+ "learning_rate": 6.057762090680162e-06,
891
+ "loss": 1.3696,
892
+ "step": 504
893
+ },
894
+ {
895
+ "epoch": 0.5054726368159204,
896
+ "grad_norm": 0.17274197936058044,
897
+ "learning_rate": 5.9876866595667085e-06,
898
+ "loss": 1.4371,
899
+ "step": 508
900
+ },
901
+ {
902
+ "epoch": 0.5094527363184079,
903
+ "grad_norm": 0.18550808727741241,
904
+ "learning_rate": 5.917230303186e-06,
905
+ "loss": 1.491,
906
+ "step": 512
907
+ },
908
+ {
909
+ "epoch": 0.5134328358208955,
910
+ "grad_norm": 0.16486109793186188,
911
+ "learning_rate": 5.84641168944888e-06,
912
+ "loss": 1.3596,
913
+ "step": 516
914
+ },
915
+ {
916
+ "epoch": 0.5174129353233831,
917
+ "grad_norm": 0.15757621824741364,
918
+ "learning_rate": 5.775249582248848e-06,
919
+ "loss": 1.3464,
920
+ "step": 520
921
+ },
922
+ {
923
+ "epoch": 0.5213930348258706,
924
+ "grad_norm": 0.1419944018125534,
925
+ "learning_rate": 5.703762836490429e-06,
926
+ "loss": 1.3415,
927
+ "step": 524
928
+ },
929
+ {
930
+ "epoch": 0.5253731343283582,
931
+ "grad_norm": 0.24455907940864563,
932
+ "learning_rate": 5.631970393093435e-06,
933
+ "loss": 1.4322,
934
+ "step": 528
935
+ },
936
+ {
937
+ "epoch": 0.5293532338308458,
938
+ "grad_norm": 0.15628467500209808,
939
+ "learning_rate": 5.559891273974433e-06,
940
+ "loss": 1.4357,
941
+ "step": 532
942
+ },
943
+ {
944
+ "epoch": 0.5333333333333333,
945
+ "grad_norm": 0.15226367115974426,
946
+ "learning_rate": 5.487544577006756e-06,
947
+ "loss": 1.3864,
948
+ "step": 536
949
+ },
950
+ {
951
+ "epoch": 0.5373134328358209,
952
+ "grad_norm": 0.15544599294662476,
953
+ "learning_rate": 5.414949470960377e-06,
954
+ "loss": 1.3954,
955
+ "step": 540
956
+ },
957
+ {
958
+ "epoch": 0.5412935323383085,
959
+ "grad_norm": 0.14596706628799438,
960
+ "learning_rate": 5.3421251904230245e-06,
961
+ "loss": 1.2908,
962
+ "step": 544
963
+ },
964
+ {
965
+ "epoch": 0.545273631840796,
966
+ "grad_norm": 0.14396768808364868,
967
+ "learning_rate": 5.269091030703825e-06,
968
+ "loss": 1.3339,
969
+ "step": 548
970
+ },
971
+ {
972
+ "epoch": 0.5492537313432836,
973
+ "grad_norm": 0.14690382778644562,
974
+ "learning_rate": 5.195866342720883e-06,
975
+ "loss": 1.4412,
976
+ "step": 552
977
+ },
978
+ {
979
+ "epoch": 0.5532338308457712,
980
+ "grad_norm": 0.164686918258667,
981
+ "learning_rate": 5.122470527874117e-06,
982
+ "loss": 1.3025,
983
+ "step": 556
984
+ },
985
+ {
986
+ "epoch": 0.5572139303482587,
987
+ "grad_norm": 0.15687449276447296,
988
+ "learning_rate": 5.0489230329047216e-06,
989
+ "loss": 1.4286,
990
+ "step": 560
991
+ },
992
+ {
993
+ "epoch": 0.5611940298507463,
994
+ "grad_norm": 0.16417358815670013,
995
+ "learning_rate": 4.975243344742612e-06,
996
+ "loss": 1.3747,
997
+ "step": 564
998
+ },
999
+ {
1000
+ "epoch": 0.5651741293532339,
1001
+ "grad_norm": 0.140806183218956,
1002
+ "learning_rate": 4.90145098534323e-06,
1003
+ "loss": 1.3345,
1004
+ "step": 568
1005
+ },
1006
+ {
1007
+ "epoch": 0.5691542288557214,
1008
+ "grad_norm": 0.14276030659675598,
1009
+ "learning_rate": 4.827565506515056e-06,
1010
+ "loss": 1.3701,
1011
+ "step": 572
1012
+ },
1013
+ {
1014
+ "epoch": 0.573134328358209,
1015
+ "grad_norm": 0.18743367493152618,
1016
+ "learning_rate": 4.753606484739227e-06,
1017
+ "loss": 1.323,
1018
+ "step": 576
1019
+ },
1020
+ {
1021
+ "epoch": 0.5771144278606966,
1022
+ "grad_norm": 0.13849185407161713,
1023
+ "learning_rate": 4.679593515982602e-06,
1024
+ "loss": 1.3634,
1025
+ "step": 580
1026
+ },
1027
+ {
1028
+ "epoch": 0.5810945273631841,
1029
+ "grad_norm": 0.16938801109790802,
1030
+ "learning_rate": 4.605546210505675e-06,
1031
+ "loss": 1.4166,
1032
+ "step": 584
1033
+ },
1034
+ {
1035
+ "epoch": 0.5850746268656717,
1036
+ "grad_norm": 0.1600942611694336,
1037
+ "learning_rate": 4.531484187666699e-06,
1038
+ "loss": 1.3556,
1039
+ "step": 588
1040
+ },
1041
+ {
1042
+ "epoch": 0.5890547263681593,
1043
+ "grad_norm": 0.18938744068145752,
1044
+ "learning_rate": 4.457427070723396e-06,
1045
+ "loss": 1.3305,
1046
+ "step": 592
1047
+ },
1048
+ {
1049
+ "epoch": 0.5930348258706468,
1050
+ "grad_norm": 0.1494676023721695,
1051
+ "learning_rate": 4.383394481633633e-06,
1052
+ "loss": 1.3438,
1053
+ "step": 596
1054
+ },
1055
+ {
1056
+ "epoch": 0.5970149253731343,
1057
+ "grad_norm": 0.17030707001686096,
1058
+ "learning_rate": 4.309406035856457e-06,
1059
+ "loss": 1.3414,
1060
+ "step": 600
1061
  }
1062
  ],
1063
  "logging_steps": 4,
 
1077
  "attributes": {}
1078
  }
1079
  },
1080
+ "total_flos": 4.505167220095058e+17,
1081
  "train_batch_size": 24,
1082
  "trial_name": null,
1083
  "trial_params": null