apriasmoro commited on
Commit
965f0f5
·
verified ·
1 Parent(s): 144e7ca

Training in progress, step 1200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:385c6700aae0cc7ebcd189dac2c66ebfc53f7fec408316a5242d59c2e3c9f07e
3
  size 349243752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0130799898020a53b14cabd263a81e22f27ec0696f688e0cd14317bbc408dba0
3
  size 349243752
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76b7cd9825c01711620bb5dcc0b9e19811d636fc9094641303dd9fc13e79fedc
3
  size 177909253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af84095ea6d9f6f4e13aeeea1737265b67d0775777ae4ab57423d3ed644c4081
3
  size 177909253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a273191378e295e79d8e7b721ac898d977266d9fc45a22812d9173affa953415
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4944f7f3f8d9cc24a998faf94675bbb1d591022b8f0394f9e1700a104dca058b
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:524fe1b39d0e74a74202e97fd363b331b733ce7e41558e0e4b19a0ac27092713
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f89c09364acfd91102561379138b88af0a420d76fde4e419b7a16abec655975
3
  size 14917
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72a73aacd895d114c25edf917acb63200c1b0df74b52c8f0b257b638ca866429
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dda3a1cdc480778259cfeb010764235ae8da3862f43cb398867dec097dc012ae
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.9333333333333336,
6
  "eval_steps": 500,
7
- "global_step": 1100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -778,6 +778,76 @@
778
  "learning_rate": 1.7102549540480392e-05,
779
  "loss": 1.1838,
780
  "step": 1100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
781
  }
782
  ],
783
  "logging_steps": 10,
@@ -797,7 +867,7 @@
797
  "attributes": {}
798
  }
799
  },
800
- "total_flos": 1.5952939212425134e+18,
801
  "train_batch_size": 36,
802
  "trial_name": null,
803
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.2,
6
  "eval_steps": 500,
7
+ "global_step": 1200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
778
  "learning_rate": 1.7102549540480392e-05,
779
  "loss": 1.1838,
780
  "step": 1100
781
+ },
782
+ {
783
+ "epoch": 2.96,
784
+ "grad_norm": 0.2694341838359833,
785
+ "learning_rate": 1.706160768693108e-05,
786
+ "loss": 1.1659,
787
+ "step": 1110
788
+ },
789
+ {
790
+ "epoch": 2.986666666666667,
791
+ "grad_norm": 0.21248756349086761,
792
+ "learning_rate": 1.7020278943499707e-05,
793
+ "loss": 1.1724,
794
+ "step": 1120
795
+ },
796
+ {
797
+ "epoch": 3.013333333333333,
798
+ "grad_norm": 0.2432037740945816,
799
+ "learning_rate": 1.6978565424772645e-05,
800
+ "loss": 1.1635,
801
+ "step": 1130
802
+ },
803
+ {
804
+ "epoch": 3.04,
805
+ "grad_norm": 0.23135386407375336,
806
+ "learning_rate": 1.693646926502333e-05,
807
+ "loss": 1.2128,
808
+ "step": 1140
809
+ },
810
+ {
811
+ "epoch": 3.066666666666667,
812
+ "grad_norm": 0.24014094471931458,
813
+ "learning_rate": 1.6893992618103017e-05,
814
+ "loss": 1.1835,
815
+ "step": 1150
816
+ },
817
+ {
818
+ "epoch": 3.0933333333333333,
819
+ "grad_norm": 0.24849995970726013,
820
+ "learning_rate": 1.6851137657330615e-05,
821
+ "loss": 1.1567,
822
+ "step": 1160
823
+ },
824
+ {
825
+ "epoch": 3.12,
826
+ "grad_norm": 0.2538585364818573,
827
+ "learning_rate": 1.6807906575381463e-05,
828
+ "loss": 1.1868,
829
+ "step": 1170
830
+ },
831
+ {
832
+ "epoch": 3.1466666666666665,
833
+ "grad_norm": 0.24177521467208862,
834
+ "learning_rate": 1.6764301584175155e-05,
835
+ "loss": 1.1521,
836
+ "step": 1180
837
+ },
838
+ {
839
+ "epoch": 3.1733333333333333,
840
+ "grad_norm": 0.2552991807460785,
841
+ "learning_rate": 1.672032491476236e-05,
842
+ "loss": 1.1725,
843
+ "step": 1190
844
+ },
845
+ {
846
+ "epoch": 3.2,
847
+ "grad_norm": 0.23876914381980896,
848
+ "learning_rate": 1.667597881721068e-05,
849
+ "loss": 1.1542,
850
+ "step": 1200
851
  }
852
  ],
853
  "logging_steps": 10,
 
867
  "attributes": {}
868
  }
869
  },
870
+ "total_flos": 1.740213740302762e+18,
871
  "train_batch_size": 36,
872
  "trial_name": null,
873
  "trial_params": null