sandernotenbaert commited on
Commit
ca731a2
·
verified ·
1 Parent(s): 51b8898

Training in progress, step 1900, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fae1cea3ea7df00885daa2c80562b06d2938657f1003b0973b4a324cf8e2ec2
3
  size 86625328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3281ea58f21e8e23189595bfa79850858fbba3783269108bb5bca8b74fb39a5
3
  size 86625328
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1383b56211f527a8b7604383361ff10967007162e52f15611fd72d31d3ccbbd
3
  size 173285114
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:554ea62f74181baa086e6cb6b5a596623c8aaf0559d81263e36e2fe654a0eeb7
3
  size 173285114
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b45f2e2a4ebd0b55652f3b63a8641a715f567044ea627bf2570ef0b9f9acb9f
3
  size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69ac6cc9f96cb23f38dad14b3c8b94857940a2c54443bb178825363cab7260c0
3
  size 13990
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:792c38b8e737901a2e9fd09ebf8a2facdd16f9761ed18e93b60f3d9f70040b97
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2375bb704062abe009d16c81c5208b4a77486ea2b7e1b6be5a4f624a4401587f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 800,
3
  "best_metric": 9.927443504333496,
4
  "best_model_checkpoint": "./models/v-004/checkpoint-800",
5
- "epoch": 58.06521739130435,
6
  "eval_steps": 100,
7
- "global_step": 1800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -800,6 +800,50 @@
800
  "eval_samples_per_second": 11.261,
801
  "eval_steps_per_second": 0.708,
802
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
803
  }
804
  ],
805
  "logging_steps": 20,
@@ -819,7 +863,7 @@
819
  "attributes": {}
820
  }
821
  },
822
- "total_flos": 3648688009529856.0,
823
  "train_batch_size": 8,
824
  "trial_name": null,
825
  "trial_params": null
 
2
  "best_global_step": 800,
3
  "best_metric": 9.927443504333496,
4
  "best_model_checkpoint": "./models/v-004/checkpoint-800",
5
+ "epoch": 61.29347826086956,
6
  "eval_steps": 100,
7
+ "global_step": 1900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
800
  "eval_samples_per_second": 11.261,
801
  "eval_steps_per_second": 0.708,
802
  "step": 1800
803
+ },
804
+ {
805
+ "epoch": 58.71739130434783,
806
+ "grad_norm": 0.9569380879402161,
807
+ "learning_rate": 4.067824398141701e-06,
808
+ "loss": 2.7457,
809
+ "step": 1820
810
+ },
811
+ {
812
+ "epoch": 59.358695652173914,
813
+ "grad_norm": 0.9395641088485718,
814
+ "learning_rate": 3.2277984585066366e-06,
815
+ "loss": 2.7463,
816
+ "step": 1840
817
+ },
818
+ {
819
+ "epoch": 60.0,
820
+ "grad_norm": 1.2450119256973267,
821
+ "learning_rate": 2.4819655082085835e-06,
822
+ "loss": 2.7273,
823
+ "step": 1860
824
+ },
825
+ {
826
+ "epoch": 60.65217391304348,
827
+ "grad_norm": 1.0745036602020264,
828
+ "learning_rate": 1.8318275555520237e-06,
829
+ "loss": 2.6887,
830
+ "step": 1880
831
+ },
832
+ {
833
+ "epoch": 61.29347826086956,
834
+ "grad_norm": 0.9726008176803589,
835
+ "learning_rate": 1.2786938918515568e-06,
836
+ "loss": 2.7324,
837
+ "step": 1900
838
+ },
839
+ {
840
+ "epoch": 61.29347826086956,
841
+ "eval_accuracy": 0.0003037594225373504,
842
+ "eval_loss": 10.290609359741211,
843
+ "eval_runtime": 42.7369,
844
+ "eval_samples_per_second": 7.067,
845
+ "eval_steps_per_second": 0.445,
846
+ "step": 1900
847
  }
848
  ],
849
  "logging_steps": 20,
 
863
  "attributes": {}
864
  }
865
  },
866
+ "total_flos": 3851454299490816.0,
867
  "train_batch_size": 8,
868
  "trial_name": null,
869
  "trial_params": null