error577 commited on
Commit
5afa1cd
·
verified ·
1 Parent(s): c936a44

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5c9d0ed52b3043466a6377a3eae8328ec07305d72478a84ed821b4d68db79c4
3
  size 377528296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da8950123b30f3fa3e0e8ee83f72f3b2f414436b088d27ee084dc11189dcdd69
3
  size 377528296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ef8324a5f781ac2c15d9f45dbdbff76de3d0825b6104a1c35acb40eb3205233
3
  size 100950454
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3bc56991baff6e9adc069cbe70c304d9e293c6e1a94dcc9e124de7f02e2ef9e
3
  size 100950454
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7e1c9864391f7a9e1741c01173d2b910d5d1c311252162fc450d4c6d83da4d0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241b88bd5e666af9478e0e5eb8cb4359a74fa33fddc106039ac6f6d66064cba9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c62f4fdc2df8e5e8bf6e891c16e6620dda6d955eaaffc2b1ff18df9d1875d5d
3
  size 2080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfdef5a028ffb71f0f119eaf380178e35f4f8c0cf1e1810bd40a5706a8894a4d
3
  size 2080
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.4196394681930542,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-700",
4
- "epoch": 0.5144326950557302,
5
  "eval_steps": 100,
6
- "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -717,6 +717,84 @@
717
  "eval_samples_per_second": 2.846,
718
  "eval_steps_per_second": 2.846,
719
  "step": 900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
720
  }
721
  ],
722
  "logging_steps": 10,
@@ -731,7 +809,7 @@
731
  "early_stopping_threshold": 0.0
732
  },
733
  "attributes": {
734
- "early_stopping_patience_counter": 2
735
  }
736
  },
737
  "TrainerControl": {
@@ -740,12 +818,12 @@
740
  "should_evaluate": false,
741
  "should_log": false,
742
  "should_save": true,
743
- "should_training_stop": false
744
  },
745
  "attributes": {}
746
  }
747
  },
748
- "total_flos": 3.29086365990912e+16,
749
  "train_batch_size": 1,
750
  "trial_name": null,
751
  "trial_params": null
 
1
  {
2
  "best_metric": 0.4196394681930542,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-700",
4
+ "epoch": 0.5715918833952558,
5
  "eval_steps": 100,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
717
  "eval_samples_per_second": 2.846,
718
  "eval_steps_per_second": 2.846,
719
  "step": 900
720
+ },
721
+ {
722
+ "epoch": 0.5201486138896828,
723
+ "grad_norm": 11.08961296081543,
724
+ "learning_rate": 0.00018628465477377176,
725
+ "loss": 1.4101,
726
+ "step": 910
727
+ },
728
+ {
729
+ "epoch": 0.5258645327236353,
730
+ "grad_norm": 7.305054664611816,
731
+ "learning_rate": 0.00018597871530801058,
732
+ "loss": 1.1836,
733
+ "step": 920
734
+ },
735
+ {
736
+ "epoch": 0.5315804515575879,
737
+ "grad_norm": 11.424978256225586,
738
+ "learning_rate": 0.00018566966173239052,
739
+ "loss": 1.7394,
740
+ "step": 930
741
+ },
742
+ {
743
+ "epoch": 0.5372963703915404,
744
+ "grad_norm": 9.069324493408203,
745
+ "learning_rate": 0.00018535747949499637,
746
+ "loss": 1.6682,
747
+ "step": 940
748
+ },
749
+ {
750
+ "epoch": 0.543012289225493,
751
+ "grad_norm": 11.527304649353027,
752
+ "learning_rate": 0.0001850422122515738,
753
+ "loss": 1.5007,
754
+ "step": 950
755
+ },
756
+ {
757
+ "epoch": 0.5487282080594456,
758
+ "grad_norm": 7.424556255340576,
759
+ "learning_rate": 0.00018472386000212282,
760
+ "loss": 1.4391,
761
+ "step": 960
762
+ },
763
+ {
764
+ "epoch": 0.5544441268933981,
765
+ "grad_norm": 7.527375221252441,
766
+ "learning_rate": 0.00018440245185047388,
767
+ "loss": 1.3857,
768
+ "step": 970
769
+ },
770
+ {
771
+ "epoch": 0.5601600457273507,
772
+ "grad_norm": 6.9273295402526855,
773
+ "learning_rate": 0.00018407795869279653,
774
+ "loss": 1.4611,
775
+ "step": 980
776
+ },
777
+ {
778
+ "epoch": 0.5658759645613032,
779
+ "grad_norm": 8.494791984558105,
780
+ "learning_rate": 0.00018375043873675168,
781
+ "loss": 1.5067,
782
+ "step": 990
783
+ },
784
+ {
785
+ "epoch": 0.5715918833952558,
786
+ "grad_norm": 33.15937042236328,
787
+ "learning_rate": 0.00018341986287850887,
788
+ "loss": 2.3622,
789
+ "step": 1000
790
+ },
791
+ {
792
+ "epoch": 0.5715918833952558,
793
+ "eval_loss": 0.5082448720932007,
794
+ "eval_runtime": 12.6257,
795
+ "eval_samples_per_second": 2.851,
796
+ "eval_steps_per_second": 2.851,
797
+ "step": 1000
798
  }
799
  ],
800
  "logging_steps": 10,
 
809
  "early_stopping_threshold": 0.0
810
  },
811
  "attributes": {
812
+ "early_stopping_patience_counter": 3
813
  }
814
  },
815
  "TrainerControl": {
 
818
  "should_evaluate": false,
819
  "should_log": false,
820
  "should_save": true,
821
+ "should_training_stop": true
822
  },
823
  "attributes": {}
824
  }
825
  },
826
+ "total_flos": 3.655020281856e+16,
827
  "train_batch_size": 1,
828
  "trial_name": null,
829
  "trial_params": null