afull05 commited on
Commit
08f430e
·
verified ·
1 Parent(s): 007394f

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dd64f18f9224c56b5b0b7932ecdd5ccf3c1ea09df9ac454d32b480f4fe38e86
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11db584e072af7f1b6f514cdaa124b844cedc94c8b97ada1c825241d18c240cc
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4771d40822f2232ea1fc66bbb86799fa46e7f703da704326d615ffa6101825b9
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ed1a4d75b88c9c80e2c24d116905c056e83ed305fde161d122a1ed7add4dcfb
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1b14213d8f993ec42c615c8027597e3bf7668a27fbdb9035698a8baae177c92
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00c02b4da51e8241d879990840aae408c8cee00a0a61e4693c55f47d7ad7007e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2ec0b50a82217eefa34c5afd3f9b449cdce6ba7ebf5cff729c1fc131f10aad0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7b5bf190dc871967c45091d9f1ab233b2d2ed62baca21fee5dfedb5718ffa5d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.026069615239017005,
5
  "eval_steps": 334,
6
- "global_step": 835,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -612,6 +612,125 @@
612
  "learning_rate": 1.4201658676502294e-05,
613
  "loss": 0.7033,
614
  "step": 830
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
615
  }
616
  ],
617
  "logging_steps": 10,
@@ -626,12 +745,12 @@
626
  "should_evaluate": false,
627
  "should_log": false,
628
  "should_save": true,
629
- "should_training_stop": false
630
  },
631
  "attributes": {}
632
  }
633
  },
634
- "total_flos": 6.372527049214525e+17,
635
  "train_batch_size": 1,
636
  "trial_name": null,
637
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.031221096094631143,
5
  "eval_steps": 334,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
612
  "learning_rate": 1.4201658676502294e-05,
613
  "loss": 0.7033,
614
  "step": 830
615
+ },
616
+ {
617
+ "epoch": 0.02622572071949016,
618
+ "grad_norm": 1.3929728269577026,
619
+ "learning_rate": 1.2615062293021507e-05,
620
+ "loss": 0.852,
621
+ "step": 840
622
+ },
623
+ {
624
+ "epoch": 0.026537931680436472,
625
+ "grad_norm": 0.782926619052887,
626
+ "learning_rate": 1.1116455134507664e-05,
627
+ "loss": 1.0648,
628
+ "step": 850
629
+ },
630
+ {
631
+ "epoch": 0.02685014264138278,
632
+ "grad_norm": 1.1986958980560303,
633
+ "learning_rate": 9.707346171337894e-06,
634
+ "loss": 0.9374,
635
+ "step": 860
636
+ },
637
+ {
638
+ "epoch": 0.027162353602329093,
639
+ "grad_norm": 1.0474522113800049,
640
+ "learning_rate": 8.38915425679304e-06,
641
+ "loss": 0.5091,
642
+ "step": 870
643
+ },
644
+ {
645
+ "epoch": 0.027474564563275405,
646
+ "grad_norm": 1.569217562675476,
647
+ "learning_rate": 7.163206698392744e-06,
648
+ "loss": 0.6196,
649
+ "step": 880
650
+ },
651
+ {
652
+ "epoch": 0.027786775524221718,
653
+ "grad_norm": 2.387129306793213,
654
+ "learning_rate": 6.030737921409169e-06,
655
+ "loss": 0.8911,
656
+ "step": 890
657
+ },
658
+ {
659
+ "epoch": 0.028098986485168027,
660
+ "grad_norm": 1.0116970539093018,
661
+ "learning_rate": 4.992888225905468e-06,
662
+ "loss": 0.893,
663
+ "step": 900
664
+ },
665
+ {
666
+ "epoch": 0.02841119744611434,
667
+ "grad_norm": 1.056663155555725,
668
+ "learning_rate": 4.050702638550275e-06,
669
+ "loss": 0.9925,
670
+ "step": 910
671
+ },
672
+ {
673
+ "epoch": 0.02872340840706065,
674
+ "grad_norm": 1.0909359455108643,
675
+ "learning_rate": 3.2051298603643753e-06,
676
+ "loss": 0.634,
677
+ "step": 920
678
+ },
679
+ {
680
+ "epoch": 0.029035619368006964,
681
+ "grad_norm": 1.5931735038757324,
682
+ "learning_rate": 2.4570213114592954e-06,
683
+ "loss": 0.6653,
684
+ "step": 930
685
+ },
686
+ {
687
+ "epoch": 0.029347830328953273,
688
+ "grad_norm": 1.73000967502594,
689
+ "learning_rate": 1.8071302737293295e-06,
690
+ "loss": 0.7891,
691
+ "step": 940
692
+ },
693
+ {
694
+ "epoch": 0.029660041289899585,
695
+ "grad_norm": 0.5561469197273254,
696
+ "learning_rate": 1.2561111323605712e-06,
697
+ "loss": 0.9679,
698
+ "step": 950
699
+ },
700
+ {
701
+ "epoch": 0.029972252250845897,
702
+ "grad_norm": 1.1528609991073608,
703
+ "learning_rate": 8.04518716920466e-07,
704
+ "loss": 0.9362,
705
+ "step": 960
706
+ },
707
+ {
708
+ "epoch": 0.030284463211792206,
709
+ "grad_norm": 0.6554343104362488,
710
+ "learning_rate": 4.5280774269154115e-07,
711
+ "loss": 0.3348,
712
+ "step": 970
713
+ },
714
+ {
715
+ "epoch": 0.03059667417273852,
716
+ "grad_norm": 1.3619601726531982,
717
+ "learning_rate": 2.0133235281156736e-07,
718
+ "loss": 0.5258,
719
+ "step": 980
720
+ },
721
+ {
722
+ "epoch": 0.03090888513368483,
723
+ "grad_norm": 1.2936415672302246,
724
+ "learning_rate": 5.0345761681491746e-08,
725
+ "loss": 0.9126,
726
+ "step": 990
727
+ },
728
+ {
729
+ "epoch": 0.031221096094631143,
730
+ "grad_norm": 1.708081841468811,
731
+ "learning_rate": 0.0,
732
+ "loss": 1.0155,
733
+ "step": 1000
734
  }
735
  ],
736
  "logging_steps": 10,
 
745
  "should_evaluate": false,
746
  "should_log": false,
747
  "should_save": true,
748
+ "should_training_stop": true
749
  },
750
  "attributes": {}
751
  }
752
  },
753
+ "total_flos": 7.641564097334477e+17,
754
  "train_batch_size": 1,
755
  "trial_name": null,
756
  "trial_params": null