neuralwonderland commited on
Commit
25fd21b
·
verified ·
1 Parent(s): 5af5ad5

Training in progress, step 900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1c23281588fe7914d4d4f44fa96d1bebe41c212bf719c33b5fdf7b40eeecca6
3
  size 239135488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c90d78e887cabe5092648a5f2500eff4360533a5506f69f490047c60938c223
3
  size 239135488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:192b8e53e5187f343994adcd574d24d72b38e9908a8d4af0ec0e2eb7e0a4cf96
3
  size 478529298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcf770adef12f056ea3ee3225ff7c3425f43be1f98c6994a9ff81521a6b19aff
3
  size 478529298
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2586975e1d3b683969aff70c9424e21fce04e276125109d1dca17ca7667d0c91
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4abc4dbe90bb5ad444fc5d07c6c8a7deecdcbcc42c2db53b12a4ccd3e4d07913
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d9acb300ee3481a499d71232bda2cc6822a3d265890998459ac4ab1f2a7a58f
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19edf8f1957a4ee4a7ad7c15c95d0e40ddbc2262c2a8b3d331d09ae113e648a2
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.05326759070158005,
3
- "best_model_checkpoint": "./output/checkpoint-750",
4
- "epoch": 0.7560483870967742,
5
  "eval_steps": 150,
6
- "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -572,6 +572,119 @@
572
  "eval_samples_per_second": 8.66,
573
  "eval_steps_per_second": 8.66,
574
  "step": 750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
  }
576
  ],
577
  "logging_steps": 10,
@@ -591,7 +704,7 @@
591
  "attributes": {}
592
  }
593
  },
594
- "total_flos": 6.769083236135731e+16,
595
  "train_batch_size": 16,
596
  "trial_name": null,
597
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.05111026018857956,
3
+ "best_model_checkpoint": "./output/checkpoint-900",
4
+ "epoch": 0.907258064516129,
5
  "eval_steps": 150,
6
+ "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
572
  "eval_samples_per_second": 8.66,
573
  "eval_steps_per_second": 8.66,
574
  "step": 750
575
+ },
576
+ {
577
+ "epoch": 0.7661290322580645,
578
+ "grad_norm": 0.4265230894088745,
579
+ "learning_rate": 0.00011948741333828481,
580
+ "loss": 0.0847,
581
+ "step": 760
582
+ },
583
+ {
584
+ "epoch": 0.7762096774193549,
585
+ "grad_norm": 0.8098424077033997,
586
+ "learning_rate": 0.00011932169520872344,
587
+ "loss": 0.0822,
588
+ "step": 770
589
+ },
590
+ {
591
+ "epoch": 0.7862903225806451,
592
+ "grad_norm": 0.8245271444320679,
593
+ "learning_rate": 0.00011915364136182738,
594
+ "loss": 0.1158,
595
+ "step": 780
596
+ },
597
+ {
598
+ "epoch": 0.7963709677419355,
599
+ "grad_norm": 0.22349853813648224,
600
+ "learning_rate": 0.0001189832587056321,
601
+ "loss": 0.0921,
602
+ "step": 790
603
+ },
604
+ {
605
+ "epoch": 0.8064516129032258,
606
+ "grad_norm": 0.7429900169372559,
607
+ "learning_rate": 0.00011881055424390119,
608
+ "loss": 0.0817,
609
+ "step": 800
610
+ },
611
+ {
612
+ "epoch": 0.8165322580645161,
613
+ "grad_norm": 0.5830271244049072,
614
+ "learning_rate": 0.00011863553507583869,
615
+ "loss": 0.0938,
616
+ "step": 810
617
+ },
618
+ {
619
+ "epoch": 0.8266129032258065,
620
+ "grad_norm": 0.2562120258808136,
621
+ "learning_rate": 0.00011845820839579708,
622
+ "loss": 0.0856,
623
+ "step": 820
624
+ },
625
+ {
626
+ "epoch": 0.8366935483870968,
627
+ "grad_norm": 0.6078082919120789,
628
+ "learning_rate": 0.00011827858149298162,
629
+ "loss": 0.0998,
630
+ "step": 830
631
+ },
632
+ {
633
+ "epoch": 0.8467741935483871,
634
+ "grad_norm": 0.48718827962875366,
635
+ "learning_rate": 0.00011809666175115075,
636
+ "loss": 0.0794,
637
+ "step": 840
638
+ },
639
+ {
640
+ "epoch": 0.8568548387096774,
641
+ "grad_norm": 0.9318081736564636,
642
+ "learning_rate": 0.00011791245664831251,
643
+ "loss": 0.0562,
644
+ "step": 850
645
+ },
646
+ {
647
+ "epoch": 0.8669354838709677,
648
+ "grad_norm": 0.5518208146095276,
649
+ "learning_rate": 0.0001177259737564172,
650
+ "loss": 0.0759,
651
+ "step": 860
652
+ },
653
+ {
654
+ "epoch": 0.8770161290322581,
655
+ "grad_norm": 0.8083676099777222,
656
+ "learning_rate": 0.00011753722074104613,
657
+ "loss": 0.0876,
658
+ "step": 870
659
+ },
660
+ {
661
+ "epoch": 0.8870967741935484,
662
+ "grad_norm": 0.3032991886138916,
663
+ "learning_rate": 0.00011734620536109644,
664
+ "loss": 0.0737,
665
+ "step": 880
666
+ },
667
+ {
668
+ "epoch": 0.8971774193548387,
669
+ "grad_norm": 0.1460338681936264,
670
+ "learning_rate": 0.00011715293546846223,
671
+ "loss": 0.0784,
672
+ "step": 890
673
+ },
674
+ {
675
+ "epoch": 0.907258064516129,
676
+ "grad_norm": 1.3422744274139404,
677
+ "learning_rate": 0.00011695741900771184,
678
+ "loss": 0.1411,
679
+ "step": 900
680
+ },
681
+ {
682
+ "epoch": 0.907258064516129,
683
+ "eval_loss": 0.05111026018857956,
684
+ "eval_runtime": 58.3136,
685
+ "eval_samples_per_second": 8.574,
686
+ "eval_steps_per_second": 8.574,
687
+ "step": 900
688
  }
689
  ],
690
  "logging_steps": 10,
 
704
  "attributes": {}
705
  }
706
  },
707
+ "total_flos": 8.093945859843686e+16,
708
  "train_batch_size": 16,
709
  "trial_name": null,
710
  "trial_params": null