sandernotenbaert commited on
Commit
1b8703d
·
verified ·
1 Parent(s): 1c3c95a

Training in progress, step 10500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:234a74342f029ed78afc7504951e30209937ebfc40da414b5c0ddba808050cde
3
  size 30214176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:860eca3961053936b241a5a2d4fa22d5d55c591199aaa9b78de4fdc667354710
3
  size 30214176
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edf3bd7c510bb22c9bef6a5be4eb2c5e53373f8b91d20a894a89d9aca7dcee5f
3
  size 291962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0908b4e68e646f4581a0026b2c4fa54184efb4bcbd7553998a1c4a4c12cd731e
3
  size 291962
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12bdf3459ffab5d32ec375231a2eebdf56e9dbd56f588155f39eb4841c87ad7b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79f81c651a86e496a8a0683bb40cfa7b5abd415344f225e4b6cf5b7f3bf0d148
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:536bf64114e88301429208f737493c4ae0d118f5c27c3327e4079f245e1d631e
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d094517def3bab9b1b376a801fd8dd8681180f5ccdb5bd9fd6776c1cd25c2968
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c477b290e5f0acc03a821a7eef064a5961781fd60d9d238c571a7e7c070565f7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd17e6a2456b591e82434cf85c6c3687643ac9b76a0a0c080b341aa852e38872
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 10000,
3
  "best_metric": 1.5091972351074219,
4
  "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-10000",
5
- "epoch": 4.452817587310421,
6
  "eval_steps": 500,
7
- "global_step": 10000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1568,6 +1568,84 @@
1568
  "eval_samples_per_second": 393.855,
1569
  "eval_steps_per_second": 49.247,
1570
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1571
  }
1572
  ],
1573
  "logging_steps": 50,
@@ -1582,7 +1660,7 @@
1582
  "early_stopping_threshold": 0.0
1583
  },
1584
  "attributes": {
1585
- "early_stopping_patience_counter": 0
1586
  }
1587
  },
1588
  "TrainerControl": {
@@ -1596,7 +1674,7 @@
1596
  "attributes": {}
1597
  }
1598
  },
1599
- "total_flos": 3.945897129811661e+16,
1600
  "train_batch_size": 4,
1601
  "trial_name": null,
1602
  "trial_params": null
 
2
  "best_global_step": 10000,
3
  "best_metric": 1.5091972351074219,
4
  "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-10000",
5
+ "epoch": 4.6754417698622515,
6
  "eval_steps": 500,
7
+ "global_step": 10500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1568
  "eval_samples_per_second": 393.855,
1569
  "eval_steps_per_second": 49.247,
1570
  "step": 10000
1571
+ },
1572
+ {
1573
+ "epoch": 4.475080005565605,
1574
+ "grad_norm": 1.0181940793991089,
1575
+ "learning_rate": 5e-05,
1576
+ "loss": 1.5594,
1577
+ "step": 10050
1578
+ },
1579
+ {
1580
+ "epoch": 4.497342423820788,
1581
+ "grad_norm": 1.0538172721862793,
1582
+ "learning_rate": 5e-05,
1583
+ "loss": 1.5523,
1584
+ "step": 10100
1585
+ },
1586
+ {
1587
+ "epoch": 4.519604842075971,
1588
+ "grad_norm": 0.936060905456543,
1589
+ "learning_rate": 5e-05,
1590
+ "loss": 1.547,
1591
+ "step": 10150
1592
+ },
1593
+ {
1594
+ "epoch": 4.541867260331154,
1595
+ "grad_norm": 1.225715160369873,
1596
+ "learning_rate": 5e-05,
1597
+ "loss": 1.5491,
1598
+ "step": 10200
1599
+ },
1600
+ {
1601
+ "epoch": 4.564129678586337,
1602
+ "grad_norm": 1.2574198246002197,
1603
+ "learning_rate": 5e-05,
1604
+ "loss": 1.5496,
1605
+ "step": 10250
1606
+ },
1607
+ {
1608
+ "epoch": 4.58639209684152,
1609
+ "grad_norm": 1.2122540473937988,
1610
+ "learning_rate": 5e-05,
1611
+ "loss": 1.5327,
1612
+ "step": 10300
1613
+ },
1614
+ {
1615
+ "epoch": 4.608654515096703,
1616
+ "grad_norm": 1.1094001531600952,
1617
+ "learning_rate": 5e-05,
1618
+ "loss": 1.5375,
1619
+ "step": 10350
1620
+ },
1621
+ {
1622
+ "epoch": 4.630916933351886,
1623
+ "grad_norm": 1.0384974479675293,
1624
+ "learning_rate": 5e-05,
1625
+ "loss": 1.555,
1626
+ "step": 10400
1627
+ },
1628
+ {
1629
+ "epoch": 4.6531793516070685,
1630
+ "grad_norm": 1.0797594785690308,
1631
+ "learning_rate": 5e-05,
1632
+ "loss": 1.5621,
1633
+ "step": 10450
1634
+ },
1635
+ {
1636
+ "epoch": 4.6754417698622515,
1637
+ "grad_norm": 1.0724256038665771,
1638
+ "learning_rate": 5e-05,
1639
+ "loss": 1.547,
1640
+ "step": 10500
1641
+ },
1642
+ {
1643
+ "epoch": 4.6754417698622515,
1644
+ "eval_loss": 1.5108764171600342,
1645
+ "eval_runtime": 40.4882,
1646
+ "eval_samples_per_second": 394.461,
1647
+ "eval_steps_per_second": 49.323,
1648
+ "step": 10500
1649
  }
1650
  ],
1651
  "logging_steps": 50,
 
1660
  "early_stopping_threshold": 0.0
1661
  },
1662
  "attributes": {
1663
+ "early_stopping_patience_counter": 1
1664
  }
1665
  },
1666
  "TrainerControl": {
 
1674
  "attributes": {}
1675
  }
1676
  },
1677
+ "total_flos": 4.37287432012032e+16,
1678
  "train_batch_size": 4,
1679
  "trial_name": null,
1680
  "trial_params": null