sandernotenbaert commited on
Commit
525af10
·
verified ·
1 Parent(s): eab31eb

Training in progress, step 10000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9117ec4f950dea6754224554f5e2c755a6836a7961b4ded5f21c63c21c445fb9
3
  size 30214176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:234a74342f029ed78afc7504951e30209937ebfc40da414b5c0ddba808050cde
3
  size 30214176
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5cc484e5182c4715a2a6109e3cc2b4d706785d329d27d5874fca6c049423045
3
  size 291962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edf3bd7c510bb22c9bef6a5be4eb2c5e53373f8b91d20a894a89d9aca7dcee5f
3
  size 291962
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77350fb64cb2dda91c46d447fdf9444b805615f96cc40b39ffa14f4a21adc432
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12bdf3459ffab5d32ec375231a2eebdf56e9dbd56f588155f39eb4841c87ad7b
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3b82fa79f11f9a89c3adc8c26018f9952557f53a7145deb6860a551470206d5
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:536bf64114e88301429208f737493c4ae0d118f5c27c3327e4079f245e1d631e
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e301c4e597049f65c33ebf04fcf53823c852f1e3ca861cecc7edcee425492837
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c477b290e5f0acc03a821a7eef064a5961781fd60d9d238c571a7e7c070565f7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 9500,
3
- "best_metric": 1.5118227005004883,
4
- "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-9500",
5
- "epoch": 4.230193404758592,
6
  "eval_steps": 500,
7
- "global_step": 9500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1490,6 +1490,84 @@
1490
  "eval_samples_per_second": 396.989,
1491
  "eval_steps_per_second": 49.639,
1492
  "step": 9500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1493
  }
1494
  ],
1495
  "logging_steps": 50,
@@ -1518,7 +1596,7 @@
1518
  "attributes": {}
1519
  }
1520
  },
1521
- "total_flos": 3.492920965883597e+16,
1522
  "train_batch_size": 4,
1523
  "trial_name": null,
1524
  "trial_params": null
 
1
  {
2
+ "best_global_step": 10000,
3
+ "best_metric": 1.5091972351074219,
4
+ "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-10000",
5
+ "epoch": 4.452817587310421,
6
  "eval_steps": 500,
7
+ "global_step": 10000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1490
  "eval_samples_per_second": 396.989,
1491
  "eval_steps_per_second": 49.639,
1492
  "step": 9500
1493
+ },
1494
+ {
1495
+ "epoch": 4.252455823013775,
1496
+ "grad_norm": 1.1394270658493042,
1497
+ "learning_rate": 5e-05,
1498
+ "loss": 1.6229,
1499
+ "step": 9550
1500
+ },
1501
+ {
1502
+ "epoch": 4.274718241268958,
1503
+ "grad_norm": 1.0398465394973755,
1504
+ "learning_rate": 5e-05,
1505
+ "loss": 1.625,
1506
+ "step": 9600
1507
+ },
1508
+ {
1509
+ "epoch": 4.296980659524141,
1510
+ "grad_norm": 1.1344504356384277,
1511
+ "learning_rate": 5e-05,
1512
+ "loss": 1.6113,
1513
+ "step": 9650
1514
+ },
1515
+ {
1516
+ "epoch": 4.319243077779324,
1517
+ "grad_norm": 0.9889805316925049,
1518
+ "learning_rate": 5e-05,
1519
+ "loss": 1.6195,
1520
+ "step": 9700
1521
+ },
1522
+ {
1523
+ "epoch": 4.3415054960345065,
1524
+ "grad_norm": 1.2321630716323853,
1525
+ "learning_rate": 5e-05,
1526
+ "loss": 1.6133,
1527
+ "step": 9750
1528
+ },
1529
+ {
1530
+ "epoch": 4.3637679142896895,
1531
+ "grad_norm": 1.0766791105270386,
1532
+ "learning_rate": 5e-05,
1533
+ "loss": 1.587,
1534
+ "step": 9800
1535
+ },
1536
+ {
1537
+ "epoch": 4.386030332544872,
1538
+ "grad_norm": 0.9230866432189941,
1539
+ "learning_rate": 5e-05,
1540
+ "loss": 1.5747,
1541
+ "step": 9850
1542
+ },
1543
+ {
1544
+ "epoch": 4.408292750800055,
1545
+ "grad_norm": 1.036097526550293,
1546
+ "learning_rate": 5e-05,
1547
+ "loss": 1.5673,
1548
+ "step": 9900
1549
+ },
1550
+ {
1551
+ "epoch": 4.430555169055238,
1552
+ "grad_norm": 1.0321383476257324,
1553
+ "learning_rate": 5e-05,
1554
+ "loss": 1.5641,
1555
+ "step": 9950
1556
+ },
1557
+ {
1558
+ "epoch": 4.452817587310421,
1559
+ "grad_norm": 0.9865553379058838,
1560
+ "learning_rate": 5e-05,
1561
+ "loss": 1.5481,
1562
+ "step": 10000
1563
+ },
1564
+ {
1565
+ "epoch": 4.452817587310421,
1566
+ "eval_loss": 1.5091972351074219,
1567
+ "eval_runtime": 40.5505,
1568
+ "eval_samples_per_second": 393.855,
1569
+ "eval_steps_per_second": 49.247,
1570
+ "step": 10000
1571
  }
1572
  ],
1573
  "logging_steps": 50,
 
1596
  "attributes": {}
1597
  }
1598
  },
1599
+ "total_flos": 3.945897129811661e+16,
1600
  "train_batch_size": 4,
1601
  "trial_name": null,
1602
  "trial_params": null