sandernotenbaert commited on
Commit
585dda8
·
verified ·
1 Parent(s): f475f96

Training in progress, step 9500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bbad73333d3880772336a625e35504a2ee30e02405a6dc0acdda9e638172d21
3
  size 30214176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9117ec4f950dea6754224554f5e2c755a6836a7961b4ded5f21c63c21c445fb9
3
  size 30214176
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed1405f2590bc8a2add99042e9e49fb8954369bbfb3b18ea111e4bab55d4aef4
3
  size 291962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5cc484e5182c4715a2a6109e3cc2b4d706785d329d27d5874fca6c049423045
3
  size 291962
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afb634ec7f171d96ec4db3077b1dfe85e8869023201bf15be33fad6a0ce2ffda
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77350fb64cb2dda91c46d447fdf9444b805615f96cc40b39ffa14f4a21adc432
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96abe9f8a68e6d0c748eb46e8d20fb4fad6bc47b204e7f7e5c0da3fbe14595be
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3b82fa79f11f9a89c3adc8c26018f9952557f53a7145deb6860a551470206d5
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cdd7639db17312a4ecd7d288107da009f92352ae931ff71ff9086e467c2a7e1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e301c4e597049f65c33ebf04fcf53823c852f1e3ca861cecc7edcee425492837
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 8500,
3
- "best_metric": 1.516330361366272,
4
- "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-8500",
5
- "epoch": 4.0075692222067625,
6
  "eval_steps": 500,
7
- "global_step": 9000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1412,6 +1412,84 @@
1412
  "eval_samples_per_second": 395.778,
1413
  "eval_steps_per_second": 49.488,
1414
  "step": 9000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1415
  }
1416
  ],
1417
  "logging_steps": 50,
@@ -1426,7 +1504,7 @@
1426
  "early_stopping_threshold": 0.0
1427
  },
1428
  "attributes": {
1429
- "early_stopping_patience_counter": 1
1430
  }
1431
  },
1432
  "TrainerControl": {
@@ -1440,7 +1518,7 @@
1440
  "attributes": {}
1441
  }
1442
  },
1443
- "total_flos": 3.444898329997824e+16,
1444
  "train_batch_size": 4,
1445
  "trial_name": null,
1446
  "trial_params": null
 
1
  {
2
+ "best_global_step": 9500,
3
+ "best_metric": 1.5118227005004883,
4
+ "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-9500",
5
+ "epoch": 4.230193404758592,
6
  "eval_steps": 500,
7
+ "global_step": 9500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1412
  "eval_samples_per_second": 395.778,
1413
  "eval_steps_per_second": 49.488,
1414
  "step": 9000
1415
+ },
1416
+ {
1417
+ "epoch": 4.029831640461945,
1418
+ "grad_norm": 1.2552838325500488,
1419
+ "learning_rate": 5e-05,
1420
+ "loss": 1.6371,
1421
+ "step": 9050
1422
+ },
1423
+ {
1424
+ "epoch": 4.052094058717128,
1425
+ "grad_norm": 1.3415331840515137,
1426
+ "learning_rate": 5e-05,
1427
+ "loss": 1.6345,
1428
+ "step": 9100
1429
+ },
1430
+ {
1431
+ "epoch": 4.074356476972311,
1432
+ "grad_norm": 1.029757022857666,
1433
+ "learning_rate": 5e-05,
1434
+ "loss": 1.629,
1435
+ "step": 9150
1436
+ },
1437
+ {
1438
+ "epoch": 4.096618895227494,
1439
+ "grad_norm": 1.1435120105743408,
1440
+ "learning_rate": 5e-05,
1441
+ "loss": 1.6287,
1442
+ "step": 9200
1443
+ },
1444
+ {
1445
+ "epoch": 4.118881313482677,
1446
+ "grad_norm": 1.385100245475769,
1447
+ "learning_rate": 5e-05,
1448
+ "loss": 1.6335,
1449
+ "step": 9250
1450
+ },
1451
+ {
1452
+ "epoch": 4.14114373173786,
1453
+ "grad_norm": 1.062818169593811,
1454
+ "learning_rate": 5e-05,
1455
+ "loss": 1.6184,
1456
+ "step": 9300
1457
+ },
1458
+ {
1459
+ "epoch": 4.163406149993043,
1460
+ "grad_norm": 1.3703244924545288,
1461
+ "learning_rate": 5e-05,
1462
+ "loss": 1.631,
1463
+ "step": 9350
1464
+ },
1465
+ {
1466
+ "epoch": 4.185668568248226,
1467
+ "grad_norm": 1.1130529642105103,
1468
+ "learning_rate": 5e-05,
1469
+ "loss": 1.6284,
1470
+ "step": 9400
1471
+ },
1472
+ {
1473
+ "epoch": 4.207930986503409,
1474
+ "grad_norm": 1.189207911491394,
1475
+ "learning_rate": 5e-05,
1476
+ "loss": 1.619,
1477
+ "step": 9450
1478
+ },
1479
+ {
1480
+ "epoch": 4.230193404758592,
1481
+ "grad_norm": 1.0979055166244507,
1482
+ "learning_rate": 5e-05,
1483
+ "loss": 1.6324,
1484
+ "step": 9500
1485
+ },
1486
+ {
1487
+ "epoch": 4.230193404758592,
1488
+ "eval_loss": 1.5118227005004883,
1489
+ "eval_runtime": 40.2303,
1490
+ "eval_samples_per_second": 396.989,
1491
+ "eval_steps_per_second": 49.639,
1492
+ "step": 9500
1493
  }
1494
  ],
1495
  "logging_steps": 50,
 
1504
  "early_stopping_threshold": 0.0
1505
  },
1506
  "attributes": {
1507
+ "early_stopping_patience_counter": 0
1508
  }
1509
  },
1510
  "TrainerControl": {
 
1518
  "attributes": {}
1519
  }
1520
  },
1521
+ "total_flos": 3.492920965883597e+16,
1522
  "train_batch_size": 4,
1523
  "trial_name": null,
1524
  "trial_params": null