Training in progress, step 4000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 30214176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:761ed6751920843cffb7ea195e9fda4cc5b52e9c232bfc7dee3bb45f6619637e
|
3 |
size 30214176
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 291962
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1202b871306e5a47b46fb1bcf989facc74951cc8514d655f034a9ea5ed1ee58
|
3 |
size 291962
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:288381390ef2c4e9c9473d5ee077feaf5f5cefbdb2b7cf1c96b6907ae793487e
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37caeb1bb49fdf577e04e454fc0c1ffb92bd891d0dbc0b5a4547415a9f259799
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50ac02c40c90996d4ad6bb7daa053a54b85676d36aaa3b4b892740a97de5c357
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": 3000,
|
3 |
"best_metric": 1.6028199195861816,
|
4 |
"best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-3000",
|
5 |
-
"epoch": 1.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -554,6 +554,84 @@
|
|
554 |
"eval_samples_per_second": 393.896,
|
555 |
"eval_steps_per_second": 49.252,
|
556 |
"step": 3500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
557 |
}
|
558 |
],
|
559 |
"logging_steps": 50,
|
@@ -568,7 +646,7 @@
|
|
568 |
"early_stopping_threshold": 0.0
|
569 |
},
|
570 |
"attributes": {
|
571 |
-
"early_stopping_patience_counter":
|
572 |
}
|
573 |
},
|
574 |
"TrainerControl": {
|
@@ -582,7 +660,7 @@
|
|
582 |
"attributes": {}
|
583 |
}
|
584 |
},
|
585 |
-
"total_flos": 1.
|
586 |
"train_batch_size": 4,
|
587 |
"trial_name": null,
|
588 |
"trial_params": null
|
|
|
2 |
"best_global_step": 3000,
|
3 |
"best_metric": 1.6028199195861816,
|
4 |
"best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-3000",
|
5 |
+
"epoch": 1.7809656323918186,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 4000,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
554 |
"eval_samples_per_second": 393.896,
|
555 |
"eval_steps_per_second": 49.252,
|
556 |
"step": 3500
|
557 |
+
},
|
558 |
+
{
|
559 |
+
"epoch": 1.5806038680951717,
|
560 |
+
"grad_norm": 1.0579187870025635,
|
561 |
+
"learning_rate": 5e-05,
|
562 |
+
"loss": 1.611,
|
563 |
+
"step": 3550
|
564 |
+
},
|
565 |
+
{
|
566 |
+
"epoch": 1.6028662863503547,
|
567 |
+
"grad_norm": 0.8839408159255981,
|
568 |
+
"learning_rate": 5e-05,
|
569 |
+
"loss": 1.6106,
|
570 |
+
"step": 3600
|
571 |
+
},
|
572 |
+
{
|
573 |
+
"epoch": 1.6251287046055378,
|
574 |
+
"grad_norm": 1.048997402191162,
|
575 |
+
"learning_rate": 5e-05,
|
576 |
+
"loss": 1.6075,
|
577 |
+
"step": 3650
|
578 |
+
},
|
579 |
+
{
|
580 |
+
"epoch": 1.6473911228607208,
|
581 |
+
"grad_norm": 1.201557993888855,
|
582 |
+
"learning_rate": 5e-05,
|
583 |
+
"loss": 1.621,
|
584 |
+
"step": 3700
|
585 |
+
},
|
586 |
+
{
|
587 |
+
"epoch": 1.6696535411159037,
|
588 |
+
"grad_norm": 0.9804443717002869,
|
589 |
+
"learning_rate": 5e-05,
|
590 |
+
"loss": 1.6079,
|
591 |
+
"step": 3750
|
592 |
+
},
|
593 |
+
{
|
594 |
+
"epoch": 1.6919159593710866,
|
595 |
+
"grad_norm": 0.9969685077667236,
|
596 |
+
"learning_rate": 5e-05,
|
597 |
+
"loss": 1.6281,
|
598 |
+
"step": 3800
|
599 |
+
},
|
600 |
+
{
|
601 |
+
"epoch": 1.7141783776262698,
|
602 |
+
"grad_norm": 1.0730953216552734,
|
603 |
+
"learning_rate": 5e-05,
|
604 |
+
"loss": 1.6235,
|
605 |
+
"step": 3850
|
606 |
+
},
|
607 |
+
{
|
608 |
+
"epoch": 1.7364407958814527,
|
609 |
+
"grad_norm": 1.1014162302017212,
|
610 |
+
"learning_rate": 5e-05,
|
611 |
+
"loss": 1.6349,
|
612 |
+
"step": 3900
|
613 |
+
},
|
614 |
+
{
|
615 |
+
"epoch": 1.7587032141366357,
|
616 |
+
"grad_norm": 0.9518324732780457,
|
617 |
+
"learning_rate": 5e-05,
|
618 |
+
"loss": 1.6271,
|
619 |
+
"step": 3950
|
620 |
+
},
|
621 |
+
{
|
622 |
+
"epoch": 1.7809656323918186,
|
623 |
+
"grad_norm": 1.0745582580566406,
|
624 |
+
"learning_rate": 5e-05,
|
625 |
+
"loss": 1.6217,
|
626 |
+
"step": 4000
|
627 |
+
},
|
628 |
+
{
|
629 |
+
"epoch": 1.7809656323918186,
|
630 |
+
"eval_loss": 1.6218018531799316,
|
631 |
+
"eval_runtime": 41.2037,
|
632 |
+
"eval_samples_per_second": 387.611,
|
633 |
+
"eval_steps_per_second": 48.466,
|
634 |
+
"step": 4000
|
635 |
}
|
636 |
],
|
637 |
"logging_steps": 50,
|
|
|
646 |
"early_stopping_threshold": 0.0
|
647 |
},
|
648 |
"attributes": {
|
649 |
+
"early_stopping_patience_counter": 2
|
650 |
}
|
651 |
},
|
652 |
"TrainerControl": {
|
|
|
660 |
"attributes": {}
|
661 |
}
|
662 |
},
|
663 |
+
"total_flos": 1.8688429934168064e+16,
|
664 |
"train_batch_size": 4,
|
665 |
"trial_name": null,
|
666 |
"trial_params": null
|