Training in progress, step 3500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 30214176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59598fbdd1c5e2f583902dedafc3608ce33bbba310616bf6dc87ed894499216c
|
3 |
size 30214176
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 291962
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c36ca00300f8149041f48da61b5fca592e064fd49ac67d29a37c6bcaba238d3e
|
3 |
size 291962
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b337ce57e0e628ac23a51821ba9829dc5591967e2ff7b7d7b32c74e1aad03b79
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:018616c73745e54f08c106f2414258a1870a189c7fffafe7f792b5acbfd19fdc
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a164f17f191d714fcd6bbbbbba58c57fe70c8ab069a5df01c4e86e5fc119f19d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": 3000,
|
3 |
"best_metric": 1.6028199195861816,
|
4 |
"best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-3000",
|
5 |
-
"epoch": 1.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -476,6 +476,84 @@
|
|
476 |
"eval_samples_per_second": 386.954,
|
477 |
"eval_steps_per_second": 48.384,
|
478 |
"step": 3000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
479 |
}
|
480 |
],
|
481 |
"logging_steps": 50,
|
@@ -490,7 +568,7 @@
|
|
490 |
"early_stopping_threshold": 0.0
|
491 |
},
|
492 |
"attributes": {
|
493 |
-
"early_stopping_patience_counter":
|
494 |
}
|
495 |
},
|
496 |
"TrainerControl": {
|
@@ -504,7 +582,7 @@
|
|
504 |
"attributes": {}
|
505 |
}
|
506 |
},
|
507 |
-
"total_flos": 1.
|
508 |
"train_batch_size": 4,
|
509 |
"trial_name": null,
|
510 |
"trial_params": null
|
|
|
2 |
"best_global_step": 3000,
|
3 |
"best_metric": 1.6028199195861816,
|
4 |
"best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-3000",
|
5 |
+
"epoch": 1.5583414498399888,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 3500,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
476 |
"eval_samples_per_second": 386.954,
|
477 |
"eval_steps_per_second": 48.384,
|
478 |
"step": 3000
|
479 |
+
},
|
480 |
+
{
|
481 |
+
"epoch": 1.3579796855433421,
|
482 |
+
"grad_norm": 0.904662549495697,
|
483 |
+
"learning_rate": 5e-05,
|
484 |
+
"loss": 1.6656,
|
485 |
+
"step": 3050
|
486 |
+
},
|
487 |
+
{
|
488 |
+
"epoch": 1.380242103798525,
|
489 |
+
"grad_norm": 1.2054646015167236,
|
490 |
+
"learning_rate": 5e-05,
|
491 |
+
"loss": 1.6409,
|
492 |
+
"step": 3100
|
493 |
+
},
|
494 |
+
{
|
495 |
+
"epoch": 1.402504522053708,
|
496 |
+
"grad_norm": 0.8623887300491333,
|
497 |
+
"learning_rate": 5e-05,
|
498 |
+
"loss": 1.6378,
|
499 |
+
"step": 3150
|
500 |
+
},
|
501 |
+
{
|
502 |
+
"epoch": 1.4247669403088912,
|
503 |
+
"grad_norm": 0.931481659412384,
|
504 |
+
"learning_rate": 5e-05,
|
505 |
+
"loss": 1.6395,
|
506 |
+
"step": 3200
|
507 |
+
},
|
508 |
+
{
|
509 |
+
"epoch": 1.447029358564074,
|
510 |
+
"grad_norm": 0.8971887826919556,
|
511 |
+
"learning_rate": 5e-05,
|
512 |
+
"loss": 1.6338,
|
513 |
+
"step": 3250
|
514 |
+
},
|
515 |
+
{
|
516 |
+
"epoch": 1.469291776819257,
|
517 |
+
"grad_norm": 0.9754030704498291,
|
518 |
+
"learning_rate": 5e-05,
|
519 |
+
"loss": 1.6341,
|
520 |
+
"step": 3300
|
521 |
+
},
|
522 |
+
{
|
523 |
+
"epoch": 1.49155419507444,
|
524 |
+
"grad_norm": 0.9373458027839661,
|
525 |
+
"learning_rate": 5e-05,
|
526 |
+
"loss": 1.6214,
|
527 |
+
"step": 3350
|
528 |
+
},
|
529 |
+
{
|
530 |
+
"epoch": 1.513816613329623,
|
531 |
+
"grad_norm": 1.1765072345733643,
|
532 |
+
"learning_rate": 5e-05,
|
533 |
+
"loss": 1.622,
|
534 |
+
"step": 3400
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 1.5360790315848059,
|
538 |
+
"grad_norm": 0.9341714382171631,
|
539 |
+
"learning_rate": 5e-05,
|
540 |
+
"loss": 1.6242,
|
541 |
+
"step": 3450
|
542 |
+
},
|
543 |
+
{
|
544 |
+
"epoch": 1.5583414498399888,
|
545 |
+
"grad_norm": 0.8690816164016724,
|
546 |
+
"learning_rate": 5e-05,
|
547 |
+
"loss": 1.6238,
|
548 |
+
"step": 3500
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"epoch": 1.5583414498399888,
|
552 |
+
"eval_loss": 1.6130000352859497,
|
553 |
+
"eval_runtime": 40.5463,
|
554 |
+
"eval_samples_per_second": 393.896,
|
555 |
+
"eval_steps_per_second": 49.252,
|
556 |
+
"step": 3500
|
557 |
}
|
558 |
],
|
559 |
"logging_steps": 50,
|
|
|
568 |
"early_stopping_threshold": 0.0
|
569 |
},
|
570 |
"attributes": {
|
571 |
+
"early_stopping_patience_counter": 1
|
572 |
}
|
573 |
},
|
574 |
"TrainerControl": {
|
|
|
582 |
"attributes": {}
|
583 |
}
|
584 |
},
|
585 |
+
"total_flos": 1.6653272703277056e+16,
|
586 |
"train_batch_size": 4,
|
587 |
"trial_name": null,
|
588 |
"trial_params": null
|