sandernotenbaert commited on
Commit
d5742c1
·
verified ·
1 Parent(s): e97be52

Training in progress, step 3500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f97a88fe7f1551daa9480177b00b35e40e856a0d9c9d91d43e2560efb86dee1
3
  size 30214176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59598fbdd1c5e2f583902dedafc3608ce33bbba310616bf6dc87ed894499216c
3
  size 30214176
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10b8042297e02ae9d3215844c9e256c5210db3f5a2e0699113654e0cb79f59db
3
  size 291962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c36ca00300f8149041f48da61b5fca592e064fd49ac67d29a37c6bcaba238d3e
3
  size 291962
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2bf650a618e96844362d12ee82d70a5f8affd2f2e50a31dc12be324f0ee9e79
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b337ce57e0e628ac23a51821ba9829dc5591967e2ff7b7d7b32c74e1aad03b79
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ade71fa77b3bec5df0f87e5afbe1254f30e68aa4fe3c4885f1c8c1ab0dff354
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:018616c73745e54f08c106f2414258a1870a189c7fffafe7f792b5acbfd19fdc
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f27936a64077570bbaa5a635febe904df73463b4153ec77db6495bfeb53a907d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a164f17f191d714fcd6bbbbbba58c57fe70c8ab069a5df01c4e86e5fc119f19d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 3000,
3
  "best_metric": 1.6028199195861816,
4
  "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-3000",
5
- "epoch": 1.3357172672881592,
6
  "eval_steps": 500,
7
- "global_step": 3000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -476,6 +476,84 @@
476
  "eval_samples_per_second": 386.954,
477
  "eval_steps_per_second": 48.384,
478
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
  }
480
  ],
481
  "logging_steps": 50,
@@ -490,7 +568,7 @@
490
  "early_stopping_threshold": 0.0
491
  },
492
  "attributes": {
493
- "early_stopping_patience_counter": 0
494
  }
495
  },
496
  "TrainerControl": {
@@ -504,7 +582,7 @@
504
  "attributes": {}
505
  }
506
  },
507
- "total_flos": 1.0474381397673984e+16,
508
  "train_batch_size": 4,
509
  "trial_name": null,
510
  "trial_params": null
 
2
  "best_global_step": 3000,
3
  "best_metric": 1.6028199195861816,
4
  "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-3000",
5
+ "epoch": 1.5583414498399888,
6
  "eval_steps": 500,
7
+ "global_step": 3500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
476
  "eval_samples_per_second": 386.954,
477
  "eval_steps_per_second": 48.384,
478
  "step": 3000
479
+ },
480
+ {
481
+ "epoch": 1.3579796855433421,
482
+ "grad_norm": 0.904662549495697,
483
+ "learning_rate": 5e-05,
484
+ "loss": 1.6656,
485
+ "step": 3050
486
+ },
487
+ {
488
+ "epoch": 1.380242103798525,
489
+ "grad_norm": 1.2054646015167236,
490
+ "learning_rate": 5e-05,
491
+ "loss": 1.6409,
492
+ "step": 3100
493
+ },
494
+ {
495
+ "epoch": 1.402504522053708,
496
+ "grad_norm": 0.8623887300491333,
497
+ "learning_rate": 5e-05,
498
+ "loss": 1.6378,
499
+ "step": 3150
500
+ },
501
+ {
502
+ "epoch": 1.4247669403088912,
503
+ "grad_norm": 0.931481659412384,
504
+ "learning_rate": 5e-05,
505
+ "loss": 1.6395,
506
+ "step": 3200
507
+ },
508
+ {
509
+ "epoch": 1.447029358564074,
510
+ "grad_norm": 0.8971887826919556,
511
+ "learning_rate": 5e-05,
512
+ "loss": 1.6338,
513
+ "step": 3250
514
+ },
515
+ {
516
+ "epoch": 1.469291776819257,
517
+ "grad_norm": 0.9754030704498291,
518
+ "learning_rate": 5e-05,
519
+ "loss": 1.6341,
520
+ "step": 3300
521
+ },
522
+ {
523
+ "epoch": 1.49155419507444,
524
+ "grad_norm": 0.9373458027839661,
525
+ "learning_rate": 5e-05,
526
+ "loss": 1.6214,
527
+ "step": 3350
528
+ },
529
+ {
530
+ "epoch": 1.513816613329623,
531
+ "grad_norm": 1.1765072345733643,
532
+ "learning_rate": 5e-05,
533
+ "loss": 1.622,
534
+ "step": 3400
535
+ },
536
+ {
537
+ "epoch": 1.5360790315848059,
538
+ "grad_norm": 0.9341714382171631,
539
+ "learning_rate": 5e-05,
540
+ "loss": 1.6242,
541
+ "step": 3450
542
+ },
543
+ {
544
+ "epoch": 1.5583414498399888,
545
+ "grad_norm": 0.8690816164016724,
546
+ "learning_rate": 5e-05,
547
+ "loss": 1.6238,
548
+ "step": 3500
549
+ },
550
+ {
551
+ "epoch": 1.5583414498399888,
552
+ "eval_loss": 1.6130000352859497,
553
+ "eval_runtime": 40.5463,
554
+ "eval_samples_per_second": 393.896,
555
+ "eval_steps_per_second": 49.252,
556
+ "step": 3500
557
  }
558
  ],
559
  "logging_steps": 50,
 
568
  "early_stopping_threshold": 0.0
569
  },
570
  "attributes": {
571
+ "early_stopping_patience_counter": 1
572
  }
573
  },
574
  "TrainerControl": {
 
582
  "attributes": {}
583
  }
584
  },
585
+ "total_flos": 1.6653272703277056e+16,
586
  "train_batch_size": 4,
587
  "trial_name": null,
588
  "trial_params": null