sandernotenbaert commited on
Commit
00a3e19
·
verified ·
1 Parent(s): 25de14a

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf6b21136738d464237c49291ff492715ac0f778d29877772de0b737cf2d2640
3
  size 30214176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f97a88fe7f1551daa9480177b00b35e40e856a0d9c9d91d43e2560efb86dee1
3
  size 30214176
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0e9083055cf358cde08cffa8e089e39a4e8e3d9798eb13a83732e0956151494
3
  size 291962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10b8042297e02ae9d3215844c9e256c5210db3f5a2e0699113654e0cb79f59db
3
  size 291962
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05bf623617defaefad1aba1b3887b66f9a371c45bd4aa888efe87b4d5bd11431
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2bf650a618e96844362d12ee82d70a5f8affd2f2e50a31dc12be324f0ee9e79
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6789c572dce92a1a63d5943c701f294de1d60d1ba0534cc546d8d898ea5a0067
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ade71fa77b3bec5df0f87e5afbe1254f30e68aa4fe3c4885f1c8c1ab0dff354
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f0b34b2ac94b6ae1b1e612c27fa9cd4fd3034532b792dc74af68839fa9ffe62
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f27936a64077570bbaa5a635febe904df73463b4153ec77db6495bfeb53a907d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 2500,
3
- "best_metric": 1.6233899593353271,
4
- "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-2500",
5
- "epoch": 1.1130930847363294,
6
  "eval_steps": 500,
7
- "global_step": 2500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -398,6 +398,84 @@
398
  "eval_samples_per_second": 388.882,
399
  "eval_steps_per_second": 48.626,
400
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  }
402
  ],
403
  "logging_steps": 50,
@@ -426,7 +504,7 @@
426
  "attributes": {}
427
  }
428
  },
429
- "total_flos": 9917592675207168.0,
430
  "train_batch_size": 4,
431
  "trial_name": null,
432
  "trial_params": null
 
1
  {
2
+ "best_global_step": 3000,
3
+ "best_metric": 1.6028199195861816,
4
+ "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-3000",
5
+ "epoch": 1.3357172672881592,
6
  "eval_steps": 500,
7
+ "global_step": 3000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
398
  "eval_samples_per_second": 388.882,
399
  "eval_steps_per_second": 48.626,
400
  "step": 2500
401
+ },
402
+ {
403
+ "epoch": 1.1353555029915126,
404
+ "grad_norm": 1.172264575958252,
405
+ "learning_rate": 5e-05,
406
+ "loss": 1.7348,
407
+ "step": 2550
408
+ },
409
+ {
410
+ "epoch": 1.1576179212466955,
411
+ "grad_norm": 1.076794981956482,
412
+ "learning_rate": 5e-05,
413
+ "loss": 1.7463,
414
+ "step": 2600
415
+ },
416
+ {
417
+ "epoch": 1.1798803395018784,
418
+ "grad_norm": 1.0754376649856567,
419
+ "learning_rate": 5e-05,
420
+ "loss": 1.7378,
421
+ "step": 2650
422
+ },
423
+ {
424
+ "epoch": 1.2021427577570614,
425
+ "grad_norm": 1.3081718683242798,
426
+ "learning_rate": 5e-05,
427
+ "loss": 1.7251,
428
+ "step": 2700
429
+ },
430
+ {
431
+ "epoch": 1.2244051760122443,
432
+ "grad_norm": 1.0483145713806152,
433
+ "learning_rate": 5e-05,
434
+ "loss": 1.7414,
435
+ "step": 2750
436
+ },
437
+ {
438
+ "epoch": 1.2466675942674272,
439
+ "grad_norm": 1.2890243530273438,
440
+ "learning_rate": 5e-05,
441
+ "loss": 1.7254,
442
+ "step": 2800
443
+ },
444
+ {
445
+ "epoch": 1.2689300125226102,
446
+ "grad_norm": 1.0999932289123535,
447
+ "learning_rate": 5e-05,
448
+ "loss": 1.7333,
449
+ "step": 2850
450
+ },
451
+ {
452
+ "epoch": 1.291192430777793,
453
+ "grad_norm": 1.0996226072311401,
454
+ "learning_rate": 5e-05,
455
+ "loss": 1.7151,
456
+ "step": 2900
457
+ },
458
+ {
459
+ "epoch": 1.3134548490329763,
460
+ "grad_norm": 1.3446428775787354,
461
+ "learning_rate": 5e-05,
462
+ "loss": 1.7088,
463
+ "step": 2950
464
+ },
465
+ {
466
+ "epoch": 1.3357172672881592,
467
+ "grad_norm": 0.9657168388366699,
468
+ "learning_rate": 5e-05,
469
+ "loss": 1.7171,
470
+ "step": 3000
471
+ },
472
+ {
473
+ "epoch": 1.3357172672881592,
474
+ "eval_loss": 1.6028199195861816,
475
+ "eval_runtime": 41.2736,
476
+ "eval_samples_per_second": 386.954,
477
+ "eval_steps_per_second": 48.384,
478
+ "step": 3000
479
  }
480
  ],
481
  "logging_steps": 50,
 
504
  "attributes": {}
505
  }
506
  },
507
+ "total_flos": 1.0474381397673984e+16,
508
  "train_batch_size": 4,
509
  "trial_name": null,
510
  "trial_params": null