adammandic87 commited on
Commit
f09bfa9
·
verified ·
1 Parent(s): 56288e9

Training in progress, step 76, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b838e37c6acc3226cb23a05e8da8e401052de7ec8d63320c4dea86d5b9791f8
3
  size 50899792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:376fe44127e1c619ce7f8f48e03b3be70c6112912963efdb83b45b7157116c70
3
  size 50899792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44ab86fafce39c2b8346177e57d4e4368a643eb8bc217de77945ce8f27c78395
3
  size 26231300
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bcd67acebb815dafe6c18e5d183a0c4822a4089bbe094b44777eed010429c78
3
  size 26231300
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f489b33e4ce6b4592438133f0f8030528235eaffcbc7196d436c439dd611d61
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7abb5da1a5f6ed0ff2bc01b41aa9d26ec6e6330291297067e0c80e58907c54d6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be205ee2b4780d487b513e30ac9c545724d6f8bbbacf91c0e0793417bf18e789
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff804e87c79ef1262057e5db83d54644262c8d880a0b60fd300d8713fc4bac67
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7574750830564784,
5
  "eval_steps": 19,
6
- "global_step": 57,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -438,6 +438,147 @@
438
  "eval_samples_per_second": 33.96,
439
  "eval_steps_per_second": 16.98,
440
  "step": 57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  }
442
  ],
443
  "logging_steps": 1,
@@ -452,12 +593,12 @@
452
  "should_evaluate": false,
453
  "should_log": false,
454
  "should_save": true,
455
- "should_training_stop": false
456
  },
457
  "attributes": {}
458
  }
459
  },
460
- "total_flos": 4715279823667200.0,
461
  "train_batch_size": 2,
462
  "trial_name": null,
463
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0099667774086378,
5
  "eval_steps": 19,
6
+ "global_step": 76,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
438
  "eval_samples_per_second": 33.96,
439
  "eval_steps_per_second": 16.98,
440
  "step": 57
441
+ },
442
+ {
443
+ "epoch": 0.770764119601329,
444
+ "grad_norm": 0.47311338782310486,
445
+ "learning_rate": 3.45139266054715e-05,
446
+ "loss": 0.6306,
447
+ "step": 58
448
+ },
449
+ {
450
+ "epoch": 0.7840531561461794,
451
+ "grad_norm": 0.5218926668167114,
452
+ "learning_rate": 3.099209885178882e-05,
453
+ "loss": 0.6573,
454
+ "step": 59
455
+ },
456
+ {
457
+ "epoch": 0.7973421926910299,
458
+ "grad_norm": 0.48611611127853394,
459
+ "learning_rate": 2.7626596189492983e-05,
460
+ "loss": 0.6417,
461
+ "step": 60
462
+ },
463
+ {
464
+ "epoch": 0.8106312292358804,
465
+ "grad_norm": 0.48212239146232605,
466
+ "learning_rate": 2.4425042564574184e-05,
467
+ "loss": 0.5932,
468
+ "step": 61
469
+ },
470
+ {
471
+ "epoch": 0.8239202657807309,
472
+ "grad_norm": 0.4338783919811249,
473
+ "learning_rate": 2.139469052572127e-05,
474
+ "loss": 0.3378,
475
+ "step": 62
476
+ },
477
+ {
478
+ "epoch": 0.8372093023255814,
479
+ "grad_norm": 0.4200206995010376,
480
+ "learning_rate": 1.854240479496643e-05,
481
+ "loss": 0.5499,
482
+ "step": 63
483
+ },
484
+ {
485
+ "epoch": 0.8504983388704319,
486
+ "grad_norm": 0.5050457715988159,
487
+ "learning_rate": 1.587464671688187e-05,
488
+ "loss": 0.4441,
489
+ "step": 64
490
+ },
491
+ {
492
+ "epoch": 0.8637873754152824,
493
+ "grad_norm": 0.552359938621521,
494
+ "learning_rate": 1.339745962155613e-05,
495
+ "loss": 0.667,
496
+ "step": 65
497
+ },
498
+ {
499
+ "epoch": 0.8770764119601329,
500
+ "grad_norm": 0.4609893262386322,
501
+ "learning_rate": 1.1116455134507664e-05,
502
+ "loss": 0.6238,
503
+ "step": 66
504
+ },
505
+ {
506
+ "epoch": 0.8903654485049833,
507
+ "grad_norm": 0.5870749950408936,
508
+ "learning_rate": 9.036800464548157e-06,
509
+ "loss": 0.7167,
510
+ "step": 67
511
+ },
512
+ {
513
+ "epoch": 0.9036544850498339,
514
+ "grad_norm": 0.42279618978500366,
515
+ "learning_rate": 7.163206698392744e-06,
516
+ "loss": 0.4892,
517
+ "step": 68
518
+ },
519
+ {
520
+ "epoch": 0.9169435215946844,
521
+ "grad_norm": 0.6572357416152954,
522
+ "learning_rate": 5.499918128533155e-06,
523
+ "loss": 0.6284,
524
+ "step": 69
525
+ },
526
+ {
527
+ "epoch": 0.9302325581395349,
528
+ "grad_norm": 0.46049225330352783,
529
+ "learning_rate": 4.050702638550275e-06,
530
+ "loss": 0.4833,
531
+ "step": 70
532
+ },
533
+ {
534
+ "epoch": 0.9435215946843853,
535
+ "grad_norm": 0.4801192283630371,
536
+ "learning_rate": 2.818843167645835e-06,
537
+ "loss": 0.7583,
538
+ "step": 71
539
+ },
540
+ {
541
+ "epoch": 0.9568106312292359,
542
+ "grad_norm": 0.518804132938385,
543
+ "learning_rate": 1.8071302737293295e-06,
544
+ "loss": 0.7252,
545
+ "step": 72
546
+ },
547
+ {
548
+ "epoch": 0.9700996677740864,
549
+ "grad_norm": 0.4595305919647217,
550
+ "learning_rate": 1.0178558119067315e-06,
551
+ "loss": 0.5154,
552
+ "step": 73
553
+ },
554
+ {
555
+ "epoch": 0.9833887043189369,
556
+ "grad_norm": 0.49823713302612305,
557
+ "learning_rate": 4.5280774269154115e-07,
558
+ "loss": 0.6968,
559
+ "step": 74
560
+ },
561
+ {
562
+ "epoch": 0.9966777408637874,
563
+ "grad_norm": 0.485995888710022,
564
+ "learning_rate": 1.1326608169920372e-07,
565
+ "loss": 0.6319,
566
+ "step": 75
567
+ },
568
+ {
569
+ "epoch": 1.0099667774086378,
570
+ "grad_norm": 1.4272960424423218,
571
+ "learning_rate": 0.0,
572
+ "loss": 1.1565,
573
+ "step": 76
574
+ },
575
+ {
576
+ "epoch": 1.0099667774086378,
577
+ "eval_loss": 0.6746536493301392,
578
+ "eval_runtime": 0.9405,
579
+ "eval_samples_per_second": 34.026,
580
+ "eval_steps_per_second": 17.013,
581
+ "step": 76
582
  }
583
  ],
584
  "logging_steps": 1,
 
593
  "should_evaluate": false,
594
  "should_log": false,
595
  "should_save": true,
596
+ "should_training_stop": true
597
  },
598
  "attributes": {}
599
  }
600
  },
601
+ "total_flos": 6273372287139840.0,
602
  "train_batch_size": 2,
603
  "trial_name": null,
604
  "trial_params": null