afull05 commited on
Commit
4eb9d39
·
verified ·
1 Parent(s): 72fb4cb

Training in progress, step 835, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39d6b881d4987bf9d5bcfe8a064b139932733ad3477682bd61058412eb62fa96
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dd64f18f9224c56b5b0b7932ecdd5ccf3c1ea09df9ac454d32b480f4fe38e86
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2e153fff5053f18208466756773d29461ab60badd4fa692e62339d7396cbe33
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4771d40822f2232ea1fc66bbb86799fa46e7f703da704326d615ffa6101825b9
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d9c63bd0232e7980e2988292e37bfbeecbd77efd303e088b8b34aac11fef8b3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1b14213d8f993ec42c615c8027597e3bf7668a27fbdb9035698a8baae177c92
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:156aadf840372c36f9ad329437d53efbaab7f6ea8e90a535b104c634fa163d21
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ec0b50a82217eefa34c5afd3f9b449cdce6ba7ebf5cff729c1fc131f10aad0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.020855692191213603,
5
  "eval_steps": 334,
6
- "global_step": 668,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -493,6 +493,125 @@
493
  "eval_samples_per_second": 2.461,
494
  "eval_steps_per_second": 2.461,
495
  "step": 668
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
  }
497
  ],
498
  "logging_steps": 10,
@@ -512,7 +631,7 @@
512
  "attributes": {}
513
  }
514
  },
515
- "total_flos": 5.0997187171477094e+17,
516
  "train_batch_size": 1,
517
  "trial_name": null,
518
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.026069615239017005,
5
  "eval_steps": 334,
6
+ "global_step": 835,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
493
  "eval_samples_per_second": 2.461,
494
  "eval_steps_per_second": 2.461,
495
  "step": 668
496
+ },
497
+ {
498
+ "epoch": 0.020918134383402866,
499
+ "grad_norm": 1.3406473398208618,
500
+ "learning_rate": 5.000000000000002e-05,
501
+ "loss": 0.4458,
502
+ "step": 670
503
+ },
504
+ {
505
+ "epoch": 0.021230345344349175,
506
+ "grad_norm": 1.6482555866241455,
507
+ "learning_rate": 4.727745323894976e-05,
508
+ "loss": 0.8605,
509
+ "step": 680
510
+ },
511
+ {
512
+ "epoch": 0.021542556305295488,
513
+ "grad_norm": 2.3034768104553223,
514
+ "learning_rate": 4.4607993613388976e-05,
515
+ "loss": 0.8842,
516
+ "step": 690
517
+ },
518
+ {
519
+ "epoch": 0.0218547672662418,
520
+ "grad_norm": 2.2205710411071777,
521
+ "learning_rate": 4.19943090428802e-05,
522
+ "loss": 1.0044,
523
+ "step": 700
524
+ },
525
+ {
526
+ "epoch": 0.022166978227188112,
527
+ "grad_norm": 1.0776264667510986,
528
+ "learning_rate": 3.943903128623335e-05,
529
+ "loss": 0.9987,
530
+ "step": 710
531
+ },
532
+ {
533
+ "epoch": 0.02247918918813442,
534
+ "grad_norm": 0.7974869012832642,
535
+ "learning_rate": 3.694473329154778e-05,
536
+ "loss": 0.4341,
537
+ "step": 720
538
+ },
539
+ {
540
+ "epoch": 0.022791400149080734,
541
+ "grad_norm": 1.410180926322937,
542
+ "learning_rate": 3.45139266054715e-05,
543
+ "loss": 0.7361,
544
+ "step": 730
545
+ },
546
+ {
547
+ "epoch": 0.023103611110027046,
548
+ "grad_norm": 1.5520089864730835,
549
+ "learning_rate": 3.21490588442868e-05,
550
+ "loss": 0.8486,
551
+ "step": 740
552
+ },
553
+ {
554
+ "epoch": 0.023415822070973355,
555
+ "grad_norm": 4.628422260284424,
556
+ "learning_rate": 2.9852511229367865e-05,
557
+ "loss": 1.1191,
558
+ "step": 750
559
+ },
560
+ {
561
+ "epoch": 0.023728033031919667,
562
+ "grad_norm": 1.1060361862182617,
563
+ "learning_rate": 2.7626596189492983e-05,
564
+ "loss": 1.018,
565
+ "step": 760
566
+ },
567
+ {
568
+ "epoch": 0.02404024399286598,
569
+ "grad_norm": 1.108621597290039,
570
+ "learning_rate": 2.5473555032424533e-05,
571
+ "loss": 0.4265,
572
+ "step": 770
573
+ },
574
+ {
575
+ "epoch": 0.024352454953812292,
576
+ "grad_norm": 1.4980049133300781,
577
+ "learning_rate": 2.339555568810221e-05,
578
+ "loss": 0.6406,
579
+ "step": 780
580
+ },
581
+ {
582
+ "epoch": 0.0246646659147586,
583
+ "grad_norm": 1.4313726425170898,
584
+ "learning_rate": 2.139469052572127e-05,
585
+ "loss": 0.9413,
586
+ "step": 790
587
+ },
588
+ {
589
+ "epoch": 0.024976876875704913,
590
+ "grad_norm": 1.4710307121276855,
591
+ "learning_rate": 1.947297424689414e-05,
592
+ "loss": 1.0135,
593
+ "step": 800
594
+ },
595
+ {
596
+ "epoch": 0.025289087836651226,
597
+ "grad_norm": 0.8172721266746521,
598
+ "learning_rate": 1.763234185701673e-05,
599
+ "loss": 0.9491,
600
+ "step": 810
601
+ },
602
+ {
603
+ "epoch": 0.025601298797597538,
604
+ "grad_norm": 1.1474344730377197,
605
+ "learning_rate": 1.587464671688187e-05,
606
+ "loss": 0.5334,
607
+ "step": 820
608
+ },
609
+ {
610
+ "epoch": 0.025913509758543847,
611
+ "grad_norm": 1.4124549627304077,
612
+ "learning_rate": 1.4201658676502294e-05,
613
+ "loss": 0.7033,
614
+ "step": 830
615
  }
616
  ],
617
  "logging_steps": 10,
 
631
  "attributes": {}
632
  }
633
  },
634
+ "total_flos": 6.372527049214525e+17,
635
  "train_batch_size": 1,
636
  "trial_name": null,
637
  "trial_params": null