Ubuntu commited on
Commit
cc48966
·
1 Parent(s): 40ab862
Files changed (2) hide show
  1. pytorch_model.bin +1 -1
  2. trainer_state.json +273 -3
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4369b6ee7ce36d31577f5b231b5dd09049ddf21563da9ef7997fa05a8d273818
3
  size 24673403925
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:188b012cec3039d8440f0980b517a0c0e7c8993f8a4f07b2854d3b2f700b2494
3
  size 24673403925
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.07628928898382667,
5
- "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -418,11 +418,281 @@
418
  "eval_samples_per_second": 3.188,
419
  "eval_steps_per_second": 0.407,
420
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
  }
422
  ],
423
  "max_steps": 13108,
424
  "num_train_epochs": 1,
425
- "total_flos": 14310696812544.0,
426
  "trial_name": null,
427
  "trial_params": null
428
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.12664021971315229,
5
+ "global_step": 1660,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
418
  "eval_samples_per_second": 3.188,
419
  "eval_steps_per_second": 0.407,
420
  "step": 1000
421
+ },
422
+ {
423
+ "epoch": 0.08,
424
+ "eval_loss": 2.1528055667877197,
425
+ "eval_runtime": 96.9725,
426
+ "eval_samples_per_second": 3.228,
427
+ "eval_steps_per_second": 0.412,
428
+ "step": 1020
429
+ },
430
+ {
431
+ "epoch": 0.08,
432
+ "eval_loss": 2.167107582092285,
433
+ "eval_runtime": 96.6542,
434
+ "eval_samples_per_second": 3.238,
435
+ "eval_steps_per_second": 0.414,
436
+ "step": 1040
437
+ },
438
+ {
439
+ "epoch": 0.08,
440
+ "eval_loss": 2.1584465503692627,
441
+ "eval_runtime": 98.4331,
442
+ "eval_samples_per_second": 3.18,
443
+ "eval_steps_per_second": 0.406,
444
+ "step": 1060
445
+ },
446
+ {
447
+ "epoch": 0.08,
448
+ "eval_loss": 2.152431011199951,
449
+ "eval_runtime": 95.2227,
450
+ "eval_samples_per_second": 3.287,
451
+ "eval_steps_per_second": 0.42,
452
+ "step": 1080
453
+ },
454
+ {
455
+ "epoch": 0.08,
456
+ "eval_loss": 2.1513078212738037,
457
+ "eval_runtime": 95.2983,
458
+ "eval_samples_per_second": 3.284,
459
+ "eval_steps_per_second": 0.42,
460
+ "step": 1100
461
+ },
462
+ {
463
+ "epoch": 0.09,
464
+ "eval_loss": 2.1465654373168945,
465
+ "eval_runtime": 96.5284,
466
+ "eval_samples_per_second": 3.243,
467
+ "eval_steps_per_second": 0.414,
468
+ "step": 1120
469
+ },
470
+ {
471
+ "epoch": 0.09,
472
+ "eval_loss": 2.155775785446167,
473
+ "eval_runtime": 95.918,
474
+ "eval_samples_per_second": 3.263,
475
+ "eval_steps_per_second": 0.417,
476
+ "step": 1140
477
+ },
478
+ {
479
+ "epoch": 0.09,
480
+ "eval_loss": 2.148512363433838,
481
+ "eval_runtime": 96.3942,
482
+ "eval_samples_per_second": 3.247,
483
+ "eval_steps_per_second": 0.415,
484
+ "step": 1160
485
+ },
486
+ {
487
+ "epoch": 0.09,
488
+ "eval_loss": 2.1571736335754395,
489
+ "eval_runtime": 94.8278,
490
+ "eval_samples_per_second": 3.301,
491
+ "eval_steps_per_second": 0.422,
492
+ "step": 1180
493
+ },
494
+ {
495
+ "epoch": 0.09,
496
+ "eval_loss": 2.1482129096984863,
497
+ "eval_runtime": 95.7915,
498
+ "eval_samples_per_second": 3.268,
499
+ "eval_steps_per_second": 0.418,
500
+ "step": 1200
501
+ },
502
+ {
503
+ "epoch": 0.09,
504
+ "eval_loss": 2.1445436477661133,
505
+ "eval_runtime": 97.0007,
506
+ "eval_samples_per_second": 3.227,
507
+ "eval_steps_per_second": 0.412,
508
+ "step": 1220
509
+ },
510
+ {
511
+ "epoch": 0.09,
512
+ "eval_loss": 2.1457667350769043,
513
+ "eval_runtime": 93.8614,
514
+ "eval_samples_per_second": 3.335,
515
+ "eval_steps_per_second": 0.426,
516
+ "step": 1240
517
+ },
518
+ {
519
+ "epoch": 0.1,
520
+ "eval_loss": 2.155850648880005,
521
+ "eval_runtime": 94.8073,
522
+ "eval_samples_per_second": 3.301,
523
+ "eval_steps_per_second": 0.422,
524
+ "step": 1260
525
+ },
526
+ {
527
+ "epoch": 0.1,
528
+ "eval_loss": 2.1380791664123535,
529
+ "eval_runtime": 95.9912,
530
+ "eval_samples_per_second": 3.261,
531
+ "eval_steps_per_second": 0.417,
532
+ "step": 1280
533
+ },
534
+ {
535
+ "epoch": 0.1,
536
+ "eval_loss": 2.1424720287323,
537
+ "eval_runtime": 94.1052,
538
+ "eval_samples_per_second": 3.326,
539
+ "eval_steps_per_second": 0.425,
540
+ "step": 1300
541
+ },
542
+ {
543
+ "epoch": 0.1,
544
+ "eval_loss": 2.1322383880615234,
545
+ "eval_runtime": 95.6595,
546
+ "eval_samples_per_second": 3.272,
547
+ "eval_steps_per_second": 0.418,
548
+ "step": 1320
549
+ },
550
+ {
551
+ "epoch": 0.1,
552
+ "eval_loss": 2.1327874660491943,
553
+ "eval_runtime": 95.3348,
554
+ "eval_samples_per_second": 3.283,
555
+ "eval_steps_per_second": 0.42,
556
+ "step": 1340
557
+ },
558
+ {
559
+ "epoch": 0.1,
560
+ "eval_loss": 2.1295926570892334,
561
+ "eval_runtime": 94.6735,
562
+ "eval_samples_per_second": 3.306,
563
+ "eval_steps_per_second": 0.423,
564
+ "step": 1360
565
+ },
566
+ {
567
+ "epoch": 0.11,
568
+ "eval_loss": 2.1335363388061523,
569
+ "eval_runtime": 94.2932,
570
+ "eval_samples_per_second": 3.319,
571
+ "eval_steps_per_second": 0.424,
572
+ "step": 1380
573
+ },
574
+ {
575
+ "epoch": 0.11,
576
+ "eval_loss": 2.126971960067749,
577
+ "eval_runtime": 92.6523,
578
+ "eval_samples_per_second": 3.378,
579
+ "eval_steps_per_second": 0.432,
580
+ "step": 1400
581
+ },
582
+ {
583
+ "epoch": 0.11,
584
+ "eval_loss": 2.1153903007507324,
585
+ "eval_runtime": 96.6374,
586
+ "eval_samples_per_second": 3.239,
587
+ "eval_steps_per_second": 0.414,
588
+ "step": 1420
589
+ },
590
+ {
591
+ "epoch": 0.11,
592
+ "eval_loss": 2.1179113388061523,
593
+ "eval_runtime": 96.4386,
594
+ "eval_samples_per_second": 3.246,
595
+ "eval_steps_per_second": 0.415,
596
+ "step": 1440
597
+ },
598
+ {
599
+ "epoch": 0.11,
600
+ "eval_loss": 2.1170127391815186,
601
+ "eval_runtime": 94.461,
602
+ "eval_samples_per_second": 3.314,
603
+ "eval_steps_per_second": 0.423,
604
+ "step": 1460
605
+ },
606
+ {
607
+ "epoch": 0.11,
608
+ "eval_loss": 2.1248252391815186,
609
+ "eval_runtime": 95.6005,
610
+ "eval_samples_per_second": 3.274,
611
+ "eval_steps_per_second": 0.418,
612
+ "step": 1480
613
+ },
614
+ {
615
+ "epoch": 0.11,
616
+ "learning_rate": 5e-05,
617
+ "loss": 1.9893,
618
+ "step": 1500
619
+ },
620
+ {
621
+ "epoch": 0.11,
622
+ "eval_loss": 2.117586851119995,
623
+ "eval_runtime": 95.6648,
624
+ "eval_samples_per_second": 3.272,
625
+ "eval_steps_per_second": 0.418,
626
+ "step": 1500
627
+ },
628
+ {
629
+ "epoch": 0.12,
630
+ "eval_loss": 2.1058804988861084,
631
+ "eval_runtime": 95.0515,
632
+ "eval_samples_per_second": 3.293,
633
+ "eval_steps_per_second": 0.421,
634
+ "step": 1520
635
+ },
636
+ {
637
+ "epoch": 0.12,
638
+ "eval_loss": 2.1127195358276367,
639
+ "eval_runtime": 95.0085,
640
+ "eval_samples_per_second": 3.294,
641
+ "eval_steps_per_second": 0.421,
642
+ "step": 1540
643
+ },
644
+ {
645
+ "epoch": 0.12,
646
+ "eval_loss": 2.115964412689209,
647
+ "eval_runtime": 36.4716,
648
+ "eval_samples_per_second": 8.582,
649
+ "eval_steps_per_second": 0.384,
650
+ "step": 1560
651
+ },
652
+ {
653
+ "epoch": 0.12,
654
+ "eval_loss": 2.1093251705169678,
655
+ "eval_runtime": 37.1698,
656
+ "eval_samples_per_second": 8.421,
657
+ "eval_steps_per_second": 0.377,
658
+ "step": 1580
659
+ },
660
+ {
661
+ "epoch": 0.12,
662
+ "eval_loss": 2.1045827865600586,
663
+ "eval_runtime": 36.7758,
664
+ "eval_samples_per_second": 8.511,
665
+ "eval_steps_per_second": 0.381,
666
+ "step": 1600
667
+ },
668
+ {
669
+ "epoch": 0.12,
670
+ "eval_loss": 2.1027355194091797,
671
+ "eval_runtime": 35.9986,
672
+ "eval_samples_per_second": 8.695,
673
+ "eval_steps_per_second": 0.389,
674
+ "step": 1620
675
+ },
676
+ {
677
+ "epoch": 0.13,
678
+ "eval_loss": 2.1164636611938477,
679
+ "eval_runtime": 36.3352,
680
+ "eval_samples_per_second": 8.614,
681
+ "eval_steps_per_second": 0.385,
682
+ "step": 1640
683
+ },
684
+ {
685
+ "epoch": 0.13,
686
+ "eval_loss": 2.105306625366211,
687
+ "eval_runtime": 36.3036,
688
+ "eval_samples_per_second": 8.622,
689
+ "eval_steps_per_second": 0.386,
690
+ "step": 1660
691
  }
692
  ],
693
  "max_steps": 13108,
694
  "num_train_epochs": 1,
695
+ "total_flos": 24131860955136.0,
696
  "trial_name": null,
697
  "trial_params": null
698
  }