schnell commited on
Commit
a9e5457
·
1 Parent(s): c2f0eee

Training in progress, epoch 12

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f44636504040a9fe37d5ec8b73bf477488c6349532908e7e7c5745e9c87fab6
3
  size 236469913
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee64dad6d55d6511beb6ac0b9d53d67463b43f31b3c562840789a31fb7f3ab68
3
  size 236469913
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6b220dffe788eeee034d91defc492a9c11ba0f487f1986a58d0f98f1f60692f
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30d2075d628d49b7e8ef70e8eea7597d65c9743427dd0d8f469ad8d7e3fe87e1
3
  size 118242180
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98322a81ece37e3bf5ff43ccd16232bff326c95ba3fa380814651b4f39d92dcb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da377c1b16e0291f1d5c1d80cc85b79a3d05b723665175d2784787456eb041f1
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e067e394b31201ee662e0efb7aa9ace6ffb85863b29345c17e44b5569a9edb7b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82531253b364cd83aa3acd17616fa6b687da8d3637e43c753c627947983b45a3
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d3857d0b2b9b6f8235233bdbcca9f4b5c05924f73a043fff2894c1c07b88152
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bfe7c18952ddc5f5995dc7a21d9ebb622f29d27e920708e7de9b94f5fbe46f3
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5120852572a1c53c5d4a9bea69364d9478aa18cfba1490631abfa19738b6569
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8373a1388c2e6c546588d79c969bd60bd5ee55c12004454fea447aefb01b11a
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05f1555f6724f28a2a3b4295c20cd66b71553e6792dd4ea4bc53c845f04d557f
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31d6c65356079d9822a9fd489b5c7372ee32f1df23966bba618ffdc93860f84c
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.0,
5
- "global_step": 764203,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9273,11 +9273,854 @@
9273
  "eval_samples_per_second": 832.052,
9274
  "eval_steps_per_second": 34.669,
9275
  "step": 764203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9276
  }
9277
  ],
9278
  "max_steps": 972622,
9279
  "num_train_epochs": 14,
9280
- "total_flos": 5.346162236077375e+18,
9281
  "trial_name": null,
9282
  "trial_params": null
9283
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.0,
5
+ "global_step": 833676,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9273
  "eval_samples_per_second": 832.052,
9274
  "eval_steps_per_second": 34.669,
9275
  "step": 764203
9276
+ },
9277
+ {
9278
+ "epoch": 11.0,
9279
+ "learning_rate": 2.1649089464583368e-05,
9280
+ "loss": 1.837,
9281
+ "step": 764500
9282
+ },
9283
+ {
9284
+ "epoch": 11.01,
9285
+ "learning_rate": 2.1597266576314137e-05,
9286
+ "loss": 1.8367,
9287
+ "step": 765000
9288
+ },
9289
+ {
9290
+ "epoch": 11.02,
9291
+ "learning_rate": 2.1545339834561402e-05,
9292
+ "loss": 1.8372,
9293
+ "step": 765500
9294
+ },
9295
+ {
9296
+ "epoch": 11.03,
9297
+ "learning_rate": 2.1493413092808667e-05,
9298
+ "loss": 1.8376,
9299
+ "step": 766000
9300
+ },
9301
+ {
9302
+ "epoch": 11.03,
9303
+ "learning_rate": 2.1441486351055932e-05,
9304
+ "loss": 1.8358,
9305
+ "step": 766500
9306
+ },
9307
+ {
9308
+ "epoch": 11.04,
9309
+ "learning_rate": 2.13896634627867e-05,
9310
+ "loss": 1.8376,
9311
+ "step": 767000
9312
+ },
9313
+ {
9314
+ "epoch": 11.05,
9315
+ "learning_rate": 2.1337736721033966e-05,
9316
+ "loss": 1.8382,
9317
+ "step": 767500
9318
+ },
9319
+ {
9320
+ "epoch": 11.05,
9321
+ "learning_rate": 2.128580997928123e-05,
9322
+ "loss": 1.8359,
9323
+ "step": 768000
9324
+ },
9325
+ {
9326
+ "epoch": 11.06,
9327
+ "learning_rate": 2.1233883237528496e-05,
9328
+ "loss": 1.8354,
9329
+ "step": 768500
9330
+ },
9331
+ {
9332
+ "epoch": 11.07,
9333
+ "learning_rate": 2.1181956495775758e-05,
9334
+ "loss": 1.8305,
9335
+ "step": 769000
9336
+ },
9337
+ {
9338
+ "epoch": 11.08,
9339
+ "learning_rate": 2.1130029754023026e-05,
9340
+ "loss": 1.8408,
9341
+ "step": 769500
9342
+ },
9343
+ {
9344
+ "epoch": 11.08,
9345
+ "learning_rate": 2.107810301227029e-05,
9346
+ "loss": 1.8375,
9347
+ "step": 770000
9348
+ },
9349
+ {
9350
+ "epoch": 11.09,
9351
+ "learning_rate": 2.1026176270517556e-05,
9352
+ "loss": 1.8367,
9353
+ "step": 770500
9354
+ },
9355
+ {
9356
+ "epoch": 11.1,
9357
+ "learning_rate": 2.0974353382248325e-05,
9358
+ "loss": 1.8384,
9359
+ "step": 771000
9360
+ },
9361
+ {
9362
+ "epoch": 11.11,
9363
+ "learning_rate": 2.092242664049559e-05,
9364
+ "loss": 1.8399,
9365
+ "step": 771500
9366
+ },
9367
+ {
9368
+ "epoch": 11.11,
9369
+ "learning_rate": 2.0870499898742855e-05,
9370
+ "loss": 1.8376,
9371
+ "step": 772000
9372
+ },
9373
+ {
9374
+ "epoch": 11.12,
9375
+ "learning_rate": 2.081857315699012e-05,
9376
+ "loss": 1.8338,
9377
+ "step": 772500
9378
+ },
9379
+ {
9380
+ "epoch": 11.13,
9381
+ "learning_rate": 2.0766646415237385e-05,
9382
+ "loss": 1.839,
9383
+ "step": 773000
9384
+ },
9385
+ {
9386
+ "epoch": 11.13,
9387
+ "learning_rate": 2.0714823526968155e-05,
9388
+ "loss": 1.8359,
9389
+ "step": 773500
9390
+ },
9391
+ {
9392
+ "epoch": 11.14,
9393
+ "learning_rate": 2.066289678521542e-05,
9394
+ "loss": 1.8366,
9395
+ "step": 774000
9396
+ },
9397
+ {
9398
+ "epoch": 11.15,
9399
+ "learning_rate": 2.0610970043462685e-05,
9400
+ "loss": 1.8367,
9401
+ "step": 774500
9402
+ },
9403
+ {
9404
+ "epoch": 11.16,
9405
+ "learning_rate": 2.055904330170995e-05,
9406
+ "loss": 1.8327,
9407
+ "step": 775000
9408
+ },
9409
+ {
9410
+ "epoch": 11.16,
9411
+ "learning_rate": 2.050711655995721e-05,
9412
+ "loss": 1.8384,
9413
+ "step": 775500
9414
+ },
9415
+ {
9416
+ "epoch": 11.17,
9417
+ "learning_rate": 2.0455293671687984e-05,
9418
+ "loss": 1.839,
9419
+ "step": 776000
9420
+ },
9421
+ {
9422
+ "epoch": 11.18,
9423
+ "learning_rate": 2.0403366929935245e-05,
9424
+ "loss": 1.8345,
9425
+ "step": 776500
9426
+ },
9427
+ {
9428
+ "epoch": 11.18,
9429
+ "learning_rate": 2.0351440188182514e-05,
9430
+ "loss": 1.8379,
9431
+ "step": 777000
9432
+ },
9433
+ {
9434
+ "epoch": 11.19,
9435
+ "learning_rate": 2.029951344642978e-05,
9436
+ "loss": 1.834,
9437
+ "step": 777500
9438
+ },
9439
+ {
9440
+ "epoch": 11.2,
9441
+ "learning_rate": 2.0247586704677044e-05,
9442
+ "loss": 1.8389,
9443
+ "step": 778000
9444
+ },
9445
+ {
9446
+ "epoch": 11.21,
9447
+ "learning_rate": 2.0195763816407813e-05,
9448
+ "loss": 1.8309,
9449
+ "step": 778500
9450
+ },
9451
+ {
9452
+ "epoch": 11.21,
9453
+ "learning_rate": 2.0143837074655078e-05,
9454
+ "loss": 1.8367,
9455
+ "step": 779000
9456
+ },
9457
+ {
9458
+ "epoch": 11.22,
9459
+ "learning_rate": 2.0091910332902343e-05,
9460
+ "loss": 1.8367,
9461
+ "step": 779500
9462
+ },
9463
+ {
9464
+ "epoch": 11.23,
9465
+ "learning_rate": 2.0039983591149608e-05,
9466
+ "loss": 1.834,
9467
+ "step": 780000
9468
+ },
9469
+ {
9470
+ "epoch": 11.23,
9471
+ "learning_rate": 1.9988056849396873e-05,
9472
+ "loss": 1.837,
9473
+ "step": 780500
9474
+ },
9475
+ {
9476
+ "epoch": 11.24,
9477
+ "learning_rate": 1.9936130107644134e-05,
9478
+ "loss": 1.8344,
9479
+ "step": 781000
9480
+ },
9481
+ {
9482
+ "epoch": 11.25,
9483
+ "learning_rate": 1.9884203365891403e-05,
9484
+ "loss": 1.8388,
9485
+ "step": 781500
9486
+ },
9487
+ {
9488
+ "epoch": 11.26,
9489
+ "learning_rate": 1.9832380477622172e-05,
9490
+ "loss": 1.8317,
9491
+ "step": 782000
9492
+ },
9493
+ {
9494
+ "epoch": 11.26,
9495
+ "learning_rate": 1.9780453735869437e-05,
9496
+ "loss": 1.8334,
9497
+ "step": 782500
9498
+ },
9499
+ {
9500
+ "epoch": 11.27,
9501
+ "learning_rate": 1.97285269941167e-05,
9502
+ "loss": 1.8407,
9503
+ "step": 783000
9504
+ },
9505
+ {
9506
+ "epoch": 11.28,
9507
+ "learning_rate": 1.9676600252363967e-05,
9508
+ "loss": 1.8296,
9509
+ "step": 783500
9510
+ },
9511
+ {
9512
+ "epoch": 11.28,
9513
+ "learning_rate": 1.962467351061123e-05,
9514
+ "loss": 1.8319,
9515
+ "step": 784000
9516
+ },
9517
+ {
9518
+ "epoch": 11.29,
9519
+ "learning_rate": 1.9572850622342e-05,
9520
+ "loss": 1.8387,
9521
+ "step": 784500
9522
+ },
9523
+ {
9524
+ "epoch": 11.3,
9525
+ "learning_rate": 1.9520923880589266e-05,
9526
+ "loss": 1.8324,
9527
+ "step": 785000
9528
+ },
9529
+ {
9530
+ "epoch": 11.31,
9531
+ "learning_rate": 1.946899713883653e-05,
9532
+ "loss": 1.83,
9533
+ "step": 785500
9534
+ },
9535
+ {
9536
+ "epoch": 11.31,
9537
+ "learning_rate": 1.9417070397083796e-05,
9538
+ "loss": 1.8346,
9539
+ "step": 786000
9540
+ },
9541
+ {
9542
+ "epoch": 11.32,
9543
+ "learning_rate": 1.936514365533106e-05,
9544
+ "loss": 1.832,
9545
+ "step": 786500
9546
+ },
9547
+ {
9548
+ "epoch": 11.33,
9549
+ "learning_rate": 1.9313216913578326e-05,
9550
+ "loss": 1.8318,
9551
+ "step": 787000
9552
+ },
9553
+ {
9554
+ "epoch": 11.34,
9555
+ "learning_rate": 1.9261290171825588e-05,
9556
+ "loss": 1.8377,
9557
+ "step": 787500
9558
+ },
9559
+ {
9560
+ "epoch": 11.34,
9561
+ "learning_rate": 1.920946728355636e-05,
9562
+ "loss": 1.8335,
9563
+ "step": 788000
9564
+ },
9565
+ {
9566
+ "epoch": 11.35,
9567
+ "learning_rate": 1.9157540541803622e-05,
9568
+ "loss": 1.8374,
9569
+ "step": 788500
9570
+ },
9571
+ {
9572
+ "epoch": 11.36,
9573
+ "learning_rate": 1.910561380005089e-05,
9574
+ "loss": 1.8371,
9575
+ "step": 789000
9576
+ },
9577
+ {
9578
+ "epoch": 11.36,
9579
+ "learning_rate": 1.9053687058298152e-05,
9580
+ "loss": 1.8323,
9581
+ "step": 789500
9582
+ },
9583
+ {
9584
+ "epoch": 11.37,
9585
+ "learning_rate": 1.900176031654542e-05,
9586
+ "loss": 1.835,
9587
+ "step": 790000
9588
+ },
9589
+ {
9590
+ "epoch": 11.38,
9591
+ "learning_rate": 1.8949937428276186e-05,
9592
+ "loss": 1.8342,
9593
+ "step": 790500
9594
+ },
9595
+ {
9596
+ "epoch": 11.39,
9597
+ "learning_rate": 1.8898010686523455e-05,
9598
+ "loss": 1.8323,
9599
+ "step": 791000
9600
+ },
9601
+ {
9602
+ "epoch": 11.39,
9603
+ "learning_rate": 1.8846083944770716e-05,
9604
+ "loss": 1.8341,
9605
+ "step": 791500
9606
+ },
9607
+ {
9608
+ "epoch": 11.4,
9609
+ "learning_rate": 1.8794157203017984e-05,
9610
+ "loss": 1.8349,
9611
+ "step": 792000
9612
+ },
9613
+ {
9614
+ "epoch": 11.41,
9615
+ "learning_rate": 1.874223046126525e-05,
9616
+ "loss": 1.8314,
9617
+ "step": 792500
9618
+ },
9619
+ {
9620
+ "epoch": 11.41,
9621
+ "learning_rate": 1.869040757299602e-05,
9622
+ "loss": 1.8342,
9623
+ "step": 793000
9624
+ },
9625
+ {
9626
+ "epoch": 11.42,
9627
+ "learning_rate": 1.8638480831243284e-05,
9628
+ "loss": 1.8299,
9629
+ "step": 793500
9630
+ },
9631
+ {
9632
+ "epoch": 11.43,
9633
+ "learning_rate": 1.858655408949055e-05,
9634
+ "loss": 1.8336,
9635
+ "step": 794000
9636
+ },
9637
+ {
9638
+ "epoch": 11.44,
9639
+ "learning_rate": 1.8534627347737814e-05,
9640
+ "loss": 1.8342,
9641
+ "step": 794500
9642
+ },
9643
+ {
9644
+ "epoch": 11.44,
9645
+ "learning_rate": 1.8482700605985075e-05,
9646
+ "loss": 1.8341,
9647
+ "step": 795000
9648
+ },
9649
+ {
9650
+ "epoch": 11.45,
9651
+ "learning_rate": 1.8430877717715848e-05,
9652
+ "loss": 1.8299,
9653
+ "step": 795500
9654
+ },
9655
+ {
9656
+ "epoch": 11.46,
9657
+ "learning_rate": 1.8378950975963113e-05,
9658
+ "loss": 1.8331,
9659
+ "step": 796000
9660
+ },
9661
+ {
9662
+ "epoch": 11.46,
9663
+ "learning_rate": 1.8327024234210378e-05,
9664
+ "loss": 1.8351,
9665
+ "step": 796500
9666
+ },
9667
+ {
9668
+ "epoch": 11.47,
9669
+ "learning_rate": 1.827509749245764e-05,
9670
+ "loss": 1.8326,
9671
+ "step": 797000
9672
+ },
9673
+ {
9674
+ "epoch": 11.48,
9675
+ "learning_rate": 1.8223170750704908e-05,
9676
+ "loss": 1.8316,
9677
+ "step": 797500
9678
+ },
9679
+ {
9680
+ "epoch": 11.49,
9681
+ "learning_rate": 1.8171347862435674e-05,
9682
+ "loss": 1.8373,
9683
+ "step": 798000
9684
+ },
9685
+ {
9686
+ "epoch": 11.49,
9687
+ "learning_rate": 1.8119421120682942e-05,
9688
+ "loss": 1.8335,
9689
+ "step": 798500
9690
+ },
9691
+ {
9692
+ "epoch": 11.5,
9693
+ "learning_rate": 1.8067494378930204e-05,
9694
+ "loss": 1.8332,
9695
+ "step": 799000
9696
+ },
9697
+ {
9698
+ "epoch": 11.51,
9699
+ "learning_rate": 1.8015567637177472e-05,
9700
+ "loss": 1.8335,
9701
+ "step": 799500
9702
+ },
9703
+ {
9704
+ "epoch": 11.52,
9705
+ "learning_rate": 1.7963640895424734e-05,
9706
+ "loss": 1.834,
9707
+ "step": 800000
9708
+ },
9709
+ {
9710
+ "epoch": 11.52,
9711
+ "learning_rate": 1.7911818007155507e-05,
9712
+ "loss": 1.8336,
9713
+ "step": 800500
9714
+ },
9715
+ {
9716
+ "epoch": 11.53,
9717
+ "learning_rate": 1.785989126540277e-05,
9718
+ "loss": 1.8333,
9719
+ "step": 801000
9720
+ },
9721
+ {
9722
+ "epoch": 11.54,
9723
+ "learning_rate": 1.7807964523650037e-05,
9724
+ "loss": 1.8331,
9725
+ "step": 801500
9726
+ },
9727
+ {
9728
+ "epoch": 11.54,
9729
+ "learning_rate": 1.77560377818973e-05,
9730
+ "loss": 1.8332,
9731
+ "step": 802000
9732
+ },
9733
+ {
9734
+ "epoch": 11.55,
9735
+ "learning_rate": 1.7704111040144563e-05,
9736
+ "loss": 1.831,
9737
+ "step": 802500
9738
+ },
9739
+ {
9740
+ "epoch": 11.56,
9741
+ "learning_rate": 1.765218429839183e-05,
9742
+ "loss": 1.8317,
9743
+ "step": 803000
9744
+ },
9745
+ {
9746
+ "epoch": 11.57,
9747
+ "learning_rate": 1.7600257556639093e-05,
9748
+ "loss": 1.8344,
9749
+ "step": 803500
9750
+ },
9751
+ {
9752
+ "epoch": 11.57,
9753
+ "learning_rate": 1.7548434668369866e-05,
9754
+ "loss": 1.8334,
9755
+ "step": 804000
9756
+ },
9757
+ {
9758
+ "epoch": 11.58,
9759
+ "learning_rate": 1.7496507926617127e-05,
9760
+ "loss": 1.8333,
9761
+ "step": 804500
9762
+ },
9763
+ {
9764
+ "epoch": 11.59,
9765
+ "learning_rate": 1.7444581184864396e-05,
9766
+ "loss": 1.8317,
9767
+ "step": 805000
9768
+ },
9769
+ {
9770
+ "epoch": 11.59,
9771
+ "learning_rate": 1.7392654443111657e-05,
9772
+ "loss": 1.8368,
9773
+ "step": 805500
9774
+ },
9775
+ {
9776
+ "epoch": 11.6,
9777
+ "learning_rate": 1.734083155484243e-05,
9778
+ "loss": 1.8312,
9779
+ "step": 806000
9780
+ },
9781
+ {
9782
+ "epoch": 11.61,
9783
+ "learning_rate": 1.728890481308969e-05,
9784
+ "loss": 1.8325,
9785
+ "step": 806500
9786
+ },
9787
+ {
9788
+ "epoch": 11.62,
9789
+ "learning_rate": 1.723697807133696e-05,
9790
+ "loss": 1.8306,
9791
+ "step": 807000
9792
+ },
9793
+ {
9794
+ "epoch": 11.62,
9795
+ "learning_rate": 1.718505132958422e-05,
9796
+ "loss": 1.8335,
9797
+ "step": 807500
9798
+ },
9799
+ {
9800
+ "epoch": 11.63,
9801
+ "learning_rate": 1.7133228441314994e-05,
9802
+ "loss": 1.833,
9803
+ "step": 808000
9804
+ },
9805
+ {
9806
+ "epoch": 11.64,
9807
+ "learning_rate": 1.7081301699562256e-05,
9808
+ "loss": 1.8344,
9809
+ "step": 808500
9810
+ },
9811
+ {
9812
+ "epoch": 11.64,
9813
+ "learning_rate": 1.7029374957809524e-05,
9814
+ "loss": 1.8356,
9815
+ "step": 809000
9816
+ },
9817
+ {
9818
+ "epoch": 11.65,
9819
+ "learning_rate": 1.697744821605679e-05,
9820
+ "loss": 1.8329,
9821
+ "step": 809500
9822
+ },
9823
+ {
9824
+ "epoch": 11.66,
9825
+ "learning_rate": 1.692552147430405e-05,
9826
+ "loss": 1.8343,
9827
+ "step": 810000
9828
+ },
9829
+ {
9830
+ "epoch": 11.67,
9831
+ "learning_rate": 1.6873698586034824e-05,
9832
+ "loss": 1.8342,
9833
+ "step": 810500
9834
+ },
9835
+ {
9836
+ "epoch": 11.67,
9837
+ "learning_rate": 1.682177184428209e-05,
9838
+ "loss": 1.8328,
9839
+ "step": 811000
9840
+ },
9841
+ {
9842
+ "epoch": 11.68,
9843
+ "learning_rate": 1.6769845102529354e-05,
9844
+ "loss": 1.8328,
9845
+ "step": 811500
9846
+ },
9847
+ {
9848
+ "epoch": 11.69,
9849
+ "learning_rate": 1.6717918360776615e-05,
9850
+ "loss": 1.832,
9851
+ "step": 812000
9852
+ },
9853
+ {
9854
+ "epoch": 11.7,
9855
+ "learning_rate": 1.6665991619023883e-05,
9856
+ "loss": 1.8305,
9857
+ "step": 812500
9858
+ },
9859
+ {
9860
+ "epoch": 11.7,
9861
+ "learning_rate": 1.6614064877271145e-05,
9862
+ "loss": 1.8287,
9863
+ "step": 813000
9864
+ },
9865
+ {
9866
+ "epoch": 11.71,
9867
+ "learning_rate": 1.6562241989001918e-05,
9868
+ "loss": 1.8324,
9869
+ "step": 813500
9870
+ },
9871
+ {
9872
+ "epoch": 11.72,
9873
+ "learning_rate": 1.651031524724918e-05,
9874
+ "loss": 1.8325,
9875
+ "step": 814000
9876
+ },
9877
+ {
9878
+ "epoch": 11.72,
9879
+ "learning_rate": 1.6458388505496448e-05,
9880
+ "loss": 1.8304,
9881
+ "step": 814500
9882
+ },
9883
+ {
9884
+ "epoch": 11.73,
9885
+ "learning_rate": 1.640646176374371e-05,
9886
+ "loss": 1.8321,
9887
+ "step": 815000
9888
+ },
9889
+ {
9890
+ "epoch": 11.74,
9891
+ "learning_rate": 1.6354638875474482e-05,
9892
+ "loss": 1.8324,
9893
+ "step": 815500
9894
+ },
9895
+ {
9896
+ "epoch": 11.75,
9897
+ "learning_rate": 1.6302712133721744e-05,
9898
+ "loss": 1.8286,
9899
+ "step": 816000
9900
+ },
9901
+ {
9902
+ "epoch": 11.75,
9903
+ "learning_rate": 1.6250785391969012e-05,
9904
+ "loss": 1.8319,
9905
+ "step": 816500
9906
+ },
9907
+ {
9908
+ "epoch": 11.76,
9909
+ "learning_rate": 1.6198858650216277e-05,
9910
+ "loss": 1.8282,
9911
+ "step": 817000
9912
+ },
9913
+ {
9914
+ "epoch": 11.77,
9915
+ "learning_rate": 1.6147035761947047e-05,
9916
+ "loss": 1.8265,
9917
+ "step": 817500
9918
+ },
9919
+ {
9920
+ "epoch": 11.77,
9921
+ "learning_rate": 1.609510902019431e-05,
9922
+ "loss": 1.8288,
9923
+ "step": 818000
9924
+ },
9925
+ {
9926
+ "epoch": 11.78,
9927
+ "learning_rate": 1.6043182278441576e-05,
9928
+ "loss": 1.8334,
9929
+ "step": 818500
9930
+ },
9931
+ {
9932
+ "epoch": 11.79,
9933
+ "learning_rate": 1.599125553668884e-05,
9934
+ "loss": 1.8322,
9935
+ "step": 819000
9936
+ },
9937
+ {
9938
+ "epoch": 11.8,
9939
+ "learning_rate": 1.5939328794936103e-05,
9940
+ "loss": 1.8318,
9941
+ "step": 819500
9942
+ },
9943
+ {
9944
+ "epoch": 11.8,
9945
+ "learning_rate": 1.5887505906666876e-05,
9946
+ "loss": 1.8281,
9947
+ "step": 820000
9948
+ },
9949
+ {
9950
+ "epoch": 11.81,
9951
+ "learning_rate": 1.583557916491414e-05,
9952
+ "loss": 1.8279,
9953
+ "step": 820500
9954
+ },
9955
+ {
9956
+ "epoch": 11.82,
9957
+ "learning_rate": 1.5783652423161406e-05,
9958
+ "loss": 1.8319,
9959
+ "step": 821000
9960
+ },
9961
+ {
9962
+ "epoch": 11.82,
9963
+ "learning_rate": 1.5731725681408667e-05,
9964
+ "loss": 1.8281,
9965
+ "step": 821500
9966
+ },
9967
+ {
9968
+ "epoch": 11.83,
9969
+ "learning_rate": 1.5679798939655935e-05,
9970
+ "loss": 1.83,
9971
+ "step": 822000
9972
+ },
9973
+ {
9974
+ "epoch": 11.84,
9975
+ "learning_rate": 1.5627976051386705e-05,
9976
+ "loss": 1.831,
9977
+ "step": 822500
9978
+ },
9979
+ {
9980
+ "epoch": 11.85,
9981
+ "learning_rate": 1.557604930963397e-05,
9982
+ "loss": 1.8308,
9983
+ "step": 823000
9984
+ },
9985
+ {
9986
+ "epoch": 11.85,
9987
+ "learning_rate": 1.552412256788123e-05,
9988
+ "loss": 1.8302,
9989
+ "step": 823500
9990
+ },
9991
+ {
9992
+ "epoch": 11.86,
9993
+ "learning_rate": 1.54721958261285e-05,
9994
+ "loss": 1.8306,
9995
+ "step": 824000
9996
+ },
9997
+ {
9998
+ "epoch": 11.87,
9999
+ "learning_rate": 1.5420372937859266e-05,
10000
+ "loss": 1.828,
10001
+ "step": 824500
10002
+ },
10003
+ {
10004
+ "epoch": 11.88,
10005
+ "learning_rate": 1.5368446196106534e-05,
10006
+ "loss": 1.8306,
10007
+ "step": 825000
10008
+ },
10009
+ {
10010
+ "epoch": 11.88,
10011
+ "learning_rate": 1.53165194543538e-05,
10012
+ "loss": 1.8288,
10013
+ "step": 825500
10014
+ },
10015
+ {
10016
+ "epoch": 11.89,
10017
+ "learning_rate": 1.5264592712601064e-05,
10018
+ "loss": 1.8304,
10019
+ "step": 826000
10020
+ },
10021
+ {
10022
+ "epoch": 11.9,
10023
+ "learning_rate": 1.5212769824331832e-05,
10024
+ "loss": 1.8285,
10025
+ "step": 826500
10026
+ },
10027
+ {
10028
+ "epoch": 11.9,
10029
+ "learning_rate": 1.5160843082579099e-05,
10030
+ "loss": 1.8287,
10031
+ "step": 827000
10032
+ },
10033
+ {
10034
+ "epoch": 11.91,
10035
+ "learning_rate": 1.5108916340826362e-05,
10036
+ "loss": 1.8291,
10037
+ "step": 827500
10038
+ },
10039
+ {
10040
+ "epoch": 11.92,
10041
+ "learning_rate": 1.5056989599073628e-05,
10042
+ "loss": 1.8314,
10043
+ "step": 828000
10044
+ },
10045
+ {
10046
+ "epoch": 11.93,
10047
+ "learning_rate": 1.5005062857320892e-05,
10048
+ "loss": 1.8258,
10049
+ "step": 828500
10050
+ },
10051
+ {
10052
+ "epoch": 11.93,
10053
+ "learning_rate": 1.4953239969051663e-05,
10054
+ "loss": 1.8287,
10055
+ "step": 829000
10056
+ },
10057
+ {
10058
+ "epoch": 11.94,
10059
+ "learning_rate": 1.4901313227298926e-05,
10060
+ "loss": 1.8294,
10061
+ "step": 829500
10062
+ },
10063
+ {
10064
+ "epoch": 11.95,
10065
+ "learning_rate": 1.4849386485546193e-05,
10066
+ "loss": 1.8328,
10067
+ "step": 830000
10068
+ },
10069
+ {
10070
+ "epoch": 11.95,
10071
+ "learning_rate": 1.4797459743793458e-05,
10072
+ "loss": 1.8287,
10073
+ "step": 830500
10074
+ },
10075
+ {
10076
+ "epoch": 11.96,
10077
+ "learning_rate": 1.4745636855524227e-05,
10078
+ "loss": 1.8279,
10079
+ "step": 831000
10080
+ },
10081
+ {
10082
+ "epoch": 11.97,
10083
+ "learning_rate": 1.4693710113771492e-05,
10084
+ "loss": 1.8286,
10085
+ "step": 831500
10086
+ },
10087
+ {
10088
+ "epoch": 11.98,
10089
+ "learning_rate": 1.4641783372018755e-05,
10090
+ "loss": 1.8254,
10091
+ "step": 832000
10092
+ },
10093
+ {
10094
+ "epoch": 11.98,
10095
+ "learning_rate": 1.4589856630266022e-05,
10096
+ "loss": 1.8274,
10097
+ "step": 832500
10098
+ },
10099
+ {
10100
+ "epoch": 11.99,
10101
+ "learning_rate": 1.4538033741996793e-05,
10102
+ "loss": 1.8279,
10103
+ "step": 833000
10104
+ },
10105
+ {
10106
+ "epoch": 12.0,
10107
+ "learning_rate": 1.4486107000244057e-05,
10108
+ "loss": 1.8245,
10109
+ "step": 833500
10110
+ },
10111
+ {
10112
+ "epoch": 12.0,
10113
+ "eval_accuracy": 0.6503763935317559,
10114
+ "eval_loss": 1.6876965761184692,
10115
+ "eval_runtime": 647.389,
10116
+ "eval_samples_per_second": 832.478,
10117
+ "eval_steps_per_second": 34.687,
10118
+ "step": 833676
10119
  }
10120
  ],
10121
  "max_steps": 972622,
10122
  "num_train_epochs": 14,
10123
+ "total_flos": 5.832190387963298e+18,
10124
  "trial_name": null,
10125
  "trial_params": null
10126
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6b220dffe788eeee034d91defc492a9c11ba0f487f1986a58d0f98f1f60692f
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30d2075d628d49b7e8ef70e8eea7597d65c9743427dd0d8f469ad8d7e3fe87e1
3
  size 118242180
runs/Aug02_00-40-05_user-SYS-5049A-TR/events.out.tfevents.1659368419.user-SYS-5049A-TR.4008140.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e79290da63e9866b28acce0c724c8b589df630071e9caff0591c4710cf7d0de1
3
- size 251429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af05549d74148c78a30fded743641173f0fcc77b72e6135712a94988aef723ba
3
+ size 273998