Mistral-7B-v0.1-lapt-ja / trainer_state.json
atsuki-yamaguchi's picture
Upload folder using huggingface_hub
6f092b0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.07860759742429106,
"eval_steps": 500,
"global_step": 6000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-06,
"loss": 1.8374,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 1e-05,
"loss": 1.7918,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 1.5e-05,
"loss": 1.8172,
"step": 15
},
{
"epoch": 0.0,
"learning_rate": 2e-05,
"loss": 1.7902,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 2.5e-05,
"loss": 1.8001,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 3e-05,
"loss": 1.7861,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 3.5e-05,
"loss": 1.7771,
"step": 35
},
{
"epoch": 0.0,
"learning_rate": 4e-05,
"loss": 1.8049,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 4.5e-05,
"loss": 1.7976,
"step": 45
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 1.7851,
"step": 50
},
{
"epoch": 0.0,
"learning_rate": 5.500000000000001e-05,
"loss": 1.7493,
"step": 55
},
{
"epoch": 0.0,
"learning_rate": 6e-05,
"loss": 1.7739,
"step": 60
},
{
"epoch": 0.0,
"learning_rate": 6.500000000000001e-05,
"loss": 1.7704,
"step": 65
},
{
"epoch": 0.0,
"learning_rate": 7e-05,
"loss": 1.7362,
"step": 70
},
{
"epoch": 0.0,
"learning_rate": 7.500000000000001e-05,
"loss": 1.766,
"step": 75
},
{
"epoch": 0.0,
"learning_rate": 8e-05,
"loss": 1.7456,
"step": 80
},
{
"epoch": 0.0,
"learning_rate": 8.5e-05,
"loss": 1.776,
"step": 85
},
{
"epoch": 0.0,
"learning_rate": 9e-05,
"loss": 1.7375,
"step": 90
},
{
"epoch": 0.0,
"learning_rate": 9.5e-05,
"loss": 1.7463,
"step": 95
},
{
"epoch": 0.0,
"learning_rate": 0.0001,
"loss": 1.6943,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 9.999999893842493e-05,
"loss": 1.7681,
"step": 105
},
{
"epoch": 0.0,
"learning_rate": 9.999999575369978e-05,
"loss": 1.737,
"step": 110
},
{
"epoch": 0.0,
"learning_rate": 9.999999044582466e-05,
"loss": 1.7382,
"step": 115
},
{
"epoch": 0.0,
"learning_rate": 9.99999830147998e-05,
"loss": 1.6842,
"step": 120
},
{
"epoch": 0.0,
"learning_rate": 9.999997346062553e-05,
"loss": 1.7078,
"step": 125
},
{
"epoch": 0.0,
"learning_rate": 9.999996178330225e-05,
"loss": 1.6654,
"step": 130
},
{
"epoch": 0.0,
"learning_rate": 9.999994798283046e-05,
"loss": 1.6695,
"step": 135
},
{
"epoch": 0.0,
"learning_rate": 9.999993205921074e-05,
"loss": 1.7186,
"step": 140
},
{
"epoch": 0.0,
"learning_rate": 9.999991401244376e-05,
"loss": 1.7059,
"step": 145
},
{
"epoch": 0.0,
"learning_rate": 9.99998938425303e-05,
"loss": 1.6948,
"step": 150
},
{
"epoch": 0.0,
"learning_rate": 9.99998715494712e-05,
"loss": 1.6622,
"step": 155
},
{
"epoch": 0.0,
"learning_rate": 9.999984713326743e-05,
"loss": 1.6849,
"step": 160
},
{
"epoch": 0.0,
"learning_rate": 9.999982059392e-05,
"loss": 1.7106,
"step": 165
},
{
"epoch": 0.0,
"learning_rate": 9.999979193143005e-05,
"loss": 1.7238,
"step": 170
},
{
"epoch": 0.0,
"learning_rate": 9.99997611457988e-05,
"loss": 1.6935,
"step": 175
},
{
"epoch": 0.0,
"learning_rate": 9.999972823702757e-05,
"loss": 1.6893,
"step": 180
},
{
"epoch": 0.0,
"learning_rate": 9.999969320511773e-05,
"loss": 1.6889,
"step": 185
},
{
"epoch": 0.0,
"learning_rate": 9.999965605007078e-05,
"loss": 1.669,
"step": 190
},
{
"epoch": 0.0,
"learning_rate": 9.99996167718883e-05,
"loss": 1.6795,
"step": 195
},
{
"epoch": 0.0,
"learning_rate": 9.999957537057195e-05,
"loss": 1.7063,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 9.99995318461235e-05,
"loss": 1.6405,
"step": 205
},
{
"epoch": 0.0,
"learning_rate": 9.999948619854478e-05,
"loss": 1.6921,
"step": 210
},
{
"epoch": 0.0,
"learning_rate": 9.999943842783774e-05,
"loss": 1.7028,
"step": 215
},
{
"epoch": 0.0,
"learning_rate": 9.999938853400442e-05,
"loss": 1.6666,
"step": 220
},
{
"epoch": 0.0,
"learning_rate": 9.999933651704692e-05,
"loss": 1.6541,
"step": 225
},
{
"epoch": 0.0,
"learning_rate": 9.999928237696746e-05,
"loss": 1.6566,
"step": 230
},
{
"epoch": 0.0,
"learning_rate": 9.999922611376832e-05,
"loss": 1.7196,
"step": 235
},
{
"epoch": 0.0,
"learning_rate": 9.999916772745193e-05,
"loss": 1.6917,
"step": 240
},
{
"epoch": 0.0,
"learning_rate": 9.999910721802073e-05,
"loss": 1.6946,
"step": 245
},
{
"epoch": 0.0,
"learning_rate": 9.999904458547728e-05,
"loss": 1.6499,
"step": 250
},
{
"epoch": 0.0,
"learning_rate": 9.999897982982429e-05,
"loss": 1.5971,
"step": 255
},
{
"epoch": 0.0,
"learning_rate": 9.999891295106447e-05,
"loss": 1.6491,
"step": 260
},
{
"epoch": 0.0,
"learning_rate": 9.999884394920069e-05,
"loss": 1.6681,
"step": 265
},
{
"epoch": 0.0,
"learning_rate": 9.999877282423584e-05,
"loss": 1.7148,
"step": 270
},
{
"epoch": 0.0,
"learning_rate": 9.999869957617297e-05,
"loss": 1.6075,
"step": 275
},
{
"epoch": 0.0,
"learning_rate": 9.999862420501518e-05,
"loss": 1.6327,
"step": 280
},
{
"epoch": 0.0,
"learning_rate": 9.999854671076568e-05,
"loss": 1.6428,
"step": 285
},
{
"epoch": 0.0,
"learning_rate": 9.999846709342773e-05,
"loss": 1.621,
"step": 290
},
{
"epoch": 0.0,
"learning_rate": 9.999838535300476e-05,
"loss": 1.6684,
"step": 295
},
{
"epoch": 0.0,
"learning_rate": 9.99983014895002e-05,
"loss": 1.6614,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 9.999821550291762e-05,
"loss": 1.65,
"step": 305
},
{
"epoch": 0.0,
"learning_rate": 9.999812739326068e-05,
"loss": 1.6711,
"step": 310
},
{
"epoch": 0.0,
"learning_rate": 9.999803716053313e-05,
"loss": 1.6332,
"step": 315
},
{
"epoch": 0.0,
"learning_rate": 9.99979448047388e-05,
"loss": 1.6297,
"step": 320
},
{
"epoch": 0.0,
"learning_rate": 9.999785032588158e-05,
"loss": 1.6039,
"step": 325
},
{
"epoch": 0.0,
"learning_rate": 9.999775372396551e-05,
"loss": 1.6079,
"step": 330
},
{
"epoch": 0.0,
"learning_rate": 9.99976549989947e-05,
"loss": 1.6368,
"step": 335
},
{
"epoch": 0.0,
"learning_rate": 9.99975541509733e-05,
"loss": 1.6326,
"step": 340
},
{
"epoch": 0.0,
"learning_rate": 9.999745117990565e-05,
"loss": 1.6255,
"step": 345
},
{
"epoch": 0.0,
"learning_rate": 9.999734608579607e-05,
"loss": 1.6626,
"step": 350
},
{
"epoch": 0.0,
"learning_rate": 9.999723886864905e-05,
"loss": 1.686,
"step": 355
},
{
"epoch": 0.0,
"learning_rate": 9.999712952846914e-05,
"loss": 1.6388,
"step": 360
},
{
"epoch": 0.0,
"learning_rate": 9.999701806526098e-05,
"loss": 1.6302,
"step": 365
},
{
"epoch": 0.0,
"learning_rate": 9.999690447902931e-05,
"loss": 1.6025,
"step": 370
},
{
"epoch": 0.0,
"learning_rate": 9.999678876977894e-05,
"loss": 1.6206,
"step": 375
},
{
"epoch": 0.0,
"learning_rate": 9.99966709375148e-05,
"loss": 1.6118,
"step": 380
},
{
"epoch": 0.01,
"learning_rate": 9.999655098224188e-05,
"loss": 1.6459,
"step": 385
},
{
"epoch": 0.01,
"learning_rate": 9.999642890396526e-05,
"loss": 1.6483,
"step": 390
},
{
"epoch": 0.01,
"learning_rate": 9.999630470269017e-05,
"loss": 1.6127,
"step": 395
},
{
"epoch": 0.01,
"learning_rate": 9.999617837842184e-05,
"loss": 1.623,
"step": 400
},
{
"epoch": 0.01,
"learning_rate": 9.999604993116564e-05,
"loss": 1.6142,
"step": 405
},
{
"epoch": 0.01,
"learning_rate": 9.999591936092705e-05,
"loss": 1.6299,
"step": 410
},
{
"epoch": 0.01,
"learning_rate": 9.99957866677116e-05,
"loss": 1.6445,
"step": 415
},
{
"epoch": 0.01,
"learning_rate": 9.999565185152491e-05,
"loss": 1.6258,
"step": 420
},
{
"epoch": 0.01,
"learning_rate": 9.999551491237273e-05,
"loss": 1.6338,
"step": 425
},
{
"epoch": 0.01,
"learning_rate": 9.999537585026085e-05,
"loss": 1.6513,
"step": 430
},
{
"epoch": 0.01,
"learning_rate": 9.99952346651952e-05,
"loss": 1.6468,
"step": 435
},
{
"epoch": 0.01,
"learning_rate": 9.999509135718176e-05,
"loss": 1.6704,
"step": 440
},
{
"epoch": 0.01,
"learning_rate": 9.999494592622661e-05,
"loss": 1.619,
"step": 445
},
{
"epoch": 0.01,
"learning_rate": 9.999479837233594e-05,
"loss": 1.6136,
"step": 450
},
{
"epoch": 0.01,
"learning_rate": 9.9994648695516e-05,
"loss": 1.6035,
"step": 455
},
{
"epoch": 0.01,
"learning_rate": 9.999449689577316e-05,
"loss": 1.5477,
"step": 460
},
{
"epoch": 0.01,
"learning_rate": 9.999434297311386e-05,
"loss": 1.6078,
"step": 465
},
{
"epoch": 0.01,
"learning_rate": 9.999418692754464e-05,
"loss": 1.5936,
"step": 470
},
{
"epoch": 0.01,
"learning_rate": 9.999402875907213e-05,
"loss": 1.6346,
"step": 475
},
{
"epoch": 0.01,
"learning_rate": 9.999386846770303e-05,
"loss": 1.6266,
"step": 480
},
{
"epoch": 0.01,
"learning_rate": 9.999370605344415e-05,
"loss": 1.5768,
"step": 485
},
{
"epoch": 0.01,
"learning_rate": 9.99935415163024e-05,
"loss": 1.6144,
"step": 490
},
{
"epoch": 0.01,
"learning_rate": 9.999337485628476e-05,
"loss": 1.6028,
"step": 495
},
{
"epoch": 0.01,
"learning_rate": 9.999320607339828e-05,
"loss": 1.6145,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 9.999303516765018e-05,
"loss": 1.6175,
"step": 505
},
{
"epoch": 0.01,
"learning_rate": 9.999286213904768e-05,
"loss": 1.6379,
"step": 510
},
{
"epoch": 0.01,
"learning_rate": 9.999268698759814e-05,
"loss": 1.5891,
"step": 515
},
{
"epoch": 0.01,
"learning_rate": 9.9992509713309e-05,
"loss": 1.615,
"step": 520
},
{
"epoch": 0.01,
"learning_rate": 9.999233031618777e-05,
"loss": 1.5685,
"step": 525
},
{
"epoch": 0.01,
"learning_rate": 9.999214879624209e-05,
"loss": 1.566,
"step": 530
},
{
"epoch": 0.01,
"learning_rate": 9.999196515347966e-05,
"loss": 1.6135,
"step": 535
},
{
"epoch": 0.01,
"learning_rate": 9.999177938790827e-05,
"loss": 1.5813,
"step": 540
},
{
"epoch": 0.01,
"learning_rate": 9.99915914995358e-05,
"loss": 1.6159,
"step": 545
},
{
"epoch": 0.01,
"learning_rate": 9.999140148837027e-05,
"loss": 1.5838,
"step": 550
},
{
"epoch": 0.01,
"learning_rate": 9.99912093544197e-05,
"loss": 1.614,
"step": 555
},
{
"epoch": 0.01,
"learning_rate": 9.999101509769229e-05,
"loss": 1.6117,
"step": 560
},
{
"epoch": 0.01,
"learning_rate": 9.999081871819625e-05,
"loss": 1.6063,
"step": 565
},
{
"epoch": 0.01,
"learning_rate": 9.999062021593994e-05,
"loss": 1.6472,
"step": 570
},
{
"epoch": 0.01,
"learning_rate": 9.999041959093178e-05,
"loss": 1.6316,
"step": 575
},
{
"epoch": 0.01,
"learning_rate": 9.99902168431803e-05,
"loss": 1.5762,
"step": 580
},
{
"epoch": 0.01,
"learning_rate": 9.999001197269411e-05,
"loss": 1.5819,
"step": 585
},
{
"epoch": 0.01,
"learning_rate": 9.99898049794819e-05,
"loss": 1.6106,
"step": 590
},
{
"epoch": 0.01,
"learning_rate": 9.998959586355246e-05,
"loss": 1.5718,
"step": 595
},
{
"epoch": 0.01,
"learning_rate": 9.998938462491468e-05,
"loss": 1.5914,
"step": 600
},
{
"epoch": 0.01,
"learning_rate": 9.998917126357752e-05,
"loss": 1.5472,
"step": 605
},
{
"epoch": 0.01,
"learning_rate": 9.998895577955005e-05,
"loss": 1.5999,
"step": 610
},
{
"epoch": 0.01,
"learning_rate": 9.998873817284141e-05,
"loss": 1.5769,
"step": 615
},
{
"epoch": 0.01,
"learning_rate": 9.998851844346082e-05,
"loss": 1.6246,
"step": 620
},
{
"epoch": 0.01,
"learning_rate": 9.998829659141765e-05,
"loss": 1.5725,
"step": 625
},
{
"epoch": 0.01,
"learning_rate": 9.99880726167213e-05,
"loss": 1.6058,
"step": 630
},
{
"epoch": 0.01,
"learning_rate": 9.998784651938129e-05,
"loss": 1.6074,
"step": 635
},
{
"epoch": 0.01,
"learning_rate": 9.998761829940721e-05,
"loss": 1.5732,
"step": 640
},
{
"epoch": 0.01,
"learning_rate": 9.998738795680877e-05,
"loss": 1.597,
"step": 645
},
{
"epoch": 0.01,
"learning_rate": 9.998715549159573e-05,
"loss": 1.5824,
"step": 650
},
{
"epoch": 0.01,
"learning_rate": 9.998692090377797e-05,
"loss": 1.5635,
"step": 655
},
{
"epoch": 0.01,
"learning_rate": 9.998668419336546e-05,
"loss": 1.6095,
"step": 660
},
{
"epoch": 0.01,
"learning_rate": 9.998644536036821e-05,
"loss": 1.564,
"step": 665
},
{
"epoch": 0.01,
"learning_rate": 9.998620440479641e-05,
"loss": 1.627,
"step": 670
},
{
"epoch": 0.01,
"learning_rate": 9.998596132666027e-05,
"loss": 1.5834,
"step": 675
},
{
"epoch": 0.01,
"learning_rate": 9.998571612597013e-05,
"loss": 1.5983,
"step": 680
},
{
"epoch": 0.01,
"learning_rate": 9.998546880273638e-05,
"loss": 1.5386,
"step": 685
},
{
"epoch": 0.01,
"learning_rate": 9.998521935696953e-05,
"loss": 1.6034,
"step": 690
},
{
"epoch": 0.01,
"learning_rate": 9.998496778868018e-05,
"loss": 1.5984,
"step": 695
},
{
"epoch": 0.01,
"learning_rate": 9.9984714097879e-05,
"loss": 1.6389,
"step": 700
},
{
"epoch": 0.01,
"learning_rate": 9.998445828457675e-05,
"loss": 1.5494,
"step": 705
},
{
"epoch": 0.01,
"learning_rate": 9.998420034878433e-05,
"loss": 1.5862,
"step": 710
},
{
"epoch": 0.01,
"learning_rate": 9.998394029051267e-05,
"loss": 1.5123,
"step": 715
},
{
"epoch": 0.01,
"learning_rate": 9.998367810977281e-05,
"loss": 1.5735,
"step": 720
},
{
"epoch": 0.01,
"learning_rate": 9.99834138065759e-05,
"loss": 1.5774,
"step": 725
},
{
"epoch": 0.01,
"learning_rate": 9.998314738093312e-05,
"loss": 1.5815,
"step": 730
},
{
"epoch": 0.01,
"learning_rate": 9.998287883285586e-05,
"loss": 1.5836,
"step": 735
},
{
"epoch": 0.01,
"learning_rate": 9.998260816235546e-05,
"loss": 1.5913,
"step": 740
},
{
"epoch": 0.01,
"learning_rate": 9.998233536944342e-05,
"loss": 1.5635,
"step": 745
},
{
"epoch": 0.01,
"learning_rate": 9.998206045413134e-05,
"loss": 1.5864,
"step": 750
},
{
"epoch": 0.01,
"learning_rate": 9.998178341643089e-05,
"loss": 1.5114,
"step": 755
},
{
"epoch": 0.01,
"learning_rate": 9.998150425635384e-05,
"loss": 1.5709,
"step": 760
},
{
"epoch": 0.01,
"learning_rate": 9.998122297391204e-05,
"loss": 1.5252,
"step": 765
},
{
"epoch": 0.01,
"learning_rate": 9.998093956911742e-05,
"loss": 1.5693,
"step": 770
},
{
"epoch": 0.01,
"learning_rate": 9.998065404198202e-05,
"loss": 1.598,
"step": 775
},
{
"epoch": 0.01,
"learning_rate": 9.998036639251797e-05,
"loss": 1.5664,
"step": 780
},
{
"epoch": 0.01,
"learning_rate": 9.99800766207375e-05,
"loss": 1.5442,
"step": 785
},
{
"epoch": 0.01,
"learning_rate": 9.997978472665289e-05,
"loss": 1.5822,
"step": 790
},
{
"epoch": 0.01,
"learning_rate": 9.997949071027654e-05,
"loss": 1.5764,
"step": 795
},
{
"epoch": 0.01,
"learning_rate": 9.997919457162095e-05,
"loss": 1.568,
"step": 800
},
{
"epoch": 0.01,
"learning_rate": 9.997889631069867e-05,
"loss": 1.5621,
"step": 805
},
{
"epoch": 0.01,
"learning_rate": 9.997859592752239e-05,
"loss": 1.5442,
"step": 810
},
{
"epoch": 0.01,
"learning_rate": 9.997829342210483e-05,
"loss": 1.5514,
"step": 815
},
{
"epoch": 0.01,
"learning_rate": 9.997798879445889e-05,
"loss": 1.5415,
"step": 820
},
{
"epoch": 0.01,
"learning_rate": 9.997768204459746e-05,
"loss": 1.5222,
"step": 825
},
{
"epoch": 0.01,
"learning_rate": 9.997737317253357e-05,
"loss": 1.5435,
"step": 830
},
{
"epoch": 0.01,
"learning_rate": 9.997706217828036e-05,
"loss": 1.5881,
"step": 835
},
{
"epoch": 0.01,
"learning_rate": 9.997674906185102e-05,
"loss": 1.5352,
"step": 840
},
{
"epoch": 0.01,
"learning_rate": 9.997643382325883e-05,
"loss": 1.5568,
"step": 845
},
{
"epoch": 0.01,
"learning_rate": 9.997611646251721e-05,
"loss": 1.5826,
"step": 850
},
{
"epoch": 0.01,
"learning_rate": 9.997579697963961e-05,
"loss": 1.5524,
"step": 855
},
{
"epoch": 0.01,
"learning_rate": 9.99754753746396e-05,
"loss": 1.5408,
"step": 860
},
{
"epoch": 0.01,
"learning_rate": 9.997515164753087e-05,
"loss": 1.5731,
"step": 865
},
{
"epoch": 0.01,
"learning_rate": 9.997482579832711e-05,
"loss": 1.5717,
"step": 870
},
{
"epoch": 0.01,
"learning_rate": 9.997449782704218e-05,
"loss": 1.5968,
"step": 875
},
{
"epoch": 0.01,
"learning_rate": 9.997416773369002e-05,
"loss": 1.5502,
"step": 880
},
{
"epoch": 0.01,
"learning_rate": 9.997383551828464e-05,
"loss": 1.5353,
"step": 885
},
{
"epoch": 0.01,
"learning_rate": 9.997350118084014e-05,
"loss": 1.5563,
"step": 890
},
{
"epoch": 0.01,
"learning_rate": 9.997316472137073e-05,
"loss": 1.5031,
"step": 895
},
{
"epoch": 0.01,
"learning_rate": 9.997282613989068e-05,
"loss": 1.5904,
"step": 900
},
{
"epoch": 0.01,
"learning_rate": 9.997248543641438e-05,
"loss": 1.6065,
"step": 905
},
{
"epoch": 0.01,
"learning_rate": 9.99721426109563e-05,
"loss": 1.5277,
"step": 910
},
{
"epoch": 0.01,
"learning_rate": 9.997179766353098e-05,
"loss": 1.5396,
"step": 915
},
{
"epoch": 0.01,
"learning_rate": 9.997145059415308e-05,
"loss": 1.5537,
"step": 920
},
{
"epoch": 0.01,
"learning_rate": 9.997110140283733e-05,
"loss": 1.5792,
"step": 925
},
{
"epoch": 0.01,
"learning_rate": 9.997075008959858e-05,
"loss": 1.5762,
"step": 930
},
{
"epoch": 0.01,
"learning_rate": 9.997039665445171e-05,
"loss": 1.5362,
"step": 935
},
{
"epoch": 0.01,
"learning_rate": 9.997004109741178e-05,
"loss": 1.5316,
"step": 940
},
{
"epoch": 0.01,
"learning_rate": 9.996968341849383e-05,
"loss": 1.529,
"step": 945
},
{
"epoch": 0.01,
"learning_rate": 9.996932361771308e-05,
"loss": 1.5221,
"step": 950
},
{
"epoch": 0.01,
"learning_rate": 9.99689616950848e-05,
"loss": 1.5904,
"step": 955
},
{
"epoch": 0.01,
"learning_rate": 9.996859765062437e-05,
"loss": 1.5704,
"step": 960
},
{
"epoch": 0.01,
"learning_rate": 9.996823148434724e-05,
"loss": 1.5806,
"step": 965
},
{
"epoch": 0.01,
"learning_rate": 9.996786319626897e-05,
"loss": 1.5288,
"step": 970
},
{
"epoch": 0.01,
"learning_rate": 9.996749278640516e-05,
"loss": 1.5374,
"step": 975
},
{
"epoch": 0.01,
"learning_rate": 9.996712025477157e-05,
"loss": 1.5549,
"step": 980
},
{
"epoch": 0.01,
"learning_rate": 9.996674560138404e-05,
"loss": 1.5357,
"step": 985
},
{
"epoch": 0.01,
"learning_rate": 9.996636882625843e-05,
"loss": 1.5692,
"step": 990
},
{
"epoch": 0.01,
"learning_rate": 9.996598992941078e-05,
"loss": 1.5926,
"step": 995
},
{
"epoch": 0.01,
"learning_rate": 9.996560891085714e-05,
"loss": 1.5619,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 9.996522577061372e-05,
"loss": 1.5692,
"step": 1005
},
{
"epoch": 0.01,
"learning_rate": 9.996484050869679e-05,
"loss": 1.5341,
"step": 1010
},
{
"epoch": 0.01,
"learning_rate": 9.996445312512269e-05,
"loss": 1.5455,
"step": 1015
},
{
"epoch": 0.01,
"learning_rate": 9.996406361990789e-05,
"loss": 1.5872,
"step": 1020
},
{
"epoch": 0.01,
"learning_rate": 9.99636719930689e-05,
"loss": 1.5254,
"step": 1025
},
{
"epoch": 0.01,
"learning_rate": 9.996327824462239e-05,
"loss": 1.5463,
"step": 1030
},
{
"epoch": 0.01,
"learning_rate": 9.996288237458505e-05,
"loss": 1.5395,
"step": 1035
},
{
"epoch": 0.01,
"learning_rate": 9.996248438297369e-05,
"loss": 1.5434,
"step": 1040
},
{
"epoch": 0.01,
"learning_rate": 9.996208426980522e-05,
"loss": 1.5404,
"step": 1045
},
{
"epoch": 0.01,
"learning_rate": 9.996168203509664e-05,
"loss": 1.5315,
"step": 1050
},
{
"epoch": 0.01,
"learning_rate": 9.996127767886501e-05,
"loss": 1.5123,
"step": 1055
},
{
"epoch": 0.01,
"learning_rate": 9.996087120112749e-05,
"loss": 1.5595,
"step": 1060
},
{
"epoch": 0.01,
"learning_rate": 9.996046260190138e-05,
"loss": 1.5484,
"step": 1065
},
{
"epoch": 0.01,
"learning_rate": 9.9960051881204e-05,
"loss": 1.5482,
"step": 1070
},
{
"epoch": 0.01,
"learning_rate": 9.99596390390528e-05,
"loss": 1.5349,
"step": 1075
},
{
"epoch": 0.01,
"learning_rate": 9.995922407546532e-05,
"loss": 1.5299,
"step": 1080
},
{
"epoch": 0.01,
"learning_rate": 9.995880699045916e-05,
"loss": 1.5362,
"step": 1085
},
{
"epoch": 0.01,
"learning_rate": 9.995838778405204e-05,
"loss": 1.5887,
"step": 1090
},
{
"epoch": 0.01,
"learning_rate": 9.995796645626177e-05,
"loss": 1.5472,
"step": 1095
},
{
"epoch": 0.01,
"learning_rate": 9.995754300710623e-05,
"loss": 1.5677,
"step": 1100
},
{
"epoch": 0.01,
"learning_rate": 9.99571174366034e-05,
"loss": 1.536,
"step": 1105
},
{
"epoch": 0.01,
"learning_rate": 9.995668974477137e-05,
"loss": 1.5135,
"step": 1110
},
{
"epoch": 0.01,
"learning_rate": 9.995625993162827e-05,
"loss": 1.543,
"step": 1115
},
{
"epoch": 0.01,
"learning_rate": 9.995582799719236e-05,
"loss": 1.5059,
"step": 1120
},
{
"epoch": 0.01,
"learning_rate": 9.9955393941482e-05,
"loss": 1.5634,
"step": 1125
},
{
"epoch": 0.01,
"learning_rate": 9.99549577645156e-05,
"loss": 1.565,
"step": 1130
},
{
"epoch": 0.01,
"learning_rate": 9.995451946631171e-05,
"loss": 1.5575,
"step": 1135
},
{
"epoch": 0.01,
"learning_rate": 9.995407904688889e-05,
"loss": 1.5251,
"step": 1140
},
{
"epoch": 0.02,
"learning_rate": 9.99536365062659e-05,
"loss": 1.5097,
"step": 1145
},
{
"epoch": 0.02,
"learning_rate": 9.99531918444615e-05,
"loss": 1.5538,
"step": 1150
},
{
"epoch": 0.02,
"learning_rate": 9.995274506149456e-05,
"loss": 1.5223,
"step": 1155
},
{
"epoch": 0.02,
"learning_rate": 9.995229615738409e-05,
"loss": 1.5376,
"step": 1160
},
{
"epoch": 0.02,
"learning_rate": 9.995184513214912e-05,
"loss": 1.5544,
"step": 1165
},
{
"epoch": 0.02,
"learning_rate": 9.995139198580884e-05,
"loss": 1.5384,
"step": 1170
},
{
"epoch": 0.02,
"learning_rate": 9.995093671838243e-05,
"loss": 1.5414,
"step": 1175
},
{
"epoch": 0.02,
"learning_rate": 9.995047932988926e-05,
"loss": 1.5645,
"step": 1180
},
{
"epoch": 0.02,
"learning_rate": 9.995001982034875e-05,
"loss": 1.5133,
"step": 1185
},
{
"epoch": 0.02,
"learning_rate": 9.994955818978042e-05,
"loss": 1.4974,
"step": 1190
},
{
"epoch": 0.02,
"learning_rate": 9.994909443820385e-05,
"loss": 1.5204,
"step": 1195
},
{
"epoch": 0.02,
"learning_rate": 9.994862856563876e-05,
"loss": 1.5035,
"step": 1200
},
{
"epoch": 0.02,
"learning_rate": 9.994816057210491e-05,
"loss": 1.5576,
"step": 1205
},
{
"epoch": 0.02,
"learning_rate": 9.994769045762217e-05,
"loss": 1.5259,
"step": 1210
},
{
"epoch": 0.02,
"learning_rate": 9.994721822221054e-05,
"loss": 1.52,
"step": 1215
},
{
"epoch": 0.02,
"learning_rate": 9.994674386589002e-05,
"loss": 1.5427,
"step": 1220
},
{
"epoch": 0.02,
"learning_rate": 9.994626738868079e-05,
"loss": 1.5183,
"step": 1225
},
{
"epoch": 0.02,
"learning_rate": 9.994578879060306e-05,
"loss": 1.5181,
"step": 1230
},
{
"epoch": 0.02,
"learning_rate": 9.994530807167717e-05,
"loss": 1.5431,
"step": 1235
},
{
"epoch": 0.02,
"learning_rate": 9.994482523192352e-05,
"loss": 1.5278,
"step": 1240
},
{
"epoch": 0.02,
"learning_rate": 9.994434027136262e-05,
"loss": 1.5357,
"step": 1245
},
{
"epoch": 0.02,
"learning_rate": 9.994385319001507e-05,
"loss": 1.5197,
"step": 1250
},
{
"epoch": 0.02,
"learning_rate": 9.994336398790153e-05,
"loss": 1.5094,
"step": 1255
},
{
"epoch": 0.02,
"learning_rate": 9.994287266504281e-05,
"loss": 1.5351,
"step": 1260
},
{
"epoch": 0.02,
"learning_rate": 9.994237922145972e-05,
"loss": 1.5026,
"step": 1265
},
{
"epoch": 0.02,
"learning_rate": 9.994188365717327e-05,
"loss": 1.5355,
"step": 1270
},
{
"epoch": 0.02,
"learning_rate": 9.994138597220446e-05,
"loss": 1.4937,
"step": 1275
},
{
"epoch": 0.02,
"learning_rate": 9.994088616657444e-05,
"loss": 1.5431,
"step": 1280
},
{
"epoch": 0.02,
"learning_rate": 9.994038424030445e-05,
"loss": 1.5185,
"step": 1285
},
{
"epoch": 0.02,
"learning_rate": 9.993988019341576e-05,
"loss": 1.5246,
"step": 1290
},
{
"epoch": 0.02,
"learning_rate": 9.993937402592981e-05,
"loss": 1.4729,
"step": 1295
},
{
"epoch": 0.02,
"learning_rate": 9.993886573786809e-05,
"loss": 1.5439,
"step": 1300
},
{
"epoch": 0.02,
"learning_rate": 9.993835532925218e-05,
"loss": 1.5294,
"step": 1305
},
{
"epoch": 0.02,
"learning_rate": 9.993784280010374e-05,
"loss": 1.5325,
"step": 1310
},
{
"epoch": 0.02,
"learning_rate": 9.993732815044454e-05,
"loss": 1.4979,
"step": 1315
},
{
"epoch": 0.02,
"learning_rate": 9.993681138029645e-05,
"loss": 1.5358,
"step": 1320
},
{
"epoch": 0.02,
"learning_rate": 9.993629248968139e-05,
"loss": 1.5517,
"step": 1325
},
{
"epoch": 0.02,
"learning_rate": 9.993577147862141e-05,
"loss": 1.4693,
"step": 1330
},
{
"epoch": 0.02,
"learning_rate": 9.993524834713863e-05,
"loss": 1.5335,
"step": 1335
},
{
"epoch": 0.02,
"learning_rate": 9.993472309525525e-05,
"loss": 1.5092,
"step": 1340
},
{
"epoch": 0.02,
"learning_rate": 9.99341957229936e-05,
"loss": 1.5415,
"step": 1345
},
{
"epoch": 0.02,
"learning_rate": 9.993366623037605e-05,
"loss": 1.5203,
"step": 1350
},
{
"epoch": 0.02,
"learning_rate": 9.993313461742508e-05,
"loss": 1.5314,
"step": 1355
},
{
"epoch": 0.02,
"learning_rate": 9.99326008841633e-05,
"loss": 1.511,
"step": 1360
},
{
"epoch": 0.02,
"learning_rate": 9.993206503061335e-05,
"loss": 1.5579,
"step": 1365
},
{
"epoch": 0.02,
"learning_rate": 9.993152705679797e-05,
"loss": 1.5395,
"step": 1370
},
{
"epoch": 0.02,
"learning_rate": 9.993098696274004e-05,
"loss": 1.4999,
"step": 1375
},
{
"epoch": 0.02,
"learning_rate": 9.993044474846245e-05,
"loss": 1.5352,
"step": 1380
},
{
"epoch": 0.02,
"learning_rate": 9.992990041398827e-05,
"loss": 1.4827,
"step": 1385
},
{
"epoch": 0.02,
"learning_rate": 9.992935395934057e-05,
"loss": 1.5421,
"step": 1390
},
{
"epoch": 0.02,
"learning_rate": 9.992880538454258e-05,
"loss": 1.5061,
"step": 1395
},
{
"epoch": 0.02,
"learning_rate": 9.992825468961759e-05,
"loss": 1.4881,
"step": 1400
},
{
"epoch": 0.02,
"learning_rate": 9.992770187458899e-05,
"loss": 1.5124,
"step": 1405
},
{
"epoch": 0.02,
"learning_rate": 9.992714693948024e-05,
"loss": 1.5314,
"step": 1410
},
{
"epoch": 0.02,
"learning_rate": 9.992658988431492e-05,
"loss": 1.5324,
"step": 1415
},
{
"epoch": 0.02,
"learning_rate": 9.992603070911666e-05,
"loss": 1.533,
"step": 1420
},
{
"epoch": 0.02,
"learning_rate": 9.992546941390924e-05,
"loss": 1.5134,
"step": 1425
},
{
"epoch": 0.02,
"learning_rate": 9.992490599871645e-05,
"loss": 1.5139,
"step": 1430
},
{
"epoch": 0.02,
"learning_rate": 9.992434046356226e-05,
"loss": 1.5198,
"step": 1435
},
{
"epoch": 0.02,
"learning_rate": 9.992377280847066e-05,
"loss": 1.5882,
"step": 1440
},
{
"epoch": 0.02,
"learning_rate": 9.992320303346576e-05,
"loss": 1.507,
"step": 1445
},
{
"epoch": 0.02,
"learning_rate": 9.992263113857174e-05,
"loss": 1.5394,
"step": 1450
},
{
"epoch": 0.02,
"learning_rate": 9.99220571238129e-05,
"loss": 1.4546,
"step": 1455
},
{
"epoch": 0.02,
"learning_rate": 9.992148098921361e-05,
"loss": 1.5112,
"step": 1460
},
{
"epoch": 0.02,
"learning_rate": 9.992090273479835e-05,
"loss": 1.4993,
"step": 1465
},
{
"epoch": 0.02,
"learning_rate": 9.992032236059165e-05,
"loss": 1.5217,
"step": 1470
},
{
"epoch": 0.02,
"learning_rate": 9.991973986661816e-05,
"loss": 1.4895,
"step": 1475
},
{
"epoch": 0.02,
"learning_rate": 9.991915525290262e-05,
"loss": 1.4592,
"step": 1480
},
{
"epoch": 0.02,
"learning_rate": 9.991856851946985e-05,
"loss": 1.5216,
"step": 1485
},
{
"epoch": 0.02,
"learning_rate": 9.991797966634478e-05,
"loss": 1.4998,
"step": 1490
},
{
"epoch": 0.02,
"learning_rate": 9.991738869355239e-05,
"loss": 1.4802,
"step": 1495
},
{
"epoch": 0.02,
"learning_rate": 9.991679560111778e-05,
"loss": 1.5565,
"step": 1500
},
{
"epoch": 0.02,
"learning_rate": 9.991620038906615e-05,
"loss": 1.5208,
"step": 1505
},
{
"epoch": 0.02,
"learning_rate": 9.991560305742276e-05,
"loss": 1.5521,
"step": 1510
},
{
"epoch": 0.02,
"learning_rate": 9.991500360621299e-05,
"loss": 1.5205,
"step": 1515
},
{
"epoch": 0.02,
"learning_rate": 9.991440203546227e-05,
"loss": 1.4971,
"step": 1520
},
{
"epoch": 0.02,
"learning_rate": 9.991379834519618e-05,
"loss": 1.5571,
"step": 1525
},
{
"epoch": 0.02,
"learning_rate": 9.991319253544031e-05,
"loss": 1.4808,
"step": 1530
},
{
"epoch": 0.02,
"learning_rate": 9.991258460622042e-05,
"loss": 1.4773,
"step": 1535
},
{
"epoch": 0.02,
"learning_rate": 9.991197455756232e-05,
"loss": 1.5252,
"step": 1540
},
{
"epoch": 0.02,
"learning_rate": 9.991136238949188e-05,
"loss": 1.4748,
"step": 1545
},
{
"epoch": 0.02,
"learning_rate": 9.991074810203515e-05,
"loss": 1.5176,
"step": 1550
},
{
"epoch": 0.02,
"learning_rate": 9.991013169521816e-05,
"loss": 1.5121,
"step": 1555
},
{
"epoch": 0.02,
"learning_rate": 9.990951316906713e-05,
"loss": 1.5259,
"step": 1560
},
{
"epoch": 0.02,
"learning_rate": 9.99088925236083e-05,
"loss": 1.4834,
"step": 1565
},
{
"epoch": 0.02,
"learning_rate": 9.990826975886803e-05,
"loss": 1.5123,
"step": 1570
},
{
"epoch": 0.02,
"learning_rate": 9.990764487487276e-05,
"loss": 1.5004,
"step": 1575
},
{
"epoch": 0.02,
"learning_rate": 9.990701787164902e-05,
"loss": 1.4957,
"step": 1580
},
{
"epoch": 0.02,
"learning_rate": 9.990638874922346e-05,
"loss": 1.4774,
"step": 1585
},
{
"epoch": 0.02,
"learning_rate": 9.990575750762276e-05,
"loss": 1.4859,
"step": 1590
},
{
"epoch": 0.02,
"learning_rate": 9.990512414687376e-05,
"loss": 1.5081,
"step": 1595
},
{
"epoch": 0.02,
"learning_rate": 9.990448866700333e-05,
"loss": 1.4969,
"step": 1600
},
{
"epoch": 0.02,
"learning_rate": 9.990385106803844e-05,
"loss": 1.5732,
"step": 1605
},
{
"epoch": 0.02,
"learning_rate": 9.99032113500062e-05,
"loss": 1.5039,
"step": 1610
},
{
"epoch": 0.02,
"learning_rate": 9.990256951293377e-05,
"loss": 1.5033,
"step": 1615
},
{
"epoch": 0.02,
"learning_rate": 9.990192555684837e-05,
"loss": 1.4919,
"step": 1620
},
{
"epoch": 0.02,
"learning_rate": 9.990127948177739e-05,
"loss": 1.5194,
"step": 1625
},
{
"epoch": 0.02,
"learning_rate": 9.990063128774822e-05,
"loss": 1.5121,
"step": 1630
},
{
"epoch": 0.02,
"learning_rate": 9.989998097478842e-05,
"loss": 1.5265,
"step": 1635
},
{
"epoch": 0.02,
"learning_rate": 9.989932854292559e-05,
"loss": 1.5256,
"step": 1640
},
{
"epoch": 0.02,
"learning_rate": 9.989867399218742e-05,
"loss": 1.4871,
"step": 1645
},
{
"epoch": 0.02,
"learning_rate": 9.989801732260174e-05,
"loss": 1.5305,
"step": 1650
},
{
"epoch": 0.02,
"learning_rate": 9.98973585341964e-05,
"loss": 1.545,
"step": 1655
},
{
"epoch": 0.02,
"learning_rate": 9.989669762699938e-05,
"loss": 1.51,
"step": 1660
},
{
"epoch": 0.02,
"learning_rate": 9.989603460103877e-05,
"loss": 1.5029,
"step": 1665
},
{
"epoch": 0.02,
"learning_rate": 9.989536945634271e-05,
"loss": 1.5314,
"step": 1670
},
{
"epoch": 0.02,
"learning_rate": 9.989470219293942e-05,
"loss": 1.5006,
"step": 1675
},
{
"epoch": 0.02,
"learning_rate": 9.989403281085725e-05,
"loss": 1.5217,
"step": 1680
},
{
"epoch": 0.02,
"learning_rate": 9.989336131012463e-05,
"loss": 1.5182,
"step": 1685
},
{
"epoch": 0.02,
"learning_rate": 9.989268769077006e-05,
"loss": 1.5025,
"step": 1690
},
{
"epoch": 0.02,
"learning_rate": 9.989201195282218e-05,
"loss": 1.5885,
"step": 1695
},
{
"epoch": 0.02,
"learning_rate": 9.989133409630967e-05,
"loss": 1.5305,
"step": 1700
},
{
"epoch": 0.02,
"learning_rate": 9.989065412126127e-05,
"loss": 1.4977,
"step": 1705
},
{
"epoch": 0.02,
"learning_rate": 9.988997202770589e-05,
"loss": 1.5071,
"step": 1710
},
{
"epoch": 0.02,
"learning_rate": 9.98892878156725e-05,
"loss": 1.5344,
"step": 1715
},
{
"epoch": 0.02,
"learning_rate": 9.988860148519015e-05,
"loss": 1.5117,
"step": 1720
},
{
"epoch": 0.02,
"learning_rate": 9.988791303628797e-05,
"loss": 1.4549,
"step": 1725
},
{
"epoch": 0.02,
"learning_rate": 9.98872224689952e-05,
"loss": 1.5532,
"step": 1730
},
{
"epoch": 0.02,
"learning_rate": 9.988652978334116e-05,
"loss": 1.4858,
"step": 1735
},
{
"epoch": 0.02,
"learning_rate": 9.988583497935528e-05,
"loss": 1.5146,
"step": 1740
},
{
"epoch": 0.02,
"learning_rate": 9.988513805706705e-05,
"loss": 1.5064,
"step": 1745
},
{
"epoch": 0.02,
"learning_rate": 9.988443901650606e-05,
"loss": 1.5336,
"step": 1750
},
{
"epoch": 0.02,
"learning_rate": 9.9883737857702e-05,
"loss": 1.4996,
"step": 1755
},
{
"epoch": 0.02,
"learning_rate": 9.988303458068465e-05,
"loss": 1.4776,
"step": 1760
},
{
"epoch": 0.02,
"learning_rate": 9.988232918548385e-05,
"loss": 1.5227,
"step": 1765
},
{
"epoch": 0.02,
"learning_rate": 9.988162167212957e-05,
"loss": 1.4745,
"step": 1770
},
{
"epoch": 0.02,
"learning_rate": 9.988091204065186e-05,
"loss": 1.5247,
"step": 1775
},
{
"epoch": 0.02,
"learning_rate": 9.988020029108084e-05,
"loss": 1.48,
"step": 1780
},
{
"epoch": 0.02,
"learning_rate": 9.987948642344674e-05,
"loss": 1.5109,
"step": 1785
},
{
"epoch": 0.02,
"learning_rate": 9.987877043777987e-05,
"loss": 1.5164,
"step": 1790
},
{
"epoch": 0.02,
"learning_rate": 9.987805233411063e-05,
"loss": 1.5472,
"step": 1795
},
{
"epoch": 0.02,
"learning_rate": 9.987733211246952e-05,
"loss": 1.46,
"step": 1800
},
{
"epoch": 0.02,
"learning_rate": 9.987660977288711e-05,
"loss": 1.5287,
"step": 1805
},
{
"epoch": 0.02,
"learning_rate": 9.987588531539409e-05,
"loss": 1.4857,
"step": 1810
},
{
"epoch": 0.02,
"learning_rate": 9.98751587400212e-05,
"loss": 1.4492,
"step": 1815
},
{
"epoch": 0.02,
"learning_rate": 9.987443004679933e-05,
"loss": 1.5026,
"step": 1820
},
{
"epoch": 0.02,
"learning_rate": 9.987369923575939e-05,
"loss": 1.4943,
"step": 1825
},
{
"epoch": 0.02,
"learning_rate": 9.987296630693242e-05,
"loss": 1.4948,
"step": 1830
},
{
"epoch": 0.02,
"learning_rate": 9.987223126034954e-05,
"loss": 1.4992,
"step": 1835
},
{
"epoch": 0.02,
"learning_rate": 9.987149409604197e-05,
"loss": 1.5003,
"step": 1840
},
{
"epoch": 0.02,
"learning_rate": 9.987075481404101e-05,
"loss": 1.4763,
"step": 1845
},
{
"epoch": 0.02,
"learning_rate": 9.987001341437805e-05,
"loss": 1.5161,
"step": 1850
},
{
"epoch": 0.02,
"learning_rate": 9.986926989708457e-05,
"loss": 1.5072,
"step": 1855
},
{
"epoch": 0.02,
"learning_rate": 9.986852426219216e-05,
"loss": 1.4651,
"step": 1860
},
{
"epoch": 0.02,
"learning_rate": 9.986777650973245e-05,
"loss": 1.5275,
"step": 1865
},
{
"epoch": 0.02,
"learning_rate": 9.986702663973722e-05,
"loss": 1.5004,
"step": 1870
},
{
"epoch": 0.02,
"learning_rate": 9.986627465223829e-05,
"loss": 1.4829,
"step": 1875
},
{
"epoch": 0.02,
"learning_rate": 9.986552054726758e-05,
"loss": 1.4671,
"step": 1880
},
{
"epoch": 0.02,
"learning_rate": 9.986476432485718e-05,
"loss": 1.4622,
"step": 1885
},
{
"epoch": 0.02,
"learning_rate": 9.986400598503913e-05,
"loss": 1.5068,
"step": 1890
},
{
"epoch": 0.02,
"learning_rate": 9.986324552784566e-05,
"loss": 1.4951,
"step": 1895
},
{
"epoch": 0.02,
"learning_rate": 9.986248295330906e-05,
"loss": 1.5005,
"step": 1900
},
{
"epoch": 0.02,
"learning_rate": 9.98617182614617e-05,
"loss": 1.5222,
"step": 1905
},
{
"epoch": 0.03,
"learning_rate": 9.986095145233608e-05,
"loss": 1.5376,
"step": 1910
},
{
"epoch": 0.03,
"learning_rate": 9.986018252596472e-05,
"loss": 1.459,
"step": 1915
},
{
"epoch": 0.03,
"learning_rate": 9.98594114823803e-05,
"loss": 1.5325,
"step": 1920
},
{
"epoch": 0.03,
"learning_rate": 9.985863832161555e-05,
"loss": 1.4876,
"step": 1925
},
{
"epoch": 0.03,
"learning_rate": 9.98578630437033e-05,
"loss": 1.5044,
"step": 1930
},
{
"epoch": 0.03,
"learning_rate": 9.985708564867648e-05,
"loss": 1.5301,
"step": 1935
},
{
"epoch": 0.03,
"learning_rate": 9.98563061365681e-05,
"loss": 1.5258,
"step": 1940
},
{
"epoch": 0.03,
"learning_rate": 9.985552450741124e-05,
"loss": 1.4905,
"step": 1945
},
{
"epoch": 0.03,
"learning_rate": 9.98547407612391e-05,
"loss": 1.5043,
"step": 1950
},
{
"epoch": 0.03,
"learning_rate": 9.9853954898085e-05,
"loss": 1.4886,
"step": 1955
},
{
"epoch": 0.03,
"learning_rate": 9.985316691798225e-05,
"loss": 1.5062,
"step": 1960
},
{
"epoch": 0.03,
"learning_rate": 9.985237682096433e-05,
"loss": 1.4848,
"step": 1965
},
{
"epoch": 0.03,
"learning_rate": 9.985158460706481e-05,
"loss": 1.509,
"step": 1970
},
{
"epoch": 0.03,
"learning_rate": 9.985079027631731e-05,
"loss": 1.5631,
"step": 1975
},
{
"epoch": 0.03,
"learning_rate": 9.984999382875558e-05,
"loss": 1.5344,
"step": 1980
},
{
"epoch": 0.03,
"learning_rate": 9.98491952644134e-05,
"loss": 1.5326,
"step": 1985
},
{
"epoch": 0.03,
"learning_rate": 9.984839458332471e-05,
"loss": 1.5153,
"step": 1990
},
{
"epoch": 0.03,
"learning_rate": 9.984759178552352e-05,
"loss": 1.5083,
"step": 1995
},
{
"epoch": 0.03,
"learning_rate": 9.984678687104389e-05,
"loss": 1.4718,
"step": 2000
},
{
"epoch": 0.03,
"learning_rate": 9.984597983992e-05,
"loss": 1.5,
"step": 2005
},
{
"epoch": 0.03,
"learning_rate": 9.984517069218615e-05,
"loss": 1.5103,
"step": 2010
},
{
"epoch": 0.03,
"learning_rate": 9.984435942787666e-05,
"loss": 1.4653,
"step": 2015
},
{
"epoch": 0.03,
"learning_rate": 9.984354604702602e-05,
"loss": 1.4717,
"step": 2020
},
{
"epoch": 0.03,
"learning_rate": 9.984273054966874e-05,
"loss": 1.5312,
"step": 2025
},
{
"epoch": 0.03,
"learning_rate": 9.984191293583946e-05,
"loss": 1.4629,
"step": 2030
},
{
"epoch": 0.03,
"learning_rate": 9.984109320557288e-05,
"loss": 1.4961,
"step": 2035
},
{
"epoch": 0.03,
"learning_rate": 9.984027135890382e-05,
"loss": 1.4446,
"step": 2040
},
{
"epoch": 0.03,
"learning_rate": 9.98394473958672e-05,
"loss": 1.526,
"step": 2045
},
{
"epoch": 0.03,
"learning_rate": 9.983862131649797e-05,
"loss": 1.4502,
"step": 2050
},
{
"epoch": 0.03,
"learning_rate": 9.983779312083123e-05,
"loss": 1.4823,
"step": 2055
},
{
"epoch": 0.03,
"learning_rate": 9.983696280890213e-05,
"loss": 1.5284,
"step": 2060
},
{
"epoch": 0.03,
"learning_rate": 9.983613038074598e-05,
"loss": 1.4806,
"step": 2065
},
{
"epoch": 0.03,
"learning_rate": 9.983529583639804e-05,
"loss": 1.4606,
"step": 2070
},
{
"epoch": 0.03,
"learning_rate": 9.983445917589382e-05,
"loss": 1.4843,
"step": 2075
},
{
"epoch": 0.03,
"learning_rate": 9.983362039926883e-05,
"loss": 1.4676,
"step": 2080
},
{
"epoch": 0.03,
"learning_rate": 9.983277950655867e-05,
"loss": 1.4959,
"step": 2085
},
{
"epoch": 0.03,
"learning_rate": 9.983193649779905e-05,
"loss": 1.5052,
"step": 2090
},
{
"epoch": 0.03,
"learning_rate": 9.983109137302578e-05,
"loss": 1.4806,
"step": 2095
},
{
"epoch": 0.03,
"learning_rate": 9.983024413227474e-05,
"loss": 1.4905,
"step": 2100
},
{
"epoch": 0.03,
"learning_rate": 9.98293947755819e-05,
"loss": 1.4859,
"step": 2105
},
{
"epoch": 0.03,
"learning_rate": 9.982854330298335e-05,
"loss": 1.4797,
"step": 2110
},
{
"epoch": 0.03,
"learning_rate": 9.982768971451521e-05,
"loss": 1.508,
"step": 2115
},
{
"epoch": 0.03,
"learning_rate": 9.982683401021376e-05,
"loss": 1.5084,
"step": 2120
},
{
"epoch": 0.03,
"learning_rate": 9.98259761901153e-05,
"loss": 1.4831,
"step": 2125
},
{
"epoch": 0.03,
"learning_rate": 9.982511625425628e-05,
"loss": 1.4555,
"step": 2130
},
{
"epoch": 0.03,
"learning_rate": 9.982425420267323e-05,
"loss": 1.5117,
"step": 2135
},
{
"epoch": 0.03,
"learning_rate": 9.982339003540271e-05,
"loss": 1.4715,
"step": 2140
},
{
"epoch": 0.03,
"learning_rate": 9.982252375248146e-05,
"loss": 1.4647,
"step": 2145
},
{
"epoch": 0.03,
"learning_rate": 9.982165535394623e-05,
"loss": 1.4422,
"step": 2150
},
{
"epoch": 0.03,
"learning_rate": 9.982078483983393e-05,
"loss": 1.4579,
"step": 2155
},
{
"epoch": 0.03,
"learning_rate": 9.98199122101815e-05,
"loss": 1.5093,
"step": 2160
},
{
"epoch": 0.03,
"learning_rate": 9.981903746502598e-05,
"loss": 1.4809,
"step": 2165
},
{
"epoch": 0.03,
"learning_rate": 9.981816060440457e-05,
"loss": 1.4496,
"step": 2170
},
{
"epoch": 0.03,
"learning_rate": 9.981728162835444e-05,
"loss": 1.4504,
"step": 2175
},
{
"epoch": 0.03,
"learning_rate": 9.981640053691297e-05,
"loss": 1.4608,
"step": 2180
},
{
"epoch": 0.03,
"learning_rate": 9.981551733011752e-05,
"loss": 1.5142,
"step": 2185
},
{
"epoch": 0.03,
"learning_rate": 9.981463200800564e-05,
"loss": 1.4844,
"step": 2190
},
{
"epoch": 0.03,
"learning_rate": 9.98137445706149e-05,
"loss": 1.4667,
"step": 2195
},
{
"epoch": 0.03,
"learning_rate": 9.981285501798297e-05,
"loss": 1.4898,
"step": 2200
},
{
"epoch": 0.03,
"learning_rate": 9.981196335014766e-05,
"loss": 1.4767,
"step": 2205
},
{
"epoch": 0.03,
"learning_rate": 9.98110695671468e-05,
"loss": 1.5053,
"step": 2210
},
{
"epoch": 0.03,
"learning_rate": 9.981017366901836e-05,
"loss": 1.4718,
"step": 2215
},
{
"epoch": 0.03,
"learning_rate": 9.980927565580038e-05,
"loss": 1.4377,
"step": 2220
},
{
"epoch": 0.03,
"learning_rate": 9.980837552753098e-05,
"loss": 1.4572,
"step": 2225
},
{
"epoch": 0.03,
"learning_rate": 9.98074732842484e-05,
"loss": 1.4839,
"step": 2230
},
{
"epoch": 0.03,
"learning_rate": 9.980656892599095e-05,
"loss": 1.5175,
"step": 2235
},
{
"epoch": 0.03,
"learning_rate": 9.980566245279702e-05,
"loss": 1.4939,
"step": 2240
},
{
"epoch": 0.03,
"learning_rate": 9.980475386470511e-05,
"loss": 1.4889,
"step": 2245
},
{
"epoch": 0.03,
"learning_rate": 9.98038431617538e-05,
"loss": 1.5334,
"step": 2250
},
{
"epoch": 0.03,
"learning_rate": 9.980293034398176e-05,
"loss": 1.4783,
"step": 2255
},
{
"epoch": 0.03,
"learning_rate": 9.980201541142775e-05,
"loss": 1.5181,
"step": 2260
},
{
"epoch": 0.03,
"learning_rate": 9.980109836413063e-05,
"loss": 1.4802,
"step": 2265
},
{
"epoch": 0.03,
"learning_rate": 9.980017920212932e-05,
"loss": 1.4891,
"step": 2270
},
{
"epoch": 0.03,
"learning_rate": 9.979925792546287e-05,
"loss": 1.5275,
"step": 2275
},
{
"epoch": 0.03,
"learning_rate": 9.979833453417041e-05,
"loss": 1.4996,
"step": 2280
},
{
"epoch": 0.03,
"learning_rate": 9.97974090282911e-05,
"loss": 1.4852,
"step": 2285
},
{
"epoch": 0.03,
"learning_rate": 9.979648140786429e-05,
"loss": 1.4821,
"step": 2290
},
{
"epoch": 0.03,
"learning_rate": 9.979555167292936e-05,
"loss": 1.4715,
"step": 2295
},
{
"epoch": 0.03,
"learning_rate": 9.979461982352576e-05,
"loss": 1.5072,
"step": 2300
},
{
"epoch": 0.03,
"learning_rate": 9.97936858596931e-05,
"loss": 1.4952,
"step": 2305
},
{
"epoch": 0.03,
"learning_rate": 9.979274978147102e-05,
"loss": 1.4356,
"step": 2310
},
{
"epoch": 0.03,
"learning_rate": 9.979181158889926e-05,
"loss": 1.457,
"step": 2315
},
{
"epoch": 0.03,
"learning_rate": 9.979087128201768e-05,
"loss": 1.4718,
"step": 2320
},
{
"epoch": 0.03,
"learning_rate": 9.978992886086618e-05,
"loss": 1.4826,
"step": 2325
},
{
"epoch": 0.03,
"learning_rate": 9.97889843254848e-05,
"loss": 1.4833,
"step": 2330
},
{
"epoch": 0.03,
"learning_rate": 9.978803767591363e-05,
"loss": 1.4819,
"step": 2335
},
{
"epoch": 0.03,
"learning_rate": 9.978708891219289e-05,
"loss": 1.4769,
"step": 2340
},
{
"epoch": 0.03,
"learning_rate": 9.978613803436285e-05,
"loss": 1.5064,
"step": 2345
},
{
"epoch": 0.03,
"learning_rate": 9.97851850424639e-05,
"loss": 1.5239,
"step": 2350
},
{
"epoch": 0.03,
"learning_rate": 9.97842299365365e-05,
"loss": 1.4534,
"step": 2355
},
{
"epoch": 0.03,
"learning_rate": 9.978327271662121e-05,
"loss": 1.4953,
"step": 2360
},
{
"epoch": 0.03,
"learning_rate": 9.978231338275867e-05,
"loss": 1.497,
"step": 2365
},
{
"epoch": 0.03,
"learning_rate": 9.978135193498961e-05,
"loss": 1.5043,
"step": 2370
},
{
"epoch": 0.03,
"learning_rate": 9.978038837335489e-05,
"loss": 1.5142,
"step": 2375
},
{
"epoch": 0.03,
"learning_rate": 9.977942269789537e-05,
"loss": 1.4934,
"step": 2380
},
{
"epoch": 0.03,
"learning_rate": 9.97784549086521e-05,
"loss": 1.4684,
"step": 2385
},
{
"epoch": 0.03,
"learning_rate": 9.977748500566617e-05,
"loss": 1.5119,
"step": 2390
},
{
"epoch": 0.03,
"learning_rate": 9.977651298897874e-05,
"loss": 1.4843,
"step": 2395
},
{
"epoch": 0.03,
"learning_rate": 9.977553885863112e-05,
"loss": 1.4868,
"step": 2400
},
{
"epoch": 0.03,
"learning_rate": 9.977456261466465e-05,
"loss": 1.5556,
"step": 2405
},
{
"epoch": 0.03,
"learning_rate": 9.977358425712079e-05,
"loss": 1.4809,
"step": 2410
},
{
"epoch": 0.03,
"learning_rate": 9.977260378604107e-05,
"loss": 1.4636,
"step": 2415
},
{
"epoch": 0.03,
"learning_rate": 9.977162120146717e-05,
"loss": 1.505,
"step": 2420
},
{
"epoch": 0.03,
"learning_rate": 9.977063650344076e-05,
"loss": 1.4726,
"step": 2425
},
{
"epoch": 0.03,
"learning_rate": 9.976964969200367e-05,
"loss": 1.4891,
"step": 2430
},
{
"epoch": 0.03,
"learning_rate": 9.976866076719782e-05,
"loss": 1.4504,
"step": 2435
},
{
"epoch": 0.03,
"learning_rate": 9.976766972906518e-05,
"loss": 1.4934,
"step": 2440
},
{
"epoch": 0.03,
"learning_rate": 9.976667657764785e-05,
"loss": 1.4778,
"step": 2445
},
{
"epoch": 0.03,
"learning_rate": 9.9765681312988e-05,
"loss": 1.5007,
"step": 2450
},
{
"epoch": 0.03,
"learning_rate": 9.976468393512788e-05,
"loss": 1.4665,
"step": 2455
},
{
"epoch": 0.03,
"learning_rate": 9.976368444410985e-05,
"loss": 1.4415,
"step": 2460
},
{
"epoch": 0.03,
"learning_rate": 9.976268283997635e-05,
"loss": 1.5089,
"step": 2465
},
{
"epoch": 0.03,
"learning_rate": 9.97616791227699e-05,
"loss": 1.4819,
"step": 2470
},
{
"epoch": 0.03,
"learning_rate": 9.976067329253316e-05,
"loss": 1.4814,
"step": 2475
},
{
"epoch": 0.03,
"learning_rate": 9.975966534930879e-05,
"loss": 1.4835,
"step": 2480
},
{
"epoch": 0.03,
"learning_rate": 9.975865529313962e-05,
"loss": 1.5057,
"step": 2485
},
{
"epoch": 0.03,
"learning_rate": 9.975764312406854e-05,
"loss": 1.4941,
"step": 2490
},
{
"epoch": 0.03,
"learning_rate": 9.97566288421385e-05,
"loss": 1.4985,
"step": 2495
},
{
"epoch": 0.03,
"learning_rate": 9.975561244739261e-05,
"loss": 1.5245,
"step": 2500
},
{
"epoch": 0.03,
"learning_rate": 9.9754593939874e-05,
"loss": 1.4663,
"step": 2505
},
{
"epoch": 0.03,
"learning_rate": 9.975357331962596e-05,
"loss": 1.44,
"step": 2510
},
{
"epoch": 0.03,
"learning_rate": 9.975255058669177e-05,
"loss": 1.4626,
"step": 2515
},
{
"epoch": 0.03,
"learning_rate": 9.975152574111491e-05,
"loss": 1.4559,
"step": 2520
},
{
"epoch": 0.03,
"learning_rate": 9.975049878293885e-05,
"loss": 1.4695,
"step": 2525
},
{
"epoch": 0.03,
"learning_rate": 9.974946971220722e-05,
"loss": 1.4431,
"step": 2530
},
{
"epoch": 0.03,
"learning_rate": 9.974843852896373e-05,
"loss": 1.5001,
"step": 2535
},
{
"epoch": 0.03,
"learning_rate": 9.974740523325217e-05,
"loss": 1.52,
"step": 2540
},
{
"epoch": 0.03,
"learning_rate": 9.97463698251164e-05,
"loss": 1.4392,
"step": 2545
},
{
"epoch": 0.03,
"learning_rate": 9.97453323046004e-05,
"loss": 1.505,
"step": 2550
},
{
"epoch": 0.03,
"learning_rate": 9.974429267174819e-05,
"loss": 1.4724,
"step": 2555
},
{
"epoch": 0.03,
"learning_rate": 9.974325092660396e-05,
"loss": 1.4707,
"step": 2560
},
{
"epoch": 0.03,
"learning_rate": 9.974220706921192e-05,
"loss": 1.483,
"step": 2565
},
{
"epoch": 0.03,
"learning_rate": 9.974116109961641e-05,
"loss": 1.4492,
"step": 2570
},
{
"epoch": 0.03,
"learning_rate": 9.974011301786184e-05,
"loss": 1.4615,
"step": 2575
},
{
"epoch": 0.03,
"learning_rate": 9.973906282399271e-05,
"loss": 1.4611,
"step": 2580
},
{
"epoch": 0.03,
"learning_rate": 9.973801051805363e-05,
"loss": 1.4824,
"step": 2585
},
{
"epoch": 0.03,
"learning_rate": 9.973695610008925e-05,
"loss": 1.461,
"step": 2590
},
{
"epoch": 0.03,
"learning_rate": 9.973589957014437e-05,
"loss": 1.485,
"step": 2595
},
{
"epoch": 0.03,
"learning_rate": 9.973484092826386e-05,
"loss": 1.5147,
"step": 2600
},
{
"epoch": 0.03,
"learning_rate": 9.973378017449267e-05,
"loss": 1.4532,
"step": 2605
},
{
"epoch": 0.03,
"learning_rate": 9.973271730887582e-05,
"loss": 1.4401,
"step": 2610
},
{
"epoch": 0.03,
"learning_rate": 9.973165233145847e-05,
"loss": 1.4716,
"step": 2615
},
{
"epoch": 0.03,
"learning_rate": 9.973058524228582e-05,
"loss": 1.485,
"step": 2620
},
{
"epoch": 0.03,
"learning_rate": 9.972951604140319e-05,
"loss": 1.467,
"step": 2625
},
{
"epoch": 0.03,
"learning_rate": 9.972844472885599e-05,
"loss": 1.4482,
"step": 2630
},
{
"epoch": 0.03,
"learning_rate": 9.97273713046897e-05,
"loss": 1.4437,
"step": 2635
},
{
"epoch": 0.03,
"learning_rate": 9.972629576894991e-05,
"loss": 1.467,
"step": 2640
},
{
"epoch": 0.03,
"learning_rate": 9.972521812168228e-05,
"loss": 1.4811,
"step": 2645
},
{
"epoch": 0.03,
"learning_rate": 9.972413836293258e-05,
"loss": 1.4496,
"step": 2650
},
{
"epoch": 0.03,
"learning_rate": 9.972305649274666e-05,
"loss": 1.4497,
"step": 2655
},
{
"epoch": 0.03,
"learning_rate": 9.972197251117045e-05,
"loss": 1.4374,
"step": 2660
},
{
"epoch": 0.03,
"learning_rate": 9.972088641824997e-05,
"loss": 1.4732,
"step": 2665
},
{
"epoch": 0.03,
"learning_rate": 9.971979821403137e-05,
"loss": 1.4776,
"step": 2670
},
{
"epoch": 0.04,
"learning_rate": 9.971870789856084e-05,
"loss": 1.4148,
"step": 2675
},
{
"epoch": 0.04,
"learning_rate": 9.971761547188467e-05,
"loss": 1.4646,
"step": 2680
},
{
"epoch": 0.04,
"learning_rate": 9.971652093404927e-05,
"loss": 1.4961,
"step": 2685
},
{
"epoch": 0.04,
"learning_rate": 9.97154242851011e-05,
"loss": 1.4954,
"step": 2690
},
{
"epoch": 0.04,
"learning_rate": 9.971432552508673e-05,
"loss": 1.4554,
"step": 2695
},
{
"epoch": 0.04,
"learning_rate": 9.971322465405282e-05,
"loss": 1.5234,
"step": 2700
},
{
"epoch": 0.04,
"learning_rate": 9.97121216720461e-05,
"loss": 1.4772,
"step": 2705
},
{
"epoch": 0.04,
"learning_rate": 9.971101657911344e-05,
"loss": 1.4536,
"step": 2710
},
{
"epoch": 0.04,
"learning_rate": 9.970990937530174e-05,
"loss": 1.5093,
"step": 2715
},
{
"epoch": 0.04,
"learning_rate": 9.970880006065801e-05,
"loss": 1.4985,
"step": 2720
},
{
"epoch": 0.04,
"learning_rate": 9.970768863522938e-05,
"loss": 1.4533,
"step": 2725
},
{
"epoch": 0.04,
"learning_rate": 9.970657509906303e-05,
"loss": 1.4898,
"step": 2730
},
{
"epoch": 0.04,
"learning_rate": 9.970545945220624e-05,
"loss": 1.5151,
"step": 2735
},
{
"epoch": 0.04,
"learning_rate": 9.970434169470638e-05,
"loss": 1.4635,
"step": 2740
},
{
"epoch": 0.04,
"learning_rate": 9.970322182661094e-05,
"loss": 1.4484,
"step": 2745
},
{
"epoch": 0.04,
"learning_rate": 9.970209984796743e-05,
"loss": 1.4544,
"step": 2750
},
{
"epoch": 0.04,
"learning_rate": 9.970097575882354e-05,
"loss": 1.4444,
"step": 2755
},
{
"epoch": 0.04,
"learning_rate": 9.969984955922697e-05,
"loss": 1.4885,
"step": 2760
},
{
"epoch": 0.04,
"learning_rate": 9.969872124922555e-05,
"loss": 1.501,
"step": 2765
},
{
"epoch": 0.04,
"learning_rate": 9.96975908288672e-05,
"loss": 1.4715,
"step": 2770
},
{
"epoch": 0.04,
"learning_rate": 9.96964582981999e-05,
"loss": 1.4455,
"step": 2775
},
{
"epoch": 0.04,
"learning_rate": 9.969532365727176e-05,
"loss": 1.4733,
"step": 2780
},
{
"epoch": 0.04,
"learning_rate": 9.969418690613096e-05,
"loss": 1.4887,
"step": 2785
},
{
"epoch": 0.04,
"learning_rate": 9.969304804482575e-05,
"loss": 1.4751,
"step": 2790
},
{
"epoch": 0.04,
"learning_rate": 9.969190707340452e-05,
"loss": 1.4963,
"step": 2795
},
{
"epoch": 0.04,
"learning_rate": 9.96907639919157e-05,
"loss": 1.4535,
"step": 2800
},
{
"epoch": 0.04,
"learning_rate": 9.968961880040783e-05,
"loss": 1.4932,
"step": 2805
},
{
"epoch": 0.04,
"learning_rate": 9.968847149892954e-05,
"loss": 1.4727,
"step": 2810
},
{
"epoch": 0.04,
"learning_rate": 9.968732208752954e-05,
"loss": 1.5136,
"step": 2815
},
{
"epoch": 0.04,
"learning_rate": 9.968617056625666e-05,
"loss": 1.4927,
"step": 2820
},
{
"epoch": 0.04,
"learning_rate": 9.968501693515977e-05,
"loss": 1.428,
"step": 2825
},
{
"epoch": 0.04,
"learning_rate": 9.968386119428788e-05,
"loss": 1.4855,
"step": 2830
},
{
"epoch": 0.04,
"learning_rate": 9.968270334369004e-05,
"loss": 1.4481,
"step": 2835
},
{
"epoch": 0.04,
"learning_rate": 9.968154338341544e-05,
"loss": 1.4959,
"step": 2840
},
{
"epoch": 0.04,
"learning_rate": 9.968038131351331e-05,
"loss": 1.4978,
"step": 2845
},
{
"epoch": 0.04,
"learning_rate": 9.967921713403304e-05,
"loss": 1.455,
"step": 2850
},
{
"epoch": 0.04,
"learning_rate": 9.967805084502402e-05,
"loss": 1.4983,
"step": 2855
},
{
"epoch": 0.04,
"learning_rate": 9.967688244653579e-05,
"loss": 1.4598,
"step": 2860
},
{
"epoch": 0.04,
"learning_rate": 9.967571193861796e-05,
"loss": 1.4993,
"step": 2865
},
{
"epoch": 0.04,
"learning_rate": 9.967453932132025e-05,
"loss": 1.4597,
"step": 2870
},
{
"epoch": 0.04,
"learning_rate": 9.967336459469241e-05,
"loss": 1.4722,
"step": 2875
},
{
"epoch": 0.04,
"learning_rate": 9.967218775878438e-05,
"loss": 1.4562,
"step": 2880
},
{
"epoch": 0.04,
"learning_rate": 9.96710088136461e-05,
"loss": 1.4869,
"step": 2885
},
{
"epoch": 0.04,
"learning_rate": 9.966982775932765e-05,
"loss": 1.5237,
"step": 2890
},
{
"epoch": 0.04,
"learning_rate": 9.966864459587915e-05,
"loss": 1.5175,
"step": 2895
},
{
"epoch": 0.04,
"learning_rate": 9.966745932335085e-05,
"loss": 1.4234,
"step": 2900
},
{
"epoch": 0.04,
"learning_rate": 9.96662719417931e-05,
"loss": 1.45,
"step": 2905
},
{
"epoch": 0.04,
"learning_rate": 9.96650824512563e-05,
"loss": 1.5113,
"step": 2910
},
{
"epoch": 0.04,
"learning_rate": 9.966389085179096e-05,
"loss": 1.4743,
"step": 2915
},
{
"epoch": 0.04,
"learning_rate": 9.966269714344769e-05,
"loss": 1.4786,
"step": 2920
},
{
"epoch": 0.04,
"learning_rate": 9.966150132627718e-05,
"loss": 1.4811,
"step": 2925
},
{
"epoch": 0.04,
"learning_rate": 9.966030340033021e-05,
"loss": 1.4953,
"step": 2930
},
{
"epoch": 0.04,
"learning_rate": 9.965910336565763e-05,
"loss": 1.5109,
"step": 2935
},
{
"epoch": 0.04,
"learning_rate": 9.965790122231041e-05,
"loss": 1.4852,
"step": 2940
},
{
"epoch": 0.04,
"learning_rate": 9.965669697033959e-05,
"loss": 1.4681,
"step": 2945
},
{
"epoch": 0.04,
"learning_rate": 9.965549060979631e-05,
"loss": 1.4981,
"step": 2950
},
{
"epoch": 0.04,
"learning_rate": 9.96542821407318e-05,
"loss": 1.4871,
"step": 2955
},
{
"epoch": 0.04,
"learning_rate": 9.965307156319736e-05,
"loss": 1.4809,
"step": 2960
},
{
"epoch": 0.04,
"learning_rate": 9.965185887724442e-05,
"loss": 1.4542,
"step": 2965
},
{
"epoch": 0.04,
"learning_rate": 9.965064408292446e-05,
"loss": 1.4501,
"step": 2970
},
{
"epoch": 0.04,
"learning_rate": 9.964942718028905e-05,
"loss": 1.4623,
"step": 2975
},
{
"epoch": 0.04,
"learning_rate": 9.96482081693899e-05,
"loss": 1.4674,
"step": 2980
},
{
"epoch": 0.04,
"learning_rate": 9.964698705027871e-05,
"loss": 1.4903,
"step": 2985
},
{
"epoch": 0.04,
"learning_rate": 9.964576382300739e-05,
"loss": 1.44,
"step": 2990
},
{
"epoch": 0.04,
"learning_rate": 9.964453848762787e-05,
"loss": 1.4611,
"step": 2995
},
{
"epoch": 0.04,
"learning_rate": 9.964331104419217e-05,
"loss": 1.4697,
"step": 3000
},
{
"epoch": 0.04,
"learning_rate": 9.964208149275243e-05,
"loss": 1.4877,
"step": 3005
},
{
"epoch": 0.04,
"learning_rate": 9.964084983336084e-05,
"loss": 1.4437,
"step": 3010
},
{
"epoch": 0.04,
"learning_rate": 9.96396160660697e-05,
"loss": 1.4285,
"step": 3015
},
{
"epoch": 0.04,
"learning_rate": 9.96383801909314e-05,
"loss": 1.4469,
"step": 3020
},
{
"epoch": 0.04,
"learning_rate": 9.963714220799844e-05,
"loss": 1.4968,
"step": 3025
},
{
"epoch": 0.04,
"learning_rate": 9.963590211732337e-05,
"loss": 1.4538,
"step": 3030
},
{
"epoch": 0.04,
"learning_rate": 9.963465991895884e-05,
"loss": 1.4847,
"step": 3035
},
{
"epoch": 0.04,
"learning_rate": 9.963341561295762e-05,
"loss": 1.4646,
"step": 3040
},
{
"epoch": 0.04,
"learning_rate": 9.963216919937253e-05,
"loss": 1.4555,
"step": 3045
},
{
"epoch": 0.04,
"learning_rate": 9.96309206782565e-05,
"loss": 1.4775,
"step": 3050
},
{
"epoch": 0.04,
"learning_rate": 9.962967004966256e-05,
"loss": 1.4902,
"step": 3055
},
{
"epoch": 0.04,
"learning_rate": 9.96284173136438e-05,
"loss": 1.462,
"step": 3060
},
{
"epoch": 0.04,
"learning_rate": 9.962716247025341e-05,
"loss": 1.4773,
"step": 3065
},
{
"epoch": 0.04,
"learning_rate": 9.962590551954469e-05,
"loss": 1.4614,
"step": 3070
},
{
"epoch": 0.04,
"learning_rate": 9.962464646157101e-05,
"loss": 1.4442,
"step": 3075
},
{
"epoch": 0.04,
"learning_rate": 9.962338529638583e-05,
"loss": 1.4608,
"step": 3080
},
{
"epoch": 0.04,
"learning_rate": 9.96221220240427e-05,
"loss": 1.4478,
"step": 3085
},
{
"epoch": 0.04,
"learning_rate": 9.962085664459526e-05,
"loss": 1.4506,
"step": 3090
},
{
"epoch": 0.04,
"learning_rate": 9.961958915809726e-05,
"loss": 1.4767,
"step": 3095
},
{
"epoch": 0.04,
"learning_rate": 9.96183195646025e-05,
"loss": 1.4774,
"step": 3100
},
{
"epoch": 0.04,
"learning_rate": 9.961704786416491e-05,
"loss": 1.4447,
"step": 3105
},
{
"epoch": 0.04,
"learning_rate": 9.961577405683846e-05,
"loss": 1.4755,
"step": 3110
},
{
"epoch": 0.04,
"learning_rate": 9.961449814267727e-05,
"loss": 1.445,
"step": 3115
},
{
"epoch": 0.04,
"learning_rate": 9.96132201217355e-05,
"loss": 1.4518,
"step": 3120
},
{
"epoch": 0.04,
"learning_rate": 9.961193999406744e-05,
"loss": 1.4853,
"step": 3125
},
{
"epoch": 0.04,
"learning_rate": 9.961065775972744e-05,
"loss": 1.4897,
"step": 3130
},
{
"epoch": 0.04,
"learning_rate": 9.960937341876992e-05,
"loss": 1.4718,
"step": 3135
},
{
"epoch": 0.04,
"learning_rate": 9.960808697124946e-05,
"loss": 1.4699,
"step": 3140
},
{
"epoch": 0.04,
"learning_rate": 9.960679841722067e-05,
"loss": 1.4616,
"step": 3145
},
{
"epoch": 0.04,
"learning_rate": 9.960550775673825e-05,
"loss": 1.4945,
"step": 3150
},
{
"epoch": 0.04,
"learning_rate": 9.960421498985702e-05,
"loss": 1.4618,
"step": 3155
},
{
"epoch": 0.04,
"learning_rate": 9.960292011663187e-05,
"loss": 1.4431,
"step": 3160
},
{
"epoch": 0.04,
"learning_rate": 9.960162313711779e-05,
"loss": 1.4889,
"step": 3165
},
{
"epoch": 0.04,
"learning_rate": 9.960032405136983e-05,
"loss": 1.5063,
"step": 3170
},
{
"epoch": 0.04,
"learning_rate": 9.959902285944319e-05,
"loss": 1.4761,
"step": 3175
},
{
"epoch": 0.04,
"learning_rate": 9.959771956139311e-05,
"loss": 1.4592,
"step": 3180
},
{
"epoch": 0.04,
"learning_rate": 9.959641415727492e-05,
"loss": 1.4859,
"step": 3185
},
{
"epoch": 0.04,
"learning_rate": 9.959510664714406e-05,
"loss": 1.4829,
"step": 3190
},
{
"epoch": 0.04,
"learning_rate": 9.959379703105604e-05,
"loss": 1.4677,
"step": 3195
},
{
"epoch": 0.04,
"learning_rate": 9.959248530906651e-05,
"loss": 1.4525,
"step": 3200
},
{
"epoch": 0.04,
"learning_rate": 9.959117148123111e-05,
"loss": 1.4461,
"step": 3205
},
{
"epoch": 0.04,
"learning_rate": 9.958985554760567e-05,
"loss": 1.484,
"step": 3210
},
{
"epoch": 0.04,
"learning_rate": 9.958853750824607e-05,
"loss": 1.4997,
"step": 3215
},
{
"epoch": 0.04,
"learning_rate": 9.958721736320824e-05,
"loss": 1.452,
"step": 3220
},
{
"epoch": 0.04,
"learning_rate": 9.958589511254828e-05,
"loss": 1.4491,
"step": 3225
},
{
"epoch": 0.04,
"learning_rate": 9.95845707563223e-05,
"loss": 1.4747,
"step": 3230
},
{
"epoch": 0.04,
"learning_rate": 9.958324429458658e-05,
"loss": 1.4686,
"step": 3235
},
{
"epoch": 0.04,
"learning_rate": 9.958191572739741e-05,
"loss": 1.4659,
"step": 3240
},
{
"epoch": 0.04,
"learning_rate": 9.95805850548112e-05,
"loss": 1.4905,
"step": 3245
},
{
"epoch": 0.04,
"learning_rate": 9.95792522768845e-05,
"loss": 1.47,
"step": 3250
},
{
"epoch": 0.04,
"learning_rate": 9.957791739367386e-05,
"loss": 1.4486,
"step": 3255
},
{
"epoch": 0.04,
"learning_rate": 9.957658040523597e-05,
"loss": 1.4363,
"step": 3260
},
{
"epoch": 0.04,
"learning_rate": 9.95752413116276e-05,
"loss": 1.4701,
"step": 3265
},
{
"epoch": 0.04,
"learning_rate": 9.957390011290566e-05,
"loss": 1.478,
"step": 3270
},
{
"epoch": 0.04,
"learning_rate": 9.957255680912703e-05,
"loss": 1.4253,
"step": 3275
},
{
"epoch": 0.04,
"learning_rate": 9.957121140034879e-05,
"loss": 1.4897,
"step": 3280
},
{
"epoch": 0.04,
"learning_rate": 9.956986388662805e-05,
"loss": 1.4864,
"step": 3285
},
{
"epoch": 0.04,
"learning_rate": 9.956851426802207e-05,
"loss": 1.4749,
"step": 3290
},
{
"epoch": 0.04,
"learning_rate": 9.956716254458812e-05,
"loss": 1.445,
"step": 3295
},
{
"epoch": 0.04,
"learning_rate": 9.956580871638361e-05,
"loss": 1.4671,
"step": 3300
},
{
"epoch": 0.04,
"learning_rate": 9.956445278346603e-05,
"loss": 1.4971,
"step": 3305
},
{
"epoch": 0.04,
"learning_rate": 9.956309474589296e-05,
"loss": 1.4428,
"step": 3310
},
{
"epoch": 0.04,
"learning_rate": 9.956173460372204e-05,
"loss": 1.5019,
"step": 3315
},
{
"epoch": 0.04,
"learning_rate": 9.956037235701107e-05,
"loss": 1.4811,
"step": 3320
},
{
"epoch": 0.04,
"learning_rate": 9.955900800581785e-05,
"loss": 1.4506,
"step": 3325
},
{
"epoch": 0.04,
"learning_rate": 9.955764155020037e-05,
"loss": 1.4585,
"step": 3330
},
{
"epoch": 0.04,
"learning_rate": 9.95562729902166e-05,
"loss": 1.476,
"step": 3335
},
{
"epoch": 0.04,
"learning_rate": 9.955490232592466e-05,
"loss": 1.4578,
"step": 3340
},
{
"epoch": 0.04,
"learning_rate": 9.955352955738278e-05,
"loss": 1.4882,
"step": 3345
},
{
"epoch": 0.04,
"learning_rate": 9.955215468464924e-05,
"loss": 1.4566,
"step": 3350
},
{
"epoch": 0.04,
"learning_rate": 9.955077770778242e-05,
"loss": 1.4437,
"step": 3355
},
{
"epoch": 0.04,
"learning_rate": 9.954939862684078e-05,
"loss": 1.4542,
"step": 3360
},
{
"epoch": 0.04,
"learning_rate": 9.954801744188289e-05,
"loss": 1.4664,
"step": 3365
},
{
"epoch": 0.04,
"learning_rate": 9.954663415296741e-05,
"loss": 1.4987,
"step": 3370
},
{
"epoch": 0.04,
"learning_rate": 9.954524876015306e-05,
"loss": 1.427,
"step": 3375
},
{
"epoch": 0.04,
"learning_rate": 9.954386126349868e-05,
"loss": 1.4348,
"step": 3380
},
{
"epoch": 0.04,
"learning_rate": 9.954247166306317e-05,
"loss": 1.4928,
"step": 3385
},
{
"epoch": 0.04,
"learning_rate": 9.954107995890556e-05,
"loss": 1.4803,
"step": 3390
},
{
"epoch": 0.04,
"learning_rate": 9.953968615108493e-05,
"loss": 1.4571,
"step": 3395
},
{
"epoch": 0.04,
"learning_rate": 9.953829023966047e-05,
"loss": 1.4577,
"step": 3400
},
{
"epoch": 0.04,
"learning_rate": 9.953689222469146e-05,
"loss": 1.4662,
"step": 3405
},
{
"epoch": 0.04,
"learning_rate": 9.953549210623724e-05,
"loss": 1.4057,
"step": 3410
},
{
"epoch": 0.04,
"learning_rate": 9.953408988435731e-05,
"loss": 1.5058,
"step": 3415
},
{
"epoch": 0.04,
"learning_rate": 9.953268555911117e-05,
"loss": 1.4709,
"step": 3420
},
{
"epoch": 0.04,
"learning_rate": 9.953127913055848e-05,
"loss": 1.4827,
"step": 3425
},
{
"epoch": 0.04,
"learning_rate": 9.952987059875894e-05,
"loss": 1.5049,
"step": 3430
},
{
"epoch": 0.05,
"learning_rate": 9.952845996377235e-05,
"loss": 1.4623,
"step": 3435
},
{
"epoch": 0.05,
"learning_rate": 9.952704722565864e-05,
"loss": 1.4572,
"step": 3440
},
{
"epoch": 0.05,
"learning_rate": 9.95256323844778e-05,
"loss": 1.4761,
"step": 3445
},
{
"epoch": 0.05,
"learning_rate": 9.95242154402899e-05,
"loss": 1.4511,
"step": 3450
},
{
"epoch": 0.05,
"learning_rate": 9.952279639315508e-05,
"loss": 1.4263,
"step": 3455
},
{
"epoch": 0.05,
"learning_rate": 9.952137524313362e-05,
"loss": 1.4831,
"step": 3460
},
{
"epoch": 0.05,
"learning_rate": 9.951995199028587e-05,
"loss": 1.4664,
"step": 3465
},
{
"epoch": 0.05,
"learning_rate": 9.951852663467228e-05,
"loss": 1.4741,
"step": 3470
},
{
"epoch": 0.05,
"learning_rate": 9.951709917635333e-05,
"loss": 1.4636,
"step": 3475
},
{
"epoch": 0.05,
"learning_rate": 9.951566961538968e-05,
"loss": 1.4632,
"step": 3480
},
{
"epoch": 0.05,
"learning_rate": 9.951423795184201e-05,
"loss": 1.4948,
"step": 3485
},
{
"epoch": 0.05,
"learning_rate": 9.951280418577111e-05,
"loss": 1.465,
"step": 3490
},
{
"epoch": 0.05,
"learning_rate": 9.951136831723789e-05,
"loss": 1.4898,
"step": 3495
},
{
"epoch": 0.05,
"learning_rate": 9.950993034630328e-05,
"loss": 1.4642,
"step": 3500
},
{
"epoch": 0.05,
"learning_rate": 9.950849027302836e-05,
"loss": 1.452,
"step": 3505
},
{
"epoch": 0.05,
"learning_rate": 9.95070480974743e-05,
"loss": 1.4858,
"step": 3510
},
{
"epoch": 0.05,
"learning_rate": 9.95056038197023e-05,
"loss": 1.4803,
"step": 3515
},
{
"epoch": 0.05,
"learning_rate": 9.950415743977373e-05,
"loss": 1.4427,
"step": 3520
},
{
"epoch": 0.05,
"learning_rate": 9.950270895774997e-05,
"loss": 1.4719,
"step": 3525
},
{
"epoch": 0.05,
"learning_rate": 9.950125837369254e-05,
"loss": 1.5095,
"step": 3530
},
{
"epoch": 0.05,
"learning_rate": 9.949980568766305e-05,
"loss": 1.4551,
"step": 3535
},
{
"epoch": 0.05,
"learning_rate": 9.949835089972317e-05,
"loss": 1.45,
"step": 3540
},
{
"epoch": 0.05,
"learning_rate": 9.949689400993467e-05,
"loss": 1.5131,
"step": 3545
},
{
"epoch": 0.05,
"learning_rate": 9.949543501835944e-05,
"loss": 1.4688,
"step": 3550
},
{
"epoch": 0.05,
"learning_rate": 9.949397392505942e-05,
"loss": 1.4781,
"step": 3555
},
{
"epoch": 0.05,
"learning_rate": 9.949251073009662e-05,
"loss": 1.4491,
"step": 3560
},
{
"epoch": 0.05,
"learning_rate": 9.949104543353322e-05,
"loss": 1.4332,
"step": 3565
},
{
"epoch": 0.05,
"learning_rate": 9.948957803543142e-05,
"loss": 1.4343,
"step": 3570
},
{
"epoch": 0.05,
"learning_rate": 9.948810853585353e-05,
"loss": 1.478,
"step": 3575
},
{
"epoch": 0.05,
"learning_rate": 9.948663693486195e-05,
"loss": 1.4561,
"step": 3580
},
{
"epoch": 0.05,
"learning_rate": 9.948516323251918e-05,
"loss": 1.4212,
"step": 3585
},
{
"epoch": 0.05,
"learning_rate": 9.948368742888776e-05,
"loss": 1.4113,
"step": 3590
},
{
"epoch": 0.05,
"learning_rate": 9.948220952403042e-05,
"loss": 1.4517,
"step": 3595
},
{
"epoch": 0.05,
"learning_rate": 9.948072951800985e-05,
"loss": 1.5028,
"step": 3600
},
{
"epoch": 0.05,
"learning_rate": 9.947924741088894e-05,
"loss": 1.4611,
"step": 3605
},
{
"epoch": 0.05,
"learning_rate": 9.947776320273061e-05,
"loss": 1.4314,
"step": 3610
},
{
"epoch": 0.05,
"learning_rate": 9.947627689359789e-05,
"loss": 1.4122,
"step": 3615
},
{
"epoch": 0.05,
"learning_rate": 9.947478848355387e-05,
"loss": 1.4653,
"step": 3620
},
{
"epoch": 0.05,
"learning_rate": 9.947329797266178e-05,
"loss": 1.4599,
"step": 3625
},
{
"epoch": 0.05,
"learning_rate": 9.94718053609849e-05,
"loss": 1.439,
"step": 3630
},
{
"epoch": 0.05,
"learning_rate": 9.947031064858663e-05,
"loss": 1.48,
"step": 3635
},
{
"epoch": 0.05,
"learning_rate": 9.94688138355304e-05,
"loss": 1.4827,
"step": 3640
},
{
"epoch": 0.05,
"learning_rate": 9.94673149218798e-05,
"loss": 1.4926,
"step": 3645
},
{
"epoch": 0.05,
"learning_rate": 9.946581390769846e-05,
"loss": 1.4708,
"step": 3650
},
{
"epoch": 0.05,
"learning_rate": 9.946431079305014e-05,
"loss": 1.4693,
"step": 3655
},
{
"epoch": 0.05,
"learning_rate": 9.946280557799865e-05,
"loss": 1.4745,
"step": 3660
},
{
"epoch": 0.05,
"learning_rate": 9.946129826260793e-05,
"loss": 1.4809,
"step": 3665
},
{
"epoch": 0.05,
"learning_rate": 9.945978884694194e-05,
"loss": 1.4619,
"step": 3670
},
{
"epoch": 0.05,
"learning_rate": 9.94582773310648e-05,
"loss": 1.4385,
"step": 3675
},
{
"epoch": 0.05,
"learning_rate": 9.94567637150407e-05,
"loss": 1.4959,
"step": 3680
},
{
"epoch": 0.05,
"learning_rate": 9.94552479989339e-05,
"loss": 1.4615,
"step": 3685
},
{
"epoch": 0.05,
"learning_rate": 9.945373018280879e-05,
"loss": 1.4841,
"step": 3690
},
{
"epoch": 0.05,
"learning_rate": 9.945221026672978e-05,
"loss": 1.3997,
"step": 3695
},
{
"epoch": 0.05,
"learning_rate": 9.945068825076143e-05,
"loss": 1.4434,
"step": 3700
},
{
"epoch": 0.05,
"learning_rate": 9.944916413496837e-05,
"loss": 1.4411,
"step": 3705
},
{
"epoch": 0.05,
"learning_rate": 9.944763791941532e-05,
"loss": 1.4333,
"step": 3710
},
{
"epoch": 0.05,
"learning_rate": 9.944610960416707e-05,
"loss": 1.4518,
"step": 3715
},
{
"epoch": 0.05,
"learning_rate": 9.944457918928856e-05,
"loss": 1.4366,
"step": 3720
},
{
"epoch": 0.05,
"learning_rate": 9.944304667484472e-05,
"loss": 1.4974,
"step": 3725
},
{
"epoch": 0.05,
"learning_rate": 9.944151206090067e-05,
"loss": 1.4472,
"step": 3730
},
{
"epoch": 0.05,
"learning_rate": 9.943997534752155e-05,
"loss": 1.4885,
"step": 3735
},
{
"epoch": 0.05,
"learning_rate": 9.943843653477262e-05,
"loss": 1.45,
"step": 3740
},
{
"epoch": 0.05,
"learning_rate": 9.943689562271922e-05,
"loss": 1.4627,
"step": 3745
},
{
"epoch": 0.05,
"learning_rate": 9.943535261142678e-05,
"loss": 1.4445,
"step": 3750
},
{
"epoch": 0.05,
"learning_rate": 9.943380750096084e-05,
"loss": 1.471,
"step": 3755
},
{
"epoch": 0.05,
"learning_rate": 9.943226029138699e-05,
"loss": 1.4499,
"step": 3760
},
{
"epoch": 0.05,
"learning_rate": 9.943071098277094e-05,
"loss": 1.4871,
"step": 3765
},
{
"epoch": 0.05,
"learning_rate": 9.942915957517847e-05,
"loss": 1.4968,
"step": 3770
},
{
"epoch": 0.05,
"learning_rate": 9.942760606867545e-05,
"loss": 1.4206,
"step": 3775
},
{
"epoch": 0.05,
"learning_rate": 9.942605046332787e-05,
"loss": 1.4681,
"step": 3780
},
{
"epoch": 0.05,
"learning_rate": 9.942449275920176e-05,
"loss": 1.4705,
"step": 3785
},
{
"epoch": 0.05,
"learning_rate": 9.942293295636329e-05,
"loss": 1.5181,
"step": 3790
},
{
"epoch": 0.05,
"learning_rate": 9.942137105487867e-05,
"loss": 1.4661,
"step": 3795
},
{
"epoch": 0.05,
"learning_rate": 9.941980705481425e-05,
"loss": 1.4705,
"step": 3800
},
{
"epoch": 0.05,
"learning_rate": 9.941824095623643e-05,
"loss": 1.4473,
"step": 3805
},
{
"epoch": 0.05,
"learning_rate": 9.941667275921169e-05,
"loss": 1.472,
"step": 3810
},
{
"epoch": 0.05,
"learning_rate": 9.941510246380664e-05,
"loss": 1.4648,
"step": 3815
},
{
"epoch": 0.05,
"learning_rate": 9.941353007008797e-05,
"loss": 1.4492,
"step": 3820
},
{
"epoch": 0.05,
"learning_rate": 9.941195557812242e-05,
"loss": 1.458,
"step": 3825
},
{
"epoch": 0.05,
"learning_rate": 9.941037898797688e-05,
"loss": 1.4997,
"step": 3830
},
{
"epoch": 0.05,
"learning_rate": 9.940880029971828e-05,
"loss": 1.4337,
"step": 3835
},
{
"epoch": 0.05,
"learning_rate": 9.940721951341365e-05,
"loss": 1.4396,
"step": 3840
},
{
"epoch": 0.05,
"learning_rate": 9.940563662913011e-05,
"loss": 1.5148,
"step": 3845
},
{
"epoch": 0.05,
"learning_rate": 9.940405164693491e-05,
"loss": 1.4248,
"step": 3850
},
{
"epoch": 0.05,
"learning_rate": 9.940246456689531e-05,
"loss": 1.4646,
"step": 3855
},
{
"epoch": 0.05,
"learning_rate": 9.940087538907873e-05,
"loss": 1.5002,
"step": 3860
},
{
"epoch": 0.05,
"learning_rate": 9.939928411355263e-05,
"loss": 1.4977,
"step": 3865
},
{
"epoch": 0.05,
"learning_rate": 9.93976907403846e-05,
"loss": 1.477,
"step": 3870
},
{
"epoch": 0.05,
"learning_rate": 9.939609526964231e-05,
"loss": 1.4644,
"step": 3875
},
{
"epoch": 0.05,
"learning_rate": 9.939449770139346e-05,
"loss": 1.4451,
"step": 3880
},
{
"epoch": 0.05,
"learning_rate": 9.939289803570592e-05,
"loss": 1.4285,
"step": 3885
},
{
"epoch": 0.05,
"learning_rate": 9.939129627264765e-05,
"loss": 1.4814,
"step": 3890
},
{
"epoch": 0.05,
"learning_rate": 9.93896924122866e-05,
"loss": 1.4449,
"step": 3895
},
{
"epoch": 0.05,
"learning_rate": 9.93880864546909e-05,
"loss": 1.4822,
"step": 3900
},
{
"epoch": 0.05,
"learning_rate": 9.938647839992876e-05,
"loss": 1.4382,
"step": 3905
},
{
"epoch": 0.05,
"learning_rate": 9.938486824806845e-05,
"loss": 1.3952,
"step": 3910
},
{
"epoch": 0.05,
"learning_rate": 9.938325599917835e-05,
"loss": 1.4876,
"step": 3915
},
{
"epoch": 0.05,
"learning_rate": 9.938164165332691e-05,
"loss": 1.4355,
"step": 3920
},
{
"epoch": 0.05,
"learning_rate": 9.938002521058269e-05,
"loss": 1.4392,
"step": 3925
},
{
"epoch": 0.05,
"learning_rate": 9.937840667101431e-05,
"loss": 1.4376,
"step": 3930
},
{
"epoch": 0.05,
"learning_rate": 9.937678603469052e-05,
"loss": 1.4617,
"step": 3935
},
{
"epoch": 0.05,
"learning_rate": 9.937516330168012e-05,
"loss": 1.4633,
"step": 3940
},
{
"epoch": 0.05,
"learning_rate": 9.937353847205203e-05,
"loss": 1.433,
"step": 3945
},
{
"epoch": 0.05,
"learning_rate": 9.937191154587523e-05,
"loss": 1.4725,
"step": 3950
},
{
"epoch": 0.05,
"learning_rate": 9.937028252321884e-05,
"loss": 1.4419,
"step": 3955
},
{
"epoch": 0.05,
"learning_rate": 9.936865140415199e-05,
"loss": 1.4478,
"step": 3960
},
{
"epoch": 0.05,
"learning_rate": 9.936701818874397e-05,
"loss": 1.5013,
"step": 3965
},
{
"epoch": 0.05,
"learning_rate": 9.93653828770641e-05,
"loss": 1.4904,
"step": 3970
},
{
"epoch": 0.05,
"learning_rate": 9.936374546918186e-05,
"loss": 1.4481,
"step": 3975
},
{
"epoch": 0.05,
"learning_rate": 9.936210596516675e-05,
"loss": 1.459,
"step": 3980
},
{
"epoch": 0.05,
"learning_rate": 9.936046436508838e-05,
"loss": 1.482,
"step": 3985
},
{
"epoch": 0.05,
"learning_rate": 9.93588206690165e-05,
"loss": 1.452,
"step": 3990
},
{
"epoch": 0.05,
"learning_rate": 9.935717487702088e-05,
"loss": 1.4596,
"step": 3995
},
{
"epoch": 0.05,
"learning_rate": 9.93555269891714e-05,
"loss": 1.4628,
"step": 4000
},
{
"epoch": 0.05,
"learning_rate": 9.935387700553805e-05,
"loss": 1.5099,
"step": 4005
},
{
"epoch": 0.05,
"learning_rate": 9.935222492619088e-05,
"loss": 1.4765,
"step": 4010
},
{
"epoch": 0.05,
"learning_rate": 9.935057075120005e-05,
"loss": 1.4906,
"step": 4015
},
{
"epoch": 0.05,
"learning_rate": 9.934891448063579e-05,
"loss": 1.453,
"step": 4020
},
{
"epoch": 0.05,
"learning_rate": 9.934725611456846e-05,
"loss": 1.4799,
"step": 4025
},
{
"epoch": 0.05,
"learning_rate": 9.934559565306842e-05,
"loss": 1.4669,
"step": 4030
},
{
"epoch": 0.05,
"learning_rate": 9.934393309620625e-05,
"loss": 1.4452,
"step": 4035
},
{
"epoch": 0.05,
"learning_rate": 9.93422684440525e-05,
"loss": 1.4824,
"step": 4040
},
{
"epoch": 0.05,
"learning_rate": 9.934060169667786e-05,
"loss": 1.4521,
"step": 4045
},
{
"epoch": 0.05,
"learning_rate": 9.933893285415313e-05,
"loss": 1.4587,
"step": 4050
},
{
"epoch": 0.05,
"learning_rate": 9.933726191654915e-05,
"loss": 1.4763,
"step": 4055
},
{
"epoch": 0.05,
"learning_rate": 9.933558888393688e-05,
"loss": 1.4473,
"step": 4060
},
{
"epoch": 0.05,
"learning_rate": 9.933391375638736e-05,
"loss": 1.4318,
"step": 4065
},
{
"epoch": 0.05,
"learning_rate": 9.933223653397172e-05,
"loss": 1.3952,
"step": 4070
},
{
"epoch": 0.05,
"learning_rate": 9.93305572167612e-05,
"loss": 1.4517,
"step": 4075
},
{
"epoch": 0.05,
"learning_rate": 9.932887580482708e-05,
"loss": 1.5049,
"step": 4080
},
{
"epoch": 0.05,
"learning_rate": 9.932719229824077e-05,
"loss": 1.4394,
"step": 4085
},
{
"epoch": 0.05,
"learning_rate": 9.932550669707377e-05,
"loss": 1.4606,
"step": 4090
},
{
"epoch": 0.05,
"learning_rate": 9.932381900139765e-05,
"loss": 1.447,
"step": 4095
},
{
"epoch": 0.05,
"learning_rate": 9.932212921128404e-05,
"loss": 1.4954,
"step": 4100
},
{
"epoch": 0.05,
"learning_rate": 9.932043732680474e-05,
"loss": 1.4487,
"step": 4105
},
{
"epoch": 0.05,
"learning_rate": 9.931874334803157e-05,
"loss": 1.4441,
"step": 4110
},
{
"epoch": 0.05,
"learning_rate": 9.931704727503646e-05,
"loss": 1.4972,
"step": 4115
},
{
"epoch": 0.05,
"learning_rate": 9.931534910789145e-05,
"loss": 1.4573,
"step": 4120
},
{
"epoch": 0.05,
"learning_rate": 9.93136488466686e-05,
"loss": 1.4498,
"step": 4125
},
{
"epoch": 0.05,
"learning_rate": 9.931194649144018e-05,
"loss": 1.4551,
"step": 4130
},
{
"epoch": 0.05,
"learning_rate": 9.931024204227843e-05,
"loss": 1.4801,
"step": 4135
},
{
"epoch": 0.05,
"learning_rate": 9.930853549925573e-05,
"loss": 1.4837,
"step": 4140
},
{
"epoch": 0.05,
"learning_rate": 9.930682686244455e-05,
"loss": 1.4377,
"step": 4145
},
{
"epoch": 0.05,
"learning_rate": 9.930511613191745e-05,
"loss": 1.5061,
"step": 4150
},
{
"epoch": 0.05,
"learning_rate": 9.930340330774708e-05,
"loss": 1.4496,
"step": 4155
},
{
"epoch": 0.05,
"learning_rate": 9.930168839000613e-05,
"loss": 1.4577,
"step": 4160
},
{
"epoch": 0.05,
"learning_rate": 9.929997137876747e-05,
"loss": 1.4217,
"step": 4165
},
{
"epoch": 0.05,
"learning_rate": 9.929825227410399e-05,
"loss": 1.4801,
"step": 4170
},
{
"epoch": 0.05,
"learning_rate": 9.929653107608868e-05,
"loss": 1.464,
"step": 4175
},
{
"epoch": 0.05,
"learning_rate": 9.929480778479464e-05,
"loss": 1.4479,
"step": 4180
},
{
"epoch": 0.05,
"learning_rate": 9.929308240029504e-05,
"loss": 1.4137,
"step": 4185
},
{
"epoch": 0.05,
"learning_rate": 9.929135492266315e-05,
"loss": 1.4608,
"step": 4190
},
{
"epoch": 0.05,
"learning_rate": 9.92896253519723e-05,
"loss": 1.4468,
"step": 4195
},
{
"epoch": 0.06,
"learning_rate": 9.928789368829599e-05,
"loss": 1.4628,
"step": 4200
},
{
"epoch": 0.06,
"learning_rate": 9.928615993170767e-05,
"loss": 1.4478,
"step": 4205
},
{
"epoch": 0.06,
"learning_rate": 9.928442408228103e-05,
"loss": 1.4556,
"step": 4210
},
{
"epoch": 0.06,
"learning_rate": 9.928268614008975e-05,
"loss": 1.4662,
"step": 4215
},
{
"epoch": 0.06,
"learning_rate": 9.928094610520764e-05,
"loss": 1.4216,
"step": 4220
},
{
"epoch": 0.06,
"learning_rate": 9.927920397770858e-05,
"loss": 1.4833,
"step": 4225
},
{
"epoch": 0.06,
"learning_rate": 9.927745975766654e-05,
"loss": 1.4491,
"step": 4230
},
{
"epoch": 0.06,
"learning_rate": 9.927571344515559e-05,
"loss": 1.4505,
"step": 4235
},
{
"epoch": 0.06,
"learning_rate": 9.927396504024988e-05,
"loss": 1.4012,
"step": 4240
},
{
"epoch": 0.06,
"learning_rate": 9.927221454302365e-05,
"loss": 1.4578,
"step": 4245
},
{
"epoch": 0.06,
"learning_rate": 9.927046195355125e-05,
"loss": 1.4669,
"step": 4250
},
{
"epoch": 0.06,
"learning_rate": 9.92687072719071e-05,
"loss": 1.4579,
"step": 4255
},
{
"epoch": 0.06,
"learning_rate": 9.926695049816568e-05,
"loss": 1.4284,
"step": 4260
},
{
"epoch": 0.06,
"learning_rate": 9.92651916324016e-05,
"loss": 1.452,
"step": 4265
},
{
"epoch": 0.06,
"learning_rate": 9.926343067468957e-05,
"loss": 1.4234,
"step": 4270
},
{
"epoch": 0.06,
"learning_rate": 9.926166762510434e-05,
"loss": 1.3977,
"step": 4275
},
{
"epoch": 0.06,
"learning_rate": 9.925990248372076e-05,
"loss": 1.4611,
"step": 4280
},
{
"epoch": 0.06,
"learning_rate": 9.925813525061384e-05,
"loss": 1.4386,
"step": 4285
},
{
"epoch": 0.06,
"learning_rate": 9.925636592585856e-05,
"loss": 1.4583,
"step": 4290
},
{
"epoch": 0.06,
"learning_rate": 9.92545945095301e-05,
"loss": 1.4623,
"step": 4295
},
{
"epoch": 0.06,
"learning_rate": 9.925282100170364e-05,
"loss": 1.4514,
"step": 4300
},
{
"epoch": 0.06,
"learning_rate": 9.925104540245452e-05,
"loss": 1.4272,
"step": 4305
},
{
"epoch": 0.06,
"learning_rate": 9.924926771185812e-05,
"loss": 1.4225,
"step": 4310
},
{
"epoch": 0.06,
"learning_rate": 9.924748792998992e-05,
"loss": 1.4803,
"step": 4315
},
{
"epoch": 0.06,
"learning_rate": 9.924570605692551e-05,
"loss": 1.4339,
"step": 4320
},
{
"epoch": 0.06,
"learning_rate": 9.924392209274054e-05,
"loss": 1.4454,
"step": 4325
},
{
"epoch": 0.06,
"learning_rate": 9.924213603751077e-05,
"loss": 1.4079,
"step": 4330
},
{
"epoch": 0.06,
"learning_rate": 9.924034789131206e-05,
"loss": 1.4662,
"step": 4335
},
{
"epoch": 0.06,
"learning_rate": 9.923855765422031e-05,
"loss": 1.4707,
"step": 4340
},
{
"epoch": 0.06,
"learning_rate": 9.923676532631154e-05,
"loss": 1.4837,
"step": 4345
},
{
"epoch": 0.06,
"learning_rate": 9.923497090766189e-05,
"loss": 1.4722,
"step": 4350
},
{
"epoch": 0.06,
"learning_rate": 9.923317439834752e-05,
"loss": 1.5022,
"step": 4355
},
{
"epoch": 0.06,
"learning_rate": 9.923137579844473e-05,
"loss": 1.4984,
"step": 4360
},
{
"epoch": 0.06,
"learning_rate": 9.92295751080299e-05,
"loss": 1.4654,
"step": 4365
},
{
"epoch": 0.06,
"learning_rate": 9.922777232717948e-05,
"loss": 1.4543,
"step": 4370
},
{
"epoch": 0.06,
"learning_rate": 9.922596745597003e-05,
"loss": 1.4521,
"step": 4375
},
{
"epoch": 0.06,
"learning_rate": 9.92241604944782e-05,
"loss": 1.4767,
"step": 4380
},
{
"epoch": 0.06,
"learning_rate": 9.922235144278069e-05,
"loss": 1.4528,
"step": 4385
},
{
"epoch": 0.06,
"learning_rate": 9.922054030095434e-05,
"loss": 1.4227,
"step": 4390
},
{
"epoch": 0.06,
"learning_rate": 9.921872706907606e-05,
"loss": 1.4644,
"step": 4395
},
{
"epoch": 0.06,
"learning_rate": 9.921691174722283e-05,
"loss": 1.4024,
"step": 4400
},
{
"epoch": 0.06,
"learning_rate": 9.921509433547173e-05,
"loss": 1.4541,
"step": 4405
},
{
"epoch": 0.06,
"learning_rate": 9.921327483389996e-05,
"loss": 1.4519,
"step": 4410
},
{
"epoch": 0.06,
"learning_rate": 9.921145324258476e-05,
"loss": 1.4108,
"step": 4415
},
{
"epoch": 0.06,
"learning_rate": 9.920962956160348e-05,
"loss": 1.3771,
"step": 4420
},
{
"epoch": 0.06,
"learning_rate": 9.920780379103357e-05,
"loss": 1.448,
"step": 4425
},
{
"epoch": 0.06,
"learning_rate": 9.920597593095256e-05,
"loss": 1.4493,
"step": 4430
},
{
"epoch": 0.06,
"learning_rate": 9.920414598143803e-05,
"loss": 1.3581,
"step": 4435
},
{
"epoch": 0.06,
"learning_rate": 9.920231394256773e-05,
"loss": 1.4563,
"step": 4440
},
{
"epoch": 0.06,
"learning_rate": 9.920047981441945e-05,
"loss": 1.4441,
"step": 4445
},
{
"epoch": 0.06,
"learning_rate": 9.919864359707104e-05,
"loss": 1.463,
"step": 4450
},
{
"epoch": 0.06,
"learning_rate": 9.919680529060051e-05,
"loss": 1.4194,
"step": 4455
},
{
"epoch": 0.06,
"learning_rate": 9.919496489508588e-05,
"loss": 1.4507,
"step": 4460
},
{
"epoch": 0.06,
"learning_rate": 9.919312241060534e-05,
"loss": 1.4774,
"step": 4465
},
{
"epoch": 0.06,
"learning_rate": 9.91912778372371e-05,
"loss": 1.4451,
"step": 4470
},
{
"epoch": 0.06,
"learning_rate": 9.91894311750595e-05,
"loss": 1.4423,
"step": 4475
},
{
"epoch": 0.06,
"learning_rate": 9.918758242415094e-05,
"loss": 1.421,
"step": 4480
},
{
"epoch": 0.06,
"learning_rate": 9.918573158458993e-05,
"loss": 1.4612,
"step": 4485
},
{
"epoch": 0.06,
"learning_rate": 9.918387865645506e-05,
"loss": 1.481,
"step": 4490
},
{
"epoch": 0.06,
"learning_rate": 9.918202363982503e-05,
"loss": 1.4349,
"step": 4495
},
{
"epoch": 0.06,
"learning_rate": 9.918016653477857e-05,
"loss": 1.4083,
"step": 4500
},
{
"epoch": 0.06,
"learning_rate": 9.91783073413946e-05,
"loss": 1.3937,
"step": 4505
},
{
"epoch": 0.06,
"learning_rate": 9.9176446059752e-05,
"loss": 1.4743,
"step": 4510
},
{
"epoch": 0.06,
"learning_rate": 9.917458268992984e-05,
"loss": 1.428,
"step": 4515
},
{
"epoch": 0.06,
"learning_rate": 9.917271723200726e-05,
"loss": 1.491,
"step": 4520
},
{
"epoch": 0.06,
"learning_rate": 9.917084968606343e-05,
"loss": 1.3886,
"step": 4525
},
{
"epoch": 0.06,
"learning_rate": 9.916898005217767e-05,
"loss": 1.5037,
"step": 4530
},
{
"epoch": 0.06,
"learning_rate": 9.916710833042939e-05,
"loss": 1.3943,
"step": 4535
},
{
"epoch": 0.06,
"learning_rate": 9.916523452089804e-05,
"loss": 1.3774,
"step": 4540
},
{
"epoch": 0.06,
"learning_rate": 9.916335862366322e-05,
"loss": 1.4492,
"step": 4545
},
{
"epoch": 0.06,
"learning_rate": 9.916148063880455e-05,
"loss": 1.4683,
"step": 4550
},
{
"epoch": 0.06,
"learning_rate": 9.915960056640179e-05,
"loss": 1.4376,
"step": 4555
},
{
"epoch": 0.06,
"learning_rate": 9.915771840653478e-05,
"loss": 1.4691,
"step": 4560
},
{
"epoch": 0.06,
"learning_rate": 9.915583415928344e-05,
"loss": 1.4359,
"step": 4565
},
{
"epoch": 0.06,
"learning_rate": 9.915394782472779e-05,
"loss": 1.4422,
"step": 4570
},
{
"epoch": 0.06,
"learning_rate": 9.915205940294791e-05,
"loss": 1.4498,
"step": 4575
},
{
"epoch": 0.06,
"learning_rate": 9.915016889402398e-05,
"loss": 1.4366,
"step": 4580
},
{
"epoch": 0.06,
"learning_rate": 9.914827629803631e-05,
"loss": 1.4572,
"step": 4585
},
{
"epoch": 0.06,
"learning_rate": 9.914638161506525e-05,
"loss": 1.4712,
"step": 4590
},
{
"epoch": 0.06,
"learning_rate": 9.914448484519124e-05,
"loss": 1.466,
"step": 4595
},
{
"epoch": 0.06,
"learning_rate": 9.914258598849484e-05,
"loss": 1.463,
"step": 4600
},
{
"epoch": 0.06,
"learning_rate": 9.914068504505668e-05,
"loss": 1.4681,
"step": 4605
},
{
"epoch": 0.06,
"learning_rate": 9.913878201495748e-05,
"loss": 1.4424,
"step": 4610
},
{
"epoch": 0.06,
"learning_rate": 9.913687689827802e-05,
"loss": 1.433,
"step": 4615
},
{
"epoch": 0.06,
"learning_rate": 9.913496969509925e-05,
"loss": 1.4363,
"step": 4620
},
{
"epoch": 0.06,
"learning_rate": 9.91330604055021e-05,
"loss": 1.454,
"step": 4625
},
{
"epoch": 0.06,
"learning_rate": 9.913114902956768e-05,
"loss": 1.4534,
"step": 4630
},
{
"epoch": 0.06,
"learning_rate": 9.912923556737716e-05,
"loss": 1.4323,
"step": 4635
},
{
"epoch": 0.06,
"learning_rate": 9.912732001901175e-05,
"loss": 1.4367,
"step": 4640
},
{
"epoch": 0.06,
"learning_rate": 9.912540238455284e-05,
"loss": 1.4017,
"step": 4645
},
{
"epoch": 0.06,
"learning_rate": 9.91234826640818e-05,
"loss": 1.4265,
"step": 4650
},
{
"epoch": 0.06,
"learning_rate": 9.91215608576802e-05,
"loss": 1.4312,
"step": 4655
},
{
"epoch": 0.06,
"learning_rate": 9.911963696542963e-05,
"loss": 1.4664,
"step": 4660
},
{
"epoch": 0.06,
"learning_rate": 9.911771098741177e-05,
"loss": 1.4627,
"step": 4665
},
{
"epoch": 0.06,
"learning_rate": 9.911578292370842e-05,
"loss": 1.4551,
"step": 4670
},
{
"epoch": 0.06,
"learning_rate": 9.911385277440144e-05,
"loss": 1.4287,
"step": 4675
},
{
"epoch": 0.06,
"learning_rate": 9.911192053957278e-05,
"loss": 1.429,
"step": 4680
},
{
"epoch": 0.06,
"learning_rate": 9.910998621930453e-05,
"loss": 1.455,
"step": 4685
},
{
"epoch": 0.06,
"learning_rate": 9.910804981367878e-05,
"loss": 1.4796,
"step": 4690
},
{
"epoch": 0.06,
"learning_rate": 9.910611132277779e-05,
"loss": 1.4344,
"step": 4695
},
{
"epoch": 0.06,
"learning_rate": 9.910417074668386e-05,
"loss": 1.4254,
"step": 4700
},
{
"epoch": 0.06,
"learning_rate": 9.910222808547938e-05,
"loss": 1.4071,
"step": 4705
},
{
"epoch": 0.06,
"learning_rate": 9.910028333924686e-05,
"loss": 1.4355,
"step": 4710
},
{
"epoch": 0.06,
"learning_rate": 9.909833650806887e-05,
"loss": 1.4119,
"step": 4715
},
{
"epoch": 0.06,
"learning_rate": 9.909638759202808e-05,
"loss": 1.4879,
"step": 4720
},
{
"epoch": 0.06,
"learning_rate": 9.909443659120725e-05,
"loss": 1.4873,
"step": 4725
},
{
"epoch": 0.06,
"learning_rate": 9.909248350568923e-05,
"loss": 1.4898,
"step": 4730
},
{
"epoch": 0.06,
"learning_rate": 9.909052833555693e-05,
"loss": 1.4512,
"step": 4735
},
{
"epoch": 0.06,
"learning_rate": 9.90885710808934e-05,
"loss": 1.478,
"step": 4740
},
{
"epoch": 0.06,
"learning_rate": 9.908661174178175e-05,
"loss": 1.4332,
"step": 4745
},
{
"epoch": 0.06,
"learning_rate": 9.908465031830516e-05,
"loss": 1.4438,
"step": 4750
},
{
"epoch": 0.06,
"learning_rate": 9.908268681054694e-05,
"loss": 1.4823,
"step": 4755
},
{
"epoch": 0.06,
"learning_rate": 9.908072121859043e-05,
"loss": 1.401,
"step": 4760
},
{
"epoch": 0.06,
"learning_rate": 9.907875354251914e-05,
"loss": 1.4604,
"step": 4765
},
{
"epoch": 0.06,
"learning_rate": 9.90767837824166e-05,
"loss": 1.4423,
"step": 4770
},
{
"epoch": 0.06,
"learning_rate": 9.907481193836646e-05,
"loss": 1.4441,
"step": 4775
},
{
"epoch": 0.06,
"learning_rate": 9.907283801045242e-05,
"loss": 1.4699,
"step": 4780
},
{
"epoch": 0.06,
"learning_rate": 9.907086199875834e-05,
"loss": 1.4403,
"step": 4785
},
{
"epoch": 0.06,
"learning_rate": 9.906888390336812e-05,
"loss": 1.457,
"step": 4790
},
{
"epoch": 0.06,
"learning_rate": 9.906690372436573e-05,
"loss": 1.4647,
"step": 4795
},
{
"epoch": 0.06,
"learning_rate": 9.906492146183529e-05,
"loss": 1.4125,
"step": 4800
},
{
"epoch": 0.06,
"learning_rate": 9.906293711586095e-05,
"loss": 1.4684,
"step": 4805
},
{
"epoch": 0.06,
"learning_rate": 9.906095068652698e-05,
"loss": 1.4278,
"step": 4810
},
{
"epoch": 0.06,
"learning_rate": 9.905896217391771e-05,
"loss": 1.4882,
"step": 4815
},
{
"epoch": 0.06,
"learning_rate": 9.905697157811761e-05,
"loss": 1.4476,
"step": 4820
},
{
"epoch": 0.06,
"learning_rate": 9.905497889921118e-05,
"loss": 1.4537,
"step": 4825
},
{
"epoch": 0.06,
"learning_rate": 9.905298413728306e-05,
"loss": 1.4483,
"step": 4830
},
{
"epoch": 0.06,
"learning_rate": 9.905098729241791e-05,
"loss": 1.4819,
"step": 4835
},
{
"epoch": 0.06,
"learning_rate": 9.904898836470058e-05,
"loss": 1.4489,
"step": 4840
},
{
"epoch": 0.06,
"learning_rate": 9.904698735421591e-05,
"loss": 1.4303,
"step": 4845
},
{
"epoch": 0.06,
"learning_rate": 9.904498426104888e-05,
"loss": 1.463,
"step": 4850
},
{
"epoch": 0.06,
"learning_rate": 9.904297908528455e-05,
"loss": 1.4548,
"step": 4855
},
{
"epoch": 0.06,
"learning_rate": 9.904097182700807e-05,
"loss": 1.4995,
"step": 4860
},
{
"epoch": 0.06,
"learning_rate": 9.903896248630466e-05,
"loss": 1.4445,
"step": 4865
},
{
"epoch": 0.06,
"learning_rate": 9.903695106325966e-05,
"loss": 1.4756,
"step": 4870
},
{
"epoch": 0.06,
"learning_rate": 9.903493755795845e-05,
"loss": 1.4408,
"step": 4875
},
{
"epoch": 0.06,
"learning_rate": 9.903292197048657e-05,
"loss": 1.4491,
"step": 4880
},
{
"epoch": 0.06,
"learning_rate": 9.903090430092958e-05,
"loss": 1.4799,
"step": 4885
},
{
"epoch": 0.06,
"learning_rate": 9.902888454937318e-05,
"loss": 1.4279,
"step": 4890
},
{
"epoch": 0.06,
"learning_rate": 9.90268627159031e-05,
"loss": 1.4457,
"step": 4895
},
{
"epoch": 0.06,
"learning_rate": 9.902483880060522e-05,
"loss": 1.4625,
"step": 4900
},
{
"epoch": 0.06,
"learning_rate": 9.902281280356548e-05,
"loss": 1.4388,
"step": 4905
},
{
"epoch": 0.06,
"learning_rate": 9.902078472486991e-05,
"loss": 1.4579,
"step": 4910
},
{
"epoch": 0.06,
"learning_rate": 9.901875456460461e-05,
"loss": 1.4468,
"step": 4915
},
{
"epoch": 0.06,
"learning_rate": 9.90167223228558e-05,
"loss": 1.4418,
"step": 4920
},
{
"epoch": 0.06,
"learning_rate": 9.901468799970979e-05,
"loss": 1.4449,
"step": 4925
},
{
"epoch": 0.06,
"learning_rate": 9.901265159525293e-05,
"loss": 1.4613,
"step": 4930
},
{
"epoch": 0.06,
"learning_rate": 9.901061310957171e-05,
"loss": 1.4206,
"step": 4935
},
{
"epoch": 0.06,
"learning_rate": 9.900857254275271e-05,
"loss": 1.4405,
"step": 4940
},
{
"epoch": 0.06,
"learning_rate": 9.900652989488255e-05,
"loss": 1.4228,
"step": 4945
},
{
"epoch": 0.06,
"learning_rate": 9.900448516604796e-05,
"loss": 1.4206,
"step": 4950
},
{
"epoch": 0.06,
"learning_rate": 9.900243835633581e-05,
"loss": 1.4725,
"step": 4955
},
{
"epoch": 0.06,
"learning_rate": 9.900038946583296e-05,
"loss": 1.472,
"step": 4960
},
{
"epoch": 0.07,
"learning_rate": 9.899833849462646e-05,
"loss": 1.4159,
"step": 4965
},
{
"epoch": 0.07,
"learning_rate": 9.899628544280334e-05,
"loss": 1.4931,
"step": 4970
},
{
"epoch": 0.07,
"learning_rate": 9.899423031045085e-05,
"loss": 1.4507,
"step": 4975
},
{
"epoch": 0.07,
"learning_rate": 9.899217309765622e-05,
"loss": 1.4272,
"step": 4980
},
{
"epoch": 0.07,
"learning_rate": 9.89901138045068e-05,
"loss": 1.4769,
"step": 4985
},
{
"epoch": 0.07,
"learning_rate": 9.898805243109004e-05,
"loss": 1.4097,
"step": 4990
},
{
"epoch": 0.07,
"learning_rate": 9.898598897749349e-05,
"loss": 1.462,
"step": 4995
},
{
"epoch": 0.07,
"learning_rate": 9.898392344380474e-05,
"loss": 1.4384,
"step": 5000
},
{
"epoch": 0.07,
"learning_rate": 9.898185583011151e-05,
"loss": 1.4595,
"step": 5005
},
{
"epoch": 0.07,
"learning_rate": 9.89797861365016e-05,
"loss": 1.4524,
"step": 5010
},
{
"epoch": 0.07,
"learning_rate": 9.897771436306291e-05,
"loss": 1.482,
"step": 5015
},
{
"epoch": 0.07,
"learning_rate": 9.897564050988339e-05,
"loss": 1.4416,
"step": 5020
},
{
"epoch": 0.07,
"learning_rate": 9.897356457705113e-05,
"loss": 1.4524,
"step": 5025
},
{
"epoch": 0.07,
"learning_rate": 9.897148656465425e-05,
"loss": 1.4698,
"step": 5030
},
{
"epoch": 0.07,
"learning_rate": 9.896940647278099e-05,
"loss": 1.4152,
"step": 5035
},
{
"epoch": 0.07,
"learning_rate": 9.89673243015197e-05,
"loss": 1.3912,
"step": 5040
},
{
"epoch": 0.07,
"learning_rate": 9.896524005095878e-05,
"loss": 1.424,
"step": 5045
},
{
"epoch": 0.07,
"learning_rate": 9.896315372118673e-05,
"loss": 1.4444,
"step": 5050
},
{
"epoch": 0.07,
"learning_rate": 9.896106531229217e-05,
"loss": 1.4579,
"step": 5055
},
{
"epoch": 0.07,
"learning_rate": 9.895897482436374e-05,
"loss": 1.4456,
"step": 5060
},
{
"epoch": 0.07,
"learning_rate": 9.895688225749023e-05,
"loss": 1.4455,
"step": 5065
},
{
"epoch": 0.07,
"learning_rate": 9.89547876117605e-05,
"loss": 1.4433,
"step": 5070
},
{
"epoch": 0.07,
"learning_rate": 9.895269088726347e-05,
"loss": 1.4151,
"step": 5075
},
{
"epoch": 0.07,
"learning_rate": 9.895059208408821e-05,
"loss": 1.4242,
"step": 5080
},
{
"epoch": 0.07,
"learning_rate": 9.894849120232381e-05,
"loss": 1.4411,
"step": 5085
},
{
"epoch": 0.07,
"learning_rate": 9.894638824205951e-05,
"loss": 1.4385,
"step": 5090
},
{
"epoch": 0.07,
"learning_rate": 9.894428320338458e-05,
"loss": 1.4364,
"step": 5095
},
{
"epoch": 0.07,
"learning_rate": 9.894217608638843e-05,
"loss": 1.4412,
"step": 5100
},
{
"epoch": 0.07,
"learning_rate": 9.89400668911605e-05,
"loss": 1.476,
"step": 5105
},
{
"epoch": 0.07,
"learning_rate": 9.893795561779039e-05,
"loss": 1.4598,
"step": 5110
},
{
"epoch": 0.07,
"learning_rate": 9.893584226636772e-05,
"loss": 1.4872,
"step": 5115
},
{
"epoch": 0.07,
"learning_rate": 9.893372683698227e-05,
"loss": 1.4143,
"step": 5120
},
{
"epoch": 0.07,
"learning_rate": 9.893160932972384e-05,
"loss": 1.4945,
"step": 5125
},
{
"epoch": 0.07,
"learning_rate": 9.892948974468232e-05,
"loss": 1.4487,
"step": 5130
},
{
"epoch": 0.07,
"learning_rate": 9.892736808194776e-05,
"loss": 1.4429,
"step": 5135
},
{
"epoch": 0.07,
"learning_rate": 9.892524434161024e-05,
"loss": 1.4673,
"step": 5140
},
{
"epoch": 0.07,
"learning_rate": 9.892311852375993e-05,
"loss": 1.4249,
"step": 5145
},
{
"epoch": 0.07,
"learning_rate": 9.89209906284871e-05,
"loss": 1.3978,
"step": 5150
},
{
"epoch": 0.07,
"learning_rate": 9.89188606558821e-05,
"loss": 1.4586,
"step": 5155
},
{
"epoch": 0.07,
"learning_rate": 9.891672860603541e-05,
"loss": 1.4332,
"step": 5160
},
{
"epoch": 0.07,
"learning_rate": 9.89145944790375e-05,
"loss": 1.4114,
"step": 5165
},
{
"epoch": 0.07,
"learning_rate": 9.891245827497906e-05,
"loss": 1.4097,
"step": 5170
},
{
"epoch": 0.07,
"learning_rate": 9.891031999395077e-05,
"loss": 1.4278,
"step": 5175
},
{
"epoch": 0.07,
"learning_rate": 9.890817963604342e-05,
"loss": 1.4269,
"step": 5180
},
{
"epoch": 0.07,
"learning_rate": 9.89060372013479e-05,
"loss": 1.4194,
"step": 5185
},
{
"epoch": 0.07,
"learning_rate": 9.890389268995518e-05,
"loss": 1.4705,
"step": 5190
},
{
"epoch": 0.07,
"learning_rate": 9.890174610195635e-05,
"loss": 1.4548,
"step": 5195
},
{
"epoch": 0.07,
"learning_rate": 9.889959743744252e-05,
"loss": 1.4359,
"step": 5200
},
{
"epoch": 0.07,
"learning_rate": 9.889744669650498e-05,
"loss": 1.4385,
"step": 5205
},
{
"epoch": 0.07,
"learning_rate": 9.8895293879235e-05,
"loss": 1.5018,
"step": 5210
},
{
"epoch": 0.07,
"learning_rate": 9.889313898572403e-05,
"loss": 1.419,
"step": 5215
},
{
"epoch": 0.07,
"learning_rate": 9.889098201606356e-05,
"loss": 1.5235,
"step": 5220
},
{
"epoch": 0.07,
"learning_rate": 9.888882297034518e-05,
"loss": 1.4242,
"step": 5225
},
{
"epoch": 0.07,
"learning_rate": 9.88866618486606e-05,
"loss": 1.4484,
"step": 5230
},
{
"epoch": 0.07,
"learning_rate": 9.888449865110153e-05,
"loss": 1.4115,
"step": 5235
},
{
"epoch": 0.07,
"learning_rate": 9.888233337775987e-05,
"loss": 1.4238,
"step": 5240
},
{
"epoch": 0.07,
"learning_rate": 9.888016602872757e-05,
"loss": 1.4216,
"step": 5245
},
{
"epoch": 0.07,
"learning_rate": 9.887799660409662e-05,
"loss": 1.4498,
"step": 5250
},
{
"epoch": 0.07,
"learning_rate": 9.887582510395919e-05,
"loss": 1.4161,
"step": 5255
},
{
"epoch": 0.07,
"learning_rate": 9.887365152840745e-05,
"loss": 1.4407,
"step": 5260
},
{
"epoch": 0.07,
"learning_rate": 9.887147587753372e-05,
"loss": 1.4304,
"step": 5265
},
{
"epoch": 0.07,
"learning_rate": 9.886929815143036e-05,
"loss": 1.426,
"step": 5270
},
{
"epoch": 0.07,
"learning_rate": 9.886711835018986e-05,
"loss": 1.4597,
"step": 5275
},
{
"epoch": 0.07,
"learning_rate": 9.886493647390478e-05,
"loss": 1.4202,
"step": 5280
},
{
"epoch": 0.07,
"learning_rate": 9.886275252266777e-05,
"loss": 1.4153,
"step": 5285
},
{
"epoch": 0.07,
"learning_rate": 9.886056649657159e-05,
"loss": 1.4417,
"step": 5290
},
{
"epoch": 0.07,
"learning_rate": 9.8858378395709e-05,
"loss": 1.4376,
"step": 5295
},
{
"epoch": 0.07,
"learning_rate": 9.885618822017297e-05,
"loss": 1.4071,
"step": 5300
},
{
"epoch": 0.07,
"learning_rate": 9.885399597005649e-05,
"loss": 1.4098,
"step": 5305
},
{
"epoch": 0.07,
"learning_rate": 9.885180164545264e-05,
"loss": 1.467,
"step": 5310
},
{
"epoch": 0.07,
"learning_rate": 9.884960524645459e-05,
"loss": 1.4272,
"step": 5315
},
{
"epoch": 0.07,
"learning_rate": 9.884740677315563e-05,
"loss": 1.4073,
"step": 5320
},
{
"epoch": 0.07,
"learning_rate": 9.88452062256491e-05,
"loss": 1.4524,
"step": 5325
},
{
"epoch": 0.07,
"learning_rate": 9.884300360402845e-05,
"loss": 1.4836,
"step": 5330
},
{
"epoch": 0.07,
"learning_rate": 9.88407989083872e-05,
"loss": 1.4171,
"step": 5335
},
{
"epoch": 0.07,
"learning_rate": 9.883859213881897e-05,
"loss": 1.4508,
"step": 5340
},
{
"epoch": 0.07,
"learning_rate": 9.883638329541746e-05,
"loss": 1.4102,
"step": 5345
},
{
"epoch": 0.07,
"learning_rate": 9.883417237827648e-05,
"loss": 1.433,
"step": 5350
},
{
"epoch": 0.07,
"learning_rate": 9.883195938748989e-05,
"loss": 1.4498,
"step": 5355
},
{
"epoch": 0.07,
"learning_rate": 9.882974432315168e-05,
"loss": 1.4686,
"step": 5360
},
{
"epoch": 0.07,
"learning_rate": 9.882752718535591e-05,
"loss": 1.4164,
"step": 5365
},
{
"epoch": 0.07,
"learning_rate": 9.882530797419671e-05,
"loss": 1.4255,
"step": 5370
},
{
"epoch": 0.07,
"learning_rate": 9.882308668976835e-05,
"loss": 1.4327,
"step": 5375
},
{
"epoch": 0.07,
"learning_rate": 9.88208633321651e-05,
"loss": 1.4665,
"step": 5380
},
{
"epoch": 0.07,
"learning_rate": 9.881863790148138e-05,
"loss": 1.4413,
"step": 5385
},
{
"epoch": 0.07,
"learning_rate": 9.881641039781173e-05,
"loss": 1.457,
"step": 5390
},
{
"epoch": 0.07,
"learning_rate": 9.881418082125069e-05,
"loss": 1.3922,
"step": 5395
},
{
"epoch": 0.07,
"learning_rate": 9.881194917189298e-05,
"loss": 1.4152,
"step": 5400
},
{
"epoch": 0.07,
"learning_rate": 9.880971544983332e-05,
"loss": 1.3909,
"step": 5405
},
{
"epoch": 0.07,
"learning_rate": 9.88074796551666e-05,
"loss": 1.4421,
"step": 5410
},
{
"epoch": 0.07,
"learning_rate": 9.880524178798772e-05,
"loss": 1.4506,
"step": 5415
},
{
"epoch": 0.07,
"learning_rate": 9.88030018483917e-05,
"loss": 1.4401,
"step": 5420
},
{
"epoch": 0.07,
"learning_rate": 9.88007598364737e-05,
"loss": 1.4411,
"step": 5425
},
{
"epoch": 0.07,
"learning_rate": 9.879851575232892e-05,
"loss": 1.4236,
"step": 5430
},
{
"epoch": 0.07,
"learning_rate": 9.87962695960526e-05,
"loss": 1.3662,
"step": 5435
},
{
"epoch": 0.07,
"learning_rate": 9.879402136774017e-05,
"loss": 1.4435,
"step": 5440
},
{
"epoch": 0.07,
"learning_rate": 9.879177106748706e-05,
"loss": 1.4832,
"step": 5445
},
{
"epoch": 0.07,
"learning_rate": 9.878951869538886e-05,
"loss": 1.4342,
"step": 5450
},
{
"epoch": 0.07,
"learning_rate": 9.878726425154118e-05,
"loss": 1.4295,
"step": 5455
},
{
"epoch": 0.07,
"learning_rate": 9.878500773603977e-05,
"loss": 1.4543,
"step": 5460
},
{
"epoch": 0.07,
"learning_rate": 9.878274914898043e-05,
"loss": 1.4325,
"step": 5465
},
{
"epoch": 0.07,
"learning_rate": 9.878048849045909e-05,
"loss": 1.452,
"step": 5470
},
{
"epoch": 0.07,
"learning_rate": 9.877822576057172e-05,
"loss": 1.409,
"step": 5475
},
{
"epoch": 0.07,
"learning_rate": 9.877596095941443e-05,
"loss": 1.3971,
"step": 5480
},
{
"epoch": 0.07,
"learning_rate": 9.877369408708337e-05,
"loss": 1.4932,
"step": 5485
},
{
"epoch": 0.07,
"learning_rate": 9.87714251436748e-05,
"loss": 1.43,
"step": 5490
},
{
"epoch": 0.07,
"learning_rate": 9.876915412928508e-05,
"loss": 1.4377,
"step": 5495
},
{
"epoch": 0.07,
"learning_rate": 9.876688104401061e-05,
"loss": 1.4307,
"step": 5500
},
{
"epoch": 0.07,
"learning_rate": 9.876460588794796e-05,
"loss": 1.4394,
"step": 5505
},
{
"epoch": 0.07,
"learning_rate": 9.87623286611937e-05,
"loss": 1.4425,
"step": 5510
},
{
"epoch": 0.07,
"learning_rate": 9.876004936384455e-05,
"loss": 1.455,
"step": 5515
},
{
"epoch": 0.07,
"learning_rate": 9.87577679959973e-05,
"loss": 1.4759,
"step": 5520
},
{
"epoch": 0.07,
"learning_rate": 9.87554845577488e-05,
"loss": 1.4277,
"step": 5525
},
{
"epoch": 0.07,
"learning_rate": 9.875319904919602e-05,
"loss": 1.4145,
"step": 5530
},
{
"epoch": 0.07,
"learning_rate": 9.875091147043602e-05,
"loss": 1.4,
"step": 5535
},
{
"epoch": 0.07,
"learning_rate": 9.874862182156594e-05,
"loss": 1.413,
"step": 5540
},
{
"epoch": 0.07,
"learning_rate": 9.874633010268299e-05,
"loss": 1.3855,
"step": 5545
},
{
"epoch": 0.07,
"learning_rate": 9.874403631388449e-05,
"loss": 1.4665,
"step": 5550
},
{
"epoch": 0.07,
"learning_rate": 9.874174045526786e-05,
"loss": 1.4137,
"step": 5555
},
{
"epoch": 0.07,
"learning_rate": 9.873944252693055e-05,
"loss": 1.4412,
"step": 5560
},
{
"epoch": 0.07,
"learning_rate": 9.873714252897015e-05,
"loss": 1.4332,
"step": 5565
},
{
"epoch": 0.07,
"learning_rate": 9.873484046148435e-05,
"loss": 1.4179,
"step": 5570
},
{
"epoch": 0.07,
"learning_rate": 9.873253632457087e-05,
"loss": 1.4807,
"step": 5575
},
{
"epoch": 0.07,
"learning_rate": 9.873023011832758e-05,
"loss": 1.4237,
"step": 5580
},
{
"epoch": 0.07,
"learning_rate": 9.872792184285238e-05,
"loss": 1.4525,
"step": 5585
},
{
"epoch": 0.07,
"learning_rate": 9.87256114982433e-05,
"loss": 1.4146,
"step": 5590
},
{
"epoch": 0.07,
"learning_rate": 9.872329908459846e-05,
"loss": 1.4366,
"step": 5595
},
{
"epoch": 0.07,
"learning_rate": 9.872098460201604e-05,
"loss": 1.4667,
"step": 5600
},
{
"epoch": 0.07,
"learning_rate": 9.87186680505943e-05,
"loss": 1.4047,
"step": 5605
},
{
"epoch": 0.07,
"learning_rate": 9.871634943043163e-05,
"loss": 1.4019,
"step": 5610
},
{
"epoch": 0.07,
"learning_rate": 9.871402874162647e-05,
"loss": 1.4213,
"step": 5615
},
{
"epoch": 0.07,
"learning_rate": 9.87117059842774e-05,
"loss": 1.4572,
"step": 5620
},
{
"epoch": 0.07,
"learning_rate": 9.870938115848301e-05,
"loss": 1.4626,
"step": 5625
},
{
"epoch": 0.07,
"learning_rate": 9.870705426434202e-05,
"loss": 1.4561,
"step": 5630
},
{
"epoch": 0.07,
"learning_rate": 9.870472530195326e-05,
"loss": 1.4016,
"step": 5635
},
{
"epoch": 0.07,
"learning_rate": 9.870239427141561e-05,
"loss": 1.4378,
"step": 5640
},
{
"epoch": 0.07,
"learning_rate": 9.870006117282807e-05,
"loss": 1.4359,
"step": 5645
},
{
"epoch": 0.07,
"learning_rate": 9.86977260062897e-05,
"loss": 1.4282,
"step": 5650
},
{
"epoch": 0.07,
"learning_rate": 9.869538877189965e-05,
"loss": 1.4376,
"step": 5655
},
{
"epoch": 0.07,
"learning_rate": 9.869304946975717e-05,
"loss": 1.4222,
"step": 5660
},
{
"epoch": 0.07,
"learning_rate": 9.869070809996159e-05,
"loss": 1.476,
"step": 5665
},
{
"epoch": 0.07,
"learning_rate": 9.868836466261234e-05,
"loss": 1.4603,
"step": 5670
},
{
"epoch": 0.07,
"learning_rate": 9.868601915780894e-05,
"loss": 1.4608,
"step": 5675
},
{
"epoch": 0.07,
"learning_rate": 9.868367158565096e-05,
"loss": 1.382,
"step": 5680
},
{
"epoch": 0.07,
"learning_rate": 9.86813219462381e-05,
"loss": 1.424,
"step": 5685
},
{
"epoch": 0.07,
"learning_rate": 9.867897023967015e-05,
"loss": 1.4233,
"step": 5690
},
{
"epoch": 0.07,
"learning_rate": 9.867661646604694e-05,
"loss": 1.4424,
"step": 5695
},
{
"epoch": 0.07,
"learning_rate": 9.867426062546842e-05,
"loss": 1.4345,
"step": 5700
},
{
"epoch": 0.07,
"learning_rate": 9.867190271803465e-05,
"loss": 1.4549,
"step": 5705
},
{
"epoch": 0.07,
"learning_rate": 9.866954274384573e-05,
"loss": 1.4285,
"step": 5710
},
{
"epoch": 0.07,
"learning_rate": 9.866718070300189e-05,
"loss": 1.4531,
"step": 5715
},
{
"epoch": 0.07,
"learning_rate": 9.866481659560342e-05,
"loss": 1.4107,
"step": 5720
},
{
"epoch": 0.08,
"learning_rate": 9.86624504217507e-05,
"loss": 1.3965,
"step": 5725
},
{
"epoch": 0.08,
"learning_rate": 9.866008218154424e-05,
"loss": 1.4588,
"step": 5730
},
{
"epoch": 0.08,
"learning_rate": 9.865771187508454e-05,
"loss": 1.4339,
"step": 5735
},
{
"epoch": 0.08,
"learning_rate": 9.865533950247231e-05,
"loss": 1.4565,
"step": 5740
},
{
"epoch": 0.08,
"learning_rate": 9.865296506380828e-05,
"loss": 1.4209,
"step": 5745
},
{
"epoch": 0.08,
"learning_rate": 9.865058855919323e-05,
"loss": 1.4563,
"step": 5750
},
{
"epoch": 0.08,
"learning_rate": 9.864820998872812e-05,
"loss": 1.4482,
"step": 5755
},
{
"epoch": 0.08,
"learning_rate": 9.864582935251391e-05,
"loss": 1.4393,
"step": 5760
},
{
"epoch": 0.08,
"learning_rate": 9.864344665065175e-05,
"loss": 1.4465,
"step": 5765
},
{
"epoch": 0.08,
"learning_rate": 9.864106188324276e-05,
"loss": 1.4097,
"step": 5770
},
{
"epoch": 0.08,
"learning_rate": 9.863867505038823e-05,
"loss": 1.4607,
"step": 5775
},
{
"epoch": 0.08,
"learning_rate": 9.86362861521895e-05,
"loss": 1.4346,
"step": 5780
},
{
"epoch": 0.08,
"learning_rate": 9.863389518874803e-05,
"loss": 1.425,
"step": 5785
},
{
"epoch": 0.08,
"learning_rate": 9.863150216016532e-05,
"loss": 1.4839,
"step": 5790
},
{
"epoch": 0.08,
"learning_rate": 9.862910706654302e-05,
"loss": 1.4313,
"step": 5795
},
{
"epoch": 0.08,
"learning_rate": 9.86267099079828e-05,
"loss": 1.4444,
"step": 5800
},
{
"epoch": 0.08,
"learning_rate": 9.862431068458647e-05,
"loss": 1.4169,
"step": 5805
},
{
"epoch": 0.08,
"learning_rate": 9.862190939645587e-05,
"loss": 1.4161,
"step": 5810
},
{
"epoch": 0.08,
"learning_rate": 9.861950604369303e-05,
"loss": 1.4225,
"step": 5815
},
{
"epoch": 0.08,
"learning_rate": 9.861710062639996e-05,
"loss": 1.4741,
"step": 5820
},
{
"epoch": 0.08,
"learning_rate": 9.861469314467882e-05,
"loss": 1.4372,
"step": 5825
},
{
"epoch": 0.08,
"learning_rate": 9.861228359863181e-05,
"loss": 1.4736,
"step": 5830
},
{
"epoch": 0.08,
"learning_rate": 9.86098719883613e-05,
"loss": 1.3839,
"step": 5835
},
{
"epoch": 0.08,
"learning_rate": 9.860745831396963e-05,
"loss": 1.4295,
"step": 5840
},
{
"epoch": 0.08,
"learning_rate": 9.860504257555934e-05,
"loss": 1.4515,
"step": 5845
},
{
"epoch": 0.08,
"learning_rate": 9.860262477323299e-05,
"loss": 1.4258,
"step": 5850
},
{
"epoch": 0.08,
"learning_rate": 9.860020490709326e-05,
"loss": 1.4174,
"step": 5855
},
{
"epoch": 0.08,
"learning_rate": 9.859778297724287e-05,
"loss": 1.3954,
"step": 5860
},
{
"epoch": 0.08,
"learning_rate": 9.859535898378472e-05,
"loss": 1.3934,
"step": 5865
},
{
"epoch": 0.08,
"learning_rate": 9.859293292682169e-05,
"loss": 1.432,
"step": 5870
},
{
"epoch": 0.08,
"learning_rate": 9.859050480645681e-05,
"loss": 1.4463,
"step": 5875
},
{
"epoch": 0.08,
"learning_rate": 9.858807462279319e-05,
"loss": 1.4157,
"step": 5880
},
{
"epoch": 0.08,
"learning_rate": 9.858564237593404e-05,
"loss": 1.4338,
"step": 5885
},
{
"epoch": 0.08,
"learning_rate": 9.85832080659826e-05,
"loss": 1.4579,
"step": 5890
},
{
"epoch": 0.08,
"learning_rate": 9.858077169304227e-05,
"loss": 1.4035,
"step": 5895
},
{
"epoch": 0.08,
"learning_rate": 9.85783332572165e-05,
"loss": 1.4833,
"step": 5900
},
{
"epoch": 0.08,
"learning_rate": 9.857589275860884e-05,
"loss": 1.4525,
"step": 5905
},
{
"epoch": 0.08,
"learning_rate": 9.857345019732291e-05,
"loss": 1.4575,
"step": 5910
},
{
"epoch": 0.08,
"learning_rate": 9.857100557346242e-05,
"loss": 1.4738,
"step": 5915
},
{
"epoch": 0.08,
"learning_rate": 9.856855888713119e-05,
"loss": 1.3944,
"step": 5920
},
{
"epoch": 0.08,
"learning_rate": 9.856611013843311e-05,
"loss": 1.4301,
"step": 5925
},
{
"epoch": 0.08,
"learning_rate": 9.856365932747217e-05,
"loss": 1.393,
"step": 5930
},
{
"epoch": 0.08,
"learning_rate": 9.856120645435243e-05,
"loss": 1.4268,
"step": 5935
},
{
"epoch": 0.08,
"learning_rate": 9.855875151917804e-05,
"loss": 1.4958,
"step": 5940
},
{
"epoch": 0.08,
"learning_rate": 9.855629452205324e-05,
"loss": 1.441,
"step": 5945
},
{
"epoch": 0.08,
"learning_rate": 9.855383546308239e-05,
"loss": 1.4147,
"step": 5950
},
{
"epoch": 0.08,
"learning_rate": 9.855137434236987e-05,
"loss": 1.427,
"step": 5955
},
{
"epoch": 0.08,
"learning_rate": 9.854891116002023e-05,
"loss": 1.4723,
"step": 5960
},
{
"epoch": 0.08,
"learning_rate": 9.854644591613801e-05,
"loss": 1.4572,
"step": 5965
},
{
"epoch": 0.08,
"learning_rate": 9.854397861082796e-05,
"loss": 1.4476,
"step": 5970
},
{
"epoch": 0.08,
"learning_rate": 9.854150924419479e-05,
"loss": 1.3959,
"step": 5975
},
{
"epoch": 0.08,
"learning_rate": 9.85390378163434e-05,
"loss": 1.4274,
"step": 5980
},
{
"epoch": 0.08,
"learning_rate": 9.853656432737868e-05,
"loss": 1.4082,
"step": 5985
},
{
"epoch": 0.08,
"learning_rate": 9.853408877740572e-05,
"loss": 1.4306,
"step": 5990
},
{
"epoch": 0.08,
"learning_rate": 9.85316111665296e-05,
"loss": 1.3983,
"step": 5995
},
{
"epoch": 0.08,
"learning_rate": 9.852913149485556e-05,
"loss": 1.4172,
"step": 6000
}
],
"logging_steps": 5,
"max_steps": 76328,
"num_train_epochs": 1,
"save_steps": 1000,
"total_flos": 1.6825629945102336e+19,
"trial_name": null,
"trial_params": null
}