{ "best_metric": 0.09024298936128616, "best_model_checkpoint": "./modeversion1_m7_e4/checkpoint-6800", "epoch": 4.0, "global_step": 6824, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.00019970691676436108, "loss": 4.9518, "step": 10 }, { "epoch": 0.01, "learning_rate": 0.00019941383352872216, "loss": 4.7468, "step": 20 }, { "epoch": 0.02, "learning_rate": 0.00019912075029308323, "loss": 4.6634, "step": 30 }, { "epoch": 0.02, "learning_rate": 0.00019882766705744433, "loss": 4.5601, "step": 40 }, { "epoch": 0.03, "learning_rate": 0.0001985345838218054, "loss": 4.4809, "step": 50 }, { "epoch": 0.04, "learning_rate": 0.00019824150058616647, "loss": 4.3918, "step": 60 }, { "epoch": 0.04, "learning_rate": 0.00019794841735052755, "loss": 4.1907, "step": 70 }, { "epoch": 0.05, "learning_rate": 0.00019765533411488865, "loss": 4.1004, "step": 80 }, { "epoch": 0.05, "learning_rate": 0.00019736225087924972, "loss": 4.07, "step": 90 }, { "epoch": 0.06, "learning_rate": 0.0001970691676436108, "loss": 4.073, "step": 100 }, { "epoch": 0.06, "eval_accuracy": 0.1768493346687652, "eval_loss": 3.93697452545166, "eval_runtime": 144.6591, "eval_samples_per_second": 48.314, "eval_steps_per_second": 6.042, "step": 100 }, { "epoch": 0.06, "learning_rate": 0.0001967760844079719, "loss": 3.8669, "step": 110 }, { "epoch": 0.07, "learning_rate": 0.00019648300117233296, "loss": 3.7828, "step": 120 }, { "epoch": 0.08, "learning_rate": 0.00019618991793669404, "loss": 3.7687, "step": 130 }, { "epoch": 0.08, "learning_rate": 0.0001958968347010551, "loss": 3.693, "step": 140 }, { "epoch": 0.09, "learning_rate": 0.00019560375146541618, "loss": 3.5779, "step": 150 }, { "epoch": 0.09, "learning_rate": 0.00019531066822977726, "loss": 3.5634, "step": 160 }, { "epoch": 0.1, "learning_rate": 0.00019501758499413833, "loss": 3.4888, "step": 170 }, { "epoch": 0.11, "learning_rate": 0.00019472450175849943, "loss": 3.3963, "step": 180 }, { "epoch": 0.11, "learning_rate": 0.0001944314185228605, "loss": 3.2674, "step": 190 }, { "epoch": 0.12, "learning_rate": 0.00019413833528722157, "loss": 3.4186, "step": 200 }, { "epoch": 0.12, "eval_accuracy": 0.25897839462011735, "eval_loss": 3.272094964981079, "eval_runtime": 144.7772, "eval_samples_per_second": 48.274, "eval_steps_per_second": 6.037, "step": 200 }, { "epoch": 0.12, "learning_rate": 0.00019384525205158265, "loss": 3.2797, "step": 210 }, { "epoch": 0.13, "learning_rate": 0.00019355216881594375, "loss": 3.2347, "step": 220 }, { "epoch": 0.13, "learning_rate": 0.00019325908558030482, "loss": 3.1258, "step": 230 }, { "epoch": 0.14, "learning_rate": 0.0001929660023446659, "loss": 2.9637, "step": 240 }, { "epoch": 0.15, "learning_rate": 0.000192672919109027, "loss": 2.9957, "step": 250 }, { "epoch": 0.15, "learning_rate": 0.00019237983587338807, "loss": 3.0592, "step": 260 }, { "epoch": 0.16, "learning_rate": 0.00019208675263774914, "loss": 2.778, "step": 270 }, { "epoch": 0.16, "learning_rate": 0.0001917936694021102, "loss": 2.7552, "step": 280 }, { "epoch": 0.17, "learning_rate": 0.00019150058616647128, "loss": 2.684, "step": 290 }, { "epoch": 0.18, "learning_rate": 0.00019120750293083236, "loss": 2.6745, "step": 300 }, { "epoch": 0.18, "eval_accuracy": 0.38560595221061666, "eval_loss": 2.6465089321136475, "eval_runtime": 146.4706, "eval_samples_per_second": 47.716, "eval_steps_per_second": 5.967, "step": 300 }, { "epoch": 0.18, "learning_rate": 0.00019091441969519343, "loss": 2.764, "step": 310 }, { "epoch": 0.19, "learning_rate": 0.00019062133645955453, "loss": 2.6034, "step": 320 }, { "epoch": 0.19, "learning_rate": 0.0001903282532239156, "loss": 2.539, "step": 330 }, { "epoch": 0.2, "learning_rate": 0.00019003516998827668, "loss": 2.5864, "step": 340 }, { "epoch": 0.21, "learning_rate": 0.00018974208675263775, "loss": 2.3876, "step": 350 }, { "epoch": 0.21, "learning_rate": 0.00018944900351699885, "loss": 2.3781, "step": 360 }, { "epoch": 0.22, "learning_rate": 0.00018915592028135992, "loss": 2.2723, "step": 370 }, { "epoch": 0.22, "learning_rate": 0.000188862837045721, "loss": 2.4228, "step": 380 }, { "epoch": 0.23, "learning_rate": 0.00018856975381008207, "loss": 2.1609, "step": 390 }, { "epoch": 0.23, "learning_rate": 0.00018827667057444317, "loss": 2.2806, "step": 400 }, { "epoch": 0.23, "eval_accuracy": 0.45228215767634855, "eval_loss": 2.260012626647949, "eval_runtime": 144.5966, "eval_samples_per_second": 48.334, "eval_steps_per_second": 6.044, "step": 400 }, { "epoch": 0.24, "learning_rate": 0.00018798358733880424, "loss": 2.2351, "step": 410 }, { "epoch": 0.25, "learning_rate": 0.0001876905041031653, "loss": 2.1572, "step": 420 }, { "epoch": 0.25, "learning_rate": 0.00018739742086752638, "loss": 2.3053, "step": 430 }, { "epoch": 0.26, "learning_rate": 0.00018710433763188746, "loss": 2.0842, "step": 440 }, { "epoch": 0.26, "learning_rate": 0.00018681125439624853, "loss": 2.1774, "step": 450 }, { "epoch": 0.27, "learning_rate": 0.0001865181711606096, "loss": 2.0003, "step": 460 }, { "epoch": 0.28, "learning_rate": 0.0001862250879249707, "loss": 2.1276, "step": 470 }, { "epoch": 0.28, "learning_rate": 0.00018593200468933178, "loss": 2.0091, "step": 480 }, { "epoch": 0.29, "learning_rate": 0.00018563892145369285, "loss": 1.9637, "step": 490 }, { "epoch": 0.29, "learning_rate": 0.00018534583821805395, "loss": 1.9275, "step": 500 }, { "epoch": 0.29, "eval_accuracy": 0.5109457719273144, "eval_loss": 1.965333104133606, "eval_runtime": 142.8058, "eval_samples_per_second": 48.941, "eval_steps_per_second": 6.12, "step": 500 }, { "epoch": 0.3, "learning_rate": 0.00018505275498241502, "loss": 2.0284, "step": 510 }, { "epoch": 0.3, "learning_rate": 0.0001847596717467761, "loss": 2.0057, "step": 520 }, { "epoch": 0.31, "learning_rate": 0.00018446658851113717, "loss": 1.8957, "step": 530 }, { "epoch": 0.32, "learning_rate": 0.00018417350527549827, "loss": 1.8548, "step": 540 }, { "epoch": 0.32, "learning_rate": 0.00018388042203985934, "loss": 1.7549, "step": 550 }, { "epoch": 0.33, "learning_rate": 0.0001835873388042204, "loss": 1.6703, "step": 560 }, { "epoch": 0.33, "learning_rate": 0.00018329425556858149, "loss": 1.7037, "step": 570 }, { "epoch": 0.34, "learning_rate": 0.00018300117233294256, "loss": 1.5982, "step": 580 }, { "epoch": 0.35, "learning_rate": 0.00018270808909730363, "loss": 1.8877, "step": 590 }, { "epoch": 0.35, "learning_rate": 0.0001824150058616647, "loss": 1.6958, "step": 600 }, { "epoch": 0.35, "eval_accuracy": 0.607812276434397, "eval_loss": 1.6815019845962524, "eval_runtime": 141.9874, "eval_samples_per_second": 49.223, "eval_steps_per_second": 6.155, "step": 600 }, { "epoch": 0.36, "learning_rate": 0.0001821219226260258, "loss": 1.6292, "step": 610 }, { "epoch": 0.36, "learning_rate": 0.00018182883939038688, "loss": 1.6704, "step": 620 }, { "epoch": 0.37, "learning_rate": 0.00018153575615474795, "loss": 1.4707, "step": 630 }, { "epoch": 0.38, "learning_rate": 0.00018124267291910902, "loss": 1.5722, "step": 640 }, { "epoch": 0.38, "learning_rate": 0.00018094958968347012, "loss": 1.477, "step": 650 }, { "epoch": 0.39, "learning_rate": 0.0001806565064478312, "loss": 1.5452, "step": 660 }, { "epoch": 0.39, "learning_rate": 0.00018036342321219227, "loss": 1.6506, "step": 670 }, { "epoch": 0.4, "learning_rate": 0.00018007033997655337, "loss": 1.4681, "step": 680 }, { "epoch": 0.4, "learning_rate": 0.00017977725674091444, "loss": 1.4487, "step": 690 }, { "epoch": 0.41, "learning_rate": 0.00017948417350527551, "loss": 1.2797, "step": 700 }, { "epoch": 0.41, "eval_accuracy": 0.6418657890971526, "eval_loss": 1.4514340162277222, "eval_runtime": 142.9219, "eval_samples_per_second": 48.901, "eval_steps_per_second": 6.115, "step": 700 }, { "epoch": 0.42, "learning_rate": 0.00017919109026963659, "loss": 1.4092, "step": 710 }, { "epoch": 0.42, "learning_rate": 0.00017889800703399766, "loss": 1.6711, "step": 720 }, { "epoch": 0.43, "learning_rate": 0.00017860492379835873, "loss": 1.5477, "step": 730 }, { "epoch": 0.43, "learning_rate": 0.0001783118405627198, "loss": 1.4402, "step": 740 }, { "epoch": 0.44, "learning_rate": 0.0001780187573270809, "loss": 1.5563, "step": 750 }, { "epoch": 0.45, "learning_rate": 0.00017772567409144198, "loss": 1.4736, "step": 760 }, { "epoch": 0.45, "learning_rate": 0.00017743259085580305, "loss": 1.5159, "step": 770 }, { "epoch": 0.46, "learning_rate": 0.00017713950762016412, "loss": 1.329, "step": 780 }, { "epoch": 0.46, "learning_rate": 0.00017684642438452522, "loss": 1.2187, "step": 790 }, { "epoch": 0.47, "learning_rate": 0.0001765533411488863, "loss": 1.3772, "step": 800 }, { "epoch": 0.47, "eval_accuracy": 0.6762054657318644, "eval_loss": 1.3212426900863647, "eval_runtime": 143.9906, "eval_samples_per_second": 48.538, "eval_steps_per_second": 6.07, "step": 800 }, { "epoch": 0.47, "learning_rate": 0.00017626025791324737, "loss": 1.1954, "step": 810 }, { "epoch": 0.48, "learning_rate": 0.00017596717467760847, "loss": 1.2532, "step": 820 }, { "epoch": 0.49, "learning_rate": 0.00017567409144196954, "loss": 1.2134, "step": 830 }, { "epoch": 0.49, "learning_rate": 0.00017538100820633061, "loss": 1.1357, "step": 840 }, { "epoch": 0.5, "learning_rate": 0.0001750879249706917, "loss": 1.2893, "step": 850 }, { "epoch": 0.5, "learning_rate": 0.00017479484173505276, "loss": 1.2528, "step": 860 }, { "epoch": 0.51, "learning_rate": 0.00017450175849941383, "loss": 1.3934, "step": 870 }, { "epoch": 0.52, "learning_rate": 0.0001742086752637749, "loss": 1.1394, "step": 880 }, { "epoch": 0.52, "learning_rate": 0.000173915592028136, "loss": 1.224, "step": 890 }, { "epoch": 0.53, "learning_rate": 0.00017362250879249708, "loss": 1.1765, "step": 900 }, { "epoch": 0.53, "eval_accuracy": 0.7028187151237659, "eval_loss": 1.1476110219955444, "eval_runtime": 142.6845, "eval_samples_per_second": 48.982, "eval_steps_per_second": 6.125, "step": 900 }, { "epoch": 0.53, "learning_rate": 0.00017332942555685815, "loss": 1.1505, "step": 910 }, { "epoch": 0.54, "learning_rate": 0.00017303634232121922, "loss": 1.0058, "step": 920 }, { "epoch": 0.55, "learning_rate": 0.00017274325908558032, "loss": 1.1991, "step": 930 }, { "epoch": 0.55, "learning_rate": 0.0001724501758499414, "loss": 1.1221, "step": 940 }, { "epoch": 0.56, "learning_rate": 0.00017215709261430247, "loss": 1.0279, "step": 950 }, { "epoch": 0.56, "learning_rate": 0.00017186400937866357, "loss": 1.0806, "step": 960 }, { "epoch": 0.57, "learning_rate": 0.00017157092614302464, "loss": 1.0342, "step": 970 }, { "epoch": 0.57, "learning_rate": 0.00017127784290738572, "loss": 1.095, "step": 980 }, { "epoch": 0.58, "learning_rate": 0.0001709847596717468, "loss": 1.0627, "step": 990 }, { "epoch": 0.59, "learning_rate": 0.00017069167643610786, "loss": 1.0152, "step": 1000 }, { "epoch": 0.59, "eval_accuracy": 0.7312920303333811, "eval_loss": 1.0357469320297241, "eval_runtime": 141.502, "eval_samples_per_second": 49.392, "eval_steps_per_second": 6.177, "step": 1000 }, { "epoch": 0.59, "learning_rate": 0.00017039859320046893, "loss": 1.1845, "step": 1010 }, { "epoch": 0.6, "learning_rate": 0.00017010550996483, "loss": 1.0731, "step": 1020 }, { "epoch": 0.6, "learning_rate": 0.00016981242672919108, "loss": 1.0936, "step": 1030 }, { "epoch": 0.61, "learning_rate": 0.00016951934349355218, "loss": 1.1276, "step": 1040 }, { "epoch": 0.62, "learning_rate": 0.00016922626025791325, "loss": 1.0028, "step": 1050 }, { "epoch": 0.62, "learning_rate": 0.00016893317702227432, "loss": 1.0649, "step": 1060 }, { "epoch": 0.63, "learning_rate": 0.00016864009378663542, "loss": 1.0313, "step": 1070 }, { "epoch": 0.63, "learning_rate": 0.0001683470105509965, "loss": 0.9503, "step": 1080 }, { "epoch": 0.64, "learning_rate": 0.00016805392731535757, "loss": 1.0665, "step": 1090 }, { "epoch": 0.64, "learning_rate": 0.00016776084407971864, "loss": 0.7861, "step": 1100 }, { "epoch": 0.64, "eval_accuracy": 0.7184146515953641, "eval_loss": 1.023007869720459, "eval_runtime": 141.0512, "eval_samples_per_second": 49.549, "eval_steps_per_second": 6.196, "step": 1100 }, { "epoch": 0.65, "learning_rate": 0.00016746776084407974, "loss": 1.0444, "step": 1110 }, { "epoch": 0.66, "learning_rate": 0.00016717467760844082, "loss": 0.9562, "step": 1120 }, { "epoch": 0.66, "learning_rate": 0.0001668815943728019, "loss": 1.057, "step": 1130 }, { "epoch": 0.67, "learning_rate": 0.00016658851113716296, "loss": 0.7354, "step": 1140 }, { "epoch": 0.67, "learning_rate": 0.00016629542790152403, "loss": 0.9462, "step": 1150 }, { "epoch": 0.68, "learning_rate": 0.0001660023446658851, "loss": 0.838, "step": 1160 }, { "epoch": 0.69, "learning_rate": 0.00016570926143024618, "loss": 1.0383, "step": 1170 }, { "epoch": 0.69, "learning_rate": 0.00016541617819460728, "loss": 0.9091, "step": 1180 }, { "epoch": 0.7, "learning_rate": 0.00016512309495896835, "loss": 0.9672, "step": 1190 }, { "epoch": 0.7, "learning_rate": 0.00016483001172332943, "loss": 1.0262, "step": 1200 }, { "epoch": 0.7, "eval_accuracy": 0.7385892116182573, "eval_loss": 0.9469121694564819, "eval_runtime": 143.0373, "eval_samples_per_second": 48.861, "eval_steps_per_second": 6.11, "step": 1200 }, { "epoch": 0.71, "learning_rate": 0.0001645369284876905, "loss": 0.8081, "step": 1210 }, { "epoch": 0.72, "learning_rate": 0.0001642438452520516, "loss": 0.8826, "step": 1220 }, { "epoch": 0.72, "learning_rate": 0.00016395076201641267, "loss": 0.8504, "step": 1230 }, { "epoch": 0.73, "learning_rate": 0.00016365767878077374, "loss": 0.9641, "step": 1240 }, { "epoch": 0.73, "learning_rate": 0.00016336459554513484, "loss": 0.9718, "step": 1250 }, { "epoch": 0.74, "learning_rate": 0.00016307151230949592, "loss": 0.9118, "step": 1260 }, { "epoch": 0.74, "learning_rate": 0.000162778429073857, "loss": 0.8592, "step": 1270 }, { "epoch": 0.75, "learning_rate": 0.00016248534583821806, "loss": 0.9236, "step": 1280 }, { "epoch": 0.76, "learning_rate": 0.00016219226260257914, "loss": 0.9054, "step": 1290 }, { "epoch": 0.76, "learning_rate": 0.0001618991793669402, "loss": 0.8905, "step": 1300 }, { "epoch": 0.76, "eval_accuracy": 0.7756474459865503, "eval_loss": 0.818402886390686, "eval_runtime": 142.6625, "eval_samples_per_second": 48.99, "eval_steps_per_second": 6.126, "step": 1300 }, { "epoch": 0.77, "learning_rate": 0.00016160609613130128, "loss": 0.9189, "step": 1310 }, { "epoch": 0.77, "learning_rate": 0.00016131301289566238, "loss": 0.7185, "step": 1320 }, { "epoch": 0.78, "learning_rate": 0.00016101992966002345, "loss": 0.7161, "step": 1330 }, { "epoch": 0.79, "learning_rate": 0.00016072684642438453, "loss": 0.78, "step": 1340 }, { "epoch": 0.79, "learning_rate": 0.0001604337631887456, "loss": 0.8638, "step": 1350 }, { "epoch": 0.8, "learning_rate": 0.0001601406799531067, "loss": 0.5591, "step": 1360 }, { "epoch": 0.8, "learning_rate": 0.00015984759671746777, "loss": 0.7001, "step": 1370 }, { "epoch": 0.81, "learning_rate": 0.00015955451348182884, "loss": 0.765, "step": 1380 }, { "epoch": 0.81, "learning_rate": 0.00015926143024618994, "loss": 0.7748, "step": 1390 }, { "epoch": 0.82, "learning_rate": 0.00015896834701055102, "loss": 0.6919, "step": 1400 }, { "epoch": 0.82, "eval_accuracy": 0.7710688224352554, "eval_loss": 0.8083020448684692, "eval_runtime": 141.661, "eval_samples_per_second": 49.336, "eval_steps_per_second": 6.17, "step": 1400 }, { "epoch": 0.83, "learning_rate": 0.0001586752637749121, "loss": 0.736, "step": 1410 }, { "epoch": 0.83, "learning_rate": 0.00015838218053927316, "loss": 0.7675, "step": 1420 }, { "epoch": 0.84, "learning_rate": 0.00015808909730363424, "loss": 0.7939, "step": 1430 }, { "epoch": 0.84, "learning_rate": 0.0001577960140679953, "loss": 0.8095, "step": 1440 }, { "epoch": 0.85, "learning_rate": 0.00015750293083235638, "loss": 0.7648, "step": 1450 }, { "epoch": 0.86, "learning_rate": 0.00015720984759671748, "loss": 0.743, "step": 1460 }, { "epoch": 0.86, "learning_rate": 0.00015691676436107855, "loss": 0.7438, "step": 1470 }, { "epoch": 0.87, "learning_rate": 0.00015662368112543963, "loss": 0.7346, "step": 1480 }, { "epoch": 0.87, "learning_rate": 0.0001563305978898007, "loss": 0.8393, "step": 1490 }, { "epoch": 0.88, "learning_rate": 0.0001560375146541618, "loss": 0.7494, "step": 1500 }, { "epoch": 0.88, "eval_accuracy": 0.7825153813134926, "eval_loss": 0.7600758075714111, "eval_runtime": 143.5803, "eval_samples_per_second": 48.677, "eval_steps_per_second": 6.087, "step": 1500 }, { "epoch": 0.89, "learning_rate": 0.00015574443141852287, "loss": 0.6779, "step": 1510 }, { "epoch": 0.89, "learning_rate": 0.00015545134818288395, "loss": 0.6841, "step": 1520 }, { "epoch": 0.9, "learning_rate": 0.00015515826494724505, "loss": 0.7694, "step": 1530 }, { "epoch": 0.9, "learning_rate": 0.00015486518171160612, "loss": 0.6697, "step": 1540 }, { "epoch": 0.91, "learning_rate": 0.0001545720984759672, "loss": 0.71, "step": 1550 }, { "epoch": 0.91, "learning_rate": 0.00015427901524032826, "loss": 0.6402, "step": 1560 }, { "epoch": 0.92, "learning_rate": 0.00015398593200468934, "loss": 0.6851, "step": 1570 }, { "epoch": 0.93, "learning_rate": 0.0001536928487690504, "loss": 0.6974, "step": 1580 }, { "epoch": 0.93, "learning_rate": 0.00015339976553341148, "loss": 0.7773, "step": 1590 }, { "epoch": 0.94, "learning_rate": 0.00015310668229777258, "loss": 0.5078, "step": 1600 }, { "epoch": 0.94, "eval_accuracy": 0.805551581055945, "eval_loss": 0.6883856058120728, "eval_runtime": 143.3782, "eval_samples_per_second": 48.745, "eval_steps_per_second": 6.096, "step": 1600 }, { "epoch": 0.94, "learning_rate": 0.00015281359906213366, "loss": 0.8773, "step": 1610 }, { "epoch": 0.95, "learning_rate": 0.00015252051582649473, "loss": 0.728, "step": 1620 }, { "epoch": 0.96, "learning_rate": 0.0001522274325908558, "loss": 0.7335, "step": 1630 }, { "epoch": 0.96, "learning_rate": 0.0001519343493552169, "loss": 0.6446, "step": 1640 }, { "epoch": 0.97, "learning_rate": 0.00015164126611957797, "loss": 0.6338, "step": 1650 }, { "epoch": 0.97, "learning_rate": 0.00015134818288393905, "loss": 0.6337, "step": 1660 }, { "epoch": 0.98, "learning_rate": 0.00015105509964830012, "loss": 0.6159, "step": 1670 }, { "epoch": 0.98, "learning_rate": 0.00015076201641266122, "loss": 0.6605, "step": 1680 }, { "epoch": 0.99, "learning_rate": 0.0001504689331770223, "loss": 0.7442, "step": 1690 }, { "epoch": 1.0, "learning_rate": 0.00015017584994138336, "loss": 0.7134, "step": 1700 }, { "epoch": 1.0, "eval_accuracy": 0.8159965660323365, "eval_loss": 0.6311172842979431, "eval_runtime": 142.9359, "eval_samples_per_second": 48.896, "eval_steps_per_second": 6.115, "step": 1700 }, { "epoch": 1.0, "learning_rate": 0.00014988276670574444, "loss": 0.4797, "step": 1710 }, { "epoch": 1.01, "learning_rate": 0.0001495896834701055, "loss": 0.3648, "step": 1720 }, { "epoch": 1.01, "learning_rate": 0.00014929660023446658, "loss": 0.5075, "step": 1730 }, { "epoch": 1.02, "learning_rate": 0.00014900351699882766, "loss": 0.4528, "step": 1740 }, { "epoch": 1.03, "learning_rate": 0.00014871043376318876, "loss": 0.3223, "step": 1750 }, { "epoch": 1.03, "learning_rate": 0.00014841735052754983, "loss": 0.5243, "step": 1760 }, { "epoch": 1.04, "learning_rate": 0.0001481242672919109, "loss": 0.5827, "step": 1770 }, { "epoch": 1.04, "learning_rate": 0.00014783118405627197, "loss": 0.5099, "step": 1780 }, { "epoch": 1.05, "learning_rate": 0.00014753810082063307, "loss": 0.2818, "step": 1790 }, { "epoch": 1.06, "learning_rate": 0.00014724501758499415, "loss": 0.4328, "step": 1800 }, { "epoch": 1.06, "eval_accuracy": 0.8251538131349263, "eval_loss": 0.5739801526069641, "eval_runtime": 142.1993, "eval_samples_per_second": 49.149, "eval_steps_per_second": 6.146, "step": 1800 }, { "epoch": 1.06, "learning_rate": 0.00014695193434935522, "loss": 0.4137, "step": 1810 }, { "epoch": 1.07, "learning_rate": 0.00014665885111371632, "loss": 0.3978, "step": 1820 }, { "epoch": 1.07, "learning_rate": 0.0001463657678780774, "loss": 0.4585, "step": 1830 }, { "epoch": 1.08, "learning_rate": 0.00014607268464243847, "loss": 0.3887, "step": 1840 }, { "epoch": 1.08, "learning_rate": 0.00014577960140679954, "loss": 0.4743, "step": 1850 }, { "epoch": 1.09, "learning_rate": 0.0001454865181711606, "loss": 0.4315, "step": 1860 }, { "epoch": 1.1, "learning_rate": 0.00014519343493552168, "loss": 0.4949, "step": 1870 }, { "epoch": 1.1, "learning_rate": 0.00014490035169988276, "loss": 0.5806, "step": 1880 }, { "epoch": 1.11, "learning_rate": 0.00014460726846424386, "loss": 0.4127, "step": 1890 }, { "epoch": 1.11, "learning_rate": 0.00014431418522860493, "loss": 0.4971, "step": 1900 }, { "epoch": 1.11, "eval_accuracy": 0.8290170267563314, "eval_loss": 0.5856359601020813, "eval_runtime": 141.3649, "eval_samples_per_second": 49.439, "eval_steps_per_second": 6.183, "step": 1900 }, { "epoch": 1.12, "learning_rate": 0.000144021101992966, "loss": 0.4027, "step": 1910 }, { "epoch": 1.13, "learning_rate": 0.00014372801875732708, "loss": 0.3814, "step": 1920 }, { "epoch": 1.13, "learning_rate": 0.00014343493552168818, "loss": 0.4191, "step": 1930 }, { "epoch": 1.14, "learning_rate": 0.00014314185228604925, "loss": 0.4999, "step": 1940 }, { "epoch": 1.14, "learning_rate": 0.00014284876905041032, "loss": 0.3142, "step": 1950 }, { "epoch": 1.15, "learning_rate": 0.00014255568581477142, "loss": 0.383, "step": 1960 }, { "epoch": 1.15, "learning_rate": 0.0001422626025791325, "loss": 0.4117, "step": 1970 }, { "epoch": 1.16, "learning_rate": 0.00014196951934349357, "loss": 0.3326, "step": 1980 }, { "epoch": 1.17, "learning_rate": 0.00014167643610785464, "loss": 0.332, "step": 1990 }, { "epoch": 1.17, "learning_rate": 0.0001413833528722157, "loss": 0.5207, "step": 2000 }, { "epoch": 1.17, "eval_accuracy": 0.8167119759622263, "eval_loss": 0.6218913197517395, "eval_runtime": 142.1541, "eval_samples_per_second": 49.165, "eval_steps_per_second": 6.148, "step": 2000 }, { "epoch": 1.18, "learning_rate": 0.00014109026963657678, "loss": 0.3766, "step": 2010 }, { "epoch": 1.18, "learning_rate": 0.00014079718640093786, "loss": 0.3261, "step": 2020 }, { "epoch": 1.19, "learning_rate": 0.00014050410316529896, "loss": 0.5744, "step": 2030 }, { "epoch": 1.2, "learning_rate": 0.00014024032825322392, "loss": 0.4988, "step": 2040 }, { "epoch": 1.2, "learning_rate": 0.00013994724501758502, "loss": 0.3837, "step": 2050 }, { "epoch": 1.21, "learning_rate": 0.0001396541617819461, "loss": 0.4694, "step": 2060 }, { "epoch": 1.21, "learning_rate": 0.00013936107854630716, "loss": 0.4364, "step": 2070 }, { "epoch": 1.22, "learning_rate": 0.00013906799531066824, "loss": 0.5789, "step": 2080 }, { "epoch": 1.23, "learning_rate": 0.0001387749120750293, "loss": 0.3874, "step": 2090 }, { "epoch": 1.23, "learning_rate": 0.0001384818288393904, "loss": 0.4027, "step": 2100 }, { "epoch": 1.23, "eval_accuracy": 0.826584632994706, "eval_loss": 0.5703310966491699, "eval_runtime": 144.3732, "eval_samples_per_second": 48.409, "eval_steps_per_second": 6.054, "step": 2100 }, { "epoch": 1.24, "learning_rate": 0.00013818874560375148, "loss": 0.4893, "step": 2110 }, { "epoch": 1.24, "learning_rate": 0.00013789566236811255, "loss": 0.5338, "step": 2120 }, { "epoch": 1.25, "learning_rate": 0.00013760257913247363, "loss": 0.4958, "step": 2130 }, { "epoch": 1.25, "learning_rate": 0.0001373094958968347, "loss": 0.3926, "step": 2140 }, { "epoch": 1.26, "learning_rate": 0.00013701641266119577, "loss": 0.3796, "step": 2150 }, { "epoch": 1.27, "learning_rate": 0.00013672332942555685, "loss": 0.4433, "step": 2160 }, { "epoch": 1.27, "learning_rate": 0.00013643024618991795, "loss": 0.3509, "step": 2170 }, { "epoch": 1.28, "learning_rate": 0.00013613716295427902, "loss": 0.2775, "step": 2180 }, { "epoch": 1.28, "learning_rate": 0.0001358440797186401, "loss": 0.3138, "step": 2190 }, { "epoch": 1.29, "learning_rate": 0.0001355509964830012, "loss": 0.5605, "step": 2200 }, { "epoch": 1.29, "eval_accuracy": 0.8371726999570754, "eval_loss": 0.5217174291610718, "eval_runtime": 142.7973, "eval_samples_per_second": 48.944, "eval_steps_per_second": 6.121, "step": 2200 }, { "epoch": 1.3, "learning_rate": 0.00013525791324736226, "loss": 0.3436, "step": 2210 }, { "epoch": 1.3, "learning_rate": 0.00013496483001172334, "loss": 0.3696, "step": 2220 }, { "epoch": 1.31, "learning_rate": 0.0001346717467760844, "loss": 0.4021, "step": 2230 }, { "epoch": 1.31, "learning_rate": 0.00013437866354044548, "loss": 0.4185, "step": 2240 }, { "epoch": 1.32, "learning_rate": 0.00013408558030480658, "loss": 0.2992, "step": 2250 }, { "epoch": 1.32, "learning_rate": 0.00013379249706916766, "loss": 0.4494, "step": 2260 }, { "epoch": 1.33, "learning_rate": 0.00013349941383352873, "loss": 0.4397, "step": 2270 }, { "epoch": 1.34, "learning_rate": 0.0001332063305978898, "loss": 0.4446, "step": 2280 }, { "epoch": 1.34, "learning_rate": 0.00013291324736225087, "loss": 0.4627, "step": 2290 }, { "epoch": 1.35, "learning_rate": 0.00013262016412661195, "loss": 0.2723, "step": 2300 }, { "epoch": 1.35, "eval_accuracy": 0.8564887680641007, "eval_loss": 0.48052945733070374, "eval_runtime": 141.8523, "eval_samples_per_second": 49.27, "eval_steps_per_second": 6.161, "step": 2300 }, { "epoch": 1.35, "learning_rate": 0.00013232708089097302, "loss": 0.3139, "step": 2310 }, { "epoch": 1.36, "learning_rate": 0.00013203399765533412, "loss": 0.3448, "step": 2320 }, { "epoch": 1.37, "learning_rate": 0.0001317409144196952, "loss": 0.4392, "step": 2330 }, { "epoch": 1.37, "learning_rate": 0.0001314478311840563, "loss": 0.417, "step": 2340 }, { "epoch": 1.38, "learning_rate": 0.00013115474794841736, "loss": 0.4455, "step": 2350 }, { "epoch": 1.38, "learning_rate": 0.00013086166471277844, "loss": 0.304, "step": 2360 }, { "epoch": 1.39, "learning_rate": 0.0001305685814771395, "loss": 0.2896, "step": 2370 }, { "epoch": 1.4, "learning_rate": 0.00013027549824150058, "loss": 0.3634, "step": 2380 }, { "epoch": 1.4, "learning_rate": 0.00012998241500586168, "loss": 0.3162, "step": 2390 }, { "epoch": 1.41, "learning_rate": 0.00012968933177022276, "loss": 0.401, "step": 2400 }, { "epoch": 1.41, "eval_accuracy": 0.8490485047932466, "eval_loss": 0.48114609718322754, "eval_runtime": 141.1595, "eval_samples_per_second": 49.511, "eval_steps_per_second": 6.192, "step": 2400 }, { "epoch": 1.41, "learning_rate": 0.00012939624853458383, "loss": 0.4162, "step": 2410 }, { "epoch": 1.42, "learning_rate": 0.0001291031652989449, "loss": 0.3563, "step": 2420 }, { "epoch": 1.42, "learning_rate": 0.00012881008206330597, "loss": 0.2935, "step": 2430 }, { "epoch": 1.43, "learning_rate": 0.00012851699882766705, "loss": 0.2772, "step": 2440 }, { "epoch": 1.44, "learning_rate": 0.00012822391559202812, "loss": 0.1815, "step": 2450 }, { "epoch": 1.44, "learning_rate": 0.00012793083235638922, "loss": 0.2421, "step": 2460 }, { "epoch": 1.45, "learning_rate": 0.0001276377491207503, "loss": 0.3135, "step": 2470 }, { "epoch": 1.45, "learning_rate": 0.0001273446658851114, "loss": 0.2401, "step": 2480 }, { "epoch": 1.46, "learning_rate": 0.00012705158264947247, "loss": 0.2437, "step": 2490 }, { "epoch": 1.47, "learning_rate": 0.00012675849941383354, "loss": 0.3419, "step": 2500 }, { "epoch": 1.47, "eval_accuracy": 0.8607812276434397, "eval_loss": 0.46192315220832825, "eval_runtime": 141.0585, "eval_samples_per_second": 49.547, "eval_steps_per_second": 6.196, "step": 2500 }, { "epoch": 1.47, "learning_rate": 0.0001264654161781946, "loss": 0.3481, "step": 2510 }, { "epoch": 1.48, "learning_rate": 0.00012617233294255568, "loss": 0.3368, "step": 2520 }, { "epoch": 1.48, "learning_rate": 0.00012587924970691678, "loss": 0.4863, "step": 2530 }, { "epoch": 1.49, "learning_rate": 0.00012558616647127786, "loss": 0.3587, "step": 2540 }, { "epoch": 1.49, "learning_rate": 0.00012529308323563893, "loss": 0.3619, "step": 2550 }, { "epoch": 1.5, "learning_rate": 0.000125, "loss": 0.32, "step": 2560 }, { "epoch": 1.51, "learning_rate": 0.00012470691676436108, "loss": 0.4175, "step": 2570 }, { "epoch": 1.51, "learning_rate": 0.00012441383352872215, "loss": 0.2654, "step": 2580 }, { "epoch": 1.52, "learning_rate": 0.00012412075029308322, "loss": 0.3706, "step": 2590 }, { "epoch": 1.52, "learning_rate": 0.00012382766705744432, "loss": 0.301, "step": 2600 }, { "epoch": 1.52, "eval_accuracy": 0.8712262126198311, "eval_loss": 0.43177559971809387, "eval_runtime": 142.4669, "eval_samples_per_second": 49.057, "eval_steps_per_second": 6.135, "step": 2600 }, { "epoch": 1.53, "learning_rate": 0.0001235345838218054, "loss": 0.3649, "step": 2610 }, { "epoch": 1.54, "learning_rate": 0.0001232415005861665, "loss": 0.2837, "step": 2620 }, { "epoch": 1.54, "learning_rate": 0.00012294841735052757, "loss": 0.2603, "step": 2630 }, { "epoch": 1.55, "learning_rate": 0.00012265533411488864, "loss": 0.3673, "step": 2640 }, { "epoch": 1.55, "learning_rate": 0.0001223622508792497, "loss": 0.2663, "step": 2650 }, { "epoch": 1.56, "learning_rate": 0.00012206916764361079, "loss": 0.2428, "step": 2660 }, { "epoch": 1.57, "learning_rate": 0.00012177608440797188, "loss": 0.3494, "step": 2670 }, { "epoch": 1.57, "learning_rate": 0.00012148300117233296, "loss": 0.4883, "step": 2680 }, { "epoch": 1.58, "learning_rate": 0.00012118991793669403, "loss": 0.4268, "step": 2690 }, { "epoch": 1.58, "learning_rate": 0.0001208968347010551, "loss": 0.2872, "step": 2700 }, { "epoch": 1.58, "eval_accuracy": 0.8573472599799685, "eval_loss": 0.46980831027030945, "eval_runtime": 142.0482, "eval_samples_per_second": 49.202, "eval_steps_per_second": 6.153, "step": 2700 }, { "epoch": 1.59, "learning_rate": 0.00012060375146541618, "loss": 0.356, "step": 2710 }, { "epoch": 1.59, "learning_rate": 0.00012031066822977726, "loss": 0.4152, "step": 2720 }, { "epoch": 1.6, "learning_rate": 0.00012001758499413834, "loss": 0.3628, "step": 2730 }, { "epoch": 1.61, "learning_rate": 0.00011972450175849944, "loss": 0.3722, "step": 2740 }, { "epoch": 1.61, "learning_rate": 0.00011943141852286051, "loss": 0.3239, "step": 2750 }, { "epoch": 1.62, "learning_rate": 0.00011913833528722158, "loss": 0.2286, "step": 2760 }, { "epoch": 1.62, "learning_rate": 0.00011884525205158265, "loss": 0.2773, "step": 2770 }, { "epoch": 1.63, "learning_rate": 0.00011855216881594373, "loss": 0.3792, "step": 2780 }, { "epoch": 1.64, "learning_rate": 0.00011825908558030481, "loss": 0.3792, "step": 2790 }, { "epoch": 1.64, "learning_rate": 0.00011796600234466589, "loss": 0.2451, "step": 2800 }, { "epoch": 1.64, "eval_accuracy": 0.8729431964515667, "eval_loss": 0.42098021507263184, "eval_runtime": 141.8474, "eval_samples_per_second": 49.271, "eval_steps_per_second": 6.162, "step": 2800 }, { "epoch": 1.65, "learning_rate": 0.00011767291910902699, "loss": 0.3349, "step": 2810 }, { "epoch": 1.65, "learning_rate": 0.00011737983587338806, "loss": 0.2566, "step": 2820 }, { "epoch": 1.66, "learning_rate": 0.00011708675263774913, "loss": 0.2207, "step": 2830 }, { "epoch": 1.66, "learning_rate": 0.0001167936694021102, "loss": 0.2553, "step": 2840 }, { "epoch": 1.67, "learning_rate": 0.00011650058616647128, "loss": 0.3184, "step": 2850 }, { "epoch": 1.68, "learning_rate": 0.00011620750293083236, "loss": 0.2927, "step": 2860 }, { "epoch": 1.68, "learning_rate": 0.00011591441969519344, "loss": 0.3839, "step": 2870 }, { "epoch": 1.69, "learning_rate": 0.00011562133645955454, "loss": 0.3236, "step": 2880 }, { "epoch": 1.69, "learning_rate": 0.00011532825322391561, "loss": 0.3058, "step": 2890 }, { "epoch": 1.7, "learning_rate": 0.00011503516998827668, "loss": 0.2211, "step": 2900 }, { "epoch": 1.7, "eval_accuracy": 0.8851051652596938, "eval_loss": 0.36445751786231995, "eval_runtime": 142.4316, "eval_samples_per_second": 49.069, "eval_steps_per_second": 6.136, "step": 2900 }, { "epoch": 1.71, "learning_rate": 0.00011474208675263775, "loss": 0.4079, "step": 2910 }, { "epoch": 1.71, "learning_rate": 0.00011444900351699883, "loss": 0.285, "step": 2920 }, { "epoch": 1.72, "learning_rate": 0.00011415592028135991, "loss": 0.268, "step": 2930 }, { "epoch": 1.72, "learning_rate": 0.00011386283704572099, "loss": 0.3587, "step": 2940 }, { "epoch": 1.73, "learning_rate": 0.00011356975381008206, "loss": 0.2891, "step": 2950 }, { "epoch": 1.74, "learning_rate": 0.00011327667057444316, "loss": 0.4267, "step": 2960 }, { "epoch": 1.74, "learning_rate": 0.00011298358733880423, "loss": 0.3827, "step": 2970 }, { "epoch": 1.75, "learning_rate": 0.0001126905041031653, "loss": 0.2797, "step": 2980 }, { "epoch": 1.75, "learning_rate": 0.00011239742086752638, "loss": 0.2858, "step": 2990 }, { "epoch": 1.76, "learning_rate": 0.00011210433763188745, "loss": 0.3145, "step": 3000 }, { "epoch": 1.76, "eval_accuracy": 0.871512376591787, "eval_loss": 0.41394171118736267, "eval_runtime": 143.8279, "eval_samples_per_second": 48.593, "eval_steps_per_second": 6.077, "step": 3000 }, { "epoch": 1.76, "learning_rate": 0.00011181125439624854, "loss": 0.2412, "step": 3010 }, { "epoch": 1.77, "learning_rate": 0.00011151817116060961, "loss": 0.4263, "step": 3020 }, { "epoch": 1.78, "learning_rate": 0.00011122508792497071, "loss": 0.2569, "step": 3030 }, { "epoch": 1.78, "learning_rate": 0.00011093200468933178, "loss": 0.3211, "step": 3040 }, { "epoch": 1.79, "learning_rate": 0.00011063892145369286, "loss": 0.2235, "step": 3050 }, { "epoch": 1.79, "learning_rate": 0.00011034583821805393, "loss": 0.2982, "step": 3060 }, { "epoch": 1.8, "learning_rate": 0.000110052754982415, "loss": 0.2236, "step": 3070 }, { "epoch": 1.81, "learning_rate": 0.00010975967174677609, "loss": 0.2339, "step": 3080 }, { "epoch": 1.81, "learning_rate": 0.00010946658851113716, "loss": 0.2568, "step": 3090 }, { "epoch": 1.82, "learning_rate": 0.00010917350527549826, "loss": 0.2001, "step": 3100 }, { "epoch": 1.82, "eval_accuracy": 0.8863929031334955, "eval_loss": 0.3604518175125122, "eval_runtime": 143.3768, "eval_samples_per_second": 48.746, "eval_steps_per_second": 6.096, "step": 3100 }, { "epoch": 1.82, "learning_rate": 0.00010888042203985933, "loss": 0.2798, "step": 3110 }, { "epoch": 1.83, "learning_rate": 0.0001085873388042204, "loss": 0.2949, "step": 3120 }, { "epoch": 1.83, "learning_rate": 0.00010829425556858148, "loss": 0.2569, "step": 3130 }, { "epoch": 1.84, "learning_rate": 0.00010800117233294255, "loss": 0.2033, "step": 3140 }, { "epoch": 1.85, "learning_rate": 0.00010770808909730364, "loss": 0.2469, "step": 3150 }, { "epoch": 1.85, "learning_rate": 0.00010741500586166471, "loss": 0.3182, "step": 3160 }, { "epoch": 1.86, "learning_rate": 0.00010712192262602581, "loss": 0.2325, "step": 3170 }, { "epoch": 1.86, "learning_rate": 0.00010682883939038688, "loss": 0.3356, "step": 3180 }, { "epoch": 1.87, "learning_rate": 0.00010653575615474796, "loss": 0.2707, "step": 3190 }, { "epoch": 1.88, "learning_rate": 0.00010624267291910903, "loss": 0.3095, "step": 3200 }, { "epoch": 1.88, "eval_accuracy": 0.8675060809844041, "eval_loss": 0.4273549020290375, "eval_runtime": 143.5124, "eval_samples_per_second": 48.7, "eval_steps_per_second": 6.09, "step": 3200 }, { "epoch": 1.88, "learning_rate": 0.0001059495896834701, "loss": 0.3672, "step": 3210 }, { "epoch": 1.89, "learning_rate": 0.00010565650644783119, "loss": 0.3408, "step": 3220 }, { "epoch": 1.89, "learning_rate": 0.00010536342321219226, "loss": 0.277, "step": 3230 }, { "epoch": 1.9, "learning_rate": 0.00010507033997655336, "loss": 0.2123, "step": 3240 }, { "epoch": 1.91, "learning_rate": 0.00010477725674091443, "loss": 0.1926, "step": 3250 }, { "epoch": 1.91, "learning_rate": 0.0001044841735052755, "loss": 0.2049, "step": 3260 }, { "epoch": 1.92, "learning_rate": 0.00010419109026963658, "loss": 0.3891, "step": 3270 }, { "epoch": 1.92, "learning_rate": 0.00010389800703399765, "loss": 0.3387, "step": 3280 }, { "epoch": 1.93, "learning_rate": 0.00010360492379835874, "loss": 0.2356, "step": 3290 }, { "epoch": 1.93, "learning_rate": 0.00010331184056271981, "loss": 0.1915, "step": 3300 }, { "epoch": 1.93, "eval_accuracy": 0.9101445128058377, "eval_loss": 0.2909921109676361, "eval_runtime": 142.0389, "eval_samples_per_second": 49.205, "eval_steps_per_second": 6.153, "step": 3300 }, { "epoch": 1.94, "learning_rate": 0.00010301875732708091, "loss": 0.2416, "step": 3310 }, { "epoch": 1.95, "learning_rate": 0.00010272567409144198, "loss": 0.2543, "step": 3320 }, { "epoch": 1.95, "learning_rate": 0.00010243259085580306, "loss": 0.2175, "step": 3330 }, { "epoch": 1.96, "learning_rate": 0.00010213950762016413, "loss": 0.335, "step": 3340 }, { "epoch": 1.96, "learning_rate": 0.0001018464243845252, "loss": 0.3157, "step": 3350 }, { "epoch": 1.97, "learning_rate": 0.00010155334114888629, "loss": 0.2386, "step": 3360 }, { "epoch": 1.98, "learning_rate": 0.00010126025791324736, "loss": 0.289, "step": 3370 }, { "epoch": 1.98, "learning_rate": 0.00010096717467760846, "loss": 0.2696, "step": 3380 }, { "epoch": 1.99, "learning_rate": 0.00010067409144196953, "loss": 0.504, "step": 3390 }, { "epoch": 1.99, "learning_rate": 0.00010038100820633061, "loss": 0.2465, "step": 3400 }, { "epoch": 1.99, "eval_accuracy": 0.9102875947918158, "eval_loss": 0.27264514565467834, "eval_runtime": 141.7632, "eval_samples_per_second": 49.301, "eval_steps_per_second": 6.165, "step": 3400 }, { "epoch": 2.0, "learning_rate": 0.00010008792497069168, "loss": 0.1975, "step": 3410 }, { "epoch": 2.0, "learning_rate": 9.979484173505275e-05, "loss": 0.1501, "step": 3420 }, { "epoch": 2.01, "learning_rate": 9.950175849941384e-05, "loss": 0.1063, "step": 3430 }, { "epoch": 2.02, "learning_rate": 9.920867526377493e-05, "loss": 0.125, "step": 3440 }, { "epoch": 2.02, "learning_rate": 9.8915592028136e-05, "loss": 0.0868, "step": 3450 }, { "epoch": 2.03, "learning_rate": 9.862250879249707e-05, "loss": 0.1477, "step": 3460 }, { "epoch": 2.03, "learning_rate": 9.832942555685816e-05, "loss": 0.1333, "step": 3470 }, { "epoch": 2.04, "learning_rate": 9.803634232121923e-05, "loss": 0.0903, "step": 3480 }, { "epoch": 2.05, "learning_rate": 9.77432590855803e-05, "loss": 0.0753, "step": 3490 }, { "epoch": 2.05, "learning_rate": 9.745017584994138e-05, "loss": 0.1218, "step": 3500 }, { "epoch": 2.05, "eval_accuracy": 0.9128630705394191, "eval_loss": 0.2742130756378174, "eval_runtime": 141.9071, "eval_samples_per_second": 49.251, "eval_steps_per_second": 6.159, "step": 3500 }, { "epoch": 2.06, "learning_rate": 9.715709261430248e-05, "loss": 0.1116, "step": 3510 }, { "epoch": 2.06, "learning_rate": 9.686400937866355e-05, "loss": 0.1161, "step": 3520 }, { "epoch": 2.07, "learning_rate": 9.657092614302462e-05, "loss": 0.1803, "step": 3530 }, { "epoch": 2.08, "learning_rate": 9.627784290738571e-05, "loss": 0.0855, "step": 3540 }, { "epoch": 2.08, "learning_rate": 9.598475967174678e-05, "loss": 0.1219, "step": 3550 }, { "epoch": 2.09, "learning_rate": 9.569167643610785e-05, "loss": 0.128, "step": 3560 }, { "epoch": 2.09, "learning_rate": 9.539859320046893e-05, "loss": 0.1373, "step": 3570 }, { "epoch": 2.1, "learning_rate": 9.510550996483001e-05, "loss": 0.1646, "step": 3580 }, { "epoch": 2.1, "learning_rate": 9.48124267291911e-05, "loss": 0.1103, "step": 3590 }, { "epoch": 2.11, "learning_rate": 9.451934349355217e-05, "loss": 0.0752, "step": 3600 }, { "epoch": 2.11, "eval_accuracy": 0.9183001860065818, "eval_loss": 0.25723978877067566, "eval_runtime": 141.3288, "eval_samples_per_second": 49.452, "eval_steps_per_second": 6.184, "step": 3600 }, { "epoch": 2.12, "learning_rate": 9.422626025791326e-05, "loss": 0.1596, "step": 3610 }, { "epoch": 2.12, "learning_rate": 9.393317702227433e-05, "loss": 0.1059, "step": 3620 }, { "epoch": 2.13, "learning_rate": 9.36400937866354e-05, "loss": 0.186, "step": 3630 }, { "epoch": 2.13, "learning_rate": 9.334701055099648e-05, "loss": 0.106, "step": 3640 }, { "epoch": 2.14, "learning_rate": 9.305392731535756e-05, "loss": 0.1323, "step": 3650 }, { "epoch": 2.15, "learning_rate": 9.276084407971865e-05, "loss": 0.1517, "step": 3660 }, { "epoch": 2.15, "learning_rate": 9.246776084407972e-05, "loss": 0.1218, "step": 3670 }, { "epoch": 2.16, "learning_rate": 9.217467760844081e-05, "loss": 0.1591, "step": 3680 }, { "epoch": 2.16, "learning_rate": 9.188159437280188e-05, "loss": 0.129, "step": 3690 }, { "epoch": 2.17, "learning_rate": 9.158851113716295e-05, "loss": 0.1067, "step": 3700 }, { "epoch": 2.17, "eval_accuracy": 0.9203033338102733, "eval_loss": 0.25835496187210083, "eval_runtime": 155.4296, "eval_samples_per_second": 44.966, "eval_steps_per_second": 5.623, "step": 3700 }, { "epoch": 2.17, "learning_rate": 9.129542790152403e-05, "loss": 0.0996, "step": 3710 }, { "epoch": 2.18, "learning_rate": 9.100234466588511e-05, "loss": 0.1542, "step": 3720 }, { "epoch": 2.19, "learning_rate": 9.07092614302462e-05, "loss": 0.1354, "step": 3730 }, { "epoch": 2.19, "learning_rate": 9.041617819460727e-05, "loss": 0.1129, "step": 3740 }, { "epoch": 2.2, "learning_rate": 9.012309495896835e-05, "loss": 0.188, "step": 3750 }, { "epoch": 2.2, "learning_rate": 8.983001172332943e-05, "loss": 0.1329, "step": 3760 }, { "epoch": 2.21, "learning_rate": 8.95369284876905e-05, "loss": 0.1951, "step": 3770 }, { "epoch": 2.22, "learning_rate": 8.924384525205158e-05, "loss": 0.0462, "step": 3780 }, { "epoch": 2.22, "learning_rate": 8.895076201641266e-05, "loss": 0.0564, "step": 3790 }, { "epoch": 2.23, "learning_rate": 8.865767878077375e-05, "loss": 0.0838, "step": 3800 }, { "epoch": 2.23, "eval_accuracy": 0.921161825726141, "eval_loss": 0.2457675188779831, "eval_runtime": 162.6766, "eval_samples_per_second": 42.963, "eval_steps_per_second": 5.373, "step": 3800 }, { "epoch": 2.23, "learning_rate": 8.836459554513482e-05, "loss": 0.151, "step": 3810 }, { "epoch": 2.24, "learning_rate": 8.80715123094959e-05, "loss": 0.1161, "step": 3820 }, { "epoch": 2.25, "learning_rate": 8.777842907385698e-05, "loss": 0.1928, "step": 3830 }, { "epoch": 2.25, "learning_rate": 8.748534583821806e-05, "loss": 0.0891, "step": 3840 }, { "epoch": 2.26, "learning_rate": 8.719226260257913e-05, "loss": 0.1545, "step": 3850 }, { "epoch": 2.26, "learning_rate": 8.689917936694021e-05, "loss": 0.1813, "step": 3860 }, { "epoch": 2.27, "learning_rate": 8.66060961313013e-05, "loss": 0.1153, "step": 3870 }, { "epoch": 2.27, "learning_rate": 8.631301289566237e-05, "loss": 0.0978, "step": 3880 }, { "epoch": 2.28, "learning_rate": 8.601992966002345e-05, "loss": 0.1, "step": 3890 }, { "epoch": 2.29, "learning_rate": 8.572684642438453e-05, "loss": 0.1106, "step": 3900 }, { "epoch": 2.29, "eval_accuracy": 0.9237373014737444, "eval_loss": 0.24122054874897003, "eval_runtime": 164.1489, "eval_samples_per_second": 42.577, "eval_steps_per_second": 5.324, "step": 3900 }, { "epoch": 2.29, "learning_rate": 8.54337631887456e-05, "loss": 0.1534, "step": 3910 }, { "epoch": 2.3, "learning_rate": 8.514067995310668e-05, "loss": 0.1362, "step": 3920 }, { "epoch": 2.3, "learning_rate": 8.484759671746777e-05, "loss": 0.1671, "step": 3930 }, { "epoch": 2.31, "learning_rate": 8.455451348182885e-05, "loss": 0.1264, "step": 3940 }, { "epoch": 2.32, "learning_rate": 8.426143024618992e-05, "loss": 0.0528, "step": 3950 }, { "epoch": 2.32, "learning_rate": 8.3968347010551e-05, "loss": 0.1812, "step": 3960 }, { "epoch": 2.33, "learning_rate": 8.367526377491208e-05, "loss": 0.0458, "step": 3970 }, { "epoch": 2.33, "learning_rate": 8.338218053927316e-05, "loss": 0.1345, "step": 3980 }, { "epoch": 2.34, "learning_rate": 8.308909730363423e-05, "loss": 0.0905, "step": 3990 }, { "epoch": 2.34, "learning_rate": 8.279601406799532e-05, "loss": 0.092, "step": 4000 }, { "epoch": 2.34, "eval_accuracy": 0.9277435970811275, "eval_loss": 0.22324928641319275, "eval_runtime": 164.4007, "eval_samples_per_second": 42.512, "eval_steps_per_second": 5.316, "step": 4000 }, { "epoch": 2.35, "learning_rate": 8.25029308323564e-05, "loss": 0.1248, "step": 4010 }, { "epoch": 2.36, "learning_rate": 8.220984759671747e-05, "loss": 0.1342, "step": 4020 }, { "epoch": 2.36, "learning_rate": 8.191676436107855e-05, "loss": 0.1925, "step": 4030 }, { "epoch": 2.37, "learning_rate": 8.162368112543963e-05, "loss": 0.1011, "step": 4040 }, { "epoch": 2.37, "learning_rate": 8.13305978898007e-05, "loss": 0.0761, "step": 4050 }, { "epoch": 2.38, "learning_rate": 8.106682297772568e-05, "loss": 0.1509, "step": 4060 }, { "epoch": 2.39, "learning_rate": 8.077373974208675e-05, "loss": 0.0951, "step": 4070 }, { "epoch": 2.39, "learning_rate": 8.048065650644784e-05, "loss": 0.2193, "step": 4080 }, { "epoch": 2.4, "learning_rate": 8.018757327080891e-05, "loss": 0.0828, "step": 4090 }, { "epoch": 2.4, "learning_rate": 7.989449003516999e-05, "loss": 0.1056, "step": 4100 }, { "epoch": 2.4, "eval_accuracy": 0.9077121190442123, "eval_loss": 0.2816599905490875, "eval_runtime": 161.9883, "eval_samples_per_second": 43.145, "eval_steps_per_second": 5.395, "step": 4100 }, { "epoch": 2.41, "learning_rate": 7.963071512309497e-05, "loss": 0.1311, "step": 4110 }, { "epoch": 2.42, "learning_rate": 7.933763188745605e-05, "loss": 0.042, "step": 4120 }, { "epoch": 2.42, "learning_rate": 7.904454865181712e-05, "loss": 0.0865, "step": 4130 }, { "epoch": 2.43, "learning_rate": 7.875146541617819e-05, "loss": 0.0694, "step": 4140 }, { "epoch": 2.43, "learning_rate": 7.845838218053928e-05, "loss": 0.085, "step": 4150 }, { "epoch": 2.44, "learning_rate": 7.816529894490035e-05, "loss": 0.0738, "step": 4160 }, { "epoch": 2.44, "learning_rate": 7.787221570926144e-05, "loss": 0.0573, "step": 4170 }, { "epoch": 2.45, "learning_rate": 7.757913247362252e-05, "loss": 0.0758, "step": 4180 }, { "epoch": 2.46, "learning_rate": 7.72860492379836e-05, "loss": 0.144, "step": 4190 }, { "epoch": 2.46, "learning_rate": 7.699296600234467e-05, "loss": 0.0696, "step": 4200 }, { "epoch": 2.46, "eval_accuracy": 0.9284590070110174, "eval_loss": 0.23343555629253387, "eval_runtime": 162.6927, "eval_samples_per_second": 42.958, "eval_steps_per_second": 5.372, "step": 4200 }, { "epoch": 2.47, "learning_rate": 7.669988276670574e-05, "loss": 0.0767, "step": 4210 }, { "epoch": 2.47, "learning_rate": 7.640679953106683e-05, "loss": 0.1279, "step": 4220 }, { "epoch": 2.48, "learning_rate": 7.61137162954279e-05, "loss": 0.0458, "step": 4230 }, { "epoch": 2.49, "learning_rate": 7.582063305978899e-05, "loss": 0.0958, "step": 4240 }, { "epoch": 2.49, "learning_rate": 7.552754982415006e-05, "loss": 0.1067, "step": 4250 }, { "epoch": 2.5, "learning_rate": 7.523446658851115e-05, "loss": 0.0582, "step": 4260 }, { "epoch": 2.5, "learning_rate": 7.494138335287222e-05, "loss": 0.0322, "step": 4270 }, { "epoch": 2.51, "learning_rate": 7.464830011723329e-05, "loss": 0.069, "step": 4280 }, { "epoch": 2.51, "learning_rate": 7.435521688159438e-05, "loss": 0.0548, "step": 4290 }, { "epoch": 2.52, "learning_rate": 7.406213364595545e-05, "loss": 0.0444, "step": 4300 }, { "epoch": 2.52, "eval_accuracy": 0.9363285162398054, "eval_loss": 0.21417850255966187, "eval_runtime": 164.9686, "eval_samples_per_second": 42.366, "eval_steps_per_second": 5.298, "step": 4300 }, { "epoch": 2.53, "learning_rate": 7.376905041031654e-05, "loss": 0.086, "step": 4310 }, { "epoch": 2.53, "learning_rate": 7.347596717467761e-05, "loss": 0.0917, "step": 4320 }, { "epoch": 2.54, "learning_rate": 7.31828839390387e-05, "loss": 0.0744, "step": 4330 }, { "epoch": 2.54, "learning_rate": 7.288980070339977e-05, "loss": 0.0346, "step": 4340 }, { "epoch": 2.55, "learning_rate": 7.259671746776084e-05, "loss": 0.0869, "step": 4350 }, { "epoch": 2.56, "learning_rate": 7.230363423212193e-05, "loss": 0.0513, "step": 4360 }, { "epoch": 2.56, "learning_rate": 7.2010550996483e-05, "loss": 0.1099, "step": 4370 }, { "epoch": 2.57, "learning_rate": 7.171746776084409e-05, "loss": 0.102, "step": 4380 }, { "epoch": 2.57, "learning_rate": 7.142438452520516e-05, "loss": 0.0844, "step": 4390 }, { "epoch": 2.58, "learning_rate": 7.113130128956625e-05, "loss": 0.1046, "step": 4400 }, { "epoch": 2.58, "eval_accuracy": 0.9351838603519816, "eval_loss": 0.2036106288433075, "eval_runtime": 164.8201, "eval_samples_per_second": 42.404, "eval_steps_per_second": 5.303, "step": 4400 }, { "epoch": 2.58, "learning_rate": 7.083821805392732e-05, "loss": 0.0337, "step": 4410 }, { "epoch": 2.59, "learning_rate": 7.054513481828839e-05, "loss": 0.0956, "step": 4420 }, { "epoch": 2.6, "learning_rate": 7.025205158264948e-05, "loss": 0.0669, "step": 4430 }, { "epoch": 2.6, "learning_rate": 6.995896834701055e-05, "loss": 0.0814, "step": 4440 }, { "epoch": 2.61, "learning_rate": 6.966588511137164e-05, "loss": 0.0544, "step": 4450 }, { "epoch": 2.61, "learning_rate": 6.937280187573271e-05, "loss": 0.087, "step": 4460 }, { "epoch": 2.62, "learning_rate": 6.90797186400938e-05, "loss": 0.111, "step": 4470 }, { "epoch": 2.63, "learning_rate": 6.878663540445487e-05, "loss": 0.0844, "step": 4480 }, { "epoch": 2.63, "learning_rate": 6.849355216881594e-05, "loss": 0.0468, "step": 4490 }, { "epoch": 2.64, "learning_rate": 6.820046893317703e-05, "loss": 0.066, "step": 4500 }, { "epoch": 2.64, "eval_accuracy": 0.9364715982257834, "eval_loss": 0.2114696204662323, "eval_runtime": 164.4144, "eval_samples_per_second": 42.508, "eval_steps_per_second": 5.316, "step": 4500 }, { "epoch": 2.64, "learning_rate": 6.79073856975381e-05, "loss": 0.0762, "step": 4510 }, { "epoch": 2.65, "learning_rate": 6.761430246189917e-05, "loss": 0.077, "step": 4520 }, { "epoch": 2.66, "learning_rate": 6.732121922626026e-05, "loss": 0.0801, "step": 4530 }, { "epoch": 2.66, "learning_rate": 6.702813599062135e-05, "loss": 0.1392, "step": 4540 }, { "epoch": 2.67, "learning_rate": 6.673505275498242e-05, "loss": 0.04, "step": 4550 }, { "epoch": 2.67, "learning_rate": 6.64419695193435e-05, "loss": 0.0774, "step": 4560 }, { "epoch": 2.68, "learning_rate": 6.614888628370457e-05, "loss": 0.098, "step": 4570 }, { "epoch": 2.68, "learning_rate": 6.585580304806565e-05, "loss": 0.0882, "step": 4580 }, { "epoch": 2.69, "learning_rate": 6.556271981242673e-05, "loss": 0.0846, "step": 4590 }, { "epoch": 2.7, "learning_rate": 6.526963657678781e-05, "loss": 0.0649, "step": 4600 }, { "epoch": 2.7, "eval_accuracy": 0.9447703534125054, "eval_loss": 0.1729690283536911, "eval_runtime": 164.155, "eval_samples_per_second": 42.576, "eval_steps_per_second": 5.324, "step": 4600 }, { "epoch": 2.7, "learning_rate": 6.49765533411489e-05, "loss": 0.0805, "step": 4610 }, { "epoch": 2.71, "learning_rate": 6.468347010550997e-05, "loss": 0.0386, "step": 4620 }, { "epoch": 2.71, "learning_rate": 6.439038686987104e-05, "loss": 0.0576, "step": 4630 }, { "epoch": 2.72, "learning_rate": 6.409730363423212e-05, "loss": 0.0721, "step": 4640 }, { "epoch": 2.73, "learning_rate": 6.38042203985932e-05, "loss": 0.1281, "step": 4650 }, { "epoch": 2.73, "learning_rate": 6.351113716295428e-05, "loss": 0.0783, "step": 4660 }, { "epoch": 2.74, "learning_rate": 6.321805392731536e-05, "loss": 0.0638, "step": 4670 }, { "epoch": 2.74, "learning_rate": 6.292497069167645e-05, "loss": 0.1168, "step": 4680 }, { "epoch": 2.75, "learning_rate": 6.263188745603752e-05, "loss": 0.0586, "step": 4690 }, { "epoch": 2.75, "learning_rate": 6.23388042203986e-05, "loss": 0.0513, "step": 4700 }, { "epoch": 2.75, "eval_accuracy": 0.93389612247818, "eval_loss": 0.21480464935302734, "eval_runtime": 164.883, "eval_samples_per_second": 42.388, "eval_steps_per_second": 5.301, "step": 4700 }, { "epoch": 2.76, "learning_rate": 6.204572098475967e-05, "loss": 0.0793, "step": 4710 }, { "epoch": 2.77, "learning_rate": 6.175263774912075e-05, "loss": 0.0444, "step": 4720 }, { "epoch": 2.77, "learning_rate": 6.145955451348183e-05, "loss": 0.0528, "step": 4730 }, { "epoch": 2.78, "learning_rate": 6.116647127784291e-05, "loss": 0.05, "step": 4740 }, { "epoch": 2.78, "learning_rate": 6.087338804220399e-05, "loss": 0.086, "step": 4750 }, { "epoch": 2.79, "learning_rate": 6.058030480656507e-05, "loss": 0.0332, "step": 4760 }, { "epoch": 2.8, "learning_rate": 6.0287221570926144e-05, "loss": 0.0645, "step": 4770 }, { "epoch": 2.8, "learning_rate": 5.999413833528722e-05, "loss": 0.084, "step": 4780 }, { "epoch": 2.81, "learning_rate": 5.9701055099648304e-05, "loss": 0.0662, "step": 4790 }, { "epoch": 2.81, "learning_rate": 5.940797186400938e-05, "loss": 0.0917, "step": 4800 }, { "epoch": 2.81, "eval_accuracy": 0.9437687795106596, "eval_loss": 0.18099913001060486, "eval_runtime": 163.6306, "eval_samples_per_second": 42.712, "eval_steps_per_second": 5.341, "step": 4800 }, { "epoch": 2.82, "learning_rate": 5.9114888628370456e-05, "loss": 0.0719, "step": 4810 }, { "epoch": 2.83, "learning_rate": 5.882180539273154e-05, "loss": 0.0301, "step": 4820 }, { "epoch": 2.83, "learning_rate": 5.852872215709262e-05, "loss": 0.0396, "step": 4830 }, { "epoch": 2.84, "learning_rate": 5.8235638921453695e-05, "loss": 0.0671, "step": 4840 }, { "epoch": 2.84, "learning_rate": 5.794255568581477e-05, "loss": 0.0185, "step": 4850 }, { "epoch": 2.85, "learning_rate": 5.7649472450175854e-05, "loss": 0.0473, "step": 4860 }, { "epoch": 2.85, "learning_rate": 5.7356389214536934e-05, "loss": 0.079, "step": 4870 }, { "epoch": 2.86, "learning_rate": 5.7063305978898006e-05, "loss": 0.0578, "step": 4880 }, { "epoch": 2.87, "learning_rate": 5.677022274325909e-05, "loss": 0.0324, "step": 4890 }, { "epoch": 2.87, "learning_rate": 5.647713950762017e-05, "loss": 0.0879, "step": 4900 }, { "epoch": 2.87, "eval_accuracy": 0.9387609100014308, "eval_loss": 0.19710442423820496, "eval_runtime": 164.6231, "eval_samples_per_second": 42.455, "eval_steps_per_second": 5.309, "step": 4900 }, { "epoch": 2.88, "learning_rate": 5.6184056271981245e-05, "loss": 0.0679, "step": 4910 }, { "epoch": 2.88, "learning_rate": 5.589097303634232e-05, "loss": 0.0441, "step": 4920 }, { "epoch": 2.89, "learning_rate": 5.5597889800703404e-05, "loss": 0.0352, "step": 4930 }, { "epoch": 2.9, "learning_rate": 5.5304806565064484e-05, "loss": 0.0405, "step": 4940 }, { "epoch": 2.9, "learning_rate": 5.501172332942556e-05, "loss": 0.0397, "step": 4950 }, { "epoch": 2.91, "learning_rate": 5.471864009378663e-05, "loss": 0.0531, "step": 4960 }, { "epoch": 2.91, "learning_rate": 5.4425556858147716e-05, "loss": 0.0564, "step": 4970 }, { "epoch": 2.92, "learning_rate": 5.4132473622508796e-05, "loss": 0.048, "step": 4980 }, { "epoch": 2.92, "learning_rate": 5.383939038686987e-05, "loss": 0.0597, "step": 4990 }, { "epoch": 2.93, "learning_rate": 5.3546307151230955e-05, "loss": 0.1052, "step": 5000 }, { "epoch": 2.93, "eval_accuracy": 0.9507797968235799, "eval_loss": 0.16023729741573334, "eval_runtime": 164.4511, "eval_samples_per_second": 42.499, "eval_steps_per_second": 5.315, "step": 5000 }, { "epoch": 2.94, "learning_rate": 5.3253223915592034e-05, "loss": 0.0319, "step": 5010 }, { "epoch": 2.94, "learning_rate": 5.296014067995311e-05, "loss": 0.0565, "step": 5020 }, { "epoch": 2.95, "learning_rate": 5.266705744431418e-05, "loss": 0.0545, "step": 5030 }, { "epoch": 2.95, "learning_rate": 5.2373974208675266e-05, "loss": 0.0658, "step": 5040 }, { "epoch": 2.96, "learning_rate": 5.2080890973036346e-05, "loss": 0.0377, "step": 5050 }, { "epoch": 2.97, "learning_rate": 5.178780773739742e-05, "loss": 0.0384, "step": 5060 }, { "epoch": 2.97, "learning_rate": 5.1494724501758505e-05, "loss": 0.0243, "step": 5070 }, { "epoch": 2.98, "learning_rate": 5.1201641266119585e-05, "loss": 0.0173, "step": 5080 }, { "epoch": 2.98, "learning_rate": 5.090855803048066e-05, "loss": 0.0739, "step": 5090 }, { "epoch": 2.99, "learning_rate": 5.061547479484173e-05, "loss": 0.0362, "step": 5100 }, { "epoch": 2.99, "eval_accuracy": 0.9556445843468307, "eval_loss": 0.1475149393081665, "eval_runtime": 163.8639, "eval_samples_per_second": 42.651, "eval_steps_per_second": 5.334, "step": 5100 }, { "epoch": 3.0, "learning_rate": 5.032239155920282e-05, "loss": 0.0649, "step": 5110 }, { "epoch": 3.0, "learning_rate": 5.0029308323563896e-05, "loss": 0.0223, "step": 5120 }, { "epoch": 3.01, "learning_rate": 4.9736225087924976e-05, "loss": 0.0067, "step": 5130 }, { "epoch": 3.01, "learning_rate": 4.944314185228605e-05, "loss": 0.0183, "step": 5140 }, { "epoch": 3.02, "learning_rate": 4.9150058616647135e-05, "loss": 0.0171, "step": 5150 }, { "epoch": 3.02, "learning_rate": 4.885697538100821e-05, "loss": 0.0287, "step": 5160 }, { "epoch": 3.03, "learning_rate": 4.856389214536929e-05, "loss": 0.0121, "step": 5170 }, { "epoch": 3.04, "learning_rate": 4.827080890973037e-05, "loss": 0.0064, "step": 5180 }, { "epoch": 3.04, "learning_rate": 4.797772567409145e-05, "loss": 0.0179, "step": 5190 }, { "epoch": 3.05, "learning_rate": 4.768464243845252e-05, "loss": 0.041, "step": 5200 }, { "epoch": 3.05, "eval_accuracy": 0.9585062240663901, "eval_loss": 0.13277386128902435, "eval_runtime": 162.7191, "eval_samples_per_second": 42.951, "eval_steps_per_second": 5.371, "step": 5200 }, { "epoch": 3.05, "learning_rate": 4.73915592028136e-05, "loss": 0.0101, "step": 5210 }, { "epoch": 3.06, "learning_rate": 4.709847596717468e-05, "loss": 0.0035, "step": 5220 }, { "epoch": 3.07, "learning_rate": 4.680539273153576e-05, "loss": 0.0073, "step": 5230 }, { "epoch": 3.07, "learning_rate": 4.651230949589684e-05, "loss": 0.0123, "step": 5240 }, { "epoch": 3.08, "learning_rate": 4.621922626025791e-05, "loss": 0.0272, "step": 5250 }, { "epoch": 3.08, "learning_rate": 4.5926143024619e-05, "loss": 0.0129, "step": 5260 }, { "epoch": 3.09, "learning_rate": 4.563305978898007e-05, "loss": 0.0255, "step": 5270 }, { "epoch": 3.09, "learning_rate": 4.533997655334115e-05, "loss": 0.0218, "step": 5280 }, { "epoch": 3.1, "learning_rate": 4.504689331770223e-05, "loss": 0.0068, "step": 5290 }, { "epoch": 3.11, "learning_rate": 4.475381008206331e-05, "loss": 0.0156, "step": 5300 }, { "epoch": 3.11, "eval_accuracy": 0.9570754042066104, "eval_loss": 0.13893470168113708, "eval_runtime": 163.8232, "eval_samples_per_second": 42.662, "eval_steps_per_second": 5.335, "step": 5300 }, { "epoch": 3.11, "learning_rate": 4.446072684642439e-05, "loss": 0.0103, "step": 5310 }, { "epoch": 3.12, "learning_rate": 4.416764361078546e-05, "loss": 0.04, "step": 5320 }, { "epoch": 3.12, "learning_rate": 4.387456037514655e-05, "loss": 0.0212, "step": 5330 }, { "epoch": 3.13, "learning_rate": 4.358147713950762e-05, "loss": 0.0047, "step": 5340 }, { "epoch": 3.14, "learning_rate": 4.32883939038687e-05, "loss": 0.014, "step": 5350 }, { "epoch": 3.14, "learning_rate": 4.299531066822978e-05, "loss": 0.005, "step": 5360 }, { "epoch": 3.15, "learning_rate": 4.270222743259086e-05, "loss": 0.0296, "step": 5370 }, { "epoch": 3.15, "learning_rate": 4.240914419695194e-05, "loss": 0.0037, "step": 5380 }, { "epoch": 3.16, "learning_rate": 4.211606096131301e-05, "loss": 0.0559, "step": 5390 }, { "epoch": 3.17, "learning_rate": 4.18229777256741e-05, "loss": 0.0047, "step": 5400 }, { "epoch": 3.17, "eval_accuracy": 0.9638002575475748, "eval_loss": 0.12242697924375534, "eval_runtime": 164.1713, "eval_samples_per_second": 42.571, "eval_steps_per_second": 5.324, "step": 5400 }, { "epoch": 3.17, "learning_rate": 4.152989449003517e-05, "loss": 0.0042, "step": 5410 }, { "epoch": 3.18, "learning_rate": 4.123681125439625e-05, "loss": 0.0082, "step": 5420 }, { "epoch": 3.18, "learning_rate": 4.094372801875733e-05, "loss": 0.0034, "step": 5430 }, { "epoch": 3.19, "learning_rate": 4.065064478311841e-05, "loss": 0.0092, "step": 5440 }, { "epoch": 3.19, "learning_rate": 4.035756154747949e-05, "loss": 0.0039, "step": 5450 }, { "epoch": 3.2, "learning_rate": 4.006447831184056e-05, "loss": 0.0375, "step": 5460 }, { "epoch": 3.21, "learning_rate": 3.977139507620165e-05, "loss": 0.0033, "step": 5470 }, { "epoch": 3.21, "learning_rate": 3.947831184056272e-05, "loss": 0.0089, "step": 5480 }, { "epoch": 3.22, "learning_rate": 3.91852286049238e-05, "loss": 0.0028, "step": 5490 }, { "epoch": 3.22, "learning_rate": 3.8892145369284874e-05, "loss": 0.0174, "step": 5500 }, { "epoch": 3.22, "eval_accuracy": 0.9650879954213765, "eval_loss": 0.11929039657115936, "eval_runtime": 163.6627, "eval_samples_per_second": 42.704, "eval_steps_per_second": 5.34, "step": 5500 }, { "epoch": 3.23, "learning_rate": 3.859906213364596e-05, "loss": 0.004, "step": 5510 }, { "epoch": 3.24, "learning_rate": 3.830597889800703e-05, "loss": 0.0091, "step": 5520 }, { "epoch": 3.24, "learning_rate": 3.801289566236811e-05, "loss": 0.0044, "step": 5530 }, { "epoch": 3.25, "learning_rate": 3.771981242672919e-05, "loss": 0.0061, "step": 5540 }, { "epoch": 3.25, "learning_rate": 3.742672919109027e-05, "loss": 0.0065, "step": 5550 }, { "epoch": 3.26, "learning_rate": 3.713364595545135e-05, "loss": 0.0106, "step": 5560 }, { "epoch": 3.26, "learning_rate": 3.6840562719812424e-05, "loss": 0.008, "step": 5570 }, { "epoch": 3.27, "learning_rate": 3.654747948417351e-05, "loss": 0.0162, "step": 5580 }, { "epoch": 3.28, "learning_rate": 3.625439624853458e-05, "loss": 0.009, "step": 5590 }, { "epoch": 3.28, "learning_rate": 3.596131301289566e-05, "loss": 0.0087, "step": 5600 }, { "epoch": 3.28, "eval_accuracy": 0.9622263557018171, "eval_loss": 0.12759189307689667, "eval_runtime": 163.9026, "eval_samples_per_second": 42.641, "eval_steps_per_second": 5.332, "step": 5600 }, { "epoch": 3.29, "learning_rate": 3.566822977725674e-05, "loss": 0.0094, "step": 5610 }, { "epoch": 3.29, "learning_rate": 3.537514654161782e-05, "loss": 0.0125, "step": 5620 }, { "epoch": 3.3, "learning_rate": 3.50820633059789e-05, "loss": 0.0086, "step": 5630 }, { "epoch": 3.31, "learning_rate": 3.4788980070339975e-05, "loss": 0.027, "step": 5640 }, { "epoch": 3.31, "learning_rate": 3.449589683470106e-05, "loss": 0.0098, "step": 5650 }, { "epoch": 3.32, "learning_rate": 3.4202813599062134e-05, "loss": 0.0269, "step": 5660 }, { "epoch": 3.32, "learning_rate": 3.390973036342321e-05, "loss": 0.0052, "step": 5670 }, { "epoch": 3.33, "learning_rate": 3.361664712778429e-05, "loss": 0.007, "step": 5680 }, { "epoch": 3.34, "learning_rate": 3.332356389214537e-05, "loss": 0.0048, "step": 5690 }, { "epoch": 3.34, "learning_rate": 3.303048065650645e-05, "loss": 0.0084, "step": 5700 }, { "epoch": 3.34, "eval_accuracy": 0.9662326513092002, "eval_loss": 0.11341094970703125, "eval_runtime": 163.1714, "eval_samples_per_second": 42.832, "eval_steps_per_second": 5.356, "step": 5700 }, { "epoch": 3.35, "learning_rate": 3.2737397420867525e-05, "loss": 0.0069, "step": 5710 }, { "epoch": 3.35, "learning_rate": 3.244431418522861e-05, "loss": 0.0221, "step": 5720 }, { "epoch": 3.36, "learning_rate": 3.2151230949589684e-05, "loss": 0.0104, "step": 5730 }, { "epoch": 3.36, "learning_rate": 3.1858147713950764e-05, "loss": 0.0036, "step": 5740 }, { "epoch": 3.37, "learning_rate": 3.1565064478311837e-05, "loss": 0.0026, "step": 5750 }, { "epoch": 3.38, "learning_rate": 3.127198124267292e-05, "loss": 0.0053, "step": 5760 }, { "epoch": 3.38, "learning_rate": 3.0978898007034e-05, "loss": 0.0101, "step": 5770 }, { "epoch": 3.39, "learning_rate": 3.0685814771395075e-05, "loss": 0.0083, "step": 5780 }, { "epoch": 3.39, "learning_rate": 3.0392731535756158e-05, "loss": 0.0082, "step": 5790 }, { "epoch": 3.4, "learning_rate": 3.0099648300117235e-05, "loss": 0.0141, "step": 5800 }, { "epoch": 3.4, "eval_accuracy": 0.963084847617685, "eval_loss": 0.12386169284582138, "eval_runtime": 164.253, "eval_samples_per_second": 42.55, "eval_steps_per_second": 5.321, "step": 5800 }, { "epoch": 3.41, "learning_rate": 2.9806565064478314e-05, "loss": 0.0041, "step": 5810 }, { "epoch": 3.41, "learning_rate": 2.951348182883939e-05, "loss": 0.0075, "step": 5820 }, { "epoch": 3.42, "learning_rate": 2.922039859320047e-05, "loss": 0.0064, "step": 5830 }, { "epoch": 3.42, "learning_rate": 2.8927315357561546e-05, "loss": 0.018, "step": 5840 }, { "epoch": 3.43, "learning_rate": 2.8634232121922626e-05, "loss": 0.0052, "step": 5850 }, { "epoch": 3.43, "learning_rate": 2.834114888628371e-05, "loss": 0.0049, "step": 5860 }, { "epoch": 3.44, "learning_rate": 2.8048065650644785e-05, "loss": 0.004, "step": 5870 }, { "epoch": 3.45, "learning_rate": 2.7754982415005865e-05, "loss": 0.0325, "step": 5880 }, { "epoch": 3.45, "learning_rate": 2.746189917936694e-05, "loss": 0.0048, "step": 5890 }, { "epoch": 3.46, "learning_rate": 2.716881594372802e-05, "loss": 0.0291, "step": 5900 }, { "epoch": 3.46, "eval_accuracy": 0.9645156674774645, "eval_loss": 0.1198735386133194, "eval_runtime": 162.4699, "eval_samples_per_second": 43.017, "eval_steps_per_second": 5.379, "step": 5900 }, { "epoch": 3.46, "learning_rate": 2.6875732708089097e-05, "loss": 0.0153, "step": 5910 }, { "epoch": 3.47, "learning_rate": 2.6582649472450176e-05, "loss": 0.0131, "step": 5920 }, { "epoch": 3.48, "learning_rate": 2.628956623681126e-05, "loss": 0.0074, "step": 5930 }, { "epoch": 3.48, "learning_rate": 2.5996483001172335e-05, "loss": 0.0028, "step": 5940 }, { "epoch": 3.49, "learning_rate": 2.5703399765533415e-05, "loss": 0.0065, "step": 5950 }, { "epoch": 3.49, "learning_rate": 2.541031652989449e-05, "loss": 0.0081, "step": 5960 }, { "epoch": 3.5, "learning_rate": 2.511723329425557e-05, "loss": 0.0438, "step": 5970 }, { "epoch": 3.51, "learning_rate": 2.482415005861665e-05, "loss": 0.0041, "step": 5980 }, { "epoch": 3.51, "learning_rate": 2.4531066822977727e-05, "loss": 0.003, "step": 5990 }, { "epoch": 3.52, "learning_rate": 2.4237983587338806e-05, "loss": 0.0049, "step": 6000 }, { "epoch": 3.52, "eval_accuracy": 0.9679496351409358, "eval_loss": 0.11025016009807587, "eval_runtime": 163.4902, "eval_samples_per_second": 42.749, "eval_steps_per_second": 5.346, "step": 6000 }, { "epoch": 3.52, "learning_rate": 2.3944900351699882e-05, "loss": 0.0028, "step": 6010 }, { "epoch": 3.53, "learning_rate": 2.3651817116060962e-05, "loss": 0.0028, "step": 6020 }, { "epoch": 3.53, "learning_rate": 2.335873388042204e-05, "loss": 0.0022, "step": 6030 }, { "epoch": 3.54, "learning_rate": 2.3065650644783118e-05, "loss": 0.0028, "step": 6040 }, { "epoch": 3.55, "learning_rate": 2.27725674091442e-05, "loss": 0.0139, "step": 6050 }, { "epoch": 3.55, "learning_rate": 2.2479484173505277e-05, "loss": 0.0051, "step": 6060 }, { "epoch": 3.56, "learning_rate": 2.2186400937866357e-05, "loss": 0.0038, "step": 6070 }, { "epoch": 3.56, "learning_rate": 2.1893317702227433e-05, "loss": 0.0237, "step": 6080 }, { "epoch": 3.57, "learning_rate": 2.1600234466588512e-05, "loss": 0.0108, "step": 6090 }, { "epoch": 3.58, "learning_rate": 2.130715123094959e-05, "loss": 0.0055, "step": 6100 }, { "epoch": 3.58, "eval_accuracy": 0.9662326513092002, "eval_loss": 0.11199595779180527, "eval_runtime": 163.683, "eval_samples_per_second": 42.698, "eval_steps_per_second": 5.34, "step": 6100 }, { "epoch": 3.58, "learning_rate": 2.1014067995310668e-05, "loss": 0.0029, "step": 6110 }, { "epoch": 3.59, "learning_rate": 2.0720984759671748e-05, "loss": 0.0096, "step": 6120 }, { "epoch": 3.59, "learning_rate": 2.0427901524032827e-05, "loss": 0.0067, "step": 6130 }, { "epoch": 3.6, "learning_rate": 2.0134818288393907e-05, "loss": 0.0029, "step": 6140 }, { "epoch": 3.6, "learning_rate": 1.9841735052754983e-05, "loss": 0.0075, "step": 6150 }, { "epoch": 3.61, "learning_rate": 1.9548651817116063e-05, "loss": 0.031, "step": 6160 }, { "epoch": 3.62, "learning_rate": 1.925556858147714e-05, "loss": 0.0029, "step": 6170 }, { "epoch": 3.62, "learning_rate": 1.896248534583822e-05, "loss": 0.0025, "step": 6180 }, { "epoch": 3.63, "learning_rate": 1.8669402110199298e-05, "loss": 0.0026, "step": 6190 }, { "epoch": 3.63, "learning_rate": 1.8376318874560374e-05, "loss": 0.0061, "step": 6200 }, { "epoch": 3.63, "eval_accuracy": 0.966804979253112, "eval_loss": 0.10705357789993286, "eval_runtime": 164.8249, "eval_samples_per_second": 42.403, "eval_steps_per_second": 5.303, "step": 6200 }, { "epoch": 3.64, "learning_rate": 1.8083235638921457e-05, "loss": 0.0059, "step": 6210 }, { "epoch": 3.65, "learning_rate": 1.7790152403282534e-05, "loss": 0.011, "step": 6220 }, { "epoch": 3.65, "learning_rate": 1.7497069167643613e-05, "loss": 0.0026, "step": 6230 }, { "epoch": 3.66, "learning_rate": 1.720398593200469e-05, "loss": 0.009, "step": 6240 }, { "epoch": 3.66, "learning_rate": 1.691090269636577e-05, "loss": 0.0024, "step": 6250 }, { "epoch": 3.67, "learning_rate": 1.6617819460726845e-05, "loss": 0.0024, "step": 6260 }, { "epoch": 3.68, "learning_rate": 1.6324736225087925e-05, "loss": 0.0027, "step": 6270 }, { "epoch": 3.68, "learning_rate": 1.6031652989449004e-05, "loss": 0.0069, "step": 6280 }, { "epoch": 3.69, "learning_rate": 1.5738569753810084e-05, "loss": 0.004, "step": 6290 }, { "epoch": 3.69, "learning_rate": 1.5445486518171164e-05, "loss": 0.0054, "step": 6300 }, { "epoch": 3.69, "eval_accuracy": 0.9696666189726714, "eval_loss": 0.10320978611707687, "eval_runtime": 163.4309, "eval_samples_per_second": 42.764, "eval_steps_per_second": 5.348, "step": 6300 }, { "epoch": 3.7, "learning_rate": 1.515240328253224e-05, "loss": 0.0181, "step": 6310 }, { "epoch": 3.7, "learning_rate": 1.485932004689332e-05, "loss": 0.0026, "step": 6320 }, { "epoch": 3.71, "learning_rate": 1.4566236811254397e-05, "loss": 0.0032, "step": 6330 }, { "epoch": 3.72, "learning_rate": 1.4273153575615475e-05, "loss": 0.0136, "step": 6340 }, { "epoch": 3.72, "learning_rate": 1.3980070339976553e-05, "loss": 0.0058, "step": 6350 }, { "epoch": 3.73, "learning_rate": 1.3686987104337631e-05, "loss": 0.0371, "step": 6360 }, { "epoch": 3.73, "learning_rate": 1.3393903868698712e-05, "loss": 0.0061, "step": 6370 }, { "epoch": 3.74, "learning_rate": 1.310082063305979e-05, "loss": 0.0019, "step": 6380 }, { "epoch": 3.75, "learning_rate": 1.2807737397420868e-05, "loss": 0.0103, "step": 6390 }, { "epoch": 3.75, "learning_rate": 1.2514654161781948e-05, "loss": 0.0041, "step": 6400 }, { "epoch": 3.75, "eval_accuracy": 0.971097438832451, "eval_loss": 0.09614234417676926, "eval_runtime": 163.0518, "eval_samples_per_second": 42.864, "eval_steps_per_second": 5.36, "step": 6400 }, { "epoch": 3.76, "learning_rate": 1.2221570926143026e-05, "loss": 0.0044, "step": 6410 }, { "epoch": 3.76, "learning_rate": 1.1928487690504103e-05, "loss": 0.0048, "step": 6420 }, { "epoch": 3.77, "learning_rate": 1.1635404454865183e-05, "loss": 0.0047, "step": 6430 }, { "epoch": 3.77, "learning_rate": 1.1342321219226261e-05, "loss": 0.0027, "step": 6440 }, { "epoch": 3.78, "learning_rate": 1.1049237983587339e-05, "loss": 0.0023, "step": 6450 }, { "epoch": 3.79, "learning_rate": 1.0756154747948417e-05, "loss": 0.0025, "step": 6460 }, { "epoch": 3.79, "learning_rate": 1.0463071512309496e-05, "loss": 0.0087, "step": 6470 }, { "epoch": 3.8, "learning_rate": 1.0169988276670574e-05, "loss": 0.0271, "step": 6480 }, { "epoch": 3.8, "learning_rate": 9.876905041031654e-06, "loss": 0.0036, "step": 6490 }, { "epoch": 3.81, "learning_rate": 9.583821805392732e-06, "loss": 0.0018, "step": 6500 }, { "epoch": 3.81, "eval_accuracy": 0.9718128487623409, "eval_loss": 0.0929703488945961, "eval_runtime": 164.4896, "eval_samples_per_second": 42.489, "eval_steps_per_second": 5.313, "step": 6500 }, { "epoch": 3.82, "learning_rate": 9.290738569753811e-06, "loss": 0.0029, "step": 6510 }, { "epoch": 3.82, "learning_rate": 8.99765533411489e-06, "loss": 0.0026, "step": 6520 }, { "epoch": 3.83, "learning_rate": 8.704572098475967e-06, "loss": 0.0023, "step": 6530 }, { "epoch": 3.83, "learning_rate": 8.411488862837045e-06, "loss": 0.002, "step": 6540 }, { "epoch": 3.84, "learning_rate": 8.118405627198125e-06, "loss": 0.0039, "step": 6550 }, { "epoch": 3.85, "learning_rate": 7.825322391559203e-06, "loss": 0.0107, "step": 6560 }, { "epoch": 3.85, "learning_rate": 7.532239155920281e-06, "loss": 0.0023, "step": 6570 }, { "epoch": 3.86, "learning_rate": 7.23915592028136e-06, "loss": 0.0057, "step": 6580 }, { "epoch": 3.86, "learning_rate": 6.94607268464244e-06, "loss": 0.0031, "step": 6590 }, { "epoch": 3.87, "learning_rate": 6.6529894490035175e-06, "loss": 0.0032, "step": 6600 }, { "epoch": 3.87, "eval_accuracy": 0.9729575046501645, "eval_loss": 0.09183436632156372, "eval_runtime": 165.4991, "eval_samples_per_second": 42.23, "eval_steps_per_second": 5.281, "step": 6600 }, { "epoch": 3.87, "learning_rate": 6.3599062133645955e-06, "loss": 0.0042, "step": 6610 }, { "epoch": 3.88, "learning_rate": 6.066822977725674e-06, "loss": 0.0053, "step": 6620 }, { "epoch": 3.89, "learning_rate": 5.773739742086753e-06, "loss": 0.0077, "step": 6630 }, { "epoch": 3.89, "learning_rate": 5.480656506447832e-06, "loss": 0.0212, "step": 6640 }, { "epoch": 3.9, "learning_rate": 5.18757327080891e-06, "loss": 0.0022, "step": 6650 }, { "epoch": 3.9, "learning_rate": 4.894490035169988e-06, "loss": 0.0027, "step": 6660 }, { "epoch": 3.91, "learning_rate": 4.601406799531067e-06, "loss": 0.0093, "step": 6670 }, { "epoch": 3.92, "learning_rate": 4.308323563892146e-06, "loss": 0.0036, "step": 6680 }, { "epoch": 3.92, "learning_rate": 4.015240328253224e-06, "loss": 0.0034, "step": 6690 }, { "epoch": 3.93, "learning_rate": 3.722157092614303e-06, "loss": 0.0048, "step": 6700 }, { "epoch": 3.93, "eval_accuracy": 0.9732436686221205, "eval_loss": 0.09061435610055923, "eval_runtime": 162.7794, "eval_samples_per_second": 42.935, "eval_steps_per_second": 5.369, "step": 6700 }, { "epoch": 3.93, "learning_rate": 3.429073856975381e-06, "loss": 0.0028, "step": 6710 }, { "epoch": 3.94, "learning_rate": 3.13599062133646e-06, "loss": 0.0023, "step": 6720 }, { "epoch": 3.94, "learning_rate": 2.8429073856975383e-06, "loss": 0.0029, "step": 6730 }, { "epoch": 3.95, "learning_rate": 2.549824150058617e-06, "loss": 0.0042, "step": 6740 }, { "epoch": 3.96, "learning_rate": 2.2567409144196954e-06, "loss": 0.002, "step": 6750 }, { "epoch": 3.96, "learning_rate": 1.963657678780774e-06, "loss": 0.0099, "step": 6760 }, { "epoch": 3.97, "learning_rate": 1.6705744431418525e-06, "loss": 0.0017, "step": 6770 }, { "epoch": 3.97, "learning_rate": 1.3774912075029308e-06, "loss": 0.0031, "step": 6780 }, { "epoch": 3.98, "learning_rate": 1.0844079718640093e-06, "loss": 0.0018, "step": 6790 }, { "epoch": 3.99, "learning_rate": 7.913247362250879e-07, "loss": 0.002, "step": 6800 }, { "epoch": 3.99, "eval_accuracy": 0.9731005866361425, "eval_loss": 0.09024298936128616, "eval_runtime": 163.9652, "eval_samples_per_second": 42.625, "eval_steps_per_second": 5.33, "step": 6800 }, { "epoch": 3.99, "learning_rate": 4.982415005861665e-07, "loss": 0.0037, "step": 6810 }, { "epoch": 4.0, "learning_rate": 2.0515826494724504e-07, "loss": 0.0266, "step": 6820 }, { "epoch": 4.0, "step": 6824, "total_flos": 8.470480851570254e+18, "train_loss": 0.5286654638341308, "train_runtime": 15256.5796, "train_samples_per_second": 7.155, "train_steps_per_second": 0.447 } ], "max_steps": 6824, "num_train_epochs": 4, "total_flos": 8.470480851570254e+18, "trial_name": null, "trial_params": null }