diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" deleted file mode 100644--- "a/last-checkpoint/trainer_state.json" +++ /dev/null @@ -1,11812 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 14.0, - "global_step": 972622, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.01, - "learning_rate": 5.140331037318804e-06, - "loss": 9.6819, - "step": 500 - }, - { - "epoch": 0.01, - "learning_rate": 1.0280662074637608e-05, - "loss": 8.0932, - "step": 1000 - }, - { - "epoch": 0.02, - "learning_rate": 1.542099311195641e-05, - "loss": 6.9551, - "step": 1500 - }, - { - "epoch": 0.03, - "learning_rate": 2.0561324149275216e-05, - "loss": 6.6147, - "step": 2000 - }, - { - "epoch": 0.04, - "learning_rate": 2.570165518659402e-05, - "loss": 6.3513, - "step": 2500 - }, - { - "epoch": 0.04, - "learning_rate": 3.084198622391282e-05, - "loss": 6.0637, - "step": 3000 - }, - { - "epoch": 0.05, - "learning_rate": 3.5982317261231625e-05, - "loss": 5.6379, - "step": 3500 - }, - { - "epoch": 0.06, - "learning_rate": 4.112264829855043e-05, - "loss": 5.1989, - "step": 4000 - }, - { - "epoch": 0.06, - "learning_rate": 4.6262979335869235e-05, - "loss": 4.867, - "step": 4500 - }, - { - "epoch": 0.07, - "learning_rate": 5.140331037318804e-05, - "loss": 4.6153, - "step": 5000 - }, - { - "epoch": 0.08, - "learning_rate": 5.6543641410506844e-05, - "loss": 4.4195, - "step": 5500 - }, - { - "epoch": 0.09, - "learning_rate": 6.168397244782565e-05, - "loss": 4.2546, - "step": 6000 - }, - { - "epoch": 0.09, - "learning_rate": 6.682430348514444e-05, - "loss": 4.1156, - "step": 6500 - }, - { - "epoch": 0.1, - "learning_rate": 7.196463452246325e-05, - "loss": 3.9908, - "step": 7000 - }, - { - "epoch": 0.11, - "learning_rate": 7.710496555978204e-05, - "loss": 3.8759, - "step": 7500 - }, - { - "epoch": 0.12, - "learning_rate": 8.224529659710087e-05, - "loss": 3.7715, - "step": 8000 - }, - { - "epoch": 0.12, - "learning_rate": 8.738562763441966e-05, - "loss": 3.6865, - "step": 8500 - }, - { - "epoch": 0.13, - "learning_rate": 9.252595867173847e-05, - "loss": 3.6037, - "step": 9000 - }, - { - "epoch": 0.14, - "learning_rate": 9.766628970905726e-05, - "loss": 3.5251, - "step": 9500 - }, - { - "epoch": 0.14, - "learning_rate": 9.997164799900302e-05, - "loss": 3.4598, - "step": 10000 - }, - { - "epoch": 0.15, - "learning_rate": 9.991982511073378e-05, - "loss": 3.394, - "step": 10500 - }, - { - "epoch": 0.16, - "learning_rate": 9.986789836898105e-05, - "loss": 3.3329, - "step": 11000 - }, - { - "epoch": 0.17, - "learning_rate": 9.98159716272283e-05, - "loss": 3.2819, - "step": 11500 - }, - { - "epoch": 0.17, - "learning_rate": 9.976404488547557e-05, - "loss": 3.2306, - "step": 12000 - }, - { - "epoch": 0.18, - "learning_rate": 9.971222199720634e-05, - "loss": 3.1876, - "step": 12500 - }, - { - "epoch": 0.19, - "learning_rate": 9.966029525545361e-05, - "loss": 3.1435, - "step": 13000 - }, - { - "epoch": 0.19, - "learning_rate": 9.960836851370088e-05, - "loss": 3.1032, - "step": 13500 - }, - { - "epoch": 0.2, - "learning_rate": 9.955644177194815e-05, - "loss": 3.07, - "step": 14000 - }, - { - "epoch": 0.21, - "learning_rate": 9.950461888367891e-05, - "loss": 3.0312, - "step": 14500 - }, - { - "epoch": 0.22, - "learning_rate": 9.945269214192618e-05, - "loss": 2.9979, - "step": 15000 - }, - { - "epoch": 0.22, - "learning_rate": 9.940076540017345e-05, - "loss": 2.9722, - "step": 15500 - }, - { - "epoch": 0.23, - "learning_rate": 9.93488386584207e-05, - "loss": 2.945, - "step": 16000 - }, - { - "epoch": 0.24, - "learning_rate": 9.929701577015148e-05, - "loss": 2.9172, - "step": 16500 - }, - { - "epoch": 0.24, - "learning_rate": 9.924519288188225e-05, - "loss": 2.8928, - "step": 17000 - }, - { - "epoch": 0.25, - "learning_rate": 9.919326614012952e-05, - "loss": 2.8699, - "step": 17500 - }, - { - "epoch": 0.26, - "learning_rate": 9.914133939837677e-05, - "loss": 2.8491, - "step": 18000 - }, - { - "epoch": 0.27, - "learning_rate": 9.908941265662404e-05, - "loss": 2.8275, - "step": 18500 - }, - { - "epoch": 0.27, - "learning_rate": 9.903748591487131e-05, - "loss": 2.8038, - "step": 19000 - }, - { - "epoch": 0.28, - "learning_rate": 9.898555917311856e-05, - "loss": 2.787, - "step": 19500 - }, - { - "epoch": 0.29, - "learning_rate": 9.893363243136583e-05, - "loss": 2.768, - "step": 20000 - }, - { - "epoch": 0.3, - "learning_rate": 9.88818095430966e-05, - "loss": 2.7538, - "step": 20500 - }, - { - "epoch": 0.3, - "learning_rate": 9.882988280134387e-05, - "loss": 2.7372, - "step": 21000 - }, - { - "epoch": 0.31, - "learning_rate": 9.877795605959113e-05, - "loss": 2.7222, - "step": 21500 - }, - { - "epoch": 0.32, - "learning_rate": 9.87260293178384e-05, - "loss": 2.7134, - "step": 22000 - }, - { - "epoch": 0.32, - "learning_rate": 9.867410257608566e-05, - "loss": 2.6956, - "step": 22500 - }, - { - "epoch": 0.33, - "learning_rate": 9.862227968781644e-05, - "loss": 2.6807, - "step": 23000 - }, - { - "epoch": 0.34, - "learning_rate": 9.857035294606369e-05, - "loss": 2.671, - "step": 23500 - }, - { - "epoch": 0.35, - "learning_rate": 9.851842620431096e-05, - "loss": 2.6575, - "step": 24000 - }, - { - "epoch": 0.35, - "learning_rate": 9.846649946255823e-05, - "loss": 2.6476, - "step": 24500 - }, - { - "epoch": 0.36, - "learning_rate": 9.8414676574289e-05, - "loss": 2.6363, - "step": 25000 - }, - { - "epoch": 0.37, - "learning_rate": 9.836274983253626e-05, - "loss": 2.6247, - "step": 25500 - }, - { - "epoch": 0.37, - "learning_rate": 9.831082309078353e-05, - "loss": 2.6118, - "step": 26000 - }, - { - "epoch": 0.38, - "learning_rate": 9.82588963490308e-05, - "loss": 2.6042, - "step": 26500 - }, - { - "epoch": 0.39, - "learning_rate": 9.820707346076157e-05, - "loss": 2.5936, - "step": 27000 - }, - { - "epoch": 0.4, - "learning_rate": 9.815514671900883e-05, - "loss": 2.5828, - "step": 27500 - }, - { - "epoch": 0.4, - "learning_rate": 9.810321997725609e-05, - "loss": 2.5737, - "step": 28000 - }, - { - "epoch": 0.41, - "learning_rate": 9.805129323550336e-05, - "loss": 2.5671, - "step": 28500 - }, - { - "epoch": 0.42, - "learning_rate": 9.799947034723412e-05, - "loss": 2.5575, - "step": 29000 - }, - { - "epoch": 0.42, - "learning_rate": 9.794754360548139e-05, - "loss": 2.5498, - "step": 29500 - }, - { - "epoch": 0.43, - "learning_rate": 9.789561686372866e-05, - "loss": 2.5449, - "step": 30000 - }, - { - "epoch": 0.44, - "learning_rate": 9.784369012197591e-05, - "loss": 2.5361, - "step": 30500 - }, - { - "epoch": 0.45, - "learning_rate": 9.77918672337067e-05, - "loss": 2.5307, - "step": 31000 - }, - { - "epoch": 0.45, - "learning_rate": 9.774004434543746e-05, - "loss": 2.523, - "step": 31500 - }, - { - "epoch": 0.46, - "learning_rate": 9.768811760368473e-05, - "loss": 2.5129, - "step": 32000 - }, - { - "epoch": 0.47, - "learning_rate": 9.763619086193198e-05, - "loss": 2.509, - "step": 32500 - }, - { - "epoch": 0.48, - "learning_rate": 9.758426412017925e-05, - "loss": 2.4989, - "step": 33000 - }, - { - "epoch": 0.48, - "learning_rate": 9.753233737842652e-05, - "loss": 2.4968, - "step": 33500 - }, - { - "epoch": 0.49, - "learning_rate": 9.748051449015729e-05, - "loss": 2.4901, - "step": 34000 - }, - { - "epoch": 0.5, - "learning_rate": 9.742858774840455e-05, - "loss": 2.4799, - "step": 34500 - }, - { - "epoch": 0.5, - "learning_rate": 9.737666100665181e-05, - "loss": 2.4764, - "step": 35000 - }, - { - "epoch": 0.51, - "learning_rate": 9.732473426489909e-05, - "loss": 2.4707, - "step": 35500 - }, - { - "epoch": 0.52, - "learning_rate": 9.727280752314635e-05, - "loss": 2.4663, - "step": 36000 - }, - { - "epoch": 0.53, - "learning_rate": 9.722098463487713e-05, - "loss": 2.4592, - "step": 36500 - }, - { - "epoch": 0.53, - "learning_rate": 9.716905789312438e-05, - "loss": 2.4528, - "step": 37000 - }, - { - "epoch": 0.54, - "learning_rate": 9.711713115137165e-05, - "loss": 2.4517, - "step": 37500 - }, - { - "epoch": 0.55, - "learning_rate": 9.706520440961892e-05, - "loss": 2.447, - "step": 38000 - }, - { - "epoch": 0.55, - "learning_rate": 9.701338152134968e-05, - "loss": 2.4342, - "step": 38500 - }, - { - "epoch": 0.56, - "learning_rate": 9.696145477959695e-05, - "loss": 2.4355, - "step": 39000 - }, - { - "epoch": 0.57, - "learning_rate": 9.69095280378442e-05, - "loss": 2.4292, - "step": 39500 - }, - { - "epoch": 0.58, - "learning_rate": 9.685760129609149e-05, - "loss": 2.426, - "step": 40000 - }, - { - "epoch": 0.58, - "learning_rate": 9.680567455433874e-05, - "loss": 2.4177, - "step": 40500 - }, - { - "epoch": 0.59, - "learning_rate": 9.6753747812586e-05, - "loss": 2.4136, - "step": 41000 - }, - { - "epoch": 0.6, - "learning_rate": 9.670192492431678e-05, - "loss": 2.4099, - "step": 41500 - }, - { - "epoch": 0.6, - "learning_rate": 9.664999818256403e-05, - "loss": 2.4072, - "step": 42000 - }, - { - "epoch": 0.61, - "learning_rate": 9.659807144081131e-05, - "loss": 2.402, - "step": 42500 - }, - { - "epoch": 0.62, - "learning_rate": 9.654614469905857e-05, - "loss": 2.3975, - "step": 43000 - }, - { - "epoch": 0.63, - "learning_rate": 9.649432181078935e-05, - "loss": 2.3914, - "step": 43500 - }, - { - "epoch": 0.63, - "learning_rate": 9.64423950690366e-05, - "loss": 2.3901, - "step": 44000 - }, - { - "epoch": 0.64, - "learning_rate": 9.639046832728387e-05, - "loss": 2.388, - "step": 44500 - }, - { - "epoch": 0.65, - "learning_rate": 9.633854158553114e-05, - "loss": 2.3837, - "step": 45000 - }, - { - "epoch": 0.65, - "learning_rate": 9.62867186972619e-05, - "loss": 2.3821, - "step": 45500 - }, - { - "epoch": 0.66, - "learning_rate": 9.623479195550917e-05, - "loss": 2.3754, - "step": 46000 - }, - { - "epoch": 0.67, - "learning_rate": 9.618286521375643e-05, - "loss": 2.371, - "step": 46500 - }, - { - "epoch": 0.68, - "learning_rate": 9.613093847200371e-05, - "loss": 2.3657, - "step": 47000 - }, - { - "epoch": 0.68, - "learning_rate": 9.607901173025097e-05, - "loss": 2.3574, - "step": 47500 - }, - { - "epoch": 0.69, - "learning_rate": 9.602718884198175e-05, - "loss": 2.3635, - "step": 48000 - }, - { - "epoch": 0.7, - "learning_rate": 9.5975262100229e-05, - "loss": 2.356, - "step": 48500 - }, - { - "epoch": 0.71, - "learning_rate": 9.592333535847627e-05, - "loss": 2.3499, - "step": 49000 - }, - { - "epoch": 0.71, - "learning_rate": 9.587140861672354e-05, - "loss": 2.3485, - "step": 49500 - }, - { - "epoch": 0.72, - "learning_rate": 9.581948187497079e-05, - "loss": 2.3504, - "step": 50000 - }, - { - "epoch": 0.73, - "learning_rate": 9.576755513321806e-05, - "loss": 2.341, - "step": 50500 - }, - { - "epoch": 0.73, - "learning_rate": 9.571573224494883e-05, - "loss": 2.3404, - "step": 51000 - }, - { - "epoch": 0.74, - "learning_rate": 9.56638055031961e-05, - "loss": 2.3353, - "step": 51500 - }, - { - "epoch": 0.75, - "learning_rate": 9.561187876144336e-05, - "loss": 2.3323, - "step": 52000 - }, - { - "epoch": 0.76, - "learning_rate": 9.555995201969062e-05, - "loss": 2.3328, - "step": 52500 - }, - { - "epoch": 0.76, - "learning_rate": 9.55081291314214e-05, - "loss": 2.3304, - "step": 53000 - }, - { - "epoch": 0.77, - "learning_rate": 9.545620238966865e-05, - "loss": 2.3251, - "step": 53500 - }, - { - "epoch": 0.78, - "learning_rate": 9.540427564791592e-05, - "loss": 2.3243, - "step": 54000 - }, - { - "epoch": 0.78, - "learning_rate": 9.535234890616319e-05, - "loss": 2.3267, - "step": 54500 - }, - { - "epoch": 0.79, - "learning_rate": 9.530042216441046e-05, - "loss": 2.3152, - "step": 55000 - }, - { - "epoch": 0.8, - "learning_rate": 9.524849542265773e-05, - "loss": 2.3157, - "step": 55500 - }, - { - "epoch": 0.81, - "learning_rate": 9.519656868090498e-05, - "loss": 2.3077, - "step": 56000 - }, - { - "epoch": 0.81, - "learning_rate": 9.514474579263576e-05, - "loss": 2.3043, - "step": 56500 - }, - { - "epoch": 0.82, - "learning_rate": 9.509281905088301e-05, - "loss": 2.3081, - "step": 57000 - }, - { - "epoch": 0.83, - "learning_rate": 9.504089230913028e-05, - "loss": 2.2992, - "step": 57500 - }, - { - "epoch": 0.83, - "learning_rate": 9.498896556737755e-05, - "loss": 2.3053, - "step": 58000 - }, - { - "epoch": 0.84, - "learning_rate": 9.493714267910832e-05, - "loss": 2.2959, - "step": 58500 - }, - { - "epoch": 0.85, - "learning_rate": 9.488521593735559e-05, - "loss": 2.2958, - "step": 59000 - }, - { - "epoch": 0.86, - "learning_rate": 9.483328919560285e-05, - "loss": 2.292, - "step": 59500 - }, - { - "epoch": 0.86, - "learning_rate": 9.478136245385011e-05, - "loss": 2.2902, - "step": 60000 - }, - { - "epoch": 0.87, - "learning_rate": 9.472943571209738e-05, - "loss": 2.2859, - "step": 60500 - }, - { - "epoch": 0.88, - "learning_rate": 9.467750897034465e-05, - "loss": 2.2821, - "step": 61000 - }, - { - "epoch": 0.89, - "learning_rate": 9.46255822285919e-05, - "loss": 2.2826, - "step": 61500 - }, - { - "epoch": 0.89, - "learning_rate": 9.457365548683917e-05, - "loss": 2.2825, - "step": 62000 - }, - { - "epoch": 0.9, - "learning_rate": 9.452183259856993e-05, - "loss": 2.2826, - "step": 62500 - }, - { - "epoch": 0.91, - "learning_rate": 9.44699058568172e-05, - "loss": 2.2723, - "step": 63000 - }, - { - "epoch": 0.91, - "learning_rate": 9.441797911506447e-05, - "loss": 2.2721, - "step": 63500 - }, - { - "epoch": 0.92, - "learning_rate": 9.436605237331174e-05, - "loss": 2.2744, - "step": 64000 - }, - { - "epoch": 0.93, - "learning_rate": 9.431412563155901e-05, - "loss": 2.2722, - "step": 64500 - }, - { - "epoch": 0.94, - "learning_rate": 9.426230274328977e-05, - "loss": 2.2649, - "step": 65000 - }, - { - "epoch": 0.94, - "learning_rate": 9.421037600153704e-05, - "loss": 2.2639, - "step": 65500 - }, - { - "epoch": 0.95, - "learning_rate": 9.41584492597843e-05, - "loss": 2.2653, - "step": 66000 - }, - { - "epoch": 0.96, - "learning_rate": 9.410652251803157e-05, - "loss": 2.2565, - "step": 66500 - }, - { - "epoch": 0.96, - "learning_rate": 9.405459577627883e-05, - "loss": 2.2624, - "step": 67000 - }, - { - "epoch": 0.97, - "learning_rate": 9.400266903452609e-05, - "loss": 2.2556, - "step": 67500 - }, - { - "epoch": 0.98, - "learning_rate": 9.395074229277337e-05, - "loss": 2.2516, - "step": 68000 - }, - { - "epoch": 0.99, - "learning_rate": 9.389881555102063e-05, - "loss": 2.249, - "step": 68500 - }, - { - "epoch": 0.99, - "learning_rate": 9.38469926627514e-05, - "loss": 2.2548, - "step": 69000 - }, - { - "epoch": 1.0, - "eval_accuracy": 0.588244433320517, - "eval_loss": 2.116262674331665, - "eval_runtime": 646.5563, - "eval_samples_per_second": 833.55, - "eval_steps_per_second": 34.732, - "step": 69473 - }, - { - "epoch": 1.0, - "learning_rate": 9.379506592099866e-05, - "loss": 2.2492, - "step": 69500 - }, - { - "epoch": 1.01, - "learning_rate": 9.374313917924591e-05, - "loss": 2.2478, - "step": 70000 - }, - { - "epoch": 1.01, - "learning_rate": 9.36912124374932e-05, - "loss": 2.2434, - "step": 70500 - }, - { - "epoch": 1.02, - "learning_rate": 9.363938954922395e-05, - "loss": 2.2369, - "step": 71000 - }, - { - "epoch": 1.03, - "learning_rate": 9.358746280747123e-05, - "loss": 2.2392, - "step": 71500 - }, - { - "epoch": 1.04, - "learning_rate": 9.353553606571849e-05, - "loss": 2.2348, - "step": 72000 - }, - { - "epoch": 1.04, - "learning_rate": 9.348360932396575e-05, - "loss": 2.2382, - "step": 72500 - }, - { - "epoch": 1.05, - "learning_rate": 9.343178643569652e-05, - "loss": 2.2336, - "step": 73000 - }, - { - "epoch": 1.06, - "learning_rate": 9.337985969394379e-05, - "loss": 2.2339, - "step": 73500 - }, - { - "epoch": 1.07, - "learning_rate": 9.332793295219106e-05, - "loss": 2.2286, - "step": 74000 - }, - { - "epoch": 1.07, - "learning_rate": 9.327600621043831e-05, - "loss": 2.2303, - "step": 74500 - }, - { - "epoch": 1.08, - "learning_rate": 9.322418332216909e-05, - "loss": 2.2318, - "step": 75000 - }, - { - "epoch": 1.09, - "learning_rate": 9.317225658041635e-05, - "loss": 2.2249, - "step": 75500 - }, - { - "epoch": 1.09, - "learning_rate": 9.312032983866363e-05, - "loss": 2.2209, - "step": 76000 - }, - { - "epoch": 1.1, - "learning_rate": 9.306840309691088e-05, - "loss": 2.223, - "step": 76500 - }, - { - "epoch": 1.11, - "learning_rate": 9.301658020864166e-05, - "loss": 2.2224, - "step": 77000 - }, - { - "epoch": 1.12, - "learning_rate": 9.296465346688892e-05, - "loss": 2.2191, - "step": 77500 - }, - { - "epoch": 1.12, - "learning_rate": 9.291272672513619e-05, - "loss": 2.2184, - "step": 78000 - }, - { - "epoch": 1.13, - "learning_rate": 9.286079998338345e-05, - "loss": 2.2153, - "step": 78500 - }, - { - "epoch": 1.14, - "learning_rate": 9.280897709511422e-05, - "loss": 2.212, - "step": 79000 - }, - { - "epoch": 1.14, - "learning_rate": 9.275705035336149e-05, - "loss": 2.2111, - "step": 79500 - }, - { - "epoch": 1.15, - "learning_rate": 9.270512361160874e-05, - "loss": 2.2102, - "step": 80000 - }, - { - "epoch": 1.16, - "learning_rate": 9.265319686985601e-05, - "loss": 2.2068, - "step": 80500 - }, - { - "epoch": 1.17, - "learning_rate": 9.260137398158678e-05, - "loss": 2.2086, - "step": 81000 - }, - { - "epoch": 1.17, - "learning_rate": 9.254944723983405e-05, - "loss": 2.2048, - "step": 81500 - }, - { - "epoch": 1.18, - "learning_rate": 9.249752049808131e-05, - "loss": 2.2054, - "step": 82000 - }, - { - "epoch": 1.19, - "learning_rate": 9.244559375632857e-05, - "loss": 2.2023, - "step": 82500 - }, - { - "epoch": 1.19, - "learning_rate": 9.239377086805935e-05, - "loss": 2.2023, - "step": 83000 - }, - { - "epoch": 1.2, - "learning_rate": 9.23418441263066e-05, - "loss": 2.2016, - "step": 83500 - }, - { - "epoch": 1.21, - "learning_rate": 9.228991738455389e-05, - "loss": 2.2034, - "step": 84000 - }, - { - "epoch": 1.22, - "learning_rate": 9.223799064280114e-05, - "loss": 2.1933, - "step": 84500 - }, - { - "epoch": 1.22, - "learning_rate": 9.218616775453192e-05, - "loss": 2.1979, - "step": 85000 - }, - { - "epoch": 1.23, - "learning_rate": 9.213424101277917e-05, - "loss": 2.1929, - "step": 85500 - }, - { - "epoch": 1.24, - "learning_rate": 9.208231427102644e-05, - "loss": 2.1989, - "step": 86000 - }, - { - "epoch": 1.25, - "learning_rate": 9.203038752927371e-05, - "loss": 2.1941, - "step": 86500 - }, - { - "epoch": 1.25, - "learning_rate": 9.197856464100448e-05, - "loss": 2.1927, - "step": 87000 - }, - { - "epoch": 1.26, - "learning_rate": 9.192663789925175e-05, - "loss": 2.1899, - "step": 87500 - }, - { - "epoch": 1.27, - "learning_rate": 9.1874711157499e-05, - "loss": 2.187, - "step": 88000 - }, - { - "epoch": 1.27, - "learning_rate": 9.182278441574627e-05, - "loss": 2.1863, - "step": 88500 - }, - { - "epoch": 1.28, - "learning_rate": 9.177096152747703e-05, - "loss": 2.1881, - "step": 89000 - }, - { - "epoch": 1.29, - "learning_rate": 9.17190347857243e-05, - "loss": 2.1864, - "step": 89500 - }, - { - "epoch": 1.3, - "learning_rate": 9.166710804397157e-05, - "loss": 2.1846, - "step": 90000 - }, - { - "epoch": 1.3, - "learning_rate": 9.161518130221884e-05, - "loss": 2.1793, - "step": 90500 - }, - { - "epoch": 1.31, - "learning_rate": 9.15633584139496e-05, - "loss": 2.1813, - "step": 91000 - }, - { - "epoch": 1.32, - "learning_rate": 9.151143167219687e-05, - "loss": 2.1755, - "step": 91500 - }, - { - "epoch": 1.32, - "learning_rate": 9.145950493044413e-05, - "loss": 2.1763, - "step": 92000 - }, - { - "epoch": 1.33, - "learning_rate": 9.14075781886914e-05, - "loss": 2.1777, - "step": 92500 - }, - { - "epoch": 1.34, - "learning_rate": 9.135575530042216e-05, - "loss": 2.1759, - "step": 93000 - }, - { - "epoch": 1.35, - "learning_rate": 9.130382855866943e-05, - "loss": 2.1758, - "step": 93500 - }, - { - "epoch": 1.35, - "learning_rate": 9.12519018169167e-05, - "loss": 2.1762, - "step": 94000 - }, - { - "epoch": 1.36, - "learning_rate": 9.119997507516397e-05, - "loss": 2.1713, - "step": 94500 - }, - { - "epoch": 1.37, - "learning_rate": 9.114815218689473e-05, - "loss": 2.1713, - "step": 95000 - }, - { - "epoch": 1.37, - "learning_rate": 9.1096225445142e-05, - "loss": 2.1681, - "step": 95500 - }, - { - "epoch": 1.38, - "learning_rate": 9.104429870338926e-05, - "loss": 2.1703, - "step": 96000 - }, - { - "epoch": 1.39, - "learning_rate": 9.099237196163653e-05, - "loss": 2.168, - "step": 96500 - }, - { - "epoch": 1.4, - "learning_rate": 9.094054907336729e-05, - "loss": 2.1642, - "step": 97000 - }, - { - "epoch": 1.4, - "learning_rate": 9.088862233161456e-05, - "loss": 2.1629, - "step": 97500 - }, - { - "epoch": 1.41, - "learning_rate": 9.083669558986183e-05, - "loss": 2.1655, - "step": 98000 - }, - { - "epoch": 1.42, - "learning_rate": 9.07847688481091e-05, - "loss": 2.1653, - "step": 98500 - }, - { - "epoch": 1.43, - "learning_rate": 9.073294595983986e-05, - "loss": 2.1619, - "step": 99000 - }, - { - "epoch": 1.43, - "learning_rate": 9.068101921808713e-05, - "loss": 2.1608, - "step": 99500 - }, - { - "epoch": 1.44, - "learning_rate": 9.062909247633439e-05, - "loss": 2.1614, - "step": 100000 - }, - { - "epoch": 1.45, - "learning_rate": 9.057716573458165e-05, - "loss": 2.1624, - "step": 100500 - }, - { - "epoch": 1.45, - "learning_rate": 9.052534284631242e-05, - "loss": 2.1523, - "step": 101000 - }, - { - "epoch": 1.46, - "learning_rate": 9.047341610455969e-05, - "loss": 2.1582, - "step": 101500 - }, - { - "epoch": 1.47, - "learning_rate": 9.042148936280696e-05, - "loss": 2.1586, - "step": 102000 - }, - { - "epoch": 1.48, - "learning_rate": 9.036956262105421e-05, - "loss": 2.1508, - "step": 102500 - }, - { - "epoch": 1.48, - "learning_rate": 9.031773973278499e-05, - "loss": 2.1563, - "step": 103000 - }, - { - "epoch": 1.49, - "learning_rate": 9.026581299103226e-05, - "loss": 2.1538, - "step": 103500 - }, - { - "epoch": 1.5, - "learning_rate": 9.021388624927953e-05, - "loss": 2.151, - "step": 104000 - }, - { - "epoch": 1.5, - "learning_rate": 9.016195950752678e-05, - "loss": 2.1447, - "step": 104500 - }, - { - "epoch": 1.51, - "learning_rate": 9.011013661925756e-05, - "loss": 2.1508, - "step": 105000 - }, - { - "epoch": 1.52, - "learning_rate": 9.005820987750482e-05, - "loss": 2.145, - "step": 105500 - }, - { - "epoch": 1.53, - "learning_rate": 9.000628313575209e-05, - "loss": 2.1438, - "step": 106000 - }, - { - "epoch": 1.53, - "learning_rate": 8.995435639399935e-05, - "loss": 2.1465, - "step": 106500 - }, - { - "epoch": 1.54, - "learning_rate": 8.990253350573012e-05, - "loss": 2.1456, - "step": 107000 - }, - { - "epoch": 1.55, - "learning_rate": 8.985060676397739e-05, - "loss": 2.144, - "step": 107500 - }, - { - "epoch": 1.55, - "learning_rate": 8.979868002222464e-05, - "loss": 2.1466, - "step": 108000 - }, - { - "epoch": 1.56, - "learning_rate": 8.974675328047191e-05, - "loss": 2.1409, - "step": 108500 - }, - { - "epoch": 1.57, - "learning_rate": 8.969493039220268e-05, - "loss": 2.1367, - "step": 109000 - }, - { - "epoch": 1.58, - "learning_rate": 8.964300365044995e-05, - "loss": 2.1402, - "step": 109500 - }, - { - "epoch": 1.58, - "learning_rate": 8.959107690869721e-05, - "loss": 2.1362, - "step": 110000 - }, - { - "epoch": 1.59, - "learning_rate": 8.953915016694447e-05, - "loss": 2.1351, - "step": 110500 - }, - { - "epoch": 1.6, - "learning_rate": 8.948732727867525e-05, - "loss": 2.1379, - "step": 111000 - }, - { - "epoch": 1.6, - "learning_rate": 8.94354005369225e-05, - "loss": 2.134, - "step": 111500 - }, - { - "epoch": 1.61, - "learning_rate": 8.938347379516979e-05, - "loss": 2.1356, - "step": 112000 - }, - { - "epoch": 1.62, - "learning_rate": 8.933154705341704e-05, - "loss": 2.1365, - "step": 112500 - }, - { - "epoch": 1.63, - "learning_rate": 8.927972416514782e-05, - "loss": 2.1324, - "step": 113000 - }, - { - "epoch": 1.63, - "learning_rate": 8.922779742339508e-05, - "loss": 2.1308, - "step": 113500 - }, - { - "epoch": 1.64, - "learning_rate": 8.917587068164234e-05, - "loss": 2.1312, - "step": 114000 - }, - { - "epoch": 1.65, - "learning_rate": 8.912394393988961e-05, - "loss": 2.1292, - "step": 114500 - }, - { - "epoch": 1.66, - "learning_rate": 8.907212105162038e-05, - "loss": 2.1316, - "step": 115000 - }, - { - "epoch": 1.66, - "learning_rate": 8.902019430986765e-05, - "loss": 2.1271, - "step": 115500 - }, - { - "epoch": 1.67, - "learning_rate": 8.89682675681149e-05, - "loss": 2.128, - "step": 116000 - }, - { - "epoch": 1.68, - "learning_rate": 8.891634082636218e-05, - "loss": 2.1276, - "step": 116500 - }, - { - "epoch": 1.68, - "learning_rate": 8.886451793809294e-05, - "loss": 2.1242, - "step": 117000 - }, - { - "epoch": 1.69, - "learning_rate": 8.881259119634022e-05, - "loss": 2.1264, - "step": 117500 - }, - { - "epoch": 1.7, - "learning_rate": 8.876066445458747e-05, - "loss": 2.1215, - "step": 118000 - }, - { - "epoch": 1.71, - "learning_rate": 8.870873771283473e-05, - "loss": 2.1246, - "step": 118500 - }, - { - "epoch": 1.71, - "learning_rate": 8.86569148245655e-05, - "loss": 2.1247, - "step": 119000 - }, - { - "epoch": 1.72, - "learning_rate": 8.860498808281276e-05, - "loss": 2.1228, - "step": 119500 - }, - { - "epoch": 1.73, - "learning_rate": 8.855306134106004e-05, - "loss": 2.1223, - "step": 120000 - }, - { - "epoch": 1.73, - "learning_rate": 8.85011345993073e-05, - "loss": 2.1202, - "step": 120500 - }, - { - "epoch": 1.74, - "learning_rate": 8.844931171103808e-05, - "loss": 2.1223, - "step": 121000 - }, - { - "epoch": 1.75, - "learning_rate": 8.839738496928533e-05, - "loss": 2.1198, - "step": 121500 - }, - { - "epoch": 1.76, - "learning_rate": 8.83454582275326e-05, - "loss": 2.1164, - "step": 122000 - }, - { - "epoch": 1.76, - "learning_rate": 8.829353148577987e-05, - "loss": 2.1192, - "step": 122500 - }, - { - "epoch": 1.77, - "learning_rate": 8.824170859751064e-05, - "loss": 2.1152, - "step": 123000 - }, - { - "epoch": 1.78, - "learning_rate": 8.81897818557579e-05, - "loss": 2.1163, - "step": 123500 - }, - { - "epoch": 1.78, - "learning_rate": 8.813785511400516e-05, - "loss": 2.1119, - "step": 124000 - }, - { - "epoch": 1.79, - "learning_rate": 8.808592837225244e-05, - "loss": 2.113, - "step": 124500 - }, - { - "epoch": 1.8, - "learning_rate": 8.803410548398319e-05, - "loss": 2.1135, - "step": 125000 - }, - { - "epoch": 1.81, - "learning_rate": 8.798217874223047e-05, - "loss": 2.1099, - "step": 125500 - }, - { - "epoch": 1.81, - "learning_rate": 8.793025200047773e-05, - "loss": 2.1139, - "step": 126000 - }, - { - "epoch": 1.82, - "learning_rate": 8.7878325258725e-05, - "loss": 2.1054, - "step": 126500 - }, - { - "epoch": 1.83, - "learning_rate": 8.782650237045576e-05, - "loss": 2.1085, - "step": 127000 - }, - { - "epoch": 1.84, - "learning_rate": 8.777457562870303e-05, - "loss": 2.1115, - "step": 127500 - }, - { - "epoch": 1.84, - "learning_rate": 8.77226488869503e-05, - "loss": 2.1057, - "step": 128000 - }, - { - "epoch": 1.85, - "learning_rate": 8.767072214519756e-05, - "loss": 2.1108, - "step": 128500 - }, - { - "epoch": 1.86, - "learning_rate": 8.761889925692834e-05, - "loss": 2.1105, - "step": 129000 - }, - { - "epoch": 1.86, - "learning_rate": 8.756697251517559e-05, - "loss": 2.1075, - "step": 129500 - }, - { - "epoch": 1.87, - "learning_rate": 8.751504577342286e-05, - "loss": 2.1027, - "step": 130000 - }, - { - "epoch": 1.88, - "learning_rate": 8.746311903167013e-05, - "loss": 2.1043, - "step": 130500 - }, - { - "epoch": 1.89, - "learning_rate": 8.741129614340089e-05, - "loss": 2.0995, - "step": 131000 - }, - { - "epoch": 1.89, - "learning_rate": 8.735936940164816e-05, - "loss": 2.1022, - "step": 131500 - }, - { - "epoch": 1.9, - "learning_rate": 8.730744265989542e-05, - "loss": 2.1011, - "step": 132000 - }, - { - "epoch": 1.91, - "learning_rate": 8.72555159181427e-05, - "loss": 2.1012, - "step": 132500 - }, - { - "epoch": 1.91, - "learning_rate": 8.720369302987345e-05, - "loss": 2.1012, - "step": 133000 - }, - { - "epoch": 1.92, - "learning_rate": 8.715176628812073e-05, - "loss": 2.1026, - "step": 133500 - }, - { - "epoch": 1.93, - "learning_rate": 8.709983954636799e-05, - "loss": 2.099, - "step": 134000 - }, - { - "epoch": 1.94, - "learning_rate": 8.704791280461526e-05, - "loss": 2.1023, - "step": 134500 - }, - { - "epoch": 1.94, - "learning_rate": 8.699608991634602e-05, - "loss": 2.0986, - "step": 135000 - }, - { - "epoch": 1.95, - "learning_rate": 8.694416317459329e-05, - "loss": 2.0948, - "step": 135500 - }, - { - "epoch": 1.96, - "learning_rate": 8.689223643284056e-05, - "loss": 2.0944, - "step": 136000 - }, - { - "epoch": 1.96, - "learning_rate": 8.684030969108781e-05, - "loss": 2.0987, - "step": 136500 - }, - { - "epoch": 1.97, - "learning_rate": 8.678848680281859e-05, - "loss": 2.0933, - "step": 137000 - }, - { - "epoch": 1.98, - "learning_rate": 8.673656006106585e-05, - "loss": 2.0973, - "step": 137500 - }, - { - "epoch": 1.99, - "learning_rate": 8.668463331931312e-05, - "loss": 2.093, - "step": 138000 - }, - { - "epoch": 1.99, - "learning_rate": 8.663270657756038e-05, - "loss": 2.0904, - "step": 138500 - }, - { - "epoch": 2.0, - "eval_accuracy": 0.6101490861863045, - "eval_loss": 1.9561994075775146, - "eval_runtime": 646.8164, - "eval_samples_per_second": 833.215, - "eval_steps_per_second": 34.718, - "step": 138946 - }, - { - "epoch": 2.0, - "learning_rate": 8.658077983580765e-05, - "loss": 2.0869, - "step": 139000 - }, - { - "epoch": 2.01, - "learning_rate": 8.652895694753842e-05, - "loss": 2.087, - "step": 139500 - }, - { - "epoch": 2.02, - "learning_rate": 8.647703020578569e-05, - "loss": 2.0856, - "step": 140000 - }, - { - "epoch": 2.02, - "learning_rate": 8.642510346403294e-05, - "loss": 2.0843, - "step": 140500 - }, - { - "epoch": 2.03, - "learning_rate": 8.637317672228021e-05, - "loss": 2.086, - "step": 141000 - }, - { - "epoch": 2.04, - "learning_rate": 8.632135383401098e-05, - "loss": 2.091, - "step": 141500 - }, - { - "epoch": 2.04, - "learning_rate": 8.626942709225824e-05, - "loss": 2.083, - "step": 142000 - }, - { - "epoch": 2.05, - "learning_rate": 8.621750035050551e-05, - "loss": 2.0823, - "step": 142500 - }, - { - "epoch": 2.06, - "learning_rate": 8.616557360875278e-05, - "loss": 2.0834, - "step": 143000 - }, - { - "epoch": 2.07, - "learning_rate": 8.611375072048355e-05, - "loss": 2.0838, - "step": 143500 - }, - { - "epoch": 2.07, - "learning_rate": 8.606182397873082e-05, - "loss": 2.0821, - "step": 144000 - }, - { - "epoch": 2.08, - "learning_rate": 8.600989723697807e-05, - "loss": 2.082, - "step": 144500 - }, - { - "epoch": 2.09, - "learning_rate": 8.595797049522534e-05, - "loss": 2.082, - "step": 145000 - }, - { - "epoch": 2.09, - "learning_rate": 8.59061476069561e-05, - "loss": 2.0836, - "step": 145500 - }, - { - "epoch": 2.1, - "learning_rate": 8.585422086520337e-05, - "loss": 2.0806, - "step": 146000 - }, - { - "epoch": 2.11, - "learning_rate": 8.580229412345064e-05, - "loss": 2.08, - "step": 146500 - }, - { - "epoch": 2.12, - "learning_rate": 8.575036738169791e-05, - "loss": 2.0797, - "step": 147000 - }, - { - "epoch": 2.12, - "learning_rate": 8.569854449342868e-05, - "loss": 2.0777, - "step": 147500 - }, - { - "epoch": 2.13, - "learning_rate": 8.564661775167594e-05, - "loss": 2.0794, - "step": 148000 - }, - { - "epoch": 2.14, - "learning_rate": 8.55946910099232e-05, - "loss": 2.078, - "step": 148500 - }, - { - "epoch": 2.14, - "learning_rate": 8.554276426817047e-05, - "loss": 2.0795, - "step": 149000 - }, - { - "epoch": 2.15, - "learning_rate": 8.549094137990123e-05, - "loss": 2.0788, - "step": 149500 - }, - { - "epoch": 2.16, - "learning_rate": 8.54390146381485e-05, - "loss": 2.0767, - "step": 150000 - }, - { - "epoch": 2.17, - "learning_rate": 8.538708789639577e-05, - "loss": 2.0736, - "step": 150500 - }, - { - "epoch": 2.17, - "learning_rate": 8.533516115464302e-05, - "loss": 2.0721, - "step": 151000 - }, - { - "epoch": 2.18, - "learning_rate": 8.52833382663738e-05, - "loss": 2.0705, - "step": 151500 - }, - { - "epoch": 2.19, - "learning_rate": 8.523141152462106e-05, - "loss": 2.0733, - "step": 152000 - }, - { - "epoch": 2.2, - "learning_rate": 8.517948478286834e-05, - "loss": 2.0721, - "step": 152500 - }, - { - "epoch": 2.2, - "learning_rate": 8.51275580411156e-05, - "loss": 2.0699, - "step": 153000 - }, - { - "epoch": 2.21, - "learning_rate": 8.507573515284638e-05, - "loss": 2.0709, - "step": 153500 - }, - { - "epoch": 2.22, - "learning_rate": 8.502380841109363e-05, - "loss": 2.0694, - "step": 154000 - }, - { - "epoch": 2.22, - "learning_rate": 8.49718816693409e-05, - "loss": 2.0686, - "step": 154500 - }, - { - "epoch": 2.23, - "learning_rate": 8.491995492758817e-05, - "loss": 2.077, - "step": 155000 - }, - { - "epoch": 2.24, - "learning_rate": 8.486813203931893e-05, - "loss": 2.0667, - "step": 155500 - }, - { - "epoch": 2.25, - "learning_rate": 8.48162052975662e-05, - "loss": 2.0669, - "step": 156000 - }, - { - "epoch": 2.25, - "learning_rate": 8.476427855581346e-05, - "loss": 2.0721, - "step": 156500 - }, - { - "epoch": 2.26, - "learning_rate": 8.471235181406072e-05, - "loss": 2.0668, - "step": 157000 - }, - { - "epoch": 2.27, - "learning_rate": 8.466052892579149e-05, - "loss": 2.0673, - "step": 157500 - }, - { - "epoch": 2.27, - "learning_rate": 8.460860218403876e-05, - "loss": 2.0656, - "step": 158000 - }, - { - "epoch": 2.28, - "learning_rate": 8.455667544228603e-05, - "loss": 2.0647, - "step": 158500 - }, - { - "epoch": 2.29, - "learning_rate": 8.450474870053328e-05, - "loss": 2.0649, - "step": 159000 - }, - { - "epoch": 2.3, - "learning_rate": 8.445292581226406e-05, - "loss": 2.0636, - "step": 159500 - }, - { - "epoch": 2.3, - "learning_rate": 8.440099907051132e-05, - "loss": 2.0653, - "step": 160000 - }, - { - "epoch": 2.31, - "learning_rate": 8.43490723287586e-05, - "loss": 2.0663, - "step": 160500 - }, - { - "epoch": 2.32, - "learning_rate": 8.429714558700585e-05, - "loss": 2.0629, - "step": 161000 - }, - { - "epoch": 2.32, - "learning_rate": 8.424532269873663e-05, - "loss": 2.0617, - "step": 161500 - }, - { - "epoch": 2.33, - "learning_rate": 8.419339595698389e-05, - "loss": 2.0626, - "step": 162000 - }, - { - "epoch": 2.34, - "learning_rate": 8.414146921523116e-05, - "loss": 2.0647, - "step": 162500 - }, - { - "epoch": 2.35, - "learning_rate": 8.408954247347842e-05, - "loss": 2.064, - "step": 163000 - }, - { - "epoch": 2.35, - "learning_rate": 8.403771958520919e-05, - "loss": 2.0595, - "step": 163500 - }, - { - "epoch": 2.36, - "learning_rate": 8.398579284345646e-05, - "loss": 2.0597, - "step": 164000 - }, - { - "epoch": 2.37, - "learning_rate": 8.393386610170371e-05, - "loss": 2.0564, - "step": 164500 - }, - { - "epoch": 2.38, - "learning_rate": 8.3881939359951e-05, - "loss": 2.0575, - "step": 165000 - }, - { - "epoch": 2.38, - "learning_rate": 8.383011647168175e-05, - "loss": 2.0587, - "step": 165500 - }, - { - "epoch": 2.39, - "learning_rate": 8.377818972992903e-05, - "loss": 2.0572, - "step": 166000 - }, - { - "epoch": 2.4, - "learning_rate": 8.372626298817628e-05, - "loss": 2.0543, - "step": 166500 - }, - { - "epoch": 2.4, - "learning_rate": 8.367433624642354e-05, - "loss": 2.0566, - "step": 167000 - }, - { - "epoch": 2.41, - "learning_rate": 8.362251335815432e-05, - "loss": 2.0577, - "step": 167500 - }, - { - "epoch": 2.42, - "learning_rate": 8.357058661640157e-05, - "loss": 2.053, - "step": 168000 - }, - { - "epoch": 2.43, - "learning_rate": 8.351865987464886e-05, - "loss": 2.0561, - "step": 168500 - }, - { - "epoch": 2.43, - "learning_rate": 8.346673313289611e-05, - "loss": 2.0532, - "step": 169000 - }, - { - "epoch": 2.44, - "learning_rate": 8.341491024462689e-05, - "loss": 2.0553, - "step": 169500 - }, - { - "epoch": 2.45, - "learning_rate": 8.336298350287414e-05, - "loss": 2.0547, - "step": 170000 - }, - { - "epoch": 2.45, - "learning_rate": 8.331105676112141e-05, - "loss": 2.0515, - "step": 170500 - }, - { - "epoch": 2.46, - "learning_rate": 8.325913001936868e-05, - "loss": 2.0486, - "step": 171000 - }, - { - "epoch": 2.47, - "learning_rate": 8.320730713109945e-05, - "loss": 2.0561, - "step": 171500 - }, - { - "epoch": 2.48, - "learning_rate": 8.315538038934672e-05, - "loss": 2.0502, - "step": 172000 - }, - { - "epoch": 2.48, - "learning_rate": 8.310345364759397e-05, - "loss": 2.0525, - "step": 172500 - }, - { - "epoch": 2.49, - "learning_rate": 8.305152690584125e-05, - "loss": 2.0482, - "step": 173000 - }, - { - "epoch": 2.5, - "learning_rate": 8.2999704017572e-05, - "loss": 2.0487, - "step": 173500 - }, - { - "epoch": 2.5, - "learning_rate": 8.294777727581929e-05, - "loss": 2.054, - "step": 174000 - }, - { - "epoch": 2.51, - "learning_rate": 8.289585053406654e-05, - "loss": 2.0521, - "step": 174500 - }, - { - "epoch": 2.52, - "learning_rate": 8.284392379231381e-05, - "loss": 2.0493, - "step": 175000 - }, - { - "epoch": 2.53, - "learning_rate": 8.279210090404458e-05, - "loss": 2.054, - "step": 175500 - }, - { - "epoch": 2.53, - "learning_rate": 8.274017416229184e-05, - "loss": 2.0516, - "step": 176000 - }, - { - "epoch": 2.54, - "learning_rate": 8.268824742053911e-05, - "loss": 2.0515, - "step": 176500 - }, - { - "epoch": 2.55, - "learning_rate": 8.263632067878637e-05, - "loss": 2.044, - "step": 177000 - }, - { - "epoch": 2.55, - "learning_rate": 8.258449779051715e-05, - "loss": 2.0461, - "step": 177500 - }, - { - "epoch": 2.56, - "learning_rate": 8.25325710487644e-05, - "loss": 2.0496, - "step": 178000 - }, - { - "epoch": 2.57, - "learning_rate": 8.248064430701167e-05, - "loss": 2.0456, - "step": 178500 - }, - { - "epoch": 2.58, - "learning_rate": 8.242871756525894e-05, - "loss": 2.0436, - "step": 179000 - }, - { - "epoch": 2.58, - "learning_rate": 8.23768946769897e-05, - "loss": 2.0425, - "step": 179500 - }, - { - "epoch": 2.59, - "learning_rate": 8.232496793523697e-05, - "loss": 2.0476, - "step": 180000 - }, - { - "epoch": 2.6, - "learning_rate": 8.227304119348424e-05, - "loss": 2.0475, - "step": 180500 - }, - { - "epoch": 2.61, - "learning_rate": 8.22211144517315e-05, - "loss": 2.0497, - "step": 181000 - }, - { - "epoch": 2.61, - "learning_rate": 8.216929156346228e-05, - "loss": 2.0468, - "step": 181500 - }, - { - "epoch": 2.62, - "learning_rate": 8.211736482170954e-05, - "loss": 2.0421, - "step": 182000 - }, - { - "epoch": 2.63, - "learning_rate": 8.20654380799568e-05, - "loss": 2.0382, - "step": 182500 - }, - { - "epoch": 2.63, - "learning_rate": 8.201351133820407e-05, - "loss": 2.0407, - "step": 183000 - }, - { - "epoch": 2.64, - "learning_rate": 8.196168844993483e-05, - "loss": 2.0413, - "step": 183500 - }, - { - "epoch": 2.65, - "learning_rate": 8.19097617081821e-05, - "loss": 2.0375, - "step": 184000 - }, - { - "epoch": 2.66, - "learning_rate": 8.185783496642937e-05, - "loss": 2.0414, - "step": 184500 - }, - { - "epoch": 2.66, - "learning_rate": 8.180590822467663e-05, - "loss": 2.036, - "step": 185000 - }, - { - "epoch": 2.67, - "learning_rate": 8.17540853364074e-05, - "loss": 2.0376, - "step": 185500 - }, - { - "epoch": 2.68, - "learning_rate": 8.170215859465466e-05, - "loss": 2.0401, - "step": 186000 - }, - { - "epoch": 2.68, - "learning_rate": 8.165023185290193e-05, - "loss": 2.0404, - "step": 186500 - }, - { - "epoch": 2.69, - "learning_rate": 8.15983051111492e-05, - "loss": 2.0387, - "step": 187000 - }, - { - "epoch": 2.7, - "learning_rate": 8.154648222287996e-05, - "loss": 2.037, - "step": 187500 - }, - { - "epoch": 2.71, - "learning_rate": 8.149455548112723e-05, - "loss": 2.0411, - "step": 188000 - }, - { - "epoch": 2.71, - "learning_rate": 8.14426287393745e-05, - "loss": 2.0364, - "step": 188500 - }, - { - "epoch": 2.72, - "learning_rate": 8.139070199762175e-05, - "loss": 2.0373, - "step": 189000 - }, - { - "epoch": 2.73, - "learning_rate": 8.133887910935253e-05, - "loss": 2.0338, - "step": 189500 - }, - { - "epoch": 2.73, - "learning_rate": 8.128695236759979e-05, - "loss": 2.0353, - "step": 190000 - }, - { - "epoch": 2.74, - "learning_rate": 8.123502562584706e-05, - "loss": 2.0322, - "step": 190500 - }, - { - "epoch": 2.75, - "learning_rate": 8.118309888409432e-05, - "loss": 2.0317, - "step": 191000 - }, - { - "epoch": 2.76, - "learning_rate": 8.113127599582509e-05, - "loss": 2.0289, - "step": 191500 - }, - { - "epoch": 2.76, - "learning_rate": 8.107934925407236e-05, - "loss": 2.0335, - "step": 192000 - }, - { - "epoch": 2.77, - "learning_rate": 8.102742251231963e-05, - "loss": 2.038, - "step": 192500 - }, - { - "epoch": 2.78, - "learning_rate": 8.09754957705669e-05, - "loss": 2.0306, - "step": 193000 - }, - { - "epoch": 2.79, - "learning_rate": 8.092367288229766e-05, - "loss": 2.0329, - "step": 193500 - }, - { - "epoch": 2.79, - "learning_rate": 8.087174614054493e-05, - "loss": 2.0326, - "step": 194000 - }, - { - "epoch": 2.8, - "learning_rate": 8.081981939879219e-05, - "loss": 2.0369, - "step": 194500 - }, - { - "epoch": 2.81, - "learning_rate": 8.076789265703945e-05, - "loss": 2.0309, - "step": 195000 - }, - { - "epoch": 2.81, - "learning_rate": 8.071606976877022e-05, - "loss": 2.0313, - "step": 195500 - }, - { - "epoch": 2.82, - "learning_rate": 8.066414302701749e-05, - "loss": 2.0288, - "step": 196000 - }, - { - "epoch": 2.83, - "learning_rate": 8.061221628526476e-05, - "loss": 2.0297, - "step": 196500 - }, - { - "epoch": 2.84, - "learning_rate": 8.056028954351201e-05, - "loss": 2.0264, - "step": 197000 - }, - { - "epoch": 2.84, - "learning_rate": 8.050846665524279e-05, - "loss": 2.0319, - "step": 197500 - }, - { - "epoch": 2.85, - "learning_rate": 8.045653991349005e-05, - "loss": 2.0314, - "step": 198000 - }, - { - "epoch": 2.86, - "learning_rate": 8.040461317173731e-05, - "loss": 2.0281, - "step": 198500 - }, - { - "epoch": 2.86, - "learning_rate": 8.035268642998458e-05, - "loss": 2.0269, - "step": 199000 - }, - { - "epoch": 2.87, - "learning_rate": 8.030086354171535e-05, - "loss": 2.022, - "step": 199500 - }, - { - "epoch": 2.88, - "learning_rate": 8.024893679996262e-05, - "loss": 2.032, - "step": 200000 - }, - { - "epoch": 2.89, - "learning_rate": 8.019701005820987e-05, - "loss": 2.0283, - "step": 200500 - }, - { - "epoch": 2.89, - "learning_rate": 8.014508331645715e-05, - "loss": 2.0206, - "step": 201000 - }, - { - "epoch": 2.9, - "learning_rate": 8.009326042818792e-05, - "loss": 2.0196, - "step": 201500 - }, - { - "epoch": 2.91, - "learning_rate": 8.004133368643519e-05, - "loss": 2.0243, - "step": 202000 - }, - { - "epoch": 2.91, - "learning_rate": 7.998951079816595e-05, - "loss": 2.0244, - "step": 202500 - }, - { - "epoch": 2.92, - "learning_rate": 7.993758405641322e-05, - "loss": 2.026, - "step": 203000 - }, - { - "epoch": 2.93, - "learning_rate": 7.988565731466048e-05, - "loss": 2.0219, - "step": 203500 - }, - { - "epoch": 2.94, - "learning_rate": 7.983373057290775e-05, - "loss": 2.0271, - "step": 204000 - }, - { - "epoch": 2.94, - "learning_rate": 7.978180383115501e-05, - "loss": 2.0275, - "step": 204500 - }, - { - "epoch": 2.95, - "learning_rate": 7.972987708940227e-05, - "loss": 2.0199, - "step": 205000 - }, - { - "epoch": 2.96, - "learning_rate": 7.967795034764955e-05, - "loss": 2.0202, - "step": 205500 - }, - { - "epoch": 2.97, - "learning_rate": 7.96260236058968e-05, - "loss": 2.019, - "step": 206000 - }, - { - "epoch": 2.97, - "learning_rate": 7.957420071762758e-05, - "loss": 2.0214, - "step": 206500 - }, - { - "epoch": 2.98, - "learning_rate": 7.952227397587484e-05, - "loss": 2.0192, - "step": 207000 - }, - { - "epoch": 2.99, - "learning_rate": 7.94703472341221e-05, - "loss": 2.0183, - "step": 207500 - }, - { - "epoch": 2.99, - "learning_rate": 7.941842049236938e-05, - "loss": 2.0203, - "step": 208000 - }, - { - "epoch": 3.0, - "eval_accuracy": 0.6207558846474005, - "eval_loss": 1.8847733736038208, - "eval_runtime": 647.2657, - "eval_samples_per_second": 832.636, - "eval_steps_per_second": 34.694, - "step": 208419 - }, - { - "epoch": 3.0, - "learning_rate": 7.936659760410013e-05, - "loss": 2.0154, - "step": 208500 - }, - { - "epoch": 3.01, - "learning_rate": 7.931467086234741e-05, - "loss": 2.0093, - "step": 209000 - }, - { - "epoch": 3.02, - "learning_rate": 7.926274412059467e-05, - "loss": 2.0154, - "step": 209500 - }, - { - "epoch": 3.02, - "learning_rate": 7.921081737884193e-05, - "loss": 2.0148, - "step": 210000 - }, - { - "epoch": 3.03, - "learning_rate": 7.91589944905727e-05, - "loss": 2.0151, - "step": 210500 - }, - { - "epoch": 3.04, - "learning_rate": 7.910706774881997e-05, - "loss": 2.0173, - "step": 211000 - }, - { - "epoch": 3.04, - "learning_rate": 7.905514100706724e-05, - "loss": 2.012, - "step": 211500 - }, - { - "epoch": 3.05, - "learning_rate": 7.900321426531449e-05, - "loss": 2.0161, - "step": 212000 - }, - { - "epoch": 3.06, - "learning_rate": 7.895139137704527e-05, - "loss": 2.0142, - "step": 212500 - }, - { - "epoch": 3.07, - "learning_rate": 7.889946463529253e-05, - "loss": 2.0158, - "step": 213000 - }, - { - "epoch": 3.07, - "learning_rate": 7.884753789353981e-05, - "loss": 2.0152, - "step": 213500 - }, - { - "epoch": 3.08, - "learning_rate": 7.879561115178706e-05, - "loss": 2.013, - "step": 214000 - }, - { - "epoch": 3.09, - "learning_rate": 7.874378826351784e-05, - "loss": 2.0164, - "step": 214500 - }, - { - "epoch": 3.09, - "learning_rate": 7.86918615217651e-05, - "loss": 2.0166, - "step": 215000 - }, - { - "epoch": 3.1, - "learning_rate": 7.863993478001237e-05, - "loss": 2.0151, - "step": 215500 - }, - { - "epoch": 3.11, - "learning_rate": 7.858800803825963e-05, - "loss": 2.0172, - "step": 216000 - }, - { - "epoch": 3.12, - "learning_rate": 7.85361851499904e-05, - "loss": 2.0083, - "step": 216500 - }, - { - "epoch": 3.12, - "learning_rate": 7.848425840823767e-05, - "loss": 2.0134, - "step": 217000 - }, - { - "epoch": 3.13, - "learning_rate": 7.843233166648492e-05, - "loss": 2.0099, - "step": 217500 - }, - { - "epoch": 3.14, - "learning_rate": 7.838040492473219e-05, - "loss": 2.0139, - "step": 218000 - }, - { - "epoch": 3.15, - "learning_rate": 7.832858203646296e-05, - "loss": 2.0073, - "step": 218500 - }, - { - "epoch": 3.15, - "learning_rate": 7.827665529471023e-05, - "loss": 2.0145, - "step": 219000 - }, - { - "epoch": 3.16, - "learning_rate": 7.82247285529575e-05, - "loss": 2.0108, - "step": 219500 - }, - { - "epoch": 3.17, - "learning_rate": 7.817280181120475e-05, - "loss": 2.0082, - "step": 220000 - }, - { - "epoch": 3.17, - "learning_rate": 7.812097892293553e-05, - "loss": 2.0091, - "step": 220500 - }, - { - "epoch": 3.18, - "learning_rate": 7.806905218118278e-05, - "loss": 2.0099, - "step": 221000 - }, - { - "epoch": 3.19, - "learning_rate": 7.801712543943007e-05, - "loss": 2.008, - "step": 221500 - }, - { - "epoch": 3.2, - "learning_rate": 7.796519869767732e-05, - "loss": 2.0075, - "step": 222000 - }, - { - "epoch": 3.2, - "learning_rate": 7.79133758094081e-05, - "loss": 2.0079, - "step": 222500 - }, - { - "epoch": 3.21, - "learning_rate": 7.786144906765535e-05, - "loss": 2.008, - "step": 223000 - }, - { - "epoch": 3.22, - "learning_rate": 7.780952232590262e-05, - "loss": 2.0103, - "step": 223500 - }, - { - "epoch": 3.22, - "learning_rate": 7.775759558414989e-05, - "loss": 2.0076, - "step": 224000 - }, - { - "epoch": 3.23, - "learning_rate": 7.770577269588066e-05, - "loss": 2.0104, - "step": 224500 - }, - { - "epoch": 3.24, - "learning_rate": 7.765384595412793e-05, - "loss": 2.0062, - "step": 225000 - }, - { - "epoch": 3.25, - "learning_rate": 7.760191921237518e-05, - "loss": 2.0049, - "step": 225500 - }, - { - "epoch": 3.25, - "learning_rate": 7.754999247062245e-05, - "loss": 2.0051, - "step": 226000 - }, - { - "epoch": 3.26, - "learning_rate": 7.749816958235321e-05, - "loss": 2.0002, - "step": 226500 - }, - { - "epoch": 3.27, - "learning_rate": 7.744624284060048e-05, - "loss": 2.0056, - "step": 227000 - }, - { - "epoch": 3.27, - "learning_rate": 7.739431609884775e-05, - "loss": 2.0077, - "step": 227500 - }, - { - "epoch": 3.28, - "learning_rate": 7.734238935709502e-05, - "loss": 2.0082, - "step": 228000 - }, - { - "epoch": 3.29, - "learning_rate": 7.729056646882579e-05, - "loss": 2.0026, - "step": 228500 - }, - { - "epoch": 3.3, - "learning_rate": 7.723863972707305e-05, - "loss": 2.0034, - "step": 229000 - }, - { - "epoch": 3.3, - "learning_rate": 7.718671298532031e-05, - "loss": 2.0043, - "step": 229500 - }, - { - "epoch": 3.31, - "learning_rate": 7.713478624356758e-05, - "loss": 2.0033, - "step": 230000 - }, - { - "epoch": 3.32, - "learning_rate": 7.708296335529836e-05, - "loss": 2.0018, - "step": 230500 - }, - { - "epoch": 3.33, - "learning_rate": 7.703103661354561e-05, - "loss": 2.0024, - "step": 231000 - }, - { - "epoch": 3.33, - "learning_rate": 7.697910987179288e-05, - "loss": 2.0036, - "step": 231500 - }, - { - "epoch": 3.34, - "learning_rate": 7.692718313004015e-05, - "loss": 1.9985, - "step": 232000 - }, - { - "epoch": 3.35, - "learning_rate": 7.687536024177091e-05, - "loss": 2.0016, - "step": 232500 - }, - { - "epoch": 3.35, - "learning_rate": 7.682343350001818e-05, - "loss": 2.0027, - "step": 233000 - }, - { - "epoch": 3.36, - "learning_rate": 7.677150675826544e-05, - "loss": 2.0009, - "step": 233500 - }, - { - "epoch": 3.37, - "learning_rate": 7.67195800165127e-05, - "loss": 2.001, - "step": 234000 - }, - { - "epoch": 3.38, - "learning_rate": 7.666775712824347e-05, - "loss": 2.0001, - "step": 234500 - }, - { - "epoch": 3.38, - "learning_rate": 7.661583038649074e-05, - "loss": 2.0002, - "step": 235000 - }, - { - "epoch": 3.39, - "learning_rate": 7.656390364473801e-05, - "loss": 2.0023, - "step": 235500 - }, - { - "epoch": 3.4, - "learning_rate": 7.651197690298528e-05, - "loss": 1.9976, - "step": 236000 - }, - { - "epoch": 3.4, - "learning_rate": 7.646015401471604e-05, - "loss": 1.9986, - "step": 236500 - }, - { - "epoch": 3.41, - "learning_rate": 7.640822727296331e-05, - "loss": 1.9999, - "step": 237000 - }, - { - "epoch": 3.42, - "learning_rate": 7.635630053121057e-05, - "loss": 1.998, - "step": 237500 - }, - { - "epoch": 3.43, - "learning_rate": 7.630437378945783e-05, - "loss": 1.9966, - "step": 238000 - }, - { - "epoch": 3.43, - "learning_rate": 7.62525509011886e-05, - "loss": 1.9962, - "step": 238500 - }, - { - "epoch": 3.44, - "learning_rate": 7.620062415943587e-05, - "loss": 1.9994, - "step": 239000 - }, - { - "epoch": 3.45, - "learning_rate": 7.614869741768314e-05, - "loss": 1.998, - "step": 239500 - }, - { - "epoch": 3.45, - "learning_rate": 7.609677067593039e-05, - "loss": 1.9962, - "step": 240000 - }, - { - "epoch": 3.46, - "learning_rate": 7.604484393417767e-05, - "loss": 2.0007, - "step": 240500 - }, - { - "epoch": 3.47, - "learning_rate": 7.599291719242493e-05, - "loss": 1.9939, - "step": 241000 - }, - { - "epoch": 3.48, - "learning_rate": 7.594109430415571e-05, - "loss": 1.9966, - "step": 241500 - }, - { - "epoch": 3.48, - "learning_rate": 7.588916756240296e-05, - "loss": 1.9956, - "step": 242000 - }, - { - "epoch": 3.49, - "learning_rate": 7.583724082065023e-05, - "loss": 1.99, - "step": 242500 - }, - { - "epoch": 3.5, - "learning_rate": 7.57853140788975e-05, - "loss": 1.9943, - "step": 243000 - }, - { - "epoch": 3.5, - "learning_rate": 7.573338733714475e-05, - "loss": 1.9931, - "step": 243500 - }, - { - "epoch": 3.51, - "learning_rate": 7.568146059539202e-05, - "loss": 1.9946, - "step": 244000 - }, - { - "epoch": 3.52, - "learning_rate": 7.562953385363929e-05, - "loss": 1.9954, - "step": 244500 - }, - { - "epoch": 3.53, - "learning_rate": 7.557760711188655e-05, - "loss": 1.9956, - "step": 245000 - }, - { - "epoch": 3.53, - "learning_rate": 7.552578422361733e-05, - "loss": 1.9904, - "step": 245500 - }, - { - "epoch": 3.54, - "learning_rate": 7.547385748186458e-05, - "loss": 1.9903, - "step": 246000 - }, - { - "epoch": 3.55, - "learning_rate": 7.542193074011186e-05, - "loss": 1.9946, - "step": 246500 - }, - { - "epoch": 3.56, - "learning_rate": 7.537000399835912e-05, - "loss": 1.9943, - "step": 247000 - }, - { - "epoch": 3.56, - "learning_rate": 7.53181811100899e-05, - "loss": 1.9916, - "step": 247500 - }, - { - "epoch": 3.57, - "learning_rate": 7.526625436833715e-05, - "loss": 1.9965, - "step": 248000 - }, - { - "epoch": 3.58, - "learning_rate": 7.521432762658442e-05, - "loss": 1.9904, - "step": 248500 - }, - { - "epoch": 3.58, - "learning_rate": 7.516240088483169e-05, - "loss": 1.9919, - "step": 249000 - }, - { - "epoch": 3.59, - "learning_rate": 7.511057799656245e-05, - "loss": 1.9945, - "step": 249500 - }, - { - "epoch": 3.6, - "learning_rate": 7.505865125480972e-05, - "loss": 1.9884, - "step": 250000 - }, - { - "epoch": 3.61, - "learning_rate": 7.500672451305698e-05, - "loss": 1.9946, - "step": 250500 - }, - { - "epoch": 3.61, - "learning_rate": 7.495479777130426e-05, - "loss": 1.9906, - "step": 251000 - }, - { - "epoch": 3.62, - "learning_rate": 7.490297488303501e-05, - "loss": 1.9934, - "step": 251500 - }, - { - "epoch": 3.63, - "learning_rate": 7.48510481412823e-05, - "loss": 1.9926, - "step": 252000 - }, - { - "epoch": 3.63, - "learning_rate": 7.479922525301305e-05, - "loss": 1.9896, - "step": 252500 - }, - { - "epoch": 3.64, - "learning_rate": 7.474729851126033e-05, - "loss": 1.9932, - "step": 253000 - }, - { - "epoch": 3.65, - "learning_rate": 7.469537176950758e-05, - "loss": 1.9814, - "step": 253500 - }, - { - "epoch": 3.66, - "learning_rate": 7.464344502775484e-05, - "loss": 1.9863, - "step": 254000 - }, - { - "epoch": 3.66, - "learning_rate": 7.459151828600212e-05, - "loss": 1.9919, - "step": 254500 - }, - { - "epoch": 3.67, - "learning_rate": 7.453959154424937e-05, - "loss": 1.9891, - "step": 255000 - }, - { - "epoch": 3.68, - "learning_rate": 7.448766480249664e-05, - "loss": 1.9859, - "step": 255500 - }, - { - "epoch": 3.68, - "learning_rate": 7.443573806074391e-05, - "loss": 1.9864, - "step": 256000 - }, - { - "epoch": 3.69, - "learning_rate": 7.438391517247468e-05, - "loss": 1.9892, - "step": 256500 - }, - { - "epoch": 3.7, - "learning_rate": 7.433198843072195e-05, - "loss": 1.9883, - "step": 257000 - }, - { - "epoch": 3.71, - "learning_rate": 7.42800616889692e-05, - "loss": 1.9887, - "step": 257500 - }, - { - "epoch": 3.71, - "learning_rate": 7.422813494721647e-05, - "loss": 1.9838, - "step": 258000 - }, - { - "epoch": 3.72, - "learning_rate": 7.417631205894723e-05, - "loss": 1.985, - "step": 258500 - }, - { - "epoch": 3.73, - "learning_rate": 7.41243853171945e-05, - "loss": 1.9832, - "step": 259000 - }, - { - "epoch": 3.74, - "learning_rate": 7.407245857544177e-05, - "loss": 1.9863, - "step": 259500 - }, - { - "epoch": 3.74, - "learning_rate": 7.402053183368904e-05, - "loss": 1.9831, - "step": 260000 - }, - { - "epoch": 3.75, - "learning_rate": 7.39687089454198e-05, - "loss": 1.9848, - "step": 260500 - }, - { - "epoch": 3.76, - "learning_rate": 7.391678220366707e-05, - "loss": 1.9823, - "step": 261000 - }, - { - "epoch": 3.76, - "learning_rate": 7.386485546191434e-05, - "loss": 1.9822, - "step": 261500 - }, - { - "epoch": 3.77, - "learning_rate": 7.38129287201616e-05, - "loss": 1.9799, - "step": 262000 - }, - { - "epoch": 3.78, - "learning_rate": 7.376110583189238e-05, - "loss": 1.9854, - "step": 262500 - }, - { - "epoch": 3.79, - "learning_rate": 7.370917909013963e-05, - "loss": 1.9844, - "step": 263000 - }, - { - "epoch": 3.79, - "learning_rate": 7.36572523483869e-05, - "loss": 1.9802, - "step": 263500 - }, - { - "epoch": 3.8, - "learning_rate": 7.360532560663417e-05, - "loss": 1.9807, - "step": 264000 - }, - { - "epoch": 3.81, - "learning_rate": 7.355350271836493e-05, - "loss": 1.981, - "step": 264500 - }, - { - "epoch": 3.81, - "learning_rate": 7.35015759766122e-05, - "loss": 1.9839, - "step": 265000 - }, - { - "epoch": 3.82, - "learning_rate": 7.344964923485946e-05, - "loss": 1.979, - "step": 265500 - }, - { - "epoch": 3.83, - "learning_rate": 7.339772249310673e-05, - "loss": 1.9787, - "step": 266000 - }, - { - "epoch": 3.84, - "learning_rate": 7.334589960483749e-05, - "loss": 1.9799, - "step": 266500 - }, - { - "epoch": 3.84, - "learning_rate": 7.329397286308476e-05, - "loss": 1.9795, - "step": 267000 - }, - { - "epoch": 3.85, - "learning_rate": 7.324204612133203e-05, - "loss": 1.9833, - "step": 267500 - }, - { - "epoch": 3.86, - "learning_rate": 7.31901193795793e-05, - "loss": 1.9806, - "step": 268000 - }, - { - "epoch": 3.86, - "learning_rate": 7.313829649131006e-05, - "loss": 1.9768, - "step": 268500 - }, - { - "epoch": 3.87, - "learning_rate": 7.308636974955733e-05, - "loss": 1.9801, - "step": 269000 - }, - { - "epoch": 3.88, - "learning_rate": 7.30344430078046e-05, - "loss": 1.9756, - "step": 269500 - }, - { - "epoch": 3.89, - "learning_rate": 7.298251626605185e-05, - "loss": 1.9831, - "step": 270000 - }, - { - "epoch": 3.89, - "learning_rate": 7.293069337778263e-05, - "loss": 1.9789, - "step": 270500 - }, - { - "epoch": 3.9, - "learning_rate": 7.287876663602989e-05, - "loss": 1.9765, - "step": 271000 - }, - { - "epoch": 3.91, - "learning_rate": 7.282683989427716e-05, - "loss": 1.976, - "step": 271500 - }, - { - "epoch": 3.92, - "learning_rate": 7.277491315252443e-05, - "loss": 1.9803, - "step": 272000 - }, - { - "epoch": 3.92, - "learning_rate": 7.27229864107717e-05, - "loss": 1.9808, - "step": 272500 - }, - { - "epoch": 3.93, - "learning_rate": 7.267105966901895e-05, - "loss": 1.9821, - "step": 273000 - }, - { - "epoch": 3.94, - "learning_rate": 7.261913292726622e-05, - "loss": 1.9778, - "step": 273500 - }, - { - "epoch": 3.94, - "learning_rate": 7.256720618551349e-05, - "loss": 1.9769, - "step": 274000 - }, - { - "epoch": 3.95, - "learning_rate": 7.251538329724425e-05, - "loss": 1.9774, - "step": 274500 - }, - { - "epoch": 3.96, - "learning_rate": 7.246345655549152e-05, - "loss": 1.9731, - "step": 275000 - }, - { - "epoch": 3.97, - "learning_rate": 7.241152981373877e-05, - "loss": 1.9745, - "step": 275500 - }, - { - "epoch": 3.97, - "learning_rate": 7.235960307198604e-05, - "loss": 1.979, - "step": 276000 - }, - { - "epoch": 3.98, - "learning_rate": 7.230778018371681e-05, - "loss": 1.9733, - "step": 276500 - }, - { - "epoch": 3.99, - "learning_rate": 7.225585344196408e-05, - "loss": 1.9762, - "step": 277000 - }, - { - "epoch": 3.99, - "learning_rate": 7.220392670021135e-05, - "loss": 1.978, - "step": 277500 - }, - { - "epoch": 4.0, - "eval_accuracy": 0.6271896670934867, - "eval_loss": 1.8407700061798096, - "eval_runtime": 646.5128, - "eval_samples_per_second": 833.606, - "eval_steps_per_second": 34.734, - "step": 277892 - }, - { - "epoch": 4.0, - "learning_rate": 7.21519999584586e-05, - "loss": 1.9736, - "step": 278000 - }, - { - "epoch": 4.01, - "learning_rate": 7.210017707018938e-05, - "loss": 1.9682, - "step": 278500 - }, - { - "epoch": 4.02, - "learning_rate": 7.204825032843665e-05, - "loss": 1.9706, - "step": 279000 - }, - { - "epoch": 4.02, - "learning_rate": 7.199632358668392e-05, - "loss": 1.9751, - "step": 279500 - }, - { - "epoch": 4.03, - "learning_rate": 7.194439684493117e-05, - "loss": 1.9709, - "step": 280000 - }, - { - "epoch": 4.04, - "learning_rate": 7.189257395666195e-05, - "loss": 1.9697, - "step": 280500 - }, - { - "epoch": 4.04, - "learning_rate": 7.18406472149092e-05, - "loss": 1.9671, - "step": 281000 - }, - { - "epoch": 4.05, - "learning_rate": 7.178872047315647e-05, - "loss": 1.9717, - "step": 281500 - }, - { - "epoch": 4.06, - "learning_rate": 7.173679373140374e-05, - "loss": 1.9722, - "step": 282000 - }, - { - "epoch": 4.07, - "learning_rate": 7.168497084313451e-05, - "loss": 1.9695, - "step": 282500 - }, - { - "epoch": 4.07, - "learning_rate": 7.163314795486528e-05, - "loss": 1.9683, - "step": 283000 - }, - { - "epoch": 4.08, - "learning_rate": 7.158122121311254e-05, - "loss": 1.9647, - "step": 283500 - }, - { - "epoch": 4.09, - "learning_rate": 7.152929447135981e-05, - "loss": 1.9672, - "step": 284000 - }, - { - "epoch": 4.1, - "learning_rate": 7.147736772960707e-05, - "loss": 1.9762, - "step": 284500 - }, - { - "epoch": 4.1, - "learning_rate": 7.142544098785435e-05, - "loss": 1.9708, - "step": 285000 - }, - { - "epoch": 4.11, - "learning_rate": 7.13735142461016e-05, - "loss": 1.9703, - "step": 285500 - }, - { - "epoch": 4.12, - "learning_rate": 7.132158750434886e-05, - "loss": 1.9673, - "step": 286000 - }, - { - "epoch": 4.12, - "learning_rate": 7.126966076259614e-05, - "loss": 1.9704, - "step": 286500 - }, - { - "epoch": 4.13, - "learning_rate": 7.121783787432689e-05, - "loss": 1.9691, - "step": 287000 - }, - { - "epoch": 4.14, - "learning_rate": 7.116591113257417e-05, - "loss": 1.9683, - "step": 287500 - }, - { - "epoch": 4.15, - "learning_rate": 7.111398439082143e-05, - "loss": 1.9687, - "step": 288000 - }, - { - "epoch": 4.15, - "learning_rate": 7.10620576490687e-05, - "loss": 1.9648, - "step": 288500 - }, - { - "epoch": 4.16, - "learning_rate": 7.101023476079946e-05, - "loss": 1.9641, - "step": 289000 - }, - { - "epoch": 4.17, - "learning_rate": 7.095830801904673e-05, - "loss": 1.9635, - "step": 289500 - }, - { - "epoch": 4.17, - "learning_rate": 7.0906381277294e-05, - "loss": 1.9626, - "step": 290000 - }, - { - "epoch": 4.18, - "learning_rate": 7.085445453554125e-05, - "loss": 1.9643, - "step": 290500 - }, - { - "epoch": 4.19, - "learning_rate": 7.080263164727203e-05, - "loss": 1.9661, - "step": 291000 - }, - { - "epoch": 4.2, - "learning_rate": 7.075070490551929e-05, - "loss": 1.9664, - "step": 291500 - }, - { - "epoch": 4.2, - "learning_rate": 7.069877816376657e-05, - "loss": 1.9648, - "step": 292000 - }, - { - "epoch": 4.21, - "learning_rate": 7.064685142201383e-05, - "loss": 1.9605, - "step": 292500 - }, - { - "epoch": 4.22, - "learning_rate": 7.05950285337446e-05, - "loss": 1.9626, - "step": 293000 - }, - { - "epoch": 4.22, - "learning_rate": 7.054310179199186e-05, - "loss": 1.9673, - "step": 293500 - }, - { - "epoch": 4.23, - "learning_rate": 7.049117505023913e-05, - "loss": 1.9646, - "step": 294000 - }, - { - "epoch": 4.24, - "learning_rate": 7.04392483084864e-05, - "loss": 1.9676, - "step": 294500 - }, - { - "epoch": 4.25, - "learning_rate": 7.038742542021716e-05, - "loss": 1.9631, - "step": 295000 - }, - { - "epoch": 4.25, - "learning_rate": 7.033549867846443e-05, - "loss": 1.9696, - "step": 295500 - }, - { - "epoch": 4.26, - "learning_rate": 7.028357193671169e-05, - "loss": 1.9626, - "step": 296000 - }, - { - "epoch": 4.27, - "learning_rate": 7.023164519495895e-05, - "loss": 1.964, - "step": 296500 - }, - { - "epoch": 4.28, - "learning_rate": 7.017982230668972e-05, - "loss": 1.9593, - "step": 297000 - }, - { - "epoch": 4.28, - "learning_rate": 7.012789556493699e-05, - "loss": 1.966, - "step": 297500 - }, - { - "epoch": 4.29, - "learning_rate": 7.007596882318426e-05, - "loss": 1.9676, - "step": 298000 - }, - { - "epoch": 4.3, - "learning_rate": 7.002404208143151e-05, - "loss": 1.9667, - "step": 298500 - }, - { - "epoch": 4.3, - "learning_rate": 6.997221919316229e-05, - "loss": 1.9589, - "step": 299000 - }, - { - "epoch": 4.31, - "learning_rate": 6.992029245140955e-05, - "loss": 1.9635, - "step": 299500 - }, - { - "epoch": 4.32, - "learning_rate": 6.986836570965683e-05, - "loss": 1.9628, - "step": 300000 - }, - { - "epoch": 4.33, - "learning_rate": 6.981643896790408e-05, - "loss": 1.9599, - "step": 300500 - }, - { - "epoch": 4.33, - "learning_rate": 6.976461607963486e-05, - "loss": 1.9599, - "step": 301000 - }, - { - "epoch": 4.34, - "learning_rate": 6.971268933788212e-05, - "loss": 1.96, - "step": 301500 - }, - { - "epoch": 4.35, - "learning_rate": 6.966076259612939e-05, - "loss": 1.964, - "step": 302000 - }, - { - "epoch": 4.35, - "learning_rate": 6.960883585437665e-05, - "loss": 1.9602, - "step": 302500 - }, - { - "epoch": 4.36, - "learning_rate": 6.955701296610742e-05, - "loss": 1.9607, - "step": 303000 - }, - { - "epoch": 4.37, - "learning_rate": 6.950508622435469e-05, - "loss": 1.96, - "step": 303500 - }, - { - "epoch": 4.38, - "learning_rate": 6.945315948260194e-05, - "loss": 1.9582, - "step": 304000 - }, - { - "epoch": 4.38, - "learning_rate": 6.940123274084921e-05, - "loss": 1.9645, - "step": 304500 - }, - { - "epoch": 4.39, - "learning_rate": 6.934940985257998e-05, - "loss": 1.9649, - "step": 305000 - }, - { - "epoch": 4.4, - "learning_rate": 6.929748311082725e-05, - "loss": 1.9614, - "step": 305500 - }, - { - "epoch": 4.4, - "learning_rate": 6.924555636907451e-05, - "loss": 1.9581, - "step": 306000 - }, - { - "epoch": 4.41, - "learning_rate": 6.919362962732178e-05, - "loss": 1.9605, - "step": 306500 - }, - { - "epoch": 4.42, - "learning_rate": 6.914180673905255e-05, - "loss": 1.9589, - "step": 307000 - }, - { - "epoch": 4.43, - "learning_rate": 6.908987999729982e-05, - "loss": 1.9598, - "step": 307500 - }, - { - "epoch": 4.43, - "learning_rate": 6.903795325554707e-05, - "loss": 1.9611, - "step": 308000 - }, - { - "epoch": 4.44, - "learning_rate": 6.898602651379434e-05, - "loss": 1.9588, - "step": 308500 - }, - { - "epoch": 4.45, - "learning_rate": 6.893420362552512e-05, - "loss": 1.9596, - "step": 309000 - }, - { - "epoch": 4.45, - "learning_rate": 6.888227688377238e-05, - "loss": 1.9542, - "step": 309500 - }, - { - "epoch": 4.46, - "learning_rate": 6.883035014201964e-05, - "loss": 1.9565, - "step": 310000 - }, - { - "epoch": 4.47, - "learning_rate": 6.877842340026691e-05, - "loss": 1.9596, - "step": 310500 - }, - { - "epoch": 4.48, - "learning_rate": 6.872660051199768e-05, - "loss": 1.9577, - "step": 311000 - }, - { - "epoch": 4.48, - "learning_rate": 6.867467377024495e-05, - "loss": 1.9559, - "step": 311500 - }, - { - "epoch": 4.49, - "learning_rate": 6.86227470284922e-05, - "loss": 1.962, - "step": 312000 - }, - { - "epoch": 4.5, - "learning_rate": 6.857082028673947e-05, - "loss": 1.958, - "step": 312500 - }, - { - "epoch": 4.51, - "learning_rate": 6.851899739847024e-05, - "loss": 1.9559, - "step": 313000 - }, - { - "epoch": 4.51, - "learning_rate": 6.84670706567175e-05, - "loss": 1.9577, - "step": 313500 - }, - { - "epoch": 4.52, - "learning_rate": 6.841514391496477e-05, - "loss": 1.957, - "step": 314000 - }, - { - "epoch": 4.53, - "learning_rate": 6.836321717321204e-05, - "loss": 1.9549, - "step": 314500 - }, - { - "epoch": 4.53, - "learning_rate": 6.83113942849428e-05, - "loss": 1.9533, - "step": 315000 - }, - { - "epoch": 4.54, - "learning_rate": 6.825946754319007e-05, - "loss": 1.9567, - "step": 315500 - }, - { - "epoch": 4.55, - "learning_rate": 6.820754080143733e-05, - "loss": 1.9572, - "step": 316000 - }, - { - "epoch": 4.56, - "learning_rate": 6.81556140596846e-05, - "loss": 1.9527, - "step": 316500 - }, - { - "epoch": 4.56, - "learning_rate": 6.810379117141536e-05, - "loss": 1.9581, - "step": 317000 - }, - { - "epoch": 4.57, - "learning_rate": 6.805186442966263e-05, - "loss": 1.9575, - "step": 317500 - }, - { - "epoch": 4.58, - "learning_rate": 6.800014539487691e-05, - "loss": 1.9544, - "step": 318000 - }, - { - "epoch": 4.58, - "learning_rate": 6.794821865312418e-05, - "loss": 1.9547, - "step": 318500 - }, - { - "epoch": 4.59, - "learning_rate": 6.789629191137145e-05, - "loss": 1.9543, - "step": 319000 - }, - { - "epoch": 4.6, - "learning_rate": 6.78443651696187e-05, - "loss": 1.9539, - "step": 319500 - }, - { - "epoch": 4.61, - "learning_rate": 6.779243842786597e-05, - "loss": 1.9576, - "step": 320000 - }, - { - "epoch": 4.61, - "learning_rate": 6.774051168611324e-05, - "loss": 1.9539, - "step": 320500 - }, - { - "epoch": 4.62, - "learning_rate": 6.76885849443605e-05, - "loss": 1.9521, - "step": 321000 - }, - { - "epoch": 4.63, - "learning_rate": 6.763665820260776e-05, - "loss": 1.9576, - "step": 321500 - }, - { - "epoch": 4.63, - "learning_rate": 6.758473146085503e-05, - "loss": 1.9526, - "step": 322000 - }, - { - "epoch": 4.64, - "learning_rate": 6.75328047191023e-05, - "loss": 1.9553, - "step": 322500 - }, - { - "epoch": 4.65, - "learning_rate": 6.748087797734955e-05, - "loss": 1.9532, - "step": 323000 - }, - { - "epoch": 4.66, - "learning_rate": 6.742895123559682e-05, - "loss": 1.9524, - "step": 323500 - }, - { - "epoch": 4.66, - "learning_rate": 6.737712834732759e-05, - "loss": 1.9517, - "step": 324000 - }, - { - "epoch": 4.67, - "learning_rate": 6.732520160557486e-05, - "loss": 1.9524, - "step": 324500 - }, - { - "epoch": 4.68, - "learning_rate": 6.727327486382212e-05, - "loss": 1.9539, - "step": 325000 - }, - { - "epoch": 4.69, - "learning_rate": 6.722134812206938e-05, - "loss": 1.9542, - "step": 325500 - }, - { - "epoch": 4.69, - "learning_rate": 6.716952523380016e-05, - "loss": 1.9531, - "step": 326000 - }, - { - "epoch": 4.7, - "learning_rate": 6.711759849204741e-05, - "loss": 1.9495, - "step": 326500 - }, - { - "epoch": 4.71, - "learning_rate": 6.70656717502947e-05, - "loss": 1.9507, - "step": 327000 - }, - { - "epoch": 4.71, - "learning_rate": 6.701374500854195e-05, - "loss": 1.9461, - "step": 327500 - }, - { - "epoch": 4.72, - "learning_rate": 6.696192212027273e-05, - "loss": 1.9534, - "step": 328000 - }, - { - "epoch": 4.73, - "learning_rate": 6.690999537851998e-05, - "loss": 1.9473, - "step": 328500 - }, - { - "epoch": 4.74, - "learning_rate": 6.685806863676725e-05, - "loss": 1.9534, - "step": 329000 - }, - { - "epoch": 4.74, - "learning_rate": 6.680614189501452e-05, - "loss": 1.9516, - "step": 329500 - }, - { - "epoch": 4.75, - "learning_rate": 6.675431900674529e-05, - "loss": 1.9532, - "step": 330000 - }, - { - "epoch": 4.76, - "learning_rate": 6.670239226499256e-05, - "loss": 1.9505, - "step": 330500 - }, - { - "epoch": 4.76, - "learning_rate": 6.665046552323981e-05, - "loss": 1.9469, - "step": 331000 - }, - { - "epoch": 4.77, - "learning_rate": 6.659853878148709e-05, - "loss": 1.9511, - "step": 331500 - }, - { - "epoch": 4.78, - "learning_rate": 6.654671589321784e-05, - "loss": 1.9506, - "step": 332000 - }, - { - "epoch": 4.79, - "learning_rate": 6.649489300494862e-05, - "loss": 1.9518, - "step": 332500 - }, - { - "epoch": 4.79, - "learning_rate": 6.644296626319588e-05, - "loss": 1.9517, - "step": 333000 - }, - { - "epoch": 4.8, - "learning_rate": 6.639103952144316e-05, - "loss": 1.947, - "step": 333500 - }, - { - "epoch": 4.81, - "learning_rate": 6.633911277969042e-05, - "loss": 1.9462, - "step": 334000 - }, - { - "epoch": 4.81, - "learning_rate": 6.628718603793767e-05, - "loss": 1.946, - "step": 334500 - }, - { - "epoch": 4.82, - "learning_rate": 6.623525929618495e-05, - "loss": 1.9531, - "step": 335000 - }, - { - "epoch": 4.83, - "learning_rate": 6.61833325544322e-05, - "loss": 1.9506, - "step": 335500 - }, - { - "epoch": 4.84, - "learning_rate": 6.613140581267948e-05, - "loss": 1.9532, - "step": 336000 - }, - { - "epoch": 4.84, - "learning_rate": 6.607958292441024e-05, - "loss": 1.9443, - "step": 336500 - }, - { - "epoch": 4.85, - "learning_rate": 6.602765618265751e-05, - "loss": 1.9486, - "step": 337000 - }, - { - "epoch": 4.86, - "learning_rate": 6.597572944090478e-05, - "loss": 1.9467, - "step": 337500 - }, - { - "epoch": 4.87, - "learning_rate": 6.592380269915203e-05, - "loss": 1.9443, - "step": 338000 - }, - { - "epoch": 4.87, - "learning_rate": 6.587197981088281e-05, - "loss": 1.9463, - "step": 338500 - }, - { - "epoch": 4.88, - "learning_rate": 6.582005306913007e-05, - "loss": 1.9462, - "step": 339000 - }, - { - "epoch": 4.89, - "learning_rate": 6.576812632737735e-05, - "loss": 1.9507, - "step": 339500 - }, - { - "epoch": 4.89, - "learning_rate": 6.57161995856246e-05, - "loss": 1.9489, - "step": 340000 - }, - { - "epoch": 4.9, - "learning_rate": 6.566448055083888e-05, - "loss": 1.9447, - "step": 340500 - }, - { - "epoch": 4.91, - "learning_rate": 6.561255380908614e-05, - "loss": 1.9469, - "step": 341000 - }, - { - "epoch": 4.92, - "learning_rate": 6.556062706733342e-05, - "loss": 1.9456, - "step": 341500 - }, - { - "epoch": 4.92, - "learning_rate": 6.550870032558067e-05, - "loss": 1.9441, - "step": 342000 - }, - { - "epoch": 4.93, - "learning_rate": 6.545677358382794e-05, - "loss": 1.9454, - "step": 342500 - }, - { - "epoch": 4.94, - "learning_rate": 6.540484684207521e-05, - "loss": 1.947, - "step": 343000 - }, - { - "epoch": 4.94, - "learning_rate": 6.535292010032246e-05, - "loss": 1.9427, - "step": 343500 - }, - { - "epoch": 4.95, - "learning_rate": 6.530099335856973e-05, - "loss": 1.9461, - "step": 344000 - }, - { - "epoch": 4.96, - "learning_rate": 6.52491704703005e-05, - "loss": 1.9465, - "step": 344500 - }, - { - "epoch": 4.97, - "learning_rate": 6.519724372854777e-05, - "loss": 1.9443, - "step": 345000 - }, - { - "epoch": 4.97, - "learning_rate": 6.514531698679504e-05, - "loss": 1.9455, - "step": 345500 - }, - { - "epoch": 4.98, - "learning_rate": 6.509339024504229e-05, - "loss": 1.9419, - "step": 346000 - }, - { - "epoch": 4.99, - "learning_rate": 6.504156735677307e-05, - "loss": 1.9396, - "step": 346500 - }, - { - "epoch": 4.99, - "learning_rate": 6.498964061502032e-05, - "loss": 1.937, - "step": 347000 - }, - { - "epoch": 5.0, - "eval_accuracy": 0.6320096391256435, - "eval_loss": 1.807983636856079, - "eval_runtime": 648.1194, - "eval_samples_per_second": 831.54, - "eval_steps_per_second": 34.648, - "step": 347365 - }, - { - "epoch": 5.0, - "learning_rate": 6.49377138732676e-05, - "loss": 1.9433, - "step": 347500 - }, - { - "epoch": 5.01, - "learning_rate": 6.488589098499836e-05, - "loss": 1.9431, - "step": 348000 - }, - { - "epoch": 5.02, - "learning_rate": 6.483396424324564e-05, - "loss": 1.9447, - "step": 348500 - }, - { - "epoch": 5.02, - "learning_rate": 6.47820375014929e-05, - "loss": 1.9422, - "step": 349000 - }, - { - "epoch": 5.03, - "learning_rate": 6.473011075974016e-05, - "loss": 1.9421, - "step": 349500 - }, - { - "epoch": 5.04, - "learning_rate": 6.467818401798743e-05, - "loss": 1.94, - "step": 350000 - }, - { - "epoch": 5.05, - "learning_rate": 6.462625727623469e-05, - "loss": 1.9386, - "step": 350500 - }, - { - "epoch": 5.05, - "learning_rate": 6.457433053448196e-05, - "loss": 1.9419, - "step": 351000 - }, - { - "epoch": 5.06, - "learning_rate": 6.452250764621272e-05, - "loss": 1.9373, - "step": 351500 - }, - { - "epoch": 5.07, - "learning_rate": 6.447058090445999e-05, - "loss": 1.9377, - "step": 352000 - }, - { - "epoch": 5.07, - "learning_rate": 6.441865416270726e-05, - "loss": 1.9414, - "step": 352500 - }, - { - "epoch": 5.08, - "learning_rate": 6.436672742095453e-05, - "loss": 1.9413, - "step": 353000 - }, - { - "epoch": 5.09, - "learning_rate": 6.431480067920178e-05, - "loss": 1.9364, - "step": 353500 - }, - { - "epoch": 5.1, - "learning_rate": 6.426287393744905e-05, - "loss": 1.9375, - "step": 354000 - }, - { - "epoch": 5.1, - "learning_rate": 6.421094719569632e-05, - "loss": 1.9397, - "step": 354500 - }, - { - "epoch": 5.11, - "learning_rate": 6.415902045394357e-05, - "loss": 1.9358, - "step": 355000 - }, - { - "epoch": 5.12, - "learning_rate": 6.410719756567435e-05, - "loss": 1.9371, - "step": 355500 - }, - { - "epoch": 5.12, - "learning_rate": 6.405527082392161e-05, - "loss": 1.9376, - "step": 356000 - }, - { - "epoch": 5.13, - "learning_rate": 6.400334408216888e-05, - "loss": 1.9338, - "step": 356500 - }, - { - "epoch": 5.14, - "learning_rate": 6.395141734041614e-05, - "loss": 1.9394, - "step": 357000 - }, - { - "epoch": 5.15, - "learning_rate": 6.389959445214691e-05, - "loss": 1.9397, - "step": 357500 - }, - { - "epoch": 5.15, - "learning_rate": 6.384766771039418e-05, - "loss": 1.9415, - "step": 358000 - }, - { - "epoch": 5.16, - "learning_rate": 6.379574096864145e-05, - "loss": 1.9385, - "step": 358500 - }, - { - "epoch": 5.17, - "learning_rate": 6.374381422688871e-05, - "loss": 1.9421, - "step": 359000 - }, - { - "epoch": 5.17, - "learning_rate": 6.369199133861948e-05, - "loss": 1.9324, - "step": 359500 - }, - { - "epoch": 5.18, - "learning_rate": 6.364006459686675e-05, - "loss": 1.9355, - "step": 360000 - }, - { - "epoch": 5.19, - "learning_rate": 6.3588137855114e-05, - "loss": 1.9382, - "step": 360500 - }, - { - "epoch": 5.2, - "learning_rate": 6.353621111336127e-05, - "loss": 1.9357, - "step": 361000 - }, - { - "epoch": 5.2, - "learning_rate": 6.348438822509204e-05, - "loss": 1.9368, - "step": 361500 - }, - { - "epoch": 5.21, - "learning_rate": 6.34324614833393e-05, - "loss": 1.9403, - "step": 362000 - }, - { - "epoch": 5.22, - "learning_rate": 6.338053474158658e-05, - "loss": 1.9365, - "step": 362500 - }, - { - "epoch": 5.23, - "learning_rate": 6.332860799983383e-05, - "loss": 1.936, - "step": 363000 - }, - { - "epoch": 5.23, - "learning_rate": 6.327678511156461e-05, - "loss": 1.9344, - "step": 363500 - }, - { - "epoch": 5.24, - "learning_rate": 6.322485836981186e-05, - "loss": 1.9368, - "step": 364000 - }, - { - "epoch": 5.25, - "learning_rate": 6.317293162805915e-05, - "loss": 1.9329, - "step": 364500 - }, - { - "epoch": 5.25, - "learning_rate": 6.31210048863064e-05, - "loss": 1.9337, - "step": 365000 - }, - { - "epoch": 5.26, - "learning_rate": 6.306918199803718e-05, - "loss": 1.9338, - "step": 365500 - }, - { - "epoch": 5.27, - "learning_rate": 6.301725525628444e-05, - "loss": 1.9318, - "step": 366000 - }, - { - "epoch": 5.28, - "learning_rate": 6.296532851453169e-05, - "loss": 1.9368, - "step": 366500 - }, - { - "epoch": 5.28, - "learning_rate": 6.291340177277897e-05, - "loss": 1.9319, - "step": 367000 - }, - { - "epoch": 5.29, - "learning_rate": 6.286168273799325e-05, - "loss": 1.9398, - "step": 367500 - }, - { - "epoch": 5.3, - "learning_rate": 6.28097559962405e-05, - "loss": 1.9405, - "step": 368000 - }, - { - "epoch": 5.3, - "learning_rate": 6.275782925448777e-05, - "loss": 1.9373, - "step": 368500 - }, - { - "epoch": 5.31, - "learning_rate": 6.270590251273504e-05, - "loss": 1.9335, - "step": 369000 - }, - { - "epoch": 5.32, - "learning_rate": 6.26539757709823e-05, - "loss": 1.937, - "step": 369500 - }, - { - "epoch": 5.33, - "learning_rate": 6.260204902922956e-05, - "loss": 1.9337, - "step": 370000 - }, - { - "epoch": 5.33, - "learning_rate": 6.255012228747683e-05, - "loss": 1.9331, - "step": 370500 - }, - { - "epoch": 5.34, - "learning_rate": 6.249819554572409e-05, - "loss": 1.9328, - "step": 371000 - }, - { - "epoch": 5.35, - "learning_rate": 6.244637265745487e-05, - "loss": 1.9377, - "step": 371500 - }, - { - "epoch": 5.35, - "learning_rate": 6.239444591570212e-05, - "loss": 1.9352, - "step": 372000 - }, - { - "epoch": 5.36, - "learning_rate": 6.23425191739494e-05, - "loss": 1.9263, - "step": 372500 - }, - { - "epoch": 5.37, - "learning_rate": 6.229059243219666e-05, - "loss": 1.9342, - "step": 373000 - }, - { - "epoch": 5.38, - "learning_rate": 6.223876954392744e-05, - "loss": 1.9368, - "step": 373500 - }, - { - "epoch": 5.38, - "learning_rate": 6.218684280217469e-05, - "loss": 1.9308, - "step": 374000 - }, - { - "epoch": 5.39, - "learning_rate": 6.213491606042196e-05, - "loss": 1.93, - "step": 374500 - }, - { - "epoch": 5.4, - "learning_rate": 6.208298931866923e-05, - "loss": 1.9335, - "step": 375000 - }, - { - "epoch": 5.4, - "learning_rate": 6.20311664304e-05, - "loss": 1.933, - "step": 375500 - }, - { - "epoch": 5.41, - "learning_rate": 6.197923968864726e-05, - "loss": 1.9321, - "step": 376000 - }, - { - "epoch": 5.42, - "learning_rate": 6.192731294689452e-05, - "loss": 1.9297, - "step": 376500 - }, - { - "epoch": 5.43, - "learning_rate": 6.18753862051418e-05, - "loss": 1.9342, - "step": 377000 - }, - { - "epoch": 5.43, - "learning_rate": 6.182356331687255e-05, - "loss": 1.931, - "step": 377500 - }, - { - "epoch": 5.44, - "learning_rate": 6.177163657511983e-05, - "loss": 1.932, - "step": 378000 - }, - { - "epoch": 5.45, - "learning_rate": 6.171970983336709e-05, - "loss": 1.9308, - "step": 378500 - }, - { - "epoch": 5.46, - "learning_rate": 6.166778309161434e-05, - "loss": 1.9328, - "step": 379000 - }, - { - "epoch": 5.46, - "learning_rate": 6.161596020334512e-05, - "loss": 1.9282, - "step": 379500 - }, - { - "epoch": 5.47, - "learning_rate": 6.156403346159238e-05, - "loss": 1.9306, - "step": 380000 - }, - { - "epoch": 5.48, - "learning_rate": 6.151210671983966e-05, - "loss": 1.9305, - "step": 380500 - }, - { - "epoch": 5.48, - "learning_rate": 6.146017997808692e-05, - "loss": 1.9338, - "step": 381000 - }, - { - "epoch": 5.49, - "learning_rate": 6.14083570898177e-05, - "loss": 1.9293, - "step": 381500 - }, - { - "epoch": 5.5, - "learning_rate": 6.135643034806495e-05, - "loss": 1.9292, - "step": 382000 - }, - { - "epoch": 5.51, - "learning_rate": 6.130450360631222e-05, - "loss": 1.9307, - "step": 382500 - }, - { - "epoch": 5.51, - "learning_rate": 6.125257686455949e-05, - "loss": 1.9307, - "step": 383000 - }, - { - "epoch": 5.52, - "learning_rate": 6.120075397629025e-05, - "loss": 1.9323, - "step": 383500 - }, - { - "epoch": 5.53, - "learning_rate": 6.114882723453752e-05, - "loss": 1.9321, - "step": 384000 - }, - { - "epoch": 5.53, - "learning_rate": 6.109690049278478e-05, - "loss": 1.9339, - "step": 384500 - }, - { - "epoch": 5.54, - "learning_rate": 6.104497375103204e-05, - "loss": 1.9289, - "step": 385000 - }, - { - "epoch": 5.55, - "learning_rate": 6.099315086276282e-05, - "loss": 1.9275, - "step": 385500 - }, - { - "epoch": 5.56, - "learning_rate": 6.094122412101008e-05, - "loss": 1.9243, - "step": 386000 - }, - { - "epoch": 5.56, - "learning_rate": 6.088929737925735e-05, - "loss": 1.9277, - "step": 386500 - }, - { - "epoch": 5.57, - "learning_rate": 6.083737063750461e-05, - "loss": 1.9299, - "step": 387000 - }, - { - "epoch": 5.58, - "learning_rate": 6.0785443895751884e-05, - "loss": 1.9301, - "step": 387500 - }, - { - "epoch": 5.58, - "learning_rate": 6.073362100748264e-05, - "loss": 1.9271, - "step": 388000 - }, - { - "epoch": 5.59, - "learning_rate": 6.068169426572992e-05, - "loss": 1.9295, - "step": 388500 - }, - { - "epoch": 5.6, - "learning_rate": 6.062976752397718e-05, - "loss": 1.9269, - "step": 389000 - }, - { - "epoch": 5.61, - "learning_rate": 6.0577840782224434e-05, - "loss": 1.9251, - "step": 389500 - }, - { - "epoch": 5.61, - "learning_rate": 6.052591404047171e-05, - "loss": 1.9253, - "step": 390000 - }, - { - "epoch": 5.62, - "learning_rate": 6.047398729871897e-05, - "loss": 1.9297, - "step": 390500 - }, - { - "epoch": 5.63, - "learning_rate": 6.042206055696623e-05, - "loss": 1.9286, - "step": 391000 - }, - { - "epoch": 5.64, - "learning_rate": 6.03701338152135e-05, - "loss": 1.9254, - "step": 391500 - }, - { - "epoch": 5.64, - "learning_rate": 6.031820707346076e-05, - "loss": 1.9244, - "step": 392000 - }, - { - "epoch": 5.65, - "learning_rate": 6.0266384185191535e-05, - "loss": 1.9291, - "step": 392500 - }, - { - "epoch": 5.66, - "learning_rate": 6.02144574434388e-05, - "loss": 1.925, - "step": 393000 - }, - { - "epoch": 5.66, - "learning_rate": 6.016253070168606e-05, - "loss": 1.924, - "step": 393500 - }, - { - "epoch": 5.67, - "learning_rate": 6.0110603959933333e-05, - "loss": 1.9275, - "step": 394000 - }, - { - "epoch": 5.68, - "learning_rate": 6.005878107166409e-05, - "loss": 1.9278, - "step": 394500 - }, - { - "epoch": 5.69, - "learning_rate": 6.0006958183394866e-05, - "loss": 1.9219, - "step": 395000 - }, - { - "epoch": 5.69, - "learning_rate": 5.995503144164213e-05, - "loss": 1.9268, - "step": 395500 - }, - { - "epoch": 5.7, - "learning_rate": 5.99031046998894e-05, - "loss": 1.9273, - "step": 396000 - }, - { - "epoch": 5.71, - "learning_rate": 5.9851177958136664e-05, - "loss": 1.9229, - "step": 396500 - }, - { - "epoch": 5.71, - "learning_rate": 5.979925121638393e-05, - "loss": 1.9265, - "step": 397000 - }, - { - "epoch": 5.72, - "learning_rate": 5.9747324474631194e-05, - "loss": 1.9246, - "step": 397500 - }, - { - "epoch": 5.73, - "learning_rate": 5.9695397732878455e-05, - "loss": 1.9264, - "step": 398000 - }, - { - "epoch": 5.74, - "learning_rate": 5.964347099112573e-05, - "loss": 1.9238, - "step": 398500 - }, - { - "epoch": 5.74, - "learning_rate": 5.959164810285649e-05, - "loss": 1.9216, - "step": 399000 - }, - { - "epoch": 5.75, - "learning_rate": 5.9539721361103765e-05, - "loss": 1.9214, - "step": 399500 - }, - { - "epoch": 5.76, - "learning_rate": 5.948779461935102e-05, - "loss": 1.9282, - "step": 400000 - }, - { - "epoch": 5.76, - "learning_rate": 5.943586787759828e-05, - "loss": 1.9277, - "step": 400500 - }, - { - "epoch": 5.77, - "learning_rate": 5.9384044989329054e-05, - "loss": 1.9213, - "step": 401000 - }, - { - "epoch": 5.78, - "learning_rate": 5.9332118247576316e-05, - "loss": 1.924, - "step": 401500 - }, - { - "epoch": 5.79, - "learning_rate": 5.928019150582359e-05, - "loss": 1.9252, - "step": 402000 - }, - { - "epoch": 5.79, - "learning_rate": 5.922826476407085e-05, - "loss": 1.925, - "step": 402500 - }, - { - "epoch": 5.8, - "learning_rate": 5.9176441875801625e-05, - "loss": 1.9211, - "step": 403000 - }, - { - "epoch": 5.81, - "learning_rate": 5.912451513404889e-05, - "loss": 1.9232, - "step": 403500 - }, - { - "epoch": 5.82, - "learning_rate": 5.907258839229615e-05, - "loss": 1.9179, - "step": 404000 - }, - { - "epoch": 5.82, - "learning_rate": 5.902066165054342e-05, - "loss": 1.9199, - "step": 404500 - }, - { - "epoch": 5.83, - "learning_rate": 5.896883876227418e-05, - "loss": 1.9233, - "step": 405000 - }, - { - "epoch": 5.84, - "learning_rate": 5.891691202052145e-05, - "loss": 1.9235, - "step": 405500 - }, - { - "epoch": 5.84, - "learning_rate": 5.886498527876871e-05, - "loss": 1.9218, - "step": 406000 - }, - { - "epoch": 5.85, - "learning_rate": 5.881305853701599e-05, - "loss": 1.9206, - "step": 406500 - }, - { - "epoch": 5.86, - "learning_rate": 5.876123564874675e-05, - "loss": 1.9183, - "step": 407000 - }, - { - "epoch": 5.87, - "learning_rate": 5.870930890699402e-05, - "loss": 1.9202, - "step": 407500 - }, - { - "epoch": 5.87, - "learning_rate": 5.8657382165241284e-05, - "loss": 1.9221, - "step": 408000 - }, - { - "epoch": 5.88, - "learning_rate": 5.8605455423488545e-05, - "loss": 1.9206, - "step": 408500 - }, - { - "epoch": 5.89, - "learning_rate": 5.855363253521932e-05, - "loss": 1.9222, - "step": 409000 - }, - { - "epoch": 5.89, - "learning_rate": 5.850170579346658e-05, - "loss": 1.9192, - "step": 409500 - }, - { - "epoch": 5.9, - "learning_rate": 5.844977905171385e-05, - "loss": 1.9204, - "step": 410000 - }, - { - "epoch": 5.91, - "learning_rate": 5.839785230996111e-05, - "loss": 1.9208, - "step": 410500 - }, - { - "epoch": 5.92, - "learning_rate": 5.834602942169188e-05, - "loss": 1.9199, - "step": 411000 - }, - { - "epoch": 5.92, - "learning_rate": 5.8294102679939144e-05, - "loss": 1.9192, - "step": 411500 - }, - { - "epoch": 5.93, - "learning_rate": 5.8242175938186406e-05, - "loss": 1.9218, - "step": 412000 - }, - { - "epoch": 5.94, - "learning_rate": 5.8190249196433674e-05, - "loss": 1.9216, - "step": 412500 - }, - { - "epoch": 5.94, - "learning_rate": 5.813842630816444e-05, - "loss": 1.9223, - "step": 413000 - }, - { - "epoch": 5.95, - "learning_rate": 5.808660341989521e-05, - "loss": 1.9185, - "step": 413500 - }, - { - "epoch": 5.96, - "learning_rate": 5.8034676678142475e-05, - "loss": 1.925, - "step": 414000 - }, - { - "epoch": 5.97, - "learning_rate": 5.798274993638974e-05, - "loss": 1.9156, - "step": 414500 - }, - { - "epoch": 5.97, - "learning_rate": 5.7930823194637005e-05, - "loss": 1.9226, - "step": 415000 - }, - { - "epoch": 5.98, - "learning_rate": 5.7878896452884266e-05, - "loss": 1.9174, - "step": 415500 - }, - { - "epoch": 5.99, - "learning_rate": 5.782696971113154e-05, - "loss": 1.9211, - "step": 416000 - }, - { - "epoch": 6.0, - "learning_rate": 5.77750429693788e-05, - "loss": 1.9152, - "step": 416500 - }, - { - "epoch": 6.0, - "eval_accuracy": 0.6361376420590098, - "eval_loss": 1.781764030456543, - "eval_runtime": 645.9143, - "eval_samples_per_second": 834.378, - "eval_steps_per_second": 34.766, - "step": 416838 - }, - { - "epoch": 6.0, - "learning_rate": 5.772311622762607e-05, - "loss": 1.9179, - "step": 417000 - }, - { - "epoch": 6.01, - "learning_rate": 5.767129333935684e-05, - "loss": 1.9186, - "step": 417500 - }, - { - "epoch": 6.02, - "learning_rate": 5.761947045108761e-05, - "loss": 1.9127, - "step": 418000 - }, - { - "epoch": 6.02, - "learning_rate": 5.756754370933487e-05, - "loss": 1.9154, - "step": 418500 - }, - { - "epoch": 6.03, - "learning_rate": 5.751561696758214e-05, - "loss": 1.9157, - "step": 419000 - }, - { - "epoch": 6.04, - "learning_rate": 5.74636902258294e-05, - "loss": 1.912, - "step": 419500 - }, - { - "epoch": 6.05, - "learning_rate": 5.741176348407666e-05, - "loss": 1.9186, - "step": 420000 - }, - { - "epoch": 6.05, - "learning_rate": 5.735983674232394e-05, - "loss": 1.9151, - "step": 420500 - }, - { - "epoch": 6.06, - "learning_rate": 5.73079100005712e-05, - "loss": 1.9187, - "step": 421000 - }, - { - "epoch": 6.07, - "learning_rate": 5.7255983258818454e-05, - "loss": 1.913, - "step": 421500 - }, - { - "epoch": 6.07, - "learning_rate": 5.7204160370549234e-05, - "loss": 1.9113, - "step": 422000 - }, - { - "epoch": 6.08, - "learning_rate": 5.715223362879649e-05, - "loss": 1.9153, - "step": 422500 - }, - { - "epoch": 6.09, - "learning_rate": 5.7100306887043764e-05, - "loss": 1.9138, - "step": 423000 - }, - { - "epoch": 6.1, - "learning_rate": 5.7048380145291026e-05, - "loss": 1.9142, - "step": 423500 - }, - { - "epoch": 6.1, - "learning_rate": 5.69965572570218e-05, - "loss": 1.9134, - "step": 424000 - }, - { - "epoch": 6.11, - "learning_rate": 5.694463051526906e-05, - "loss": 1.9176, - "step": 424500 - }, - { - "epoch": 6.12, - "learning_rate": 5.689270377351632e-05, - "loss": 1.9119, - "step": 425000 - }, - { - "epoch": 6.12, - "learning_rate": 5.684077703176359e-05, - "loss": 1.918, - "step": 425500 - }, - { - "epoch": 6.13, - "learning_rate": 5.678895414349436e-05, - "loss": 1.9135, - "step": 426000 - }, - { - "epoch": 6.14, - "learning_rate": 5.673713125522513e-05, - "loss": 1.9183, - "step": 426500 - }, - { - "epoch": 6.15, - "learning_rate": 5.66852045134724e-05, - "loss": 1.9146, - "step": 427000 - }, - { - "epoch": 6.15, - "learning_rate": 5.663327777171966e-05, - "loss": 1.9156, - "step": 427500 - }, - { - "epoch": 6.16, - "learning_rate": 5.658135102996692e-05, - "loss": 1.917, - "step": 428000 - }, - { - "epoch": 6.17, - "learning_rate": 5.6529424288214196e-05, - "loss": 1.914, - "step": 428500 - }, - { - "epoch": 6.18, - "learning_rate": 5.647749754646146e-05, - "loss": 1.9172, - "step": 429000 - }, - { - "epoch": 6.18, - "learning_rate": 5.642557080470872e-05, - "loss": 1.9101, - "step": 429500 - }, - { - "epoch": 6.19, - "learning_rate": 5.637364406295599e-05, - "loss": 1.9107, - "step": 430000 - }, - { - "epoch": 6.2, - "learning_rate": 5.632182117468675e-05, - "loss": 1.9096, - "step": 430500 - }, - { - "epoch": 6.2, - "learning_rate": 5.626989443293402e-05, - "loss": 1.9118, - "step": 431000 - }, - { - "epoch": 6.21, - "learning_rate": 5.621796769118128e-05, - "loss": 1.9104, - "step": 431500 - }, - { - "epoch": 6.22, - "learning_rate": 5.6166040949428544e-05, - "loss": 1.9105, - "step": 432000 - }, - { - "epoch": 6.23, - "learning_rate": 5.611421806115932e-05, - "loss": 1.9169, - "step": 432500 - }, - { - "epoch": 6.23, - "learning_rate": 5.606229131940658e-05, - "loss": 1.914, - "step": 433000 - }, - { - "epoch": 6.24, - "learning_rate": 5.6010364577653854e-05, - "loss": 1.9136, - "step": 433500 - }, - { - "epoch": 6.25, - "learning_rate": 5.595843783590111e-05, - "loss": 1.9097, - "step": 434000 - }, - { - "epoch": 6.25, - "learning_rate": 5.590661494763189e-05, - "loss": 1.9132, - "step": 434500 - }, - { - "epoch": 6.26, - "learning_rate": 5.585468820587914e-05, - "loss": 1.9113, - "step": 435000 - }, - { - "epoch": 6.27, - "learning_rate": 5.580276146412642e-05, - "loss": 1.9137, - "step": 435500 - }, - { - "epoch": 6.28, - "learning_rate": 5.575083472237368e-05, - "loss": 1.9082, - "step": 436000 - }, - { - "epoch": 6.28, - "learning_rate": 5.569901183410445e-05, - "loss": 1.9129, - "step": 436500 - }, - { - "epoch": 6.29, - "learning_rate": 5.5647085092351714e-05, - "loss": 1.9088, - "step": 437000 - }, - { - "epoch": 6.3, - "learning_rate": 5.5595158350598976e-05, - "loss": 1.9119, - "step": 437500 - }, - { - "epoch": 6.3, - "learning_rate": 5.5543231608846244e-05, - "loss": 1.9125, - "step": 438000 - }, - { - "epoch": 6.31, - "learning_rate": 5.549140872057701e-05, - "loss": 1.909, - "step": 438500 - }, - { - "epoch": 6.32, - "learning_rate": 5.543948197882428e-05, - "loss": 1.9111, - "step": 439000 - }, - { - "epoch": 6.33, - "learning_rate": 5.538755523707154e-05, - "loss": 1.9112, - "step": 439500 - }, - { - "epoch": 6.33, - "learning_rate": 5.53356284953188e-05, - "loss": 1.9093, - "step": 440000 - }, - { - "epoch": 6.34, - "learning_rate": 5.5283805607049575e-05, - "loss": 1.9096, - "step": 440500 - }, - { - "epoch": 6.35, - "learning_rate": 5.5231878865296836e-05, - "loss": 1.9128, - "step": 441000 - }, - { - "epoch": 6.35, - "learning_rate": 5.517995212354411e-05, - "loss": 1.9075, - "step": 441500 - }, - { - "epoch": 6.36, - "learning_rate": 5.512802538179137e-05, - "loss": 1.9103, - "step": 442000 - }, - { - "epoch": 6.37, - "learning_rate": 5.5076202493522146e-05, - "loss": 1.9123, - "step": 442500 - }, - { - "epoch": 6.38, - "learning_rate": 5.502427575176941e-05, - "loss": 1.9121, - "step": 443000 - }, - { - "epoch": 6.38, - "learning_rate": 5.497245286350018e-05, - "loss": 1.9133, - "step": 443500 - }, - { - "epoch": 6.39, - "learning_rate": 5.492052612174744e-05, - "loss": 1.9112, - "step": 444000 - }, - { - "epoch": 6.4, - "learning_rate": 5.4868599379994704e-05, - "loss": 1.9081, - "step": 444500 - }, - { - "epoch": 6.41, - "learning_rate": 5.481667263824197e-05, - "loss": 1.9141, - "step": 445000 - }, - { - "epoch": 6.41, - "learning_rate": 5.476474589648923e-05, - "loss": 1.9113, - "step": 445500 - }, - { - "epoch": 6.42, - "learning_rate": 5.471281915473651e-05, - "loss": 1.9081, - "step": 446000 - }, - { - "epoch": 6.43, - "learning_rate": 5.466089241298376e-05, - "loss": 1.9097, - "step": 446500 - }, - { - "epoch": 6.43, - "learning_rate": 5.4608965671231025e-05, - "loss": 1.9084, - "step": 447000 - }, - { - "epoch": 6.44, - "learning_rate": 5.45571427829618e-05, - "loss": 1.9084, - "step": 447500 - }, - { - "epoch": 6.45, - "learning_rate": 5.450521604120906e-05, - "loss": 1.9099, - "step": 448000 - }, - { - "epoch": 6.46, - "learning_rate": 5.4453289299456334e-05, - "loss": 1.9089, - "step": 448500 - }, - { - "epoch": 6.46, - "learning_rate": 5.4401466411187094e-05, - "loss": 1.9058, - "step": 449000 - }, - { - "epoch": 6.47, - "learning_rate": 5.434953966943437e-05, - "loss": 1.91, - "step": 449500 - }, - { - "epoch": 6.48, - "learning_rate": 5.429761292768163e-05, - "loss": 1.9086, - "step": 450000 - }, - { - "epoch": 6.48, - "learning_rate": 5.424568618592889e-05, - "loss": 1.9065, - "step": 450500 - }, - { - "epoch": 6.49, - "learning_rate": 5.419375944417616e-05, - "loss": 1.9076, - "step": 451000 - }, - { - "epoch": 6.5, - "learning_rate": 5.414183270242342e-05, - "loss": 1.9092, - "step": 451500 - }, - { - "epoch": 6.51, - "learning_rate": 5.408990596067068e-05, - "loss": 1.9111, - "step": 452000 - }, - { - "epoch": 6.51, - "learning_rate": 5.403797921891796e-05, - "loss": 1.9142, - "step": 452500 - }, - { - "epoch": 6.52, - "learning_rate": 5.398615633064872e-05, - "loss": 1.9037, - "step": 453000 - }, - { - "epoch": 6.53, - "learning_rate": 5.393422958889599e-05, - "loss": 1.9089, - "step": 453500 - }, - { - "epoch": 6.53, - "learning_rate": 5.3882302847143254e-05, - "loss": 1.9084, - "step": 454000 - }, - { - "epoch": 6.54, - "learning_rate": 5.383037610539052e-05, - "loss": 1.9075, - "step": 454500 - }, - { - "epoch": 6.55, - "learning_rate": 5.377855321712129e-05, - "loss": 1.9075, - "step": 455000 - }, - { - "epoch": 6.56, - "learning_rate": 5.372662647536856e-05, - "loss": 1.9099, - "step": 455500 - }, - { - "epoch": 6.56, - "learning_rate": 5.367480358709932e-05, - "loss": 1.9025, - "step": 456000 - }, - { - "epoch": 6.57, - "learning_rate": 5.362287684534659e-05, - "loss": 1.9094, - "step": 456500 - }, - { - "epoch": 6.58, - "learning_rate": 5.357095010359385e-05, - "loss": 1.9018, - "step": 457000 - }, - { - "epoch": 6.59, - "learning_rate": 5.3519023361841115e-05, - "loss": 1.9076, - "step": 457500 - }, - { - "epoch": 6.59, - "learning_rate": 5.346709662008838e-05, - "loss": 1.9071, - "step": 458000 - }, - { - "epoch": 6.6, - "learning_rate": 5.3415169878335644e-05, - "loss": 1.9096, - "step": 458500 - }, - { - "epoch": 6.61, - "learning_rate": 5.336334699006642e-05, - "loss": 1.9021, - "step": 459000 - }, - { - "epoch": 6.61, - "learning_rate": 5.331142024831368e-05, - "loss": 1.9016, - "step": 459500 - }, - { - "epoch": 6.62, - "learning_rate": 5.325949350656094e-05, - "loss": 1.9048, - "step": 460000 - }, - { - "epoch": 6.63, - "learning_rate": 5.3207566764808216e-05, - "loss": 1.9037, - "step": 460500 - }, - { - "epoch": 6.64, - "learning_rate": 5.315564002305548e-05, - "loss": 1.9029, - "step": 461000 - }, - { - "epoch": 6.64, - "learning_rate": 5.310371328130274e-05, - "loss": 1.9028, - "step": 461500 - }, - { - "epoch": 6.65, - "learning_rate": 5.305178653955001e-05, - "loss": 1.9073, - "step": 462000 - }, - { - "epoch": 6.66, - "learning_rate": 5.299985979779727e-05, - "loss": 1.9047, - "step": 462500 - }, - { - "epoch": 6.66, - "learning_rate": 5.294803690952804e-05, - "loss": 1.9033, - "step": 463000 - }, - { - "epoch": 6.67, - "learning_rate": 5.289621402125881e-05, - "loss": 1.9044, - "step": 463500 - }, - { - "epoch": 6.68, - "learning_rate": 5.2844287279506076e-05, - "loss": 1.9053, - "step": 464000 - }, - { - "epoch": 6.69, - "learning_rate": 5.279236053775334e-05, - "loss": 1.9006, - "step": 464500 - }, - { - "epoch": 6.69, - "learning_rate": 5.274043379600061e-05, - "loss": 1.9034, - "step": 465000 - }, - { - "epoch": 6.7, - "learning_rate": 5.2688507054247874e-05, - "loss": 1.9058, - "step": 465500 - }, - { - "epoch": 6.71, - "learning_rate": 5.263658031249513e-05, - "loss": 1.9032, - "step": 466000 - }, - { - "epoch": 6.71, - "learning_rate": 5.2584653570742404e-05, - "loss": 1.9006, - "step": 466500 - }, - { - "epoch": 6.72, - "learning_rate": 5.2532726828989665e-05, - "loss": 1.9026, - "step": 467000 - }, - { - "epoch": 6.73, - "learning_rate": 5.248090394072044e-05, - "loss": 1.904, - "step": 467500 - }, - { - "epoch": 6.74, - "learning_rate": 5.24289771989677e-05, - "loss": 1.8983, - "step": 468000 - }, - { - "epoch": 6.74, - "learning_rate": 5.237705045721496e-05, - "loss": 1.903, - "step": 468500 - }, - { - "epoch": 6.75, - "learning_rate": 5.232512371546223e-05, - "loss": 1.9042, - "step": 469000 - }, - { - "epoch": 6.76, - "learning_rate": 5.2273300827192996e-05, - "loss": 1.9001, - "step": 469500 - }, - { - "epoch": 6.77, - "learning_rate": 5.222147793892377e-05, - "loss": 1.9026, - "step": 470000 - }, - { - "epoch": 6.77, - "learning_rate": 5.216955119717103e-05, - "loss": 1.9003, - "step": 470500 - }, - { - "epoch": 6.78, - "learning_rate": 5.21176244554183e-05, - "loss": 1.9038, - "step": 471000 - }, - { - "epoch": 6.79, - "learning_rate": 5.206569771366556e-05, - "loss": 1.9011, - "step": 471500 - }, - { - "epoch": 6.79, - "learning_rate": 5.201377097191282e-05, - "loss": 1.9027, - "step": 472000 - }, - { - "epoch": 6.8, - "learning_rate": 5.19618442301601e-05, - "loss": 1.8988, - "step": 472500 - }, - { - "epoch": 6.81, - "learning_rate": 5.190991748840736e-05, - "loss": 1.9007, - "step": 473000 - }, - { - "epoch": 6.82, - "learning_rate": 5.185799074665462e-05, - "loss": 1.9063, - "step": 473500 - }, - { - "epoch": 6.82, - "learning_rate": 5.1806271711868904e-05, - "loss": 1.9045, - "step": 474000 - }, - { - "epoch": 6.83, - "learning_rate": 5.1754344970116166e-05, - "loss": 1.9047, - "step": 474500 - }, - { - "epoch": 6.84, - "learning_rate": 5.170241822836343e-05, - "loss": 1.8998, - "step": 475000 - }, - { - "epoch": 6.84, - "learning_rate": 5.1650491486610696e-05, - "loss": 1.9034, - "step": 475500 - }, - { - "epoch": 6.85, - "learning_rate": 5.159856474485796e-05, - "loss": 1.8985, - "step": 476000 - }, - { - "epoch": 6.86, - "learning_rate": 5.154663800310522e-05, - "loss": 1.9026, - "step": 476500 - }, - { - "epoch": 6.87, - "learning_rate": 5.1494711261352494e-05, - "loss": 1.8984, - "step": 477000 - }, - { - "epoch": 6.87, - "learning_rate": 5.144278451959975e-05, - "loss": 1.9015, - "step": 477500 - }, - { - "epoch": 6.88, - "learning_rate": 5.139096163133053e-05, - "loss": 1.9032, - "step": 478000 - }, - { - "epoch": 6.89, - "learning_rate": 5.133903488957778e-05, - "loss": 1.8992, - "step": 478500 - }, - { - "epoch": 6.89, - "learning_rate": 5.1287108147825045e-05, - "loss": 1.9035, - "step": 479000 - }, - { - "epoch": 6.9, - "learning_rate": 5.123518140607232e-05, - "loss": 1.9003, - "step": 479500 - }, - { - "epoch": 6.91, - "learning_rate": 5.118335851780308e-05, - "loss": 1.8944, - "step": 480000 - }, - { - "epoch": 6.92, - "learning_rate": 5.1131431776050354e-05, - "loss": 1.8998, - "step": 480500 - }, - { - "epoch": 6.92, - "learning_rate": 5.1079505034297616e-05, - "loss": 1.9034, - "step": 481000 - }, - { - "epoch": 6.93, - "learning_rate": 5.102757829254488e-05, - "loss": 1.9022, - "step": 481500 - }, - { - "epoch": 6.94, - "learning_rate": 5.097575540427565e-05, - "loss": 1.8972, - "step": 482000 - }, - { - "epoch": 6.95, - "learning_rate": 5.092382866252291e-05, - "loss": 1.9032, - "step": 482500 - }, - { - "epoch": 6.95, - "learning_rate": 5.087190192077018e-05, - "loss": 1.9014, - "step": 483000 - }, - { - "epoch": 6.96, - "learning_rate": 5.081997517901744e-05, - "loss": 1.8986, - "step": 483500 - }, - { - "epoch": 6.97, - "learning_rate": 5.0768152290748215e-05, - "loss": 1.8992, - "step": 484000 - }, - { - "epoch": 6.97, - "learning_rate": 5.0716225548995476e-05, - "loss": 1.9036, - "step": 484500 - }, - { - "epoch": 6.98, - "learning_rate": 5.066429880724275e-05, - "loss": 1.9018, - "step": 485000 - }, - { - "epoch": 6.99, - "learning_rate": 5.061237206549001e-05, - "loss": 1.8988, - "step": 485500 - }, - { - "epoch": 7.0, - "learning_rate": 5.0560549177220786e-05, - "loss": 1.8982, - "step": 486000 - }, - { - "epoch": 7.0, - "eval_accuracy": 0.6395200987544465, - "eval_loss": 1.757468581199646, - "eval_runtime": 647.437, - "eval_samples_per_second": 832.416, - "eval_steps_per_second": 34.684, - "step": 486311 - }, - { - "epoch": 7.0, - "learning_rate": 5.050862243546805e-05, - "loss": 1.8974, - "step": 486500 - }, - { - "epoch": 7.01, - "learning_rate": 5.045679954719882e-05, - "loss": 1.8964, - "step": 487000 - }, - { - "epoch": 7.02, - "learning_rate": 5.040487280544608e-05, - "loss": 1.8951, - "step": 487500 - }, - { - "epoch": 7.02, - "learning_rate": 5.035294606369334e-05, - "loss": 1.8956, - "step": 488000 - }, - { - "epoch": 7.03, - "learning_rate": 5.030101932194061e-05, - "loss": 1.8932, - "step": 488500 - }, - { - "epoch": 7.04, - "learning_rate": 5.024909258018787e-05, - "loss": 1.8955, - "step": 489000 - }, - { - "epoch": 7.05, - "learning_rate": 5.0197165838435135e-05, - "loss": 1.8931, - "step": 489500 - }, - { - "epoch": 7.05, - "learning_rate": 5.01452390966824e-05, - "loss": 1.8942, - "step": 490000 - }, - { - "epoch": 7.06, - "learning_rate": 5.0093312354929664e-05, - "loss": 1.8929, - "step": 490500 - }, - { - "epoch": 7.07, - "learning_rate": 5.004148946666044e-05, - "loss": 1.8949, - "step": 491000 - }, - { - "epoch": 7.07, - "learning_rate": 4.99895627249077e-05, - "loss": 1.9012, - "step": 491500 - }, - { - "epoch": 7.08, - "learning_rate": 4.993763598315497e-05, - "loss": 1.8931, - "step": 492000 - }, - { - "epoch": 7.09, - "learning_rate": 4.9885709241402236e-05, - "loss": 1.8933, - "step": 492500 - }, - { - "epoch": 7.1, - "learning_rate": 4.9833886353133e-05, - "loss": 1.8939, - "step": 493000 - }, - { - "epoch": 7.1, - "learning_rate": 4.978195961138027e-05, - "loss": 1.8947, - "step": 493500 - }, - { - "epoch": 7.11, - "learning_rate": 4.973003286962753e-05, - "loss": 1.8957, - "step": 494000 - }, - { - "epoch": 7.12, - "learning_rate": 4.967810612787479e-05, - "loss": 1.8973, - "step": 494500 - }, - { - "epoch": 7.13, - "learning_rate": 4.9626283239605566e-05, - "loss": 1.8952, - "step": 495000 - }, - { - "epoch": 7.13, - "learning_rate": 4.957435649785283e-05, - "loss": 1.8993, - "step": 495500 - }, - { - "epoch": 7.14, - "learning_rate": 4.9522429756100096e-05, - "loss": 1.8981, - "step": 496000 - }, - { - "epoch": 7.15, - "learning_rate": 4.9470503014347364e-05, - "loss": 1.8928, - "step": 496500 - }, - { - "epoch": 7.15, - "learning_rate": 4.941868012607813e-05, - "loss": 1.8949, - "step": 497000 - }, - { - "epoch": 7.16, - "learning_rate": 4.93668572378089e-05, - "loss": 1.8945, - "step": 497500 - }, - { - "epoch": 7.17, - "learning_rate": 4.9314930496056165e-05, - "loss": 1.8971, - "step": 498000 - }, - { - "epoch": 7.18, - "learning_rate": 4.926300375430343e-05, - "loss": 1.8924, - "step": 498500 - }, - { - "epoch": 7.18, - "learning_rate": 4.9211077012550695e-05, - "loss": 1.894, - "step": 499000 - }, - { - "epoch": 7.19, - "learning_rate": 4.915915027079796e-05, - "loss": 1.8934, - "step": 499500 - }, - { - "epoch": 7.2, - "learning_rate": 4.9107223529045225e-05, - "loss": 1.8972, - "step": 500000 - }, - { - "epoch": 7.2, - "learning_rate": 4.9055296787292486e-05, - "loss": 1.8903, - "step": 500500 - }, - { - "epoch": 7.21, - "learning_rate": 4.9003370045539754e-05, - "loss": 1.8908, - "step": 501000 - }, - { - "epoch": 7.22, - "learning_rate": 4.895154715727053e-05, - "loss": 1.8885, - "step": 501500 - }, - { - "epoch": 7.23, - "learning_rate": 4.889962041551779e-05, - "loss": 1.89, - "step": 502000 - }, - { - "epoch": 7.23, - "learning_rate": 4.884769367376506e-05, - "loss": 1.8922, - "step": 502500 - }, - { - "epoch": 7.24, - "learning_rate": 4.879576693201232e-05, - "loss": 1.896, - "step": 503000 - }, - { - "epoch": 7.25, - "learning_rate": 4.874394404374309e-05, - "loss": 1.8955, - "step": 503500 - }, - { - "epoch": 7.25, - "learning_rate": 4.869201730199035e-05, - "loss": 1.893, - "step": 504000 - }, - { - "epoch": 7.26, - "learning_rate": 4.8640090560237615e-05, - "loss": 1.8923, - "step": 504500 - }, - { - "epoch": 7.27, - "learning_rate": 4.858816381848488e-05, - "loss": 1.8926, - "step": 505000 - }, - { - "epoch": 7.28, - "learning_rate": 4.853634093021565e-05, - "loss": 1.8888, - "step": 505500 - }, - { - "epoch": 7.28, - "learning_rate": 4.848441418846292e-05, - "loss": 1.8936, - "step": 506000 - }, - { - "epoch": 7.29, - "learning_rate": 4.8432487446710186e-05, - "loss": 1.8947, - "step": 506500 - }, - { - "epoch": 7.3, - "learning_rate": 4.838056070495745e-05, - "loss": 1.8902, - "step": 507000 - }, - { - "epoch": 7.3, - "learning_rate": 4.8328841670171725e-05, - "loss": 1.8917, - "step": 507500 - }, - { - "epoch": 7.31, - "learning_rate": 4.827691492841899e-05, - "loss": 1.8927, - "step": 508000 - }, - { - "epoch": 7.32, - "learning_rate": 4.8224988186666255e-05, - "loss": 1.8886, - "step": 508500 - }, - { - "epoch": 7.33, - "learning_rate": 4.8173061444913516e-05, - "loss": 1.8931, - "step": 509000 - }, - { - "epoch": 7.33, - "learning_rate": 4.812113470316078e-05, - "loss": 1.8912, - "step": 509500 - }, - { - "epoch": 7.34, - "learning_rate": 4.806931181489156e-05, - "loss": 1.8961, - "step": 510000 - }, - { - "epoch": 7.35, - "learning_rate": 4.801738507313881e-05, - "loss": 1.8916, - "step": 510500 - }, - { - "epoch": 7.36, - "learning_rate": 4.796545833138608e-05, - "loss": 1.8948, - "step": 511000 - }, - { - "epoch": 7.36, - "learning_rate": 4.791353158963335e-05, - "loss": 1.8891, - "step": 511500 - }, - { - "epoch": 7.37, - "learning_rate": 4.7861708701364115e-05, - "loss": 1.8916, - "step": 512000 - }, - { - "epoch": 7.38, - "learning_rate": 4.7809781959611384e-05, - "loss": 1.8918, - "step": 512500 - }, - { - "epoch": 7.38, - "learning_rate": 4.775785521785865e-05, - "loss": 1.892, - "step": 513000 - }, - { - "epoch": 7.39, - "learning_rate": 4.770592847610591e-05, - "loss": 1.8879, - "step": 513500 - }, - { - "epoch": 7.4, - "learning_rate": 4.7654001734353175e-05, - "loss": 1.8884, - "step": 514000 - }, - { - "epoch": 7.41, - "learning_rate": 4.760207499260044e-05, - "loss": 1.8874, - "step": 514500 - }, - { - "epoch": 7.41, - "learning_rate": 4.7550148250847705e-05, - "loss": 1.8866, - "step": 515000 - }, - { - "epoch": 7.42, - "learning_rate": 4.749822150909497e-05, - "loss": 1.8935, - "step": 515500 - }, - { - "epoch": 7.43, - "learning_rate": 4.7446398620825746e-05, - "loss": 1.8901, - "step": 516000 - }, - { - "epoch": 7.43, - "learning_rate": 4.739447187907301e-05, - "loss": 1.8954, - "step": 516500 - }, - { - "epoch": 7.44, - "learning_rate": 4.734254513732027e-05, - "loss": 1.8914, - "step": 517000 - }, - { - "epoch": 7.45, - "learning_rate": 4.729061839556754e-05, - "loss": 1.89, - "step": 517500 - }, - { - "epoch": 7.46, - "learning_rate": 4.7238795507298304e-05, - "loss": 1.8898, - "step": 518000 - }, - { - "epoch": 7.46, - "learning_rate": 4.718686876554557e-05, - "loss": 1.8849, - "step": 518500 - }, - { - "epoch": 7.47, - "learning_rate": 4.7134942023792833e-05, - "loss": 1.8887, - "step": 519000 - }, - { - "epoch": 7.48, - "learning_rate": 4.70830152820401e-05, - "loss": 1.8914, - "step": 519500 - }, - { - "epoch": 7.48, - "learning_rate": 4.703119239377087e-05, - "loss": 1.8886, - "step": 520000 - }, - { - "epoch": 7.49, - "learning_rate": 4.6979265652018136e-05, - "loss": 1.8832, - "step": 520500 - }, - { - "epoch": 7.5, - "learning_rate": 4.69273389102654e-05, - "loss": 1.8941, - "step": 521000 - }, - { - "epoch": 7.51, - "learning_rate": 4.6875412168512666e-05, - "loss": 1.8879, - "step": 521500 - }, - { - "epoch": 7.51, - "learning_rate": 4.682358928024343e-05, - "loss": 1.8884, - "step": 522000 - }, - { - "epoch": 7.52, - "learning_rate": 4.67716625384907e-05, - "loss": 1.8884, - "step": 522500 - }, - { - "epoch": 7.53, - "learning_rate": 4.671973579673796e-05, - "loss": 1.8877, - "step": 523000 - }, - { - "epoch": 7.54, - "learning_rate": 4.666780905498523e-05, - "loss": 1.8903, - "step": 523500 - }, - { - "epoch": 7.54, - "learning_rate": 4.6615986166716e-05, - "loss": 1.8854, - "step": 524000 - }, - { - "epoch": 7.55, - "learning_rate": 4.6564059424963265e-05, - "loss": 1.8902, - "step": 524500 - }, - { - "epoch": 7.56, - "learning_rate": 4.6512132683210526e-05, - "loss": 1.8871, - "step": 525000 - }, - { - "epoch": 7.56, - "learning_rate": 4.64603097949413e-05, - "loss": 1.8913, - "step": 525500 - }, - { - "epoch": 7.57, - "learning_rate": 4.640838305318856e-05, - "loss": 1.8844, - "step": 526000 - }, - { - "epoch": 7.58, - "learning_rate": 4.635645631143583e-05, - "loss": 1.8863, - "step": 526500 - }, - { - "epoch": 7.59, - "learning_rate": 4.630452956968309e-05, - "loss": 1.8858, - "step": 527000 - }, - { - "epoch": 7.59, - "learning_rate": 4.6252706681413864e-05, - "loss": 1.8871, - "step": 527500 - }, - { - "epoch": 7.6, - "learning_rate": 4.6200779939661125e-05, - "loss": 1.8851, - "step": 528000 - }, - { - "epoch": 7.61, - "learning_rate": 4.6148853197908394e-05, - "loss": 1.8875, - "step": 528500 - }, - { - "epoch": 7.61, - "learning_rate": 4.609692645615566e-05, - "loss": 1.8862, - "step": 529000 - }, - { - "epoch": 7.62, - "learning_rate": 4.604499971440292e-05, - "loss": 1.8868, - "step": 529500 - }, - { - "epoch": 7.63, - "learning_rate": 4.5993072972650185e-05, - "loss": 1.8885, - "step": 530000 - }, - { - "epoch": 7.64, - "learning_rate": 4.594114623089745e-05, - "loss": 1.8867, - "step": 530500 - }, - { - "epoch": 7.64, - "learning_rate": 4.588932334262822e-05, - "loss": 1.8873, - "step": 531000 - }, - { - "epoch": 7.65, - "learning_rate": 4.583739660087549e-05, - "loss": 1.8849, - "step": 531500 - }, - { - "epoch": 7.66, - "learning_rate": 4.5785469859122756e-05, - "loss": 1.8829, - "step": 532000 - }, - { - "epoch": 7.66, - "learning_rate": 4.573354311737002e-05, - "loss": 1.8851, - "step": 532500 - }, - { - "epoch": 7.67, - "learning_rate": 4.568161637561728e-05, - "loss": 1.8881, - "step": 533000 - }, - { - "epoch": 7.68, - "learning_rate": 4.562979348734805e-05, - "loss": 1.8836, - "step": 533500 - }, - { - "epoch": 7.69, - "learning_rate": 4.5577866745595314e-05, - "loss": 1.8855, - "step": 534000 - }, - { - "epoch": 7.69, - "learning_rate": 4.552594000384258e-05, - "loss": 1.8858, - "step": 534500 - }, - { - "epoch": 7.7, - "learning_rate": 4.547401326208985e-05, - "loss": 1.89, - "step": 535000 - }, - { - "epoch": 7.71, - "learning_rate": 4.542208652033711e-05, - "loss": 1.8891, - "step": 535500 - }, - { - "epoch": 7.72, - "learning_rate": 4.537015977858437e-05, - "loss": 1.8859, - "step": 536000 - }, - { - "epoch": 7.72, - "learning_rate": 4.531823303683164e-05, - "loss": 1.8885, - "step": 536500 - }, - { - "epoch": 7.73, - "learning_rate": 4.52663062950789e-05, - "loss": 1.886, - "step": 537000 - }, - { - "epoch": 7.74, - "learning_rate": 4.5214483406809676e-05, - "loss": 1.8879, - "step": 537500 - }, - { - "epoch": 7.74, - "learning_rate": 4.516266051854044e-05, - "loss": 1.8859, - "step": 538000 - }, - { - "epoch": 7.75, - "learning_rate": 4.511073377678771e-05, - "loss": 1.8838, - "step": 538500 - }, - { - "epoch": 7.76, - "learning_rate": 4.505880703503497e-05, - "loss": 1.883, - "step": 539000 - }, - { - "epoch": 7.77, - "learning_rate": 4.500688029328224e-05, - "loss": 1.8858, - "step": 539500 - }, - { - "epoch": 7.77, - "learning_rate": 4.49549535515295e-05, - "loss": 1.886, - "step": 540000 - }, - { - "epoch": 7.78, - "learning_rate": 4.490302680977677e-05, - "loss": 1.8845, - "step": 540500 - }, - { - "epoch": 7.79, - "learning_rate": 4.485110006802403e-05, - "loss": 1.8904, - "step": 541000 - }, - { - "epoch": 7.79, - "learning_rate": 4.47991733262713e-05, - "loss": 1.8853, - "step": 541500 - }, - { - "epoch": 7.8, - "learning_rate": 4.4747350438002066e-05, - "loss": 1.8849, - "step": 542000 - }, - { - "epoch": 7.81, - "learning_rate": 4.4695423696249335e-05, - "loss": 1.8841, - "step": 542500 - }, - { - "epoch": 7.82, - "learning_rate": 4.46434969544966e-05, - "loss": 1.886, - "step": 543000 - }, - { - "epoch": 7.82, - "learning_rate": 4.459157021274386e-05, - "loss": 1.8844, - "step": 543500 - }, - { - "epoch": 7.83, - "learning_rate": 4.453974732447464e-05, - "loss": 1.8831, - "step": 544000 - }, - { - "epoch": 7.84, - "learning_rate": 4.44878205827219e-05, - "loss": 1.8854, - "step": 544500 - }, - { - "epoch": 7.84, - "learning_rate": 4.443589384096916e-05, - "loss": 1.8829, - "step": 545000 - }, - { - "epoch": 7.85, - "learning_rate": 4.438396709921643e-05, - "loss": 1.8865, - "step": 545500 - }, - { - "epoch": 7.86, - "learning_rate": 4.4332144210947195e-05, - "loss": 1.8806, - "step": 546000 - }, - { - "epoch": 7.87, - "learning_rate": 4.428021746919446e-05, - "loss": 1.8838, - "step": 546500 - }, - { - "epoch": 7.87, - "learning_rate": 4.422829072744173e-05, - "loss": 1.8853, - "step": 547000 - }, - { - "epoch": 7.88, - "learning_rate": 4.417636398568899e-05, - "loss": 1.8821, - "step": 547500 - }, - { - "epoch": 7.89, - "learning_rate": 4.4124541097419766e-05, - "loss": 1.8817, - "step": 548000 - }, - { - "epoch": 7.9, - "learning_rate": 4.407271820915053e-05, - "loss": 1.8797, - "step": 548500 - }, - { - "epoch": 7.9, - "learning_rate": 4.40207914673978e-05, - "loss": 1.8848, - "step": 549000 - }, - { - "epoch": 7.91, - "learning_rate": 4.396886472564506e-05, - "loss": 1.8852, - "step": 549500 - }, - { - "epoch": 7.92, - "learning_rate": 4.3916937983892324e-05, - "loss": 1.8805, - "step": 550000 - }, - { - "epoch": 7.92, - "learning_rate": 4.386501124213959e-05, - "loss": 1.8832, - "step": 550500 - }, - { - "epoch": 7.93, - "learning_rate": 4.381308450038686e-05, - "loss": 1.8835, - "step": 551000 - }, - { - "epoch": 7.94, - "learning_rate": 4.376115775863412e-05, - "loss": 1.8816, - "step": 551500 - }, - { - "epoch": 7.95, - "learning_rate": 4.370923101688138e-05, - "loss": 1.882, - "step": 552000 - }, - { - "epoch": 7.95, - "learning_rate": 4.3657408128612156e-05, - "loss": 1.8875, - "step": 552500 - }, - { - "epoch": 7.96, - "learning_rate": 4.360548138685942e-05, - "loss": 1.881, - "step": 553000 - }, - { - "epoch": 7.97, - "learning_rate": 4.3553554645106686e-05, - "loss": 1.8777, - "step": 553500 - }, - { - "epoch": 7.97, - "learning_rate": 4.3501627903353954e-05, - "loss": 1.8788, - "step": 554000 - }, - { - "epoch": 7.98, - "learning_rate": 4.344980501508472e-05, - "loss": 1.886, - "step": 554500 - }, - { - "epoch": 7.99, - "learning_rate": 4.339787827333199e-05, - "loss": 1.8836, - "step": 555000 - }, - { - "epoch": 8.0, - "learning_rate": 4.334595153157925e-05, - "loss": 1.8808, - "step": 555500 - }, - { - "epoch": 8.0, - "eval_accuracy": 0.6421464828448523, - "eval_loss": 1.741304636001587, - "eval_runtime": 646.83, - "eval_samples_per_second": 833.197, - "eval_steps_per_second": 34.717, - "step": 555784 - }, - { - "epoch": 8.0, - "learning_rate": 4.3294128643310023e-05, - "loss": 1.8816, - "step": 556000 - }, - { - "epoch": 8.01, - "learning_rate": 4.3242201901557285e-05, - "loss": 1.8744, - "step": 556500 - }, - { - "epoch": 8.02, - "learning_rate": 4.3190275159804546e-05, - "loss": 1.8789, - "step": 557000 - }, - { - "epoch": 8.02, - "learning_rate": 4.3138348418051815e-05, - "loss": 1.8776, - "step": 557500 - }, - { - "epoch": 8.03, - "learning_rate": 4.3086421676299076e-05, - "loss": 1.8818, - "step": 558000 - }, - { - "epoch": 8.04, - "learning_rate": 4.3034494934546345e-05, - "loss": 1.8787, - "step": 558500 - }, - { - "epoch": 8.05, - "learning_rate": 4.298256819279361e-05, - "loss": 1.8769, - "step": 559000 - }, - { - "epoch": 8.05, - "learning_rate": 4.2930641451040874e-05, - "loss": 1.8796, - "step": 559500 - }, - { - "epoch": 8.06, - "learning_rate": 4.287881856277165e-05, - "loss": 1.8799, - "step": 560000 - }, - { - "epoch": 8.07, - "learning_rate": 4.282689182101891e-05, - "loss": 1.8759, - "step": 560500 - }, - { - "epoch": 8.08, - "learning_rate": 4.277496507926617e-05, - "loss": 1.8762, - "step": 561000 - }, - { - "epoch": 8.08, - "learning_rate": 4.272303833751344e-05, - "loss": 1.8775, - "step": 561500 - }, - { - "epoch": 8.09, - "learning_rate": 4.2671215449244205e-05, - "loss": 1.8778, - "step": 562000 - }, - { - "epoch": 8.1, - "learning_rate": 4.261928870749147e-05, - "loss": 1.8753, - "step": 562500 - }, - { - "epoch": 8.1, - "learning_rate": 4.256746581922224e-05, - "loss": 1.8799, - "step": 563000 - }, - { - "epoch": 8.11, - "learning_rate": 4.251553907746951e-05, - "loss": 1.8741, - "step": 563500 - }, - { - "epoch": 8.12, - "learning_rate": 4.2463612335716776e-05, - "loss": 1.8765, - "step": 564000 - }, - { - "epoch": 8.13, - "learning_rate": 4.241168559396404e-05, - "loss": 1.8734, - "step": 564500 - }, - { - "epoch": 8.13, - "learning_rate": 4.23597588522113e-05, - "loss": 1.8799, - "step": 565000 - }, - { - "epoch": 8.14, - "learning_rate": 4.230783211045857e-05, - "loss": 1.8719, - "step": 565500 - }, - { - "epoch": 8.15, - "learning_rate": 4.2255905368705836e-05, - "loss": 1.874, - "step": 566000 - }, - { - "epoch": 8.15, - "learning_rate": 4.22039786269531e-05, - "loss": 1.879, - "step": 566500 - }, - { - "epoch": 8.16, - "learning_rate": 4.215215573868387e-05, - "loss": 1.8821, - "step": 567000 - }, - { - "epoch": 8.17, - "learning_rate": 4.210022899693113e-05, - "loss": 1.8782, - "step": 567500 - }, - { - "epoch": 8.18, - "learning_rate": 4.204830225517839e-05, - "loss": 1.8776, - "step": 568000 - }, - { - "epoch": 8.18, - "learning_rate": 4.199637551342566e-05, - "loss": 1.8789, - "step": 568500 - }, - { - "epoch": 8.19, - "learning_rate": 4.194455262515643e-05, - "loss": 1.874, - "step": 569000 - }, - { - "epoch": 8.2, - "learning_rate": 4.18927297368872e-05, - "loss": 1.8778, - "step": 569500 - }, - { - "epoch": 8.2, - "learning_rate": 4.184080299513446e-05, - "loss": 1.8758, - "step": 570000 - }, - { - "epoch": 8.21, - "learning_rate": 4.178887625338173e-05, - "loss": 1.8775, - "step": 570500 - }, - { - "epoch": 8.22, - "learning_rate": 4.1736949511629e-05, - "loss": 1.8754, - "step": 571000 - }, - { - "epoch": 8.23, - "learning_rate": 4.168502276987626e-05, - "loss": 1.8767, - "step": 571500 - }, - { - "epoch": 8.23, - "learning_rate": 4.163309602812352e-05, - "loss": 1.873, - "step": 572000 - }, - { - "epoch": 8.24, - "learning_rate": 4.158116928637079e-05, - "loss": 1.8753, - "step": 572500 - }, - { - "epoch": 8.25, - "learning_rate": 4.152924254461805e-05, - "loss": 1.8738, - "step": 573000 - }, - { - "epoch": 8.26, - "learning_rate": 4.1477419656348825e-05, - "loss": 1.8776, - "step": 573500 - }, - { - "epoch": 8.26, - "learning_rate": 4.142559676807959e-05, - "loss": 1.8794, - "step": 574000 - }, - { - "epoch": 8.27, - "learning_rate": 4.137367002632686e-05, - "loss": 1.8763, - "step": 574500 - }, - { - "epoch": 8.28, - "learning_rate": 4.132174328457413e-05, - "loss": 1.8744, - "step": 575000 - }, - { - "epoch": 8.28, - "learning_rate": 4.126981654282139e-05, - "loss": 1.8733, - "step": 575500 - }, - { - "epoch": 8.29, - "learning_rate": 4.121788980106866e-05, - "loss": 1.8696, - "step": 576000 - }, - { - "epoch": 8.3, - "learning_rate": 4.116596305931592e-05, - "loss": 1.876, - "step": 576500 - }, - { - "epoch": 8.31, - "learning_rate": 4.111403631756318e-05, - "loss": 1.8778, - "step": 577000 - }, - { - "epoch": 8.31, - "learning_rate": 4.106210957581045e-05, - "loss": 1.8761, - "step": 577500 - }, - { - "epoch": 8.32, - "learning_rate": 4.1010286687541215e-05, - "loss": 1.8761, - "step": 578000 - }, - { - "epoch": 8.33, - "learning_rate": 4.095835994578848e-05, - "loss": 1.8759, - "step": 578500 - }, - { - "epoch": 8.33, - "learning_rate": 4.090643320403575e-05, - "loss": 1.8725, - "step": 579000 - }, - { - "epoch": 8.34, - "learning_rate": 4.085450646228301e-05, - "loss": 1.8773, - "step": 579500 - }, - { - "epoch": 8.35, - "learning_rate": 4.0802683574013786e-05, - "loss": 1.8752, - "step": 580000 - }, - { - "epoch": 8.36, - "learning_rate": 4.075075683226105e-05, - "loss": 1.8776, - "step": 580500 - }, - { - "epoch": 8.36, - "learning_rate": 4.069883009050831e-05, - "loss": 1.8756, - "step": 581000 - }, - { - "epoch": 8.37, - "learning_rate": 4.064690334875558e-05, - "loss": 1.8744, - "step": 581500 - }, - { - "epoch": 8.38, - "learning_rate": 4.0594976607002846e-05, - "loss": 1.8796, - "step": 582000 - }, - { - "epoch": 8.38, - "learning_rate": 4.054304986525011e-05, - "loss": 1.8739, - "step": 582500 - }, - { - "epoch": 8.39, - "learning_rate": 4.049112312349737e-05, - "loss": 1.8727, - "step": 583000 - }, - { - "epoch": 8.4, - "learning_rate": 4.043919638174464e-05, - "loss": 1.8734, - "step": 583500 - }, - { - "epoch": 8.41, - "learning_rate": 4.03873734934754e-05, - "loss": 1.8798, - "step": 584000 - }, - { - "epoch": 8.41, - "learning_rate": 4.033544675172267e-05, - "loss": 1.8713, - "step": 584500 - }, - { - "epoch": 8.42, - "learning_rate": 4.028352000996994e-05, - "loss": 1.8746, - "step": 585000 - }, - { - "epoch": 8.43, - "learning_rate": 4.02315932682172e-05, - "loss": 1.8737, - "step": 585500 - }, - { - "epoch": 8.43, - "learning_rate": 4.0179770379947974e-05, - "loss": 1.8744, - "step": 586000 - }, - { - "epoch": 8.44, - "learning_rate": 4.0127843638195236e-05, - "loss": 1.875, - "step": 586500 - }, - { - "epoch": 8.45, - "learning_rate": 4.00759168964425e-05, - "loss": 1.8761, - "step": 587000 - }, - { - "epoch": 8.46, - "learning_rate": 4.0023990154689766e-05, - "loss": 1.8725, - "step": 587500 - }, - { - "epoch": 8.46, - "learning_rate": 3.997216726642053e-05, - "loss": 1.8763, - "step": 588000 - }, - { - "epoch": 8.47, - "learning_rate": 3.99202405246678e-05, - "loss": 1.8745, - "step": 588500 - }, - { - "epoch": 8.48, - "learning_rate": 3.986831378291507e-05, - "loss": 1.8748, - "step": 589000 - }, - { - "epoch": 8.49, - "learning_rate": 3.981638704116233e-05, - "loss": 1.8732, - "step": 589500 - }, - { - "epoch": 8.49, - "learning_rate": 3.97645641528931e-05, - "loss": 1.875, - "step": 590000 - }, - { - "epoch": 8.5, - "learning_rate": 3.971274126462387e-05, - "loss": 1.8705, - "step": 590500 - }, - { - "epoch": 8.51, - "learning_rate": 3.966081452287114e-05, - "loss": 1.8717, - "step": 591000 - }, - { - "epoch": 8.51, - "learning_rate": 3.96088877811184e-05, - "loss": 1.8737, - "step": 591500 - }, - { - "epoch": 8.52, - "learning_rate": 3.955696103936567e-05, - "loss": 1.8713, - "step": 592000 - }, - { - "epoch": 8.53, - "learning_rate": 3.950503429761293e-05, - "loss": 1.8764, - "step": 592500 - }, - { - "epoch": 8.54, - "learning_rate": 3.94531075558602e-05, - "loss": 1.8714, - "step": 593000 - }, - { - "epoch": 8.54, - "learning_rate": 3.940118081410746e-05, - "loss": 1.8711, - "step": 593500 - }, - { - "epoch": 8.55, - "learning_rate": 3.934925407235473e-05, - "loss": 1.871, - "step": 594000 - }, - { - "epoch": 8.56, - "learning_rate": 3.929743118408549e-05, - "loss": 1.8695, - "step": 594500 - }, - { - "epoch": 8.56, - "learning_rate": 3.924550444233276e-05, - "loss": 1.8734, - "step": 595000 - }, - { - "epoch": 8.57, - "learning_rate": 3.919357770058002e-05, - "loss": 1.8704, - "step": 595500 - }, - { - "epoch": 8.58, - "learning_rate": 3.9141650958827285e-05, - "loss": 1.8703, - "step": 596000 - }, - { - "epoch": 8.59, - "learning_rate": 3.908972421707455e-05, - "loss": 1.8733, - "step": 596500 - }, - { - "epoch": 8.59, - "learning_rate": 3.903779747532182e-05, - "loss": 1.8698, - "step": 597000 - }, - { - "epoch": 8.6, - "learning_rate": 3.898587073356908e-05, - "loss": 1.8741, - "step": 597500 - }, - { - "epoch": 8.61, - "learning_rate": 3.8933943991816344e-05, - "loss": 1.8723, - "step": 598000 - }, - { - "epoch": 8.61, - "learning_rate": 3.888212110354712e-05, - "loss": 1.8735, - "step": 598500 - }, - { - "epoch": 8.62, - "learning_rate": 3.883019436179438e-05, - "loss": 1.8716, - "step": 599000 - }, - { - "epoch": 8.63, - "learning_rate": 3.877826762004165e-05, - "loss": 1.8689, - "step": 599500 - }, - { - "epoch": 8.64, - "learning_rate": 3.8726340878288915e-05, - "loss": 1.869, - "step": 600000 - }, - { - "epoch": 8.64, - "learning_rate": 3.867451799001968e-05, - "loss": 1.8711, - "step": 600500 - }, - { - "epoch": 8.65, - "learning_rate": 3.862259124826695e-05, - "loss": 1.8725, - "step": 601000 - }, - { - "epoch": 8.66, - "learning_rate": 3.857066450651421e-05, - "loss": 1.87, - "step": 601500 - }, - { - "epoch": 8.67, - "learning_rate": 3.851873776476147e-05, - "loss": 1.8694, - "step": 602000 - }, - { - "epoch": 8.67, - "learning_rate": 3.846681102300874e-05, - "loss": 1.8677, - "step": 602500 - }, - { - "epoch": 8.68, - "learning_rate": 3.841498813473951e-05, - "loss": 1.8681, - "step": 603000 - }, - { - "epoch": 8.69, - "learning_rate": 3.8363061392986776e-05, - "loss": 1.8718, - "step": 603500 - }, - { - "epoch": 8.69, - "learning_rate": 3.8311134651234044e-05, - "loss": 1.8694, - "step": 604000 - }, - { - "epoch": 8.7, - "learning_rate": 3.8259207909481305e-05, - "loss": 1.8731, - "step": 604500 - }, - { - "epoch": 8.71, - "learning_rate": 3.820738502121208e-05, - "loss": 1.8699, - "step": 605000 - }, - { - "epoch": 8.72, - "learning_rate": 3.815545827945934e-05, - "loss": 1.8737, - "step": 605500 - }, - { - "epoch": 8.72, - "learning_rate": 3.81035315377066e-05, - "loss": 1.8676, - "step": 606000 - }, - { - "epoch": 8.73, - "learning_rate": 3.805160479595387e-05, - "loss": 1.8725, - "step": 606500 - }, - { - "epoch": 8.74, - "learning_rate": 3.799967805420113e-05, - "loss": 1.8712, - "step": 607000 - }, - { - "epoch": 8.74, - "learning_rate": 3.7947855165931904e-05, - "loss": 1.8725, - "step": 607500 - }, - { - "epoch": 8.75, - "learning_rate": 3.789592842417917e-05, - "loss": 1.8686, - "step": 608000 - }, - { - "epoch": 8.76, - "learning_rate": 3.7844001682426434e-05, - "loss": 1.8711, - "step": 608500 - }, - { - "epoch": 8.77, - "learning_rate": 3.77920749406737e-05, - "loss": 1.8691, - "step": 609000 - }, - { - "epoch": 8.77, - "learning_rate": 3.7740148198920964e-05, - "loss": 1.8749, - "step": 609500 - }, - { - "epoch": 8.78, - "learning_rate": 3.7688221457168225e-05, - "loss": 1.868, - "step": 610000 - }, - { - "epoch": 8.79, - "learning_rate": 3.7636398568899e-05, - "loss": 1.8713, - "step": 610500 - }, - { - "epoch": 8.79, - "learning_rate": 3.758447182714626e-05, - "loss": 1.8693, - "step": 611000 - }, - { - "epoch": 8.8, - "learning_rate": 3.753254508539353e-05, - "loss": 1.8725, - "step": 611500 - }, - { - "epoch": 8.81, - "learning_rate": 3.7480618343640797e-05, - "loss": 1.8677, - "step": 612000 - }, - { - "epoch": 8.82, - "learning_rate": 3.742869160188806e-05, - "loss": 1.8645, - "step": 612500 - }, - { - "epoch": 8.82, - "learning_rate": 3.737676486013532e-05, - "loss": 1.8695, - "step": 613000 - }, - { - "epoch": 8.83, - "learning_rate": 3.732494197186609e-05, - "loss": 1.8666, - "step": 613500 - }, - { - "epoch": 8.84, - "learning_rate": 3.7273015230113354e-05, - "loss": 1.8708, - "step": 614000 - }, - { - "epoch": 8.85, - "learning_rate": 3.722108848836062e-05, - "loss": 1.8687, - "step": 614500 - }, - { - "epoch": 8.85, - "learning_rate": 3.716916174660789e-05, - "loss": 1.8698, - "step": 615000 - }, - { - "epoch": 8.86, - "learning_rate": 3.711723500485515e-05, - "loss": 1.867, - "step": 615500 - }, - { - "epoch": 8.87, - "learning_rate": 3.7065308263102414e-05, - "loss": 1.8668, - "step": 616000 - }, - { - "epoch": 8.87, - "learning_rate": 3.701348537483319e-05, - "loss": 1.8673, - "step": 616500 - }, - { - "epoch": 8.88, - "learning_rate": 3.696155863308045e-05, - "loss": 1.8699, - "step": 617000 - }, - { - "epoch": 8.89, - "learning_rate": 3.6909631891327717e-05, - "loss": 1.8672, - "step": 617500 - }, - { - "epoch": 8.9, - "learning_rate": 3.6857705149574985e-05, - "loss": 1.8691, - "step": 618000 - }, - { - "epoch": 8.9, - "learning_rate": 3.6805778407822246e-05, - "loss": 1.8682, - "step": 618500 - }, - { - "epoch": 8.91, - "learning_rate": 3.675385166606951e-05, - "loss": 1.8649, - "step": 619000 - }, - { - "epoch": 8.92, - "learning_rate": 3.6701924924316776e-05, - "loss": 1.8702, - "step": 619500 - }, - { - "epoch": 8.92, - "learning_rate": 3.665010203604754e-05, - "loss": 1.8682, - "step": 620000 - }, - { - "epoch": 8.93, - "learning_rate": 3.659817529429481e-05, - "loss": 1.874, - "step": 620500 - }, - { - "epoch": 8.94, - "learning_rate": 3.654624855254207e-05, - "loss": 1.8637, - "step": 621000 - }, - { - "epoch": 8.95, - "learning_rate": 3.649432181078934e-05, - "loss": 1.8706, - "step": 621500 - }, - { - "epoch": 8.95, - "learning_rate": 3.644239506903661e-05, - "loss": 1.8685, - "step": 622000 - }, - { - "epoch": 8.96, - "learning_rate": 3.6390572180767375e-05, - "loss": 1.8641, - "step": 622500 - }, - { - "epoch": 8.97, - "learning_rate": 3.633864543901464e-05, - "loss": 1.8654, - "step": 623000 - }, - { - "epoch": 8.97, - "learning_rate": 3.6286718697261905e-05, - "loss": 1.8722, - "step": 623500 - }, - { - "epoch": 8.98, - "learning_rate": 3.6234791955509166e-05, - "loss": 1.8645, - "step": 624000 - }, - { - "epoch": 8.99, - "learning_rate": 3.6182865213756435e-05, - "loss": 1.8667, - "step": 624500 - }, - { - "epoch": 9.0, - "learning_rate": 3.61309384720037e-05, - "loss": 1.8684, - "step": 625000 - }, - { - "epoch": 9.0, - "eval_accuracy": 0.6439520651638971, - "eval_loss": 1.7282428741455078, - "eval_runtime": 647.4593, - "eval_samples_per_second": 832.387, - "eval_steps_per_second": 34.683, - "step": 625257 - }, - { - "epoch": 9.0, - "learning_rate": 3.6079011730250964e-05, - "loss": 1.8635, - "step": 625500 - }, - { - "epoch": 9.01, - "learning_rate": 3.6027084988498226e-05, - "loss": 1.8644, - "step": 626000 - }, - { - "epoch": 9.02, - "learning_rate": 3.5975262100229e-05, - "loss": 1.8647, - "step": 626500 - }, - { - "epoch": 9.03, - "learning_rate": 3.592333535847626e-05, - "loss": 1.8593, - "step": 627000 - }, - { - "epoch": 9.03, - "learning_rate": 3.587140861672353e-05, - "loss": 1.8659, - "step": 627500 - }, - { - "epoch": 9.04, - "learning_rate": 3.58194818749708e-05, - "loss": 1.8667, - "step": 628000 - }, - { - "epoch": 9.05, - "learning_rate": 3.576755513321806e-05, - "loss": 1.8628, - "step": 628500 - }, - { - "epoch": 9.05, - "learning_rate": 3.571573224494883e-05, - "loss": 1.8635, - "step": 629000 - }, - { - "epoch": 9.06, - "learning_rate": 3.566380550319609e-05, - "loss": 1.8626, - "step": 629500 - }, - { - "epoch": 9.07, - "learning_rate": 3.5611878761443355e-05, - "loss": 1.8648, - "step": 630000 - }, - { - "epoch": 9.08, - "learning_rate": 3.555995201969062e-05, - "loss": 1.8633, - "step": 630500 - }, - { - "epoch": 9.08, - "learning_rate": 3.550812913142139e-05, - "loss": 1.8675, - "step": 631000 - }, - { - "epoch": 9.09, - "learning_rate": 3.545620238966866e-05, - "loss": 1.8628, - "step": 631500 - }, - { - "epoch": 9.1, - "learning_rate": 3.5404275647915926e-05, - "loss": 1.8607, - "step": 632000 - }, - { - "epoch": 9.1, - "learning_rate": 3.535234890616319e-05, - "loss": 1.8607, - "step": 632500 - }, - { - "epoch": 9.11, - "learning_rate": 3.530052601789396e-05, - "loss": 1.8634, - "step": 633000 - }, - { - "epoch": 9.12, - "learning_rate": 3.524859927614122e-05, - "loss": 1.8668, - "step": 633500 - }, - { - "epoch": 9.13, - "learning_rate": 3.519667253438848e-05, - "loss": 1.8627, - "step": 634000 - }, - { - "epoch": 9.13, - "learning_rate": 3.514474579263575e-05, - "loss": 1.8585, - "step": 634500 - }, - { - "epoch": 9.14, - "learning_rate": 3.509281905088301e-05, - "loss": 1.8609, - "step": 635000 - }, - { - "epoch": 9.15, - "learning_rate": 3.504089230913028e-05, - "loss": 1.8661, - "step": 635500 - }, - { - "epoch": 9.15, - "learning_rate": 3.498896556737754e-05, - "loss": 1.8604, - "step": 636000 - }, - { - "epoch": 9.16, - "learning_rate": 3.493703882562481e-05, - "loss": 1.8576, - "step": 636500 - }, - { - "epoch": 9.17, - "learning_rate": 3.488531979083909e-05, - "loss": 1.8629, - "step": 637000 - }, - { - "epoch": 9.18, - "learning_rate": 3.483339304908635e-05, - "loss": 1.8623, - "step": 637500 - }, - { - "epoch": 9.18, - "learning_rate": 3.478146630733362e-05, - "loss": 1.8646, - "step": 638000 - }, - { - "epoch": 9.19, - "learning_rate": 3.472953956558088e-05, - "loss": 1.8596, - "step": 638500 - }, - { - "epoch": 9.2, - "learning_rate": 3.467761282382814e-05, - "loss": 1.8588, - "step": 639000 - }, - { - "epoch": 9.21, - "learning_rate": 3.462568608207541e-05, - "loss": 1.8619, - "step": 639500 - }, - { - "epoch": 9.21, - "learning_rate": 3.457375934032268e-05, - "loss": 1.8561, - "step": 640000 - }, - { - "epoch": 9.22, - "learning_rate": 3.452183259856993e-05, - "loss": 1.8604, - "step": 640500 - }, - { - "epoch": 9.23, - "learning_rate": 3.447000971030071e-05, - "loss": 1.8586, - "step": 641000 - }, - { - "epoch": 9.23, - "learning_rate": 3.4418082968547974e-05, - "loss": 1.8609, - "step": 641500 - }, - { - "epoch": 9.24, - "learning_rate": 3.4366156226795236e-05, - "loss": 1.8588, - "step": 642000 - }, - { - "epoch": 9.25, - "learning_rate": 3.431433333852601e-05, - "loss": 1.8609, - "step": 642500 - }, - { - "epoch": 9.26, - "learning_rate": 3.426240659677327e-05, - "loss": 1.8614, - "step": 643000 - }, - { - "epoch": 9.26, - "learning_rate": 3.421047985502054e-05, - "loss": 1.8554, - "step": 643500 - }, - { - "epoch": 9.27, - "learning_rate": 3.415855311326781e-05, - "loss": 1.8618, - "step": 644000 - }, - { - "epoch": 9.28, - "learning_rate": 3.410662637151507e-05, - "loss": 1.8614, - "step": 644500 - }, - { - "epoch": 9.28, - "learning_rate": 3.405480348324584e-05, - "loss": 1.8586, - "step": 645000 - }, - { - "epoch": 9.29, - "learning_rate": 3.40028767414931e-05, - "loss": 1.8631, - "step": 645500 - }, - { - "epoch": 9.3, - "learning_rate": 3.3950949999740365e-05, - "loss": 1.8605, - "step": 646000 - }, - { - "epoch": 9.31, - "learning_rate": 3.389902325798763e-05, - "loss": 1.862, - "step": 646500 - }, - { - "epoch": 9.31, - "learning_rate": 3.38472003697184e-05, - "loss": 1.8579, - "step": 647000 - }, - { - "epoch": 9.32, - "learning_rate": 3.379527362796567e-05, - "loss": 1.8596, - "step": 647500 - }, - { - "epoch": 9.33, - "learning_rate": 3.3743346886212936e-05, - "loss": 1.8614, - "step": 648000 - }, - { - "epoch": 9.33, - "learning_rate": 3.36914201444602e-05, - "loss": 1.862, - "step": 648500 - }, - { - "epoch": 9.34, - "learning_rate": 3.363949340270746e-05, - "loss": 1.8548, - "step": 649000 - }, - { - "epoch": 9.35, - "learning_rate": 3.358767051443823e-05, - "loss": 1.861, - "step": 649500 - }, - { - "epoch": 9.36, - "learning_rate": 3.353574377268549e-05, - "loss": 1.8625, - "step": 650000 - }, - { - "epoch": 9.36, - "learning_rate": 3.348381703093276e-05, - "loss": 1.8584, - "step": 650500 - }, - { - "epoch": 9.37, - "learning_rate": 3.343189028918003e-05, - "loss": 1.859, - "step": 651000 - }, - { - "epoch": 9.38, - "learning_rate": 3.3380067400910796e-05, - "loss": 1.8593, - "step": 651500 - }, - { - "epoch": 9.38, - "learning_rate": 3.3328140659158064e-05, - "loss": 1.86, - "step": 652000 - }, - { - "epoch": 9.39, - "learning_rate": 3.3276213917405326e-05, - "loss": 1.8634, - "step": 652500 - }, - { - "epoch": 9.4, - "learning_rate": 3.322428717565259e-05, - "loss": 1.8623, - "step": 653000 - }, - { - "epoch": 9.41, - "learning_rate": 3.3172360433899856e-05, - "loss": 1.8594, - "step": 653500 - }, - { - "epoch": 9.41, - "learning_rate": 3.312043369214712e-05, - "loss": 1.8619, - "step": 654000 - }, - { - "epoch": 9.42, - "learning_rate": 3.306861080387789e-05, - "loss": 1.8597, - "step": 654500 - }, - { - "epoch": 9.43, - "learning_rate": 3.301668406212515e-05, - "loss": 1.8595, - "step": 655000 - }, - { - "epoch": 9.44, - "learning_rate": 3.296475732037242e-05, - "loss": 1.8574, - "step": 655500 - }, - { - "epoch": 9.44, - "learning_rate": 3.291283057861969e-05, - "loss": 1.8601, - "step": 656000 - }, - { - "epoch": 9.45, - "learning_rate": 3.286090383686695e-05, - "loss": 1.8594, - "step": 656500 - }, - { - "epoch": 9.46, - "learning_rate": 3.280908094859772e-05, - "loss": 1.8607, - "step": 657000 - }, - { - "epoch": 9.46, - "learning_rate": 3.2757154206844984e-05, - "loss": 1.8589, - "step": 657500 - }, - { - "epoch": 9.47, - "learning_rate": 3.2705227465092246e-05, - "loss": 1.8601, - "step": 658000 - }, - { - "epoch": 9.48, - "learning_rate": 3.2653300723339514e-05, - "loss": 1.8606, - "step": 658500 - }, - { - "epoch": 9.49, - "learning_rate": 3.260137398158678e-05, - "loss": 1.8601, - "step": 659000 - }, - { - "epoch": 9.49, - "learning_rate": 3.2549447239834044e-05, - "loss": 1.8591, - "step": 659500 - }, - { - "epoch": 9.5, - "learning_rate": 3.249762435156482e-05, - "loss": 1.8624, - "step": 660000 - }, - { - "epoch": 9.51, - "learning_rate": 3.244569760981208e-05, - "loss": 1.8576, - "step": 660500 - }, - { - "epoch": 9.51, - "learning_rate": 3.239377086805934e-05, - "loss": 1.8572, - "step": 661000 - }, - { - "epoch": 9.52, - "learning_rate": 3.234184412630661e-05, - "loss": 1.8566, - "step": 661500 - }, - { - "epoch": 9.53, - "learning_rate": 3.228991738455388e-05, - "loss": 1.861, - "step": 662000 - }, - { - "epoch": 9.54, - "learning_rate": 3.223809449628464e-05, - "loss": 1.8551, - "step": 662500 - }, - { - "epoch": 9.54, - "learning_rate": 3.218616775453191e-05, - "loss": 1.8572, - "step": 663000 - }, - { - "epoch": 9.55, - "learning_rate": 3.213424101277917e-05, - "loss": 1.8547, - "step": 663500 - }, - { - "epoch": 9.56, - "learning_rate": 3.2082314271026434e-05, - "loss": 1.8563, - "step": 664000 - }, - { - "epoch": 9.56, - "learning_rate": 3.20303875292737e-05, - "loss": 1.8589, - "step": 664500 - }, - { - "epoch": 9.57, - "learning_rate": 3.197846078752097e-05, - "loss": 1.8556, - "step": 665000 - }, - { - "epoch": 9.58, - "learning_rate": 3.192653404576823e-05, - "loss": 1.8538, - "step": 665500 - }, - { - "epoch": 9.59, - "learning_rate": 3.1874711157499005e-05, - "loss": 1.8587, - "step": 666000 - }, - { - "epoch": 9.59, - "learning_rate": 3.182278441574627e-05, - "loss": 1.8566, - "step": 666500 - }, - { - "epoch": 9.6, - "learning_rate": 3.177085767399353e-05, - "loss": 1.8564, - "step": 667000 - }, - { - "epoch": 9.61, - "learning_rate": 3.17189309322408e-05, - "loss": 1.8565, - "step": 667500 - }, - { - "epoch": 9.62, - "learning_rate": 3.166700419048806e-05, - "loss": 1.8574, - "step": 668000 - }, - { - "epoch": 9.62, - "learning_rate": 3.161518130221883e-05, - "loss": 1.8576, - "step": 668500 - }, - { - "epoch": 9.63, - "learning_rate": 3.156325456046609e-05, - "loss": 1.8628, - "step": 669000 - }, - { - "epoch": 9.64, - "learning_rate": 3.151132781871336e-05, - "loss": 1.8601, - "step": 669500 - }, - { - "epoch": 9.64, - "learning_rate": 3.145940107696063e-05, - "loss": 1.8549, - "step": 670000 - }, - { - "epoch": 9.65, - "learning_rate": 3.140747433520789e-05, - "loss": 1.8548, - "step": 670500 - }, - { - "epoch": 9.66, - "learning_rate": 3.135554759345515e-05, - "loss": 1.8594, - "step": 671000 - }, - { - "epoch": 9.67, - "learning_rate": 3.130362085170242e-05, - "loss": 1.8564, - "step": 671500 - }, - { - "epoch": 9.67, - "learning_rate": 3.125169410994969e-05, - "loss": 1.8547, - "step": 672000 - }, - { - "epoch": 9.68, - "learning_rate": 3.1199871221680455e-05, - "loss": 1.8538, - "step": 672500 - }, - { - "epoch": 9.69, - "learning_rate": 3.114804833341122e-05, - "loss": 1.8571, - "step": 673000 - }, - { - "epoch": 9.69, - "learning_rate": 3.109612159165849e-05, - "loss": 1.855, - "step": 673500 - }, - { - "epoch": 9.7, - "learning_rate": 3.104419484990576e-05, - "loss": 1.8553, - "step": 674000 - }, - { - "epoch": 9.71, - "learning_rate": 3.099226810815302e-05, - "loss": 1.8581, - "step": 674500 - }, - { - "epoch": 9.72, - "learning_rate": 3.094034136640028e-05, - "loss": 1.8542, - "step": 675000 - }, - { - "epoch": 9.72, - "learning_rate": 3.088841462464755e-05, - "loss": 1.8521, - "step": 675500 - }, - { - "epoch": 9.73, - "learning_rate": 3.083648788289482e-05, - "loss": 1.8558, - "step": 676000 - }, - { - "epoch": 9.74, - "learning_rate": 3.0784664994625584e-05, - "loss": 1.8517, - "step": 676500 - }, - { - "epoch": 9.74, - "learning_rate": 3.073273825287285e-05, - "loss": 1.8526, - "step": 677000 - }, - { - "epoch": 9.75, - "learning_rate": 3.0680811511120114e-05, - "loss": 1.8609, - "step": 677500 - }, - { - "epoch": 9.76, - "learning_rate": 3.0628884769367375e-05, - "loss": 1.853, - "step": 678000 - }, - { - "epoch": 9.77, - "learning_rate": 3.0576958027614643e-05, - "loss": 1.8541, - "step": 678500 - }, - { - "epoch": 9.77, - "learning_rate": 3.052513513934541e-05, - "loss": 1.8562, - "step": 679000 - }, - { - "epoch": 9.78, - "learning_rate": 3.0473208397592678e-05, - "loss": 1.852, - "step": 679500 - }, - { - "epoch": 9.79, - "learning_rate": 3.0421281655839946e-05, - "loss": 1.8511, - "step": 680000 - }, - { - "epoch": 9.8, - "learning_rate": 3.0369354914087204e-05, - "loss": 1.8544, - "step": 680500 - }, - { - "epoch": 9.8, - "learning_rate": 3.031753202581798e-05, - "loss": 1.8553, - "step": 681000 - }, - { - "epoch": 9.81, - "learning_rate": 3.026560528406524e-05, - "loss": 1.859, - "step": 681500 - }, - { - "epoch": 9.82, - "learning_rate": 3.0213678542312507e-05, - "loss": 1.8608, - "step": 682000 - }, - { - "epoch": 9.82, - "learning_rate": 3.0161751800559772e-05, - "loss": 1.8512, - "step": 682500 - }, - { - "epoch": 9.83, - "learning_rate": 3.0109928912290542e-05, - "loss": 1.8558, - "step": 683000 - }, - { - "epoch": 9.84, - "learning_rate": 3.0058002170537807e-05, - "loss": 1.8516, - "step": 683500 - }, - { - "epoch": 9.85, - "learning_rate": 3.0006075428785068e-05, - "loss": 1.8533, - "step": 684000 - }, - { - "epoch": 9.85, - "learning_rate": 2.9954148687032337e-05, - "loss": 1.856, - "step": 684500 - }, - { - "epoch": 9.86, - "learning_rate": 2.99022219452796e-05, - "loss": 1.8492, - "step": 685000 - }, - { - "epoch": 9.87, - "learning_rate": 2.985039905701037e-05, - "loss": 1.8532, - "step": 685500 - }, - { - "epoch": 9.87, - "learning_rate": 2.9798472315257636e-05, - "loss": 1.856, - "step": 686000 - }, - { - "epoch": 9.88, - "learning_rate": 2.9746545573504904e-05, - "loss": 1.8571, - "step": 686500 - }, - { - "epoch": 9.89, - "learning_rate": 2.9694618831752162e-05, - "loss": 1.8554, - "step": 687000 - }, - { - "epoch": 9.9, - "learning_rate": 2.964269208999943e-05, - "loss": 1.8535, - "step": 687500 - }, - { - "epoch": 9.9, - "learning_rate": 2.9590765348246696e-05, - "loss": 1.853, - "step": 688000 - }, - { - "epoch": 9.91, - "learning_rate": 2.9538838606493957e-05, - "loss": 1.8502, - "step": 688500 - }, - { - "epoch": 9.92, - "learning_rate": 2.948701571822473e-05, - "loss": 1.8495, - "step": 689000 - }, - { - "epoch": 9.92, - "learning_rate": 2.9435088976472e-05, - "loss": 1.8513, - "step": 689500 - }, - { - "epoch": 9.93, - "learning_rate": 2.9383162234719257e-05, - "loss": 1.8521, - "step": 690000 - }, - { - "epoch": 9.94, - "learning_rate": 2.9331235492966525e-05, - "loss": 1.8537, - "step": 690500 - }, - { - "epoch": 9.95, - "learning_rate": 2.927930875121379e-05, - "loss": 1.8503, - "step": 691000 - }, - { - "epoch": 9.95, - "learning_rate": 2.922738200946105e-05, - "loss": 1.8505, - "step": 691500 - }, - { - "epoch": 9.96, - "learning_rate": 2.9175559121191824e-05, - "loss": 1.8586, - "step": 692000 - }, - { - "epoch": 9.97, - "learning_rate": 2.9123632379439086e-05, - "loss": 1.8524, - "step": 692500 - }, - { - "epoch": 9.98, - "learning_rate": 2.9071705637686354e-05, - "loss": 1.8504, - "step": 693000 - }, - { - "epoch": 9.98, - "learning_rate": 2.901977889593362e-05, - "loss": 1.8514, - "step": 693500 - }, - { - "epoch": 9.99, - "learning_rate": 2.8967852154180887e-05, - "loss": 1.8511, - "step": 694000 - }, - { - "epoch": 10.0, - "learning_rate": 2.8915925412428145e-05, - "loss": 1.8517, - "step": 694500 - }, - { - "epoch": 10.0, - "eval_accuracy": 0.6463801918157677, - "eval_loss": 1.7140141725540161, - "eval_runtime": 646.6238, - "eval_samples_per_second": 833.463, - "eval_steps_per_second": 34.728, - "step": 694730 - }, - { - "epoch": 10.0, - "learning_rate": 2.8863998670675414e-05, - "loss": 1.851, - "step": 695000 - }, - { - "epoch": 10.01, - "learning_rate": 2.881217578240618e-05, - "loss": 1.8505, - "step": 695500 - }, - { - "epoch": 10.02, - "learning_rate": 2.8760249040653448e-05, - "loss": 1.8464, - "step": 696000 - }, - { - "epoch": 10.03, - "learning_rate": 2.8708322298900713e-05, - "loss": 1.852, - "step": 696500 - }, - { - "epoch": 10.03, - "learning_rate": 2.8656395557147975e-05, - "loss": 1.8485, - "step": 697000 - }, - { - "epoch": 10.04, - "learning_rate": 2.860446881539524e-05, - "loss": 1.8525, - "step": 697500 - }, - { - "epoch": 10.05, - "learning_rate": 2.855264592712601e-05, - "loss": 1.851, - "step": 698000 - }, - { - "epoch": 10.05, - "learning_rate": 2.8500719185373277e-05, - "loss": 1.8502, - "step": 698500 - }, - { - "epoch": 10.06, - "learning_rate": 2.8448792443620542e-05, - "loss": 1.8492, - "step": 699000 - }, - { - "epoch": 10.07, - "learning_rate": 2.8396865701867807e-05, - "loss": 1.849, - "step": 699500 - }, - { - "epoch": 10.08, - "learning_rate": 2.834493896011507e-05, - "loss": 1.8477, - "step": 700000 - }, - { - "epoch": 10.08, - "learning_rate": 2.8293012218362337e-05, - "loss": 1.8453, - "step": 700500 - }, - { - "epoch": 10.09, - "learning_rate": 2.8241189330093103e-05, - "loss": 1.8462, - "step": 701000 - }, - { - "epoch": 10.1, - "learning_rate": 2.818926258834037e-05, - "loss": 1.8501, - "step": 701500 - }, - { - "epoch": 10.1, - "learning_rate": 2.8137335846587636e-05, - "loss": 1.8469, - "step": 702000 - }, - { - "epoch": 10.11, - "learning_rate": 2.8085409104834898e-05, - "loss": 1.8491, - "step": 702500 - }, - { - "epoch": 10.12, - "learning_rate": 2.8033482363082163e-05, - "loss": 1.8436, - "step": 703000 - }, - { - "epoch": 10.13, - "learning_rate": 2.798155562132943e-05, - "loss": 1.851, - "step": 703500 - }, - { - "epoch": 10.13, - "learning_rate": 2.7929628879576696e-05, - "loss": 1.8514, - "step": 704000 - }, - { - "epoch": 10.14, - "learning_rate": 2.7877702137823958e-05, - "loss": 1.8483, - "step": 704500 - }, - { - "epoch": 10.15, - "learning_rate": 2.782587924955473e-05, - "loss": 1.8516, - "step": 705000 - }, - { - "epoch": 10.16, - "learning_rate": 2.7773952507801992e-05, - "loss": 1.8502, - "step": 705500 - }, - { - "epoch": 10.16, - "learning_rate": 2.772202576604926e-05, - "loss": 1.8506, - "step": 706000 - }, - { - "epoch": 10.17, - "learning_rate": 2.7670099024296525e-05, - "loss": 1.8464, - "step": 706500 - }, - { - "epoch": 10.18, - "learning_rate": 2.76183799895108e-05, - "loss": 1.8482, - "step": 707000 - }, - { - "epoch": 10.18, - "learning_rate": 2.756645324775806e-05, - "loss": 1.8451, - "step": 707500 - }, - { - "epoch": 10.19, - "learning_rate": 2.751452650600533e-05, - "loss": 1.851, - "step": 708000 - }, - { - "epoch": 10.2, - "learning_rate": 2.7462599764252594e-05, - "loss": 1.8453, - "step": 708500 - }, - { - "epoch": 10.21, - "learning_rate": 2.741067302249986e-05, - "loss": 1.8466, - "step": 709000 - }, - { - "epoch": 10.21, - "learning_rate": 2.735885013423063e-05, - "loss": 1.8497, - "step": 709500 - }, - { - "epoch": 10.22, - "learning_rate": 2.7306923392477897e-05, - "loss": 1.8495, - "step": 710000 - }, - { - "epoch": 10.23, - "learning_rate": 2.7254996650725155e-05, - "loss": 1.8506, - "step": 710500 - }, - { - "epoch": 10.23, - "learning_rate": 2.7203069908972424e-05, - "loss": 1.8451, - "step": 711000 - }, - { - "epoch": 10.24, - "learning_rate": 2.715114316721969e-05, - "loss": 1.8486, - "step": 711500 - }, - { - "epoch": 10.25, - "learning_rate": 2.709921642546695e-05, - "loss": 1.8487, - "step": 712000 - }, - { - "epoch": 10.26, - "learning_rate": 2.7047289683714215e-05, - "loss": 1.8479, - "step": 712500 - }, - { - "epoch": 10.26, - "learning_rate": 2.699546679544499e-05, - "loss": 1.8466, - "step": 713000 - }, - { - "epoch": 10.27, - "learning_rate": 2.694354005369225e-05, - "loss": 1.8496, - "step": 713500 - }, - { - "epoch": 10.28, - "learning_rate": 2.6891613311939518e-05, - "loss": 1.8452, - "step": 714000 - }, - { - "epoch": 10.28, - "learning_rate": 2.6839686570186783e-05, - "loss": 1.8497, - "step": 714500 - }, - { - "epoch": 10.29, - "learning_rate": 2.6787759828434044e-05, - "loss": 1.8442, - "step": 715000 - }, - { - "epoch": 10.3, - "learning_rate": 2.6735936940164817e-05, - "loss": 1.8458, - "step": 715500 - }, - { - "epoch": 10.31, - "learning_rate": 2.668401019841208e-05, - "loss": 1.8462, - "step": 716000 - }, - { - "epoch": 10.31, - "learning_rate": 2.6632083456659347e-05, - "loss": 1.846, - "step": 716500 - }, - { - "epoch": 10.32, - "learning_rate": 2.6580156714906612e-05, - "loss": 1.8428, - "step": 717000 - }, - { - "epoch": 10.33, - "learning_rate": 2.652833382663738e-05, - "loss": 1.8453, - "step": 717500 - }, - { - "epoch": 10.33, - "learning_rate": 2.6476407084884646e-05, - "loss": 1.8468, - "step": 718000 - }, - { - "epoch": 10.34, - "learning_rate": 2.6424480343131915e-05, - "loss": 1.8439, - "step": 718500 - }, - { - "epoch": 10.35, - "learning_rate": 2.6372553601379173e-05, - "loss": 1.8415, - "step": 719000 - }, - { - "epoch": 10.36, - "learning_rate": 2.632062685962644e-05, - "loss": 1.8476, - "step": 719500 - }, - { - "epoch": 10.36, - "learning_rate": 2.6268803971357207e-05, - "loss": 1.8418, - "step": 720000 - }, - { - "epoch": 10.37, - "learning_rate": 2.6216877229604476e-05, - "loss": 1.8449, - "step": 720500 - }, - { - "epoch": 10.38, - "learning_rate": 2.616495048785174e-05, - "loss": 1.8443, - "step": 721000 - }, - { - "epoch": 10.39, - "learning_rate": 2.6113023746099002e-05, - "loss": 1.8463, - "step": 721500 - }, - { - "epoch": 10.39, - "learning_rate": 2.606109700434627e-05, - "loss": 1.8501, - "step": 722000 - }, - { - "epoch": 10.4, - "learning_rate": 2.6009170262593535e-05, - "loss": 1.8479, - "step": 722500 - }, - { - "epoch": 10.41, - "learning_rate": 2.5957347374324305e-05, - "loss": 1.8497, - "step": 723000 - }, - { - "epoch": 10.41, - "learning_rate": 2.590542063257157e-05, - "loss": 1.8438, - "step": 723500 - }, - { - "epoch": 10.42, - "learning_rate": 2.5853493890818835e-05, - "loss": 1.8444, - "step": 724000 - }, - { - "epoch": 10.43, - "learning_rate": 2.5801567149066096e-05, - "loss": 1.8485, - "step": 724500 - }, - { - "epoch": 10.44, - "learning_rate": 2.5749640407313365e-05, - "loss": 1.8456, - "step": 725000 - }, - { - "epoch": 10.44, - "learning_rate": 2.569781751904413e-05, - "loss": 1.846, - "step": 725500 - }, - { - "epoch": 10.45, - "learning_rate": 2.56458907772914e-05, - "loss": 1.8492, - "step": 726000 - }, - { - "epoch": 10.46, - "learning_rate": 2.5593964035538664e-05, - "loss": 1.8509, - "step": 726500 - }, - { - "epoch": 10.46, - "learning_rate": 2.5542037293785926e-05, - "loss": 1.8415, - "step": 727000 - }, - { - "epoch": 10.47, - "learning_rate": 2.549011055203319e-05, - "loss": 1.8446, - "step": 727500 - }, - { - "epoch": 10.48, - "learning_rate": 2.5438287663763967e-05, - "loss": 1.8488, - "step": 728000 - }, - { - "epoch": 10.49, - "learning_rate": 2.5386360922011225e-05, - "loss": 1.8439, - "step": 728500 - }, - { - "epoch": 10.49, - "learning_rate": 2.5334434180258493e-05, - "loss": 1.8464, - "step": 729000 - }, - { - "epoch": 10.5, - "learning_rate": 2.5282507438505758e-05, - "loss": 1.8486, - "step": 729500 - }, - { - "epoch": 10.51, - "learning_rate": 2.523058069675302e-05, - "loss": 1.8476, - "step": 730000 - }, - { - "epoch": 10.51, - "learning_rate": 2.5178653955000288e-05, - "loss": 1.8451, - "step": 730500 - }, - { - "epoch": 10.52, - "learning_rate": 2.5126727213247553e-05, - "loss": 1.8463, - "step": 731000 - }, - { - "epoch": 10.53, - "learning_rate": 2.5074904324978322e-05, - "loss": 1.842, - "step": 731500 - }, - { - "epoch": 10.54, - "learning_rate": 2.5022977583225587e-05, - "loss": 1.8431, - "step": 732000 - }, - { - "epoch": 10.54, - "learning_rate": 2.4971050841472852e-05, - "loss": 1.8445, - "step": 732500 - }, - { - "epoch": 10.55, - "learning_rate": 2.4919124099720114e-05, - "loss": 1.843, - "step": 733000 - }, - { - "epoch": 10.56, - "learning_rate": 2.4867197357967382e-05, - "loss": 1.8444, - "step": 733500 - }, - { - "epoch": 10.57, - "learning_rate": 2.4815374469698152e-05, - "loss": 1.8471, - "step": 734000 - }, - { - "epoch": 10.57, - "learning_rate": 2.4763447727945417e-05, - "loss": 1.8458, - "step": 734500 - }, - { - "epoch": 10.58, - "learning_rate": 2.4711520986192678e-05, - "loss": 1.839, - "step": 735000 - }, - { - "epoch": 10.59, - "learning_rate": 2.4659594244439946e-05, - "loss": 1.844, - "step": 735500 - }, - { - "epoch": 10.59, - "learning_rate": 2.4607771356170716e-05, - "loss": 1.8431, - "step": 736000 - }, - { - "epoch": 10.6, - "learning_rate": 2.455584461441798e-05, - "loss": 1.847, - "step": 736500 - }, - { - "epoch": 10.61, - "learning_rate": 2.4503917872665242e-05, - "loss": 1.8469, - "step": 737000 - }, - { - "epoch": 10.62, - "learning_rate": 2.445199113091251e-05, - "loss": 1.8411, - "step": 737500 - }, - { - "epoch": 10.62, - "learning_rate": 2.4400064389159776e-05, - "loss": 1.8458, - "step": 738000 - }, - { - "epoch": 10.63, - "learning_rate": 2.434813764740704e-05, - "loss": 1.8439, - "step": 738500 - }, - { - "epoch": 10.64, - "learning_rate": 2.429631475913781e-05, - "loss": 1.8413, - "step": 739000 - }, - { - "epoch": 10.64, - "learning_rate": 2.4244388017385075e-05, - "loss": 1.8451, - "step": 739500 - }, - { - "epoch": 10.65, - "learning_rate": 2.419246127563234e-05, - "loss": 1.8435, - "step": 740000 - }, - { - "epoch": 10.66, - "learning_rate": 2.4140534533879605e-05, - "loss": 1.8414, - "step": 740500 - }, - { - "epoch": 10.67, - "learning_rate": 2.408860779212687e-05, - "loss": 1.8354, - "step": 741000 - }, - { - "epoch": 10.67, - "learning_rate": 2.403678490385764e-05, - "loss": 1.8443, - "step": 741500 - }, - { - "epoch": 10.68, - "learning_rate": 2.3984858162104904e-05, - "loss": 1.8406, - "step": 742000 - }, - { - "epoch": 10.69, - "learning_rate": 2.3932931420352166e-05, - "loss": 1.8435, - "step": 742500 - }, - { - "epoch": 10.69, - "learning_rate": 2.3881004678599434e-05, - "loss": 1.8416, - "step": 743000 - }, - { - "epoch": 10.7, - "learning_rate": 2.3829181790330204e-05, - "loss": 1.8432, - "step": 743500 - }, - { - "epoch": 10.71, - "learning_rate": 2.377725504857747e-05, - "loss": 1.842, - "step": 744000 - }, - { - "epoch": 10.72, - "learning_rate": 2.372532830682473e-05, - "loss": 1.8392, - "step": 744500 - }, - { - "epoch": 10.72, - "learning_rate": 2.3673401565072e-05, - "loss": 1.8448, - "step": 745000 - }, - { - "epoch": 10.73, - "learning_rate": 2.3621474823319263e-05, - "loss": 1.8396, - "step": 745500 - }, - { - "epoch": 10.74, - "learning_rate": 2.3569548081566528e-05, - "loss": 1.8434, - "step": 746000 - }, - { - "epoch": 10.75, - "learning_rate": 2.3517621339813793e-05, - "loss": 1.8438, - "step": 746500 - }, - { - "epoch": 10.75, - "learning_rate": 2.3465694598061055e-05, - "loss": 1.8442, - "step": 747000 - }, - { - "epoch": 10.76, - "learning_rate": 2.3413975563275332e-05, - "loss": 1.8365, - "step": 747500 - }, - { - "epoch": 10.77, - "learning_rate": 2.3362048821522597e-05, - "loss": 1.8394, - "step": 748000 - }, - { - "epoch": 10.77, - "learning_rate": 2.3310122079769862e-05, - "loss": 1.8396, - "step": 748500 - }, - { - "epoch": 10.78, - "learning_rate": 2.3258195338017127e-05, - "loss": 1.8463, - "step": 749000 - }, - { - "epoch": 10.79, - "learning_rate": 2.3206268596264392e-05, - "loss": 1.8422, - "step": 749500 - }, - { - "epoch": 10.8, - "learning_rate": 2.3154445707995162e-05, - "loss": 1.8358, - "step": 750000 - }, - { - "epoch": 10.8, - "learning_rate": 2.3102518966242427e-05, - "loss": 1.8415, - "step": 750500 - }, - { - "epoch": 10.81, - "learning_rate": 2.305059222448969e-05, - "loss": 1.8384, - "step": 751000 - }, - { - "epoch": 10.82, - "learning_rate": 2.2998665482736956e-05, - "loss": 1.8431, - "step": 751500 - }, - { - "epoch": 10.82, - "learning_rate": 2.2946738740984218e-05, - "loss": 1.8381, - "step": 752000 - }, - { - "epoch": 10.83, - "learning_rate": 2.289491585271499e-05, - "loss": 1.8402, - "step": 752500 - }, - { - "epoch": 10.84, - "learning_rate": 2.2842989110962256e-05, - "loss": 1.8399, - "step": 753000 - }, - { - "epoch": 10.85, - "learning_rate": 2.279106236920952e-05, - "loss": 1.844, - "step": 753500 - }, - { - "epoch": 10.85, - "learning_rate": 2.2739135627456786e-05, - "loss": 1.8441, - "step": 754000 - }, - { - "epoch": 10.86, - "learning_rate": 2.2687312739187555e-05, - "loss": 1.8416, - "step": 754500 - }, - { - "epoch": 10.87, - "learning_rate": 2.263538599743482e-05, - "loss": 1.8406, - "step": 755000 - }, - { - "epoch": 10.87, - "learning_rate": 2.2583459255682085e-05, - "loss": 1.8428, - "step": 755500 - }, - { - "epoch": 10.88, - "learning_rate": 2.253153251392935e-05, - "loss": 1.8416, - "step": 756000 - }, - { - "epoch": 10.89, - "learning_rate": 2.2479605772176615e-05, - "loss": 1.8425, - "step": 756500 - }, - { - "epoch": 10.9, - "learning_rate": 2.242767903042388e-05, - "loss": 1.8403, - "step": 757000 - }, - { - "epoch": 10.9, - "learning_rate": 2.237585614215465e-05, - "loss": 1.8421, - "step": 757500 - }, - { - "epoch": 10.91, - "learning_rate": 2.2323929400401914e-05, - "loss": 1.8433, - "step": 758000 - }, - { - "epoch": 10.92, - "learning_rate": 2.227200265864918e-05, - "loss": 1.8404, - "step": 758500 - }, - { - "epoch": 10.93, - "learning_rate": 2.2220075916896444e-05, - "loss": 1.8417, - "step": 759000 - }, - { - "epoch": 10.93, - "learning_rate": 2.2168149175143706e-05, - "loss": 1.8366, - "step": 759500 - }, - { - "epoch": 10.94, - "learning_rate": 2.2116222433390974e-05, - "loss": 1.8378, - "step": 760000 - }, - { - "epoch": 10.95, - "learning_rate": 2.2064399545121744e-05, - "loss": 1.84, - "step": 760500 - }, - { - "epoch": 10.95, - "learning_rate": 2.201247280336901e-05, - "loss": 1.8389, - "step": 761000 - }, - { - "epoch": 10.96, - "learning_rate": 2.1960546061616273e-05, - "loss": 1.8398, - "step": 761500 - }, - { - "epoch": 10.97, - "learning_rate": 2.1908619319863538e-05, - "loss": 1.8433, - "step": 762000 - }, - { - "epoch": 10.98, - "learning_rate": 2.1856692578110803e-05, - "loss": 1.845, - "step": 762500 - }, - { - "epoch": 10.98, - "learning_rate": 2.1804869689841573e-05, - "loss": 1.8387, - "step": 763000 - }, - { - "epoch": 10.99, - "learning_rate": 2.1752942948088838e-05, - "loss": 1.8402, - "step": 763500 - }, - { - "epoch": 11.0, - "learning_rate": 2.1701016206336103e-05, - "loss": 1.8353, - "step": 764000 - }, - { - "epoch": 11.0, - "eval_accuracy": 0.6480513749434211, - "eval_loss": 1.702171802520752, - "eval_runtime": 647.7205, - "eval_samples_per_second": 832.052, - "eval_steps_per_second": 34.669, - "step": 764203 - }, - { - "epoch": 11.0, - "learning_rate": 2.1649089464583368e-05, - "loss": 1.837, - "step": 764500 - }, - { - "epoch": 11.01, - "learning_rate": 2.1597266576314137e-05, - "loss": 1.8367, - "step": 765000 - }, - { - "epoch": 11.02, - "learning_rate": 2.1545339834561402e-05, - "loss": 1.8372, - "step": 765500 - }, - { - "epoch": 11.03, - "learning_rate": 2.1493413092808667e-05, - "loss": 1.8376, - "step": 766000 - }, - { - "epoch": 11.03, - "learning_rate": 2.1441486351055932e-05, - "loss": 1.8358, - "step": 766500 - }, - { - "epoch": 11.04, - "learning_rate": 2.13896634627867e-05, - "loss": 1.8376, - "step": 767000 - }, - { - "epoch": 11.05, - "learning_rate": 2.1337736721033966e-05, - "loss": 1.8382, - "step": 767500 - }, - { - "epoch": 11.05, - "learning_rate": 2.128580997928123e-05, - "loss": 1.8359, - "step": 768000 - }, - { - "epoch": 11.06, - "learning_rate": 2.1233883237528496e-05, - "loss": 1.8354, - "step": 768500 - }, - { - "epoch": 11.07, - "learning_rate": 2.1181956495775758e-05, - "loss": 1.8305, - "step": 769000 - }, - { - "epoch": 11.08, - "learning_rate": 2.1130029754023026e-05, - "loss": 1.8408, - "step": 769500 - }, - { - "epoch": 11.08, - "learning_rate": 2.107810301227029e-05, - "loss": 1.8375, - "step": 770000 - }, - { - "epoch": 11.09, - "learning_rate": 2.1026176270517556e-05, - "loss": 1.8367, - "step": 770500 - }, - { - "epoch": 11.1, - "learning_rate": 2.0974353382248325e-05, - "loss": 1.8384, - "step": 771000 - }, - { - "epoch": 11.11, - "learning_rate": 2.092242664049559e-05, - "loss": 1.8399, - "step": 771500 - }, - { - "epoch": 11.11, - "learning_rate": 2.0870499898742855e-05, - "loss": 1.8376, - "step": 772000 - }, - { - "epoch": 11.12, - "learning_rate": 2.081857315699012e-05, - "loss": 1.8338, - "step": 772500 - }, - { - "epoch": 11.13, - "learning_rate": 2.0766646415237385e-05, - "loss": 1.839, - "step": 773000 - }, - { - "epoch": 11.13, - "learning_rate": 2.0714823526968155e-05, - "loss": 1.8359, - "step": 773500 - }, - { - "epoch": 11.14, - "learning_rate": 2.066289678521542e-05, - "loss": 1.8366, - "step": 774000 - }, - { - "epoch": 11.15, - "learning_rate": 2.0610970043462685e-05, - "loss": 1.8367, - "step": 774500 - }, - { - "epoch": 11.16, - "learning_rate": 2.055904330170995e-05, - "loss": 1.8327, - "step": 775000 - }, - { - "epoch": 11.16, - "learning_rate": 2.050711655995721e-05, - "loss": 1.8384, - "step": 775500 - }, - { - "epoch": 11.17, - "learning_rate": 2.0455293671687984e-05, - "loss": 1.839, - "step": 776000 - }, - { - "epoch": 11.18, - "learning_rate": 2.0403366929935245e-05, - "loss": 1.8345, - "step": 776500 - }, - { - "epoch": 11.18, - "learning_rate": 2.0351440188182514e-05, - "loss": 1.8379, - "step": 777000 - }, - { - "epoch": 11.19, - "learning_rate": 2.029951344642978e-05, - "loss": 1.834, - "step": 777500 - }, - { - "epoch": 11.2, - "learning_rate": 2.0247586704677044e-05, - "loss": 1.8389, - "step": 778000 - }, - { - "epoch": 11.21, - "learning_rate": 2.0195763816407813e-05, - "loss": 1.8309, - "step": 778500 - }, - { - "epoch": 11.21, - "learning_rate": 2.0143837074655078e-05, - "loss": 1.8367, - "step": 779000 - }, - { - "epoch": 11.22, - "learning_rate": 2.0091910332902343e-05, - "loss": 1.8367, - "step": 779500 - }, - { - "epoch": 11.23, - "learning_rate": 2.0039983591149608e-05, - "loss": 1.834, - "step": 780000 - }, - { - "epoch": 11.23, - "learning_rate": 1.9988056849396873e-05, - "loss": 1.837, - "step": 780500 - }, - { - "epoch": 11.24, - "learning_rate": 1.9936130107644134e-05, - "loss": 1.8344, - "step": 781000 - }, - { - "epoch": 11.25, - "learning_rate": 1.9884203365891403e-05, - "loss": 1.8388, - "step": 781500 - }, - { - "epoch": 11.26, - "learning_rate": 1.9832380477622172e-05, - "loss": 1.8317, - "step": 782000 - }, - { - "epoch": 11.26, - "learning_rate": 1.9780453735869437e-05, - "loss": 1.8334, - "step": 782500 - }, - { - "epoch": 11.27, - "learning_rate": 1.97285269941167e-05, - "loss": 1.8407, - "step": 783000 - }, - { - "epoch": 11.28, - "learning_rate": 1.9676600252363967e-05, - "loss": 1.8296, - "step": 783500 - }, - { - "epoch": 11.28, - "learning_rate": 1.962467351061123e-05, - "loss": 1.8319, - "step": 784000 - }, - { - "epoch": 11.29, - "learning_rate": 1.9572850622342e-05, - "loss": 1.8387, - "step": 784500 - }, - { - "epoch": 11.3, - "learning_rate": 1.9520923880589266e-05, - "loss": 1.8324, - "step": 785000 - }, - { - "epoch": 11.31, - "learning_rate": 1.946899713883653e-05, - "loss": 1.83, - "step": 785500 - }, - { - "epoch": 11.31, - "learning_rate": 1.9417070397083796e-05, - "loss": 1.8346, - "step": 786000 - }, - { - "epoch": 11.32, - "learning_rate": 1.936514365533106e-05, - "loss": 1.832, - "step": 786500 - }, - { - "epoch": 11.33, - "learning_rate": 1.9313216913578326e-05, - "loss": 1.8318, - "step": 787000 - }, - { - "epoch": 11.34, - "learning_rate": 1.9261290171825588e-05, - "loss": 1.8377, - "step": 787500 - }, - { - "epoch": 11.34, - "learning_rate": 1.920946728355636e-05, - "loss": 1.8335, - "step": 788000 - }, - { - "epoch": 11.35, - "learning_rate": 1.9157540541803622e-05, - "loss": 1.8374, - "step": 788500 - }, - { - "epoch": 11.36, - "learning_rate": 1.910561380005089e-05, - "loss": 1.8371, - "step": 789000 - }, - { - "epoch": 11.36, - "learning_rate": 1.9053687058298152e-05, - "loss": 1.8323, - "step": 789500 - }, - { - "epoch": 11.37, - "learning_rate": 1.900176031654542e-05, - "loss": 1.835, - "step": 790000 - }, - { - "epoch": 11.38, - "learning_rate": 1.8949937428276186e-05, - "loss": 1.8342, - "step": 790500 - }, - { - "epoch": 11.39, - "learning_rate": 1.8898010686523455e-05, - "loss": 1.8323, - "step": 791000 - }, - { - "epoch": 11.39, - "learning_rate": 1.8846083944770716e-05, - "loss": 1.8341, - "step": 791500 - }, - { - "epoch": 11.4, - "learning_rate": 1.8794157203017984e-05, - "loss": 1.8349, - "step": 792000 - }, - { - "epoch": 11.41, - "learning_rate": 1.874223046126525e-05, - "loss": 1.8314, - "step": 792500 - }, - { - "epoch": 11.41, - "learning_rate": 1.869040757299602e-05, - "loss": 1.8342, - "step": 793000 - }, - { - "epoch": 11.42, - "learning_rate": 1.8638480831243284e-05, - "loss": 1.8299, - "step": 793500 - }, - { - "epoch": 11.43, - "learning_rate": 1.858655408949055e-05, - "loss": 1.8336, - "step": 794000 - }, - { - "epoch": 11.44, - "learning_rate": 1.8534627347737814e-05, - "loss": 1.8342, - "step": 794500 - }, - { - "epoch": 11.44, - "learning_rate": 1.8482700605985075e-05, - "loss": 1.8341, - "step": 795000 - }, - { - "epoch": 11.45, - "learning_rate": 1.8430877717715848e-05, - "loss": 1.8299, - "step": 795500 - }, - { - "epoch": 11.46, - "learning_rate": 1.8378950975963113e-05, - "loss": 1.8331, - "step": 796000 - }, - { - "epoch": 11.46, - "learning_rate": 1.8327024234210378e-05, - "loss": 1.8351, - "step": 796500 - }, - { - "epoch": 11.47, - "learning_rate": 1.827509749245764e-05, - "loss": 1.8326, - "step": 797000 - }, - { - "epoch": 11.48, - "learning_rate": 1.8223170750704908e-05, - "loss": 1.8316, - "step": 797500 - }, - { - "epoch": 11.49, - "learning_rate": 1.8171347862435674e-05, - "loss": 1.8373, - "step": 798000 - }, - { - "epoch": 11.49, - "learning_rate": 1.8119421120682942e-05, - "loss": 1.8335, - "step": 798500 - }, - { - "epoch": 11.5, - "learning_rate": 1.8067494378930204e-05, - "loss": 1.8332, - "step": 799000 - }, - { - "epoch": 11.51, - "learning_rate": 1.8015567637177472e-05, - "loss": 1.8335, - "step": 799500 - }, - { - "epoch": 11.52, - "learning_rate": 1.7963640895424734e-05, - "loss": 1.834, - "step": 800000 - }, - { - "epoch": 11.52, - "learning_rate": 1.7911818007155507e-05, - "loss": 1.8336, - "step": 800500 - }, - { - "epoch": 11.53, - "learning_rate": 1.785989126540277e-05, - "loss": 1.8333, - "step": 801000 - }, - { - "epoch": 11.54, - "learning_rate": 1.7807964523650037e-05, - "loss": 1.8331, - "step": 801500 - }, - { - "epoch": 11.54, - "learning_rate": 1.77560377818973e-05, - "loss": 1.8332, - "step": 802000 - }, - { - "epoch": 11.55, - "learning_rate": 1.7704111040144563e-05, - "loss": 1.831, - "step": 802500 - }, - { - "epoch": 11.56, - "learning_rate": 1.765218429839183e-05, - "loss": 1.8317, - "step": 803000 - }, - { - "epoch": 11.57, - "learning_rate": 1.7600257556639093e-05, - "loss": 1.8344, - "step": 803500 - }, - { - "epoch": 11.57, - "learning_rate": 1.7548434668369866e-05, - "loss": 1.8334, - "step": 804000 - }, - { - "epoch": 11.58, - "learning_rate": 1.7496507926617127e-05, - "loss": 1.8333, - "step": 804500 - }, - { - "epoch": 11.59, - "learning_rate": 1.7444581184864396e-05, - "loss": 1.8317, - "step": 805000 - }, - { - "epoch": 11.59, - "learning_rate": 1.7392654443111657e-05, - "loss": 1.8368, - "step": 805500 - }, - { - "epoch": 11.6, - "learning_rate": 1.734083155484243e-05, - "loss": 1.8312, - "step": 806000 - }, - { - "epoch": 11.61, - "learning_rate": 1.728890481308969e-05, - "loss": 1.8325, - "step": 806500 - }, - { - "epoch": 11.62, - "learning_rate": 1.723697807133696e-05, - "loss": 1.8306, - "step": 807000 - }, - { - "epoch": 11.62, - "learning_rate": 1.718505132958422e-05, - "loss": 1.8335, - "step": 807500 - }, - { - "epoch": 11.63, - "learning_rate": 1.7133228441314994e-05, - "loss": 1.833, - "step": 808000 - }, - { - "epoch": 11.64, - "learning_rate": 1.7081301699562256e-05, - "loss": 1.8344, - "step": 808500 - }, - { - "epoch": 11.64, - "learning_rate": 1.7029374957809524e-05, - "loss": 1.8356, - "step": 809000 - }, - { - "epoch": 11.65, - "learning_rate": 1.697744821605679e-05, - "loss": 1.8329, - "step": 809500 - }, - { - "epoch": 11.66, - "learning_rate": 1.692552147430405e-05, - "loss": 1.8343, - "step": 810000 - }, - { - "epoch": 11.67, - "learning_rate": 1.6873698586034824e-05, - "loss": 1.8342, - "step": 810500 - }, - { - "epoch": 11.67, - "learning_rate": 1.682177184428209e-05, - "loss": 1.8328, - "step": 811000 - }, - { - "epoch": 11.68, - "learning_rate": 1.6769845102529354e-05, - "loss": 1.8328, - "step": 811500 - }, - { - "epoch": 11.69, - "learning_rate": 1.6717918360776615e-05, - "loss": 1.832, - "step": 812000 - }, - { - "epoch": 11.7, - "learning_rate": 1.6665991619023883e-05, - "loss": 1.8305, - "step": 812500 - }, - { - "epoch": 11.7, - "learning_rate": 1.6614064877271145e-05, - "loss": 1.8287, - "step": 813000 - }, - { - "epoch": 11.71, - "learning_rate": 1.6562241989001918e-05, - "loss": 1.8324, - "step": 813500 - }, - { - "epoch": 11.72, - "learning_rate": 1.651031524724918e-05, - "loss": 1.8325, - "step": 814000 - }, - { - "epoch": 11.72, - "learning_rate": 1.6458388505496448e-05, - "loss": 1.8304, - "step": 814500 - }, - { - "epoch": 11.73, - "learning_rate": 1.640646176374371e-05, - "loss": 1.8321, - "step": 815000 - }, - { - "epoch": 11.74, - "learning_rate": 1.6354638875474482e-05, - "loss": 1.8324, - "step": 815500 - }, - { - "epoch": 11.75, - "learning_rate": 1.6302712133721744e-05, - "loss": 1.8286, - "step": 816000 - }, - { - "epoch": 11.75, - "learning_rate": 1.6250785391969012e-05, - "loss": 1.8319, - "step": 816500 - }, - { - "epoch": 11.76, - "learning_rate": 1.6198858650216277e-05, - "loss": 1.8282, - "step": 817000 - }, - { - "epoch": 11.77, - "learning_rate": 1.6147035761947047e-05, - "loss": 1.8265, - "step": 817500 - }, - { - "epoch": 11.77, - "learning_rate": 1.609510902019431e-05, - "loss": 1.8288, - "step": 818000 - }, - { - "epoch": 11.78, - "learning_rate": 1.6043182278441576e-05, - "loss": 1.8334, - "step": 818500 - }, - { - "epoch": 11.79, - "learning_rate": 1.599125553668884e-05, - "loss": 1.8322, - "step": 819000 - }, - { - "epoch": 11.8, - "learning_rate": 1.5939328794936103e-05, - "loss": 1.8318, - "step": 819500 - }, - { - "epoch": 11.8, - "learning_rate": 1.5887505906666876e-05, - "loss": 1.8281, - "step": 820000 - }, - { - "epoch": 11.81, - "learning_rate": 1.583557916491414e-05, - "loss": 1.8279, - "step": 820500 - }, - { - "epoch": 11.82, - "learning_rate": 1.5783652423161406e-05, - "loss": 1.8319, - "step": 821000 - }, - { - "epoch": 11.82, - "learning_rate": 1.5731725681408667e-05, - "loss": 1.8281, - "step": 821500 - }, - { - "epoch": 11.83, - "learning_rate": 1.5679798939655935e-05, - "loss": 1.83, - "step": 822000 - }, - { - "epoch": 11.84, - "learning_rate": 1.5627976051386705e-05, - "loss": 1.831, - "step": 822500 - }, - { - "epoch": 11.85, - "learning_rate": 1.557604930963397e-05, - "loss": 1.8308, - "step": 823000 - }, - { - "epoch": 11.85, - "learning_rate": 1.552412256788123e-05, - "loss": 1.8302, - "step": 823500 - }, - { - "epoch": 11.86, - "learning_rate": 1.54721958261285e-05, - "loss": 1.8306, - "step": 824000 - }, - { - "epoch": 11.87, - "learning_rate": 1.5420372937859266e-05, - "loss": 1.828, - "step": 824500 - }, - { - "epoch": 11.88, - "learning_rate": 1.5368446196106534e-05, - "loss": 1.8306, - "step": 825000 - }, - { - "epoch": 11.88, - "learning_rate": 1.53165194543538e-05, - "loss": 1.8288, - "step": 825500 - }, - { - "epoch": 11.89, - "learning_rate": 1.5264592712601064e-05, - "loss": 1.8304, - "step": 826000 - }, - { - "epoch": 11.9, - "learning_rate": 1.5212769824331832e-05, - "loss": 1.8285, - "step": 826500 - }, - { - "epoch": 11.9, - "learning_rate": 1.5160843082579099e-05, - "loss": 1.8287, - "step": 827000 - }, - { - "epoch": 11.91, - "learning_rate": 1.5108916340826362e-05, - "loss": 1.8291, - "step": 827500 - }, - { - "epoch": 11.92, - "learning_rate": 1.5056989599073628e-05, - "loss": 1.8314, - "step": 828000 - }, - { - "epoch": 11.93, - "learning_rate": 1.5005062857320892e-05, - "loss": 1.8258, - "step": 828500 - }, - { - "epoch": 11.93, - "learning_rate": 1.4953239969051663e-05, - "loss": 1.8287, - "step": 829000 - }, - { - "epoch": 11.94, - "learning_rate": 1.4901313227298926e-05, - "loss": 1.8294, - "step": 829500 - }, - { - "epoch": 11.95, - "learning_rate": 1.4849386485546193e-05, - "loss": 1.8328, - "step": 830000 - }, - { - "epoch": 11.95, - "learning_rate": 1.4797459743793458e-05, - "loss": 1.8287, - "step": 830500 - }, - { - "epoch": 11.96, - "learning_rate": 1.4745636855524227e-05, - "loss": 1.8279, - "step": 831000 - }, - { - "epoch": 11.97, - "learning_rate": 1.4693710113771492e-05, - "loss": 1.8286, - "step": 831500 - }, - { - "epoch": 11.98, - "learning_rate": 1.4641783372018755e-05, - "loss": 1.8254, - "step": 832000 - }, - { - "epoch": 11.98, - "learning_rate": 1.4589856630266022e-05, - "loss": 1.8274, - "step": 832500 - }, - { - "epoch": 11.99, - "learning_rate": 1.4538033741996793e-05, - "loss": 1.8279, - "step": 833000 - }, - { - "epoch": 12.0, - "learning_rate": 1.4486107000244057e-05, - "loss": 1.8245, - "step": 833500 - }, - { - "epoch": 12.0, - "eval_accuracy": 0.6503763935317559, - "eval_loss": 1.6876965761184692, - "eval_runtime": 647.389, - "eval_samples_per_second": 832.478, - "eval_steps_per_second": 34.687, - "step": 833676 - }, - { - "epoch": 12.0, - "learning_rate": 1.443418025849132e-05, - "loss": 1.8307, - "step": 834000 - }, - { - "epoch": 12.01, - "learning_rate": 1.4382253516738586e-05, - "loss": 1.8256, - "step": 834500 - }, - { - "epoch": 12.02, - "learning_rate": 1.4330430628469358e-05, - "loss": 1.829, - "step": 835000 - }, - { - "epoch": 12.03, - "learning_rate": 1.4278503886716621e-05, - "loss": 1.8268, - "step": 835500 - }, - { - "epoch": 12.03, - "learning_rate": 1.4226577144963884e-05, - "loss": 1.8277, - "step": 836000 - }, - { - "epoch": 12.04, - "learning_rate": 1.417465040321115e-05, - "loss": 1.8258, - "step": 836500 - }, - { - "epoch": 12.05, - "learning_rate": 1.4122723661458414e-05, - "loss": 1.8224, - "step": 837000 - }, - { - "epoch": 12.06, - "learning_rate": 1.4070900773189185e-05, - "loss": 1.827, - "step": 837500 - }, - { - "epoch": 12.06, - "learning_rate": 1.401897403143645e-05, - "loss": 1.8239, - "step": 838000 - }, - { - "epoch": 12.07, - "learning_rate": 1.3967047289683715e-05, - "loss": 1.8255, - "step": 838500 - }, - { - "epoch": 12.08, - "learning_rate": 1.391512054793098e-05, - "loss": 1.8282, - "step": 839000 - }, - { - "epoch": 12.08, - "learning_rate": 1.3863193806178247e-05, - "loss": 1.8262, - "step": 839500 - }, - { - "epoch": 12.09, - "learning_rate": 1.3811370917909014e-05, - "loss": 1.8251, - "step": 840000 - }, - { - "epoch": 12.1, - "learning_rate": 1.3759444176156281e-05, - "loss": 1.8257, - "step": 840500 - }, - { - "epoch": 12.11, - "learning_rate": 1.3707517434403544e-05, - "loss": 1.825, - "step": 841000 - }, - { - "epoch": 12.11, - "learning_rate": 1.3655590692650807e-05, - "loss": 1.8269, - "step": 841500 - }, - { - "epoch": 12.12, - "learning_rate": 1.3603767804381579e-05, - "loss": 1.8266, - "step": 842000 - }, - { - "epoch": 12.13, - "learning_rate": 1.3551841062628845e-05, - "loss": 1.8275, - "step": 842500 - }, - { - "epoch": 12.13, - "learning_rate": 1.3499914320876109e-05, - "loss": 1.8266, - "step": 843000 - }, - { - "epoch": 12.14, - "learning_rate": 1.3447987579123372e-05, - "loss": 1.8256, - "step": 843500 - }, - { - "epoch": 12.15, - "learning_rate": 1.3396164690854143e-05, - "loss": 1.8266, - "step": 844000 - }, - { - "epoch": 12.16, - "learning_rate": 1.334423794910141e-05, - "loss": 1.8231, - "step": 844500 - }, - { - "epoch": 12.16, - "learning_rate": 1.3292311207348673e-05, - "loss": 1.8236, - "step": 845000 - }, - { - "epoch": 12.17, - "learning_rate": 1.3240384465595936e-05, - "loss": 1.8239, - "step": 845500 - }, - { - "epoch": 12.18, - "learning_rate": 1.3188561577326707e-05, - "loss": 1.8254, - "step": 846000 - }, - { - "epoch": 12.18, - "learning_rate": 1.3136634835573972e-05, - "loss": 1.8244, - "step": 846500 - }, - { - "epoch": 12.19, - "learning_rate": 1.3084708093821237e-05, - "loss": 1.8238, - "step": 847000 - }, - { - "epoch": 12.2, - "learning_rate": 1.3032781352068502e-05, - "loss": 1.823, - "step": 847500 - }, - { - "epoch": 12.21, - "learning_rate": 1.2980854610315769e-05, - "loss": 1.8253, - "step": 848000 - }, - { - "epoch": 12.21, - "learning_rate": 1.2929031722046537e-05, - "loss": 1.8226, - "step": 848500 - }, - { - "epoch": 12.22, - "learning_rate": 1.2877104980293803e-05, - "loss": 1.8251, - "step": 849000 - }, - { - "epoch": 12.23, - "learning_rate": 1.2825178238541067e-05, - "loss": 1.8232, - "step": 849500 - }, - { - "epoch": 12.23, - "learning_rate": 1.2773251496788333e-05, - "loss": 1.8279, - "step": 850000 - }, - { - "epoch": 12.24, - "learning_rate": 1.2721324755035596e-05, - "loss": 1.8273, - "step": 850500 - }, - { - "epoch": 12.25, - "learning_rate": 1.2669501866766368e-05, - "loss": 1.8245, - "step": 851000 - }, - { - "epoch": 12.26, - "learning_rate": 1.2617575125013631e-05, - "loss": 1.8226, - "step": 851500 - }, - { - "epoch": 12.26, - "learning_rate": 1.2565648383260897e-05, - "loss": 1.8262, - "step": 852000 - }, - { - "epoch": 12.27, - "learning_rate": 1.251372164150816e-05, - "loss": 1.8248, - "step": 852500 - }, - { - "epoch": 12.28, - "learning_rate": 1.246189875323893e-05, - "loss": 1.8203, - "step": 853000 - }, - { - "epoch": 12.29, - "learning_rate": 1.2409972011486195e-05, - "loss": 1.8257, - "step": 853500 - }, - { - "epoch": 12.29, - "learning_rate": 1.235804526973346e-05, - "loss": 1.8219, - "step": 854000 - }, - { - "epoch": 12.3, - "learning_rate": 1.2306118527980725e-05, - "loss": 1.8203, - "step": 854500 - }, - { - "epoch": 12.31, - "learning_rate": 1.2254295639711495e-05, - "loss": 1.8241, - "step": 855000 - }, - { - "epoch": 12.31, - "learning_rate": 1.220236889795876e-05, - "loss": 1.8251, - "step": 855500 - }, - { - "epoch": 12.32, - "learning_rate": 1.2150442156206024e-05, - "loss": 1.8244, - "step": 856000 - }, - { - "epoch": 12.33, - "learning_rate": 1.2098515414453291e-05, - "loss": 1.8227, - "step": 856500 - }, - { - "epoch": 12.34, - "learning_rate": 1.204669252618406e-05, - "loss": 1.8229, - "step": 857000 - }, - { - "epoch": 12.34, - "learning_rate": 1.1994765784431326e-05, - "loss": 1.8256, - "step": 857500 - }, - { - "epoch": 12.35, - "learning_rate": 1.194283904267859e-05, - "loss": 1.8224, - "step": 858000 - }, - { - "epoch": 12.36, - "learning_rate": 1.1890912300925855e-05, - "loss": 1.8221, - "step": 858500 - }, - { - "epoch": 12.36, - "learning_rate": 1.1838985559173119e-05, - "loss": 1.826, - "step": 859000 - }, - { - "epoch": 12.37, - "learning_rate": 1.178716267090389e-05, - "loss": 1.8251, - "step": 859500 - }, - { - "epoch": 12.38, - "learning_rate": 1.1735235929151155e-05, - "loss": 1.8236, - "step": 860000 - }, - { - "epoch": 12.39, - "learning_rate": 1.1683309187398418e-05, - "loss": 1.82, - "step": 860500 - }, - { - "epoch": 12.39, - "learning_rate": 1.1631382445645683e-05, - "loss": 1.8277, - "step": 861000 - }, - { - "epoch": 12.4, - "learning_rate": 1.1579455703892948e-05, - "loss": 1.8275, - "step": 861500 - }, - { - "epoch": 12.41, - "learning_rate": 1.152763281562372e-05, - "loss": 1.8221, - "step": 862000 - }, - { - "epoch": 12.41, - "learning_rate": 1.1475706073870982e-05, - "loss": 1.8203, - "step": 862500 - }, - { - "epoch": 12.42, - "learning_rate": 1.1423779332118247e-05, - "loss": 1.8198, - "step": 863000 - }, - { - "epoch": 12.43, - "learning_rate": 1.1371852590365512e-05, - "loss": 1.8251, - "step": 863500 - }, - { - "epoch": 12.44, - "learning_rate": 1.1319925848612779e-05, - "loss": 1.8221, - "step": 864000 - }, - { - "epoch": 12.44, - "learning_rate": 1.1268102960343548e-05, - "loss": 1.8245, - "step": 864500 - }, - { - "epoch": 12.45, - "learning_rate": 1.1216176218590813e-05, - "loss": 1.8263, - "step": 865000 - }, - { - "epoch": 12.46, - "learning_rate": 1.1164249476838078e-05, - "loss": 1.8232, - "step": 865500 - }, - { - "epoch": 12.47, - "learning_rate": 1.1112322735085343e-05, - "loss": 1.821, - "step": 866000 - }, - { - "epoch": 12.47, - "learning_rate": 1.1060499846816113e-05, - "loss": 1.8223, - "step": 866500 - }, - { - "epoch": 12.48, - "learning_rate": 1.1008573105063378e-05, - "loss": 1.8272, - "step": 867000 - }, - { - "epoch": 12.49, - "learning_rate": 1.0956646363310643e-05, - "loss": 1.8216, - "step": 867500 - }, - { - "epoch": 12.49, - "learning_rate": 1.0904719621557907e-05, - "loss": 1.823, - "step": 868000 - }, - { - "epoch": 12.5, - "learning_rate": 1.085279287980517e-05, - "loss": 1.8194, - "step": 868500 - }, - { - "epoch": 12.51, - "learning_rate": 1.0800969991535942e-05, - "loss": 1.8213, - "step": 869000 - }, - { - "epoch": 12.52, - "learning_rate": 1.0749043249783207e-05, - "loss": 1.8196, - "step": 869500 - }, - { - "epoch": 12.52, - "learning_rate": 1.069711650803047e-05, - "loss": 1.8231, - "step": 870000 - }, - { - "epoch": 12.53, - "learning_rate": 1.0645189766277735e-05, - "loss": 1.8158, - "step": 870500 - }, - { - "epoch": 12.54, - "learning_rate": 1.0593366878008506e-05, - "loss": 1.8195, - "step": 871000 - }, - { - "epoch": 12.54, - "learning_rate": 1.054144013625577e-05, - "loss": 1.8227, - "step": 871500 - }, - { - "epoch": 12.55, - "learning_rate": 1.0489513394503034e-05, - "loss": 1.8157, - "step": 872000 - }, - { - "epoch": 12.56, - "learning_rate": 1.0437586652750301e-05, - "loss": 1.8217, - "step": 872500 - }, - { - "epoch": 12.57, - "learning_rate": 1.0385659910997566e-05, - "loss": 1.8213, - "step": 873000 - }, - { - "epoch": 12.57, - "learning_rate": 1.0333837022728336e-05, - "loss": 1.8229, - "step": 873500 - }, - { - "epoch": 12.58, - "learning_rate": 1.02819102809756e-05, - "loss": 1.8199, - "step": 874000 - }, - { - "epoch": 12.59, - "learning_rate": 1.0229983539222865e-05, - "loss": 1.8235, - "step": 874500 - }, - { - "epoch": 12.59, - "learning_rate": 1.017805679747013e-05, - "loss": 1.8226, - "step": 875000 - }, - { - "epoch": 12.6, - "learning_rate": 1.0126130055717395e-05, - "loss": 1.8207, - "step": 875500 - }, - { - "epoch": 12.61, - "learning_rate": 1.0074307167448165e-05, - "loss": 1.8171, - "step": 876000 - }, - { - "epoch": 12.62, - "learning_rate": 1.002238042569543e-05, - "loss": 1.8231, - "step": 876500 - }, - { - "epoch": 12.62, - "learning_rate": 9.970453683942695e-06, - "loss": 1.8192, - "step": 877000 - }, - { - "epoch": 12.63, - "learning_rate": 9.918526942189958e-06, - "loss": 1.8194, - "step": 877500 - }, - { - "epoch": 12.64, - "learning_rate": 9.866600200437223e-06, - "loss": 1.8203, - "step": 878000 - }, - { - "epoch": 12.65, - "learning_rate": 9.814673458684488e-06, - "loss": 1.8184, - "step": 878500 - }, - { - "epoch": 12.65, - "learning_rate": 9.762850570415259e-06, - "loss": 1.8189, - "step": 879000 - }, - { - "epoch": 12.66, - "learning_rate": 9.710923828662522e-06, - "loss": 1.8217, - "step": 879500 - }, - { - "epoch": 12.67, - "learning_rate": 9.658997086909787e-06, - "loss": 1.8219, - "step": 880000 - }, - { - "epoch": 12.67, - "learning_rate": 9.607070345157054e-06, - "loss": 1.8226, - "step": 880500 - }, - { - "epoch": 12.68, - "learning_rate": 9.555247456887823e-06, - "loss": 1.8191, - "step": 881000 - }, - { - "epoch": 12.69, - "learning_rate": 9.503320715135088e-06, - "loss": 1.8191, - "step": 881500 - }, - { - "epoch": 12.7, - "learning_rate": 9.451393973382353e-06, - "loss": 1.821, - "step": 882000 - }, - { - "epoch": 12.7, - "learning_rate": 9.399467231629618e-06, - "loss": 1.8177, - "step": 882500 - }, - { - "epoch": 12.71, - "learning_rate": 9.347644343360388e-06, - "loss": 1.8254, - "step": 883000 - }, - { - "epoch": 12.72, - "learning_rate": 9.295717601607653e-06, - "loss": 1.8205, - "step": 883500 - }, - { - "epoch": 12.72, - "learning_rate": 9.243790859854917e-06, - "loss": 1.8196, - "step": 884000 - }, - { - "epoch": 12.73, - "learning_rate": 9.191864118102182e-06, - "loss": 1.8206, - "step": 884500 - }, - { - "epoch": 12.74, - "learning_rate": 9.139937376349447e-06, - "loss": 1.8201, - "step": 885000 - }, - { - "epoch": 12.75, - "learning_rate": 9.088114488080217e-06, - "loss": 1.8182, - "step": 885500 - }, - { - "epoch": 12.75, - "learning_rate": 9.036187746327482e-06, - "loss": 1.8198, - "step": 886000 - }, - { - "epoch": 12.76, - "learning_rate": 8.984261004574747e-06, - "loss": 1.8238, - "step": 886500 - }, - { - "epoch": 12.77, - "learning_rate": 8.93233426282201e-06, - "loss": 1.8186, - "step": 887000 - }, - { - "epoch": 12.77, - "learning_rate": 8.880407521069275e-06, - "loss": 1.8159, - "step": 887500 - }, - { - "epoch": 12.78, - "learning_rate": 8.828584632800046e-06, - "loss": 1.818, - "step": 888000 - }, - { - "epoch": 12.79, - "learning_rate": 8.776657891047311e-06, - "loss": 1.8221, - "step": 888500 - }, - { - "epoch": 12.8, - "learning_rate": 8.724731149294576e-06, - "loss": 1.8165, - "step": 889000 - }, - { - "epoch": 12.8, - "learning_rate": 8.67280440754184e-06, - "loss": 1.8205, - "step": 889500 - }, - { - "epoch": 12.81, - "learning_rate": 8.62098151927261e-06, - "loss": 1.8192, - "step": 890000 - }, - { - "epoch": 12.82, - "learning_rate": 8.569054777519875e-06, - "loss": 1.8155, - "step": 890500 - }, - { - "epoch": 12.83, - "learning_rate": 8.51712803576714e-06, - "loss": 1.8241, - "step": 891000 - }, - { - "epoch": 12.83, - "learning_rate": 8.465201294014405e-06, - "loss": 1.8203, - "step": 891500 - }, - { - "epoch": 12.84, - "learning_rate": 8.413378405745175e-06, - "loss": 1.8153, - "step": 892000 - }, - { - "epoch": 12.85, - "learning_rate": 8.36145166399244e-06, - "loss": 1.8188, - "step": 892500 - }, - { - "epoch": 12.85, - "learning_rate": 8.309524922239705e-06, - "loss": 1.8233, - "step": 893000 - }, - { - "epoch": 12.86, - "learning_rate": 8.25759818048697e-06, - "loss": 1.82, - "step": 893500 - }, - { - "epoch": 12.87, - "learning_rate": 8.205775292217739e-06, - "loss": 1.8259, - "step": 894000 - }, - { - "epoch": 12.88, - "learning_rate": 8.153848550465004e-06, - "loss": 1.8141, - "step": 894500 - }, - { - "epoch": 12.88, - "learning_rate": 8.101921808712269e-06, - "loss": 1.8185, - "step": 895000 - }, - { - "epoch": 12.89, - "learning_rate": 8.049995066959534e-06, - "loss": 1.8158, - "step": 895500 - }, - { - "epoch": 12.9, - "learning_rate": 7.998172178690303e-06, - "loss": 1.8182, - "step": 896000 - }, - { - "epoch": 12.9, - "learning_rate": 7.946245436937568e-06, - "loss": 1.8226, - "step": 896500 - }, - { - "epoch": 12.91, - "learning_rate": 7.894318695184833e-06, - "loss": 1.82, - "step": 897000 - }, - { - "epoch": 12.92, - "learning_rate": 7.842391953432098e-06, - "loss": 1.8245, - "step": 897500 - }, - { - "epoch": 12.93, - "learning_rate": 7.790465211679363e-06, - "loss": 1.8168, - "step": 898000 - }, - { - "epoch": 12.93, - "learning_rate": 7.738538469926628e-06, - "loss": 1.8208, - "step": 898500 - }, - { - "epoch": 12.94, - "learning_rate": 7.686611728173893e-06, - "loss": 1.8174, - "step": 899000 - }, - { - "epoch": 12.95, - "learning_rate": 7.634788839904664e-06, - "loss": 1.8105, - "step": 899500 - }, - { - "epoch": 12.95, - "learning_rate": 7.5828620981519274e-06, - "loss": 1.8165, - "step": 900000 - }, - { - "epoch": 12.96, - "learning_rate": 7.530935356399192e-06, - "loss": 1.8201, - "step": 900500 - }, - { - "epoch": 12.97, - "learning_rate": 7.479008614646457e-06, - "loss": 1.8207, - "step": 901000 - }, - { - "epoch": 12.98, - "learning_rate": 7.427185726377227e-06, - "loss": 1.8191, - "step": 901500 - }, - { - "epoch": 12.98, - "learning_rate": 7.375258984624492e-06, - "loss": 1.8161, - "step": 902000 - }, - { - "epoch": 12.99, - "learning_rate": 7.323332242871757e-06, - "loss": 1.8179, - "step": 902500 - }, - { - "epoch": 13.0, - "learning_rate": 7.271405501119022e-06, - "loss": 1.8191, - "step": 903000 - }, - { - "epoch": 13.0, - "eval_accuracy": 0.651487792294714, - "eval_loss": 1.6829075813293457, - "eval_runtime": 646.4178, - "eval_samples_per_second": 833.729, - "eval_steps_per_second": 34.739, - "step": 903149 - }, - { - "epoch": 13.01, - "learning_rate": 7.2194787593662865e-06, - "loss": 1.8161, - "step": 903500 - }, - { - "epoch": 13.01, - "learning_rate": 7.167655871097056e-06, - "loss": 1.8188, - "step": 904000 - }, - { - "epoch": 13.02, - "learning_rate": 7.115729129344322e-06, - "loss": 1.8164, - "step": 904500 - }, - { - "epoch": 13.03, - "learning_rate": 7.063802387591587e-06, - "loss": 1.8137, - "step": 905000 - }, - { - "epoch": 13.03, - "learning_rate": 7.011875645838852e-06, - "loss": 1.8147, - "step": 905500 - }, - { - "epoch": 13.04, - "learning_rate": 6.960052757569621e-06, - "loss": 1.8158, - "step": 906000 - }, - { - "epoch": 13.05, - "learning_rate": 6.908126015816886e-06, - "loss": 1.8141, - "step": 906500 - }, - { - "epoch": 13.06, - "learning_rate": 6.856199274064151e-06, - "loss": 1.8159, - "step": 907000 - }, - { - "epoch": 13.06, - "learning_rate": 6.804272532311415e-06, - "loss": 1.8189, - "step": 907500 - }, - { - "epoch": 13.07, - "learning_rate": 6.752449644042186e-06, - "loss": 1.818, - "step": 908000 - }, - { - "epoch": 13.08, - "learning_rate": 6.7005229022894505e-06, - "loss": 1.8195, - "step": 908500 - }, - { - "epoch": 13.08, - "learning_rate": 6.648596160536715e-06, - "loss": 1.8131, - "step": 909000 - }, - { - "epoch": 13.09, - "learning_rate": 6.5966694187839795e-06, - "loss": 1.8176, - "step": 909500 - }, - { - "epoch": 13.1, - "learning_rate": 6.544742677031244e-06, - "loss": 1.8162, - "step": 910000 - }, - { - "epoch": 13.11, - "learning_rate": 6.492919788762016e-06, - "loss": 1.8171, - "step": 910500 - }, - { - "epoch": 13.11, - "learning_rate": 6.440993047009279e-06, - "loss": 1.8127, - "step": 911000 - }, - { - "epoch": 13.12, - "learning_rate": 6.389066305256544e-06, - "loss": 1.8153, - "step": 911500 - }, - { - "epoch": 13.13, - "learning_rate": 6.337139563503809e-06, - "loss": 1.816, - "step": 912000 - }, - { - "epoch": 13.13, - "learning_rate": 6.2852128217510745e-06, - "loss": 1.8159, - "step": 912500 - }, - { - "epoch": 13.14, - "learning_rate": 6.233389933481844e-06, - "loss": 1.8191, - "step": 913000 - }, - { - "epoch": 13.15, - "learning_rate": 6.181463191729109e-06, - "loss": 1.8146, - "step": 913500 - }, - { - "epoch": 13.16, - "learning_rate": 6.129536449976374e-06, - "loss": 1.8127, - "step": 914000 - }, - { - "epoch": 13.16, - "learning_rate": 6.077609708223638e-06, - "loss": 1.8159, - "step": 914500 - }, - { - "epoch": 13.17, - "learning_rate": 6.0257868199544085e-06, - "loss": 1.8137, - "step": 915000 - }, - { - "epoch": 13.18, - "learning_rate": 5.973860078201673e-06, - "loss": 1.8139, - "step": 915500 - }, - { - "epoch": 13.18, - "learning_rate": 5.9219333364489374e-06, - "loss": 1.8123, - "step": 916000 - }, - { - "epoch": 13.19, - "learning_rate": 5.870006594696203e-06, - "loss": 1.8142, - "step": 916500 - }, - { - "epoch": 13.2, - "learning_rate": 5.818079852943468e-06, - "loss": 1.8136, - "step": 917000 - }, - { - "epoch": 13.21, - "learning_rate": 5.7662569646742386e-06, - "loss": 1.8203, - "step": 917500 - }, - { - "epoch": 13.21, - "learning_rate": 5.714330222921503e-06, - "loss": 1.8131, - "step": 918000 - }, - { - "epoch": 13.22, - "learning_rate": 5.6624034811687675e-06, - "loss": 1.814, - "step": 918500 - }, - { - "epoch": 13.23, - "learning_rate": 5.6104767394160316e-06, - "loss": 1.8151, - "step": 919000 - }, - { - "epoch": 13.24, - "learning_rate": 5.5585499976632965e-06, - "loss": 1.8176, - "step": 919500 - }, - { - "epoch": 13.24, - "learning_rate": 5.506727109394067e-06, - "loss": 1.8139, - "step": 920000 - }, - { - "epoch": 13.25, - "learning_rate": 5.454800367641332e-06, - "loss": 1.8159, - "step": 920500 - }, - { - "epoch": 13.26, - "learning_rate": 5.402873625888597e-06, - "loss": 1.8192, - "step": 921000 - }, - { - "epoch": 13.26, - "learning_rate": 5.350946884135862e-06, - "loss": 1.8116, - "step": 921500 - }, - { - "epoch": 13.27, - "learning_rate": 5.299123995866632e-06, - "loss": 1.8147, - "step": 922000 - }, - { - "epoch": 13.28, - "learning_rate": 5.247197254113896e-06, - "loss": 1.812, - "step": 922500 - }, - { - "epoch": 13.29, - "learning_rate": 5.195270512361161e-06, - "loss": 1.8176, - "step": 923000 - }, - { - "epoch": 13.29, - "learning_rate": 5.143343770608426e-06, - "loss": 1.814, - "step": 923500 - }, - { - "epoch": 13.3, - "learning_rate": 5.091520882339196e-06, - "loss": 1.8153, - "step": 924000 - }, - { - "epoch": 13.31, - "learning_rate": 5.0395941405864605e-06, - "loss": 1.8171, - "step": 924500 - }, - { - "epoch": 13.31, - "learning_rate": 4.9876673988337254e-06, - "loss": 1.8124, - "step": 925000 - }, - { - "epoch": 13.32, - "learning_rate": 4.93574065708099e-06, - "loss": 1.811, - "step": 925500 - }, - { - "epoch": 13.33, - "learning_rate": 4.883813915328255e-06, - "loss": 1.8125, - "step": 926000 - }, - { - "epoch": 13.34, - "learning_rate": 4.83188717357552e-06, - "loss": 1.8155, - "step": 926500 - }, - { - "epoch": 13.34, - "learning_rate": 4.78006428530629e-06, - "loss": 1.8126, - "step": 927000 - }, - { - "epoch": 13.35, - "learning_rate": 4.728137543553555e-06, - "loss": 1.8134, - "step": 927500 - }, - { - "epoch": 13.36, - "learning_rate": 4.67621080180082e-06, - "loss": 1.8132, - "step": 928000 - }, - { - "epoch": 13.36, - "learning_rate": 4.6242840600480845e-06, - "loss": 1.8159, - "step": 928500 - }, - { - "epoch": 13.37, - "learning_rate": 4.572357318295349e-06, - "loss": 1.8134, - "step": 929000 - }, - { - "epoch": 13.38, - "learning_rate": 4.52053443002612e-06, - "loss": 1.8153, - "step": 929500 - }, - { - "epoch": 13.39, - "learning_rate": 4.468607688273385e-06, - "loss": 1.81, - "step": 930000 - }, - { - "epoch": 13.39, - "learning_rate": 4.416680946520649e-06, - "loss": 1.8081, - "step": 930500 - }, - { - "epoch": 13.4, - "learning_rate": 4.364754204767914e-06, - "loss": 1.8167, - "step": 931000 - }, - { - "epoch": 13.41, - "learning_rate": 4.312931316498684e-06, - "loss": 1.81, - "step": 931500 - }, - { - "epoch": 13.42, - "learning_rate": 4.261004574745948e-06, - "loss": 1.8106, - "step": 932000 - }, - { - "epoch": 13.42, - "learning_rate": 4.209077832993213e-06, - "loss": 1.812, - "step": 932500 - }, - { - "epoch": 13.43, - "learning_rate": 4.157151091240478e-06, - "loss": 1.8154, - "step": 933000 - }, - { - "epoch": 13.44, - "learning_rate": 4.105328202971248e-06, - "loss": 1.8144, - "step": 933500 - }, - { - "epoch": 13.44, - "learning_rate": 4.0534014612185135e-06, - "loss": 1.8145, - "step": 934000 - }, - { - "epoch": 13.45, - "learning_rate": 4.001474719465778e-06, - "loss": 1.8156, - "step": 934500 - }, - { - "epoch": 13.46, - "learning_rate": 3.949547977713042e-06, - "loss": 1.8142, - "step": 935000 - }, - { - "epoch": 13.47, - "learning_rate": 3.897725089443813e-06, - "loss": 1.8167, - "step": 935500 - }, - { - "epoch": 13.47, - "learning_rate": 3.845798347691078e-06, - "loss": 1.8122, - "step": 936000 - }, - { - "epoch": 13.48, - "learning_rate": 3.7938716059383423e-06, - "loss": 1.8148, - "step": 936500 - }, - { - "epoch": 13.49, - "learning_rate": 3.741944864185607e-06, - "loss": 1.8149, - "step": 937000 - }, - { - "epoch": 13.49, - "learning_rate": 3.6901219759163772e-06, - "loss": 1.8154, - "step": 937500 - }, - { - "epoch": 13.5, - "learning_rate": 3.6381952341636426e-06, - "loss": 1.8149, - "step": 938000 - }, - { - "epoch": 13.51, - "learning_rate": 3.5862684924109066e-06, - "loss": 1.8109, - "step": 938500 - }, - { - "epoch": 13.52, - "learning_rate": 3.534341750658172e-06, - "loss": 1.8105, - "step": 939000 - }, - { - "epoch": 13.52, - "learning_rate": 3.482415008905436e-06, - "loss": 1.8113, - "step": 939500 - }, - { - "epoch": 13.53, - "learning_rate": 3.4305921206362065e-06, - "loss": 1.812, - "step": 940000 - }, - { - "epoch": 13.54, - "learning_rate": 3.3786653788834714e-06, - "loss": 1.81, - "step": 940500 - }, - { - "epoch": 13.54, - "learning_rate": 3.3267386371307363e-06, - "loss": 1.8133, - "step": 941000 - }, - { - "epoch": 13.55, - "learning_rate": 3.2748118953780008e-06, - "loss": 1.8131, - "step": 941500 - }, - { - "epoch": 13.56, - "learning_rate": 3.2228851536252657e-06, - "loss": 1.8114, - "step": 942000 - }, - { - "epoch": 13.57, - "learning_rate": 3.171062265356036e-06, - "loss": 1.8116, - "step": 942500 - }, - { - "epoch": 13.57, - "learning_rate": 3.1191355236033006e-06, - "loss": 1.8144, - "step": 943000 - }, - { - "epoch": 13.58, - "learning_rate": 3.0672087818505655e-06, - "loss": 1.8101, - "step": 943500 - }, - { - "epoch": 13.59, - "learning_rate": 3.01528204009783e-06, - "loss": 1.8127, - "step": 944000 - }, - { - "epoch": 13.6, - "learning_rate": 2.9634591518286005e-06, - "loss": 1.8111, - "step": 944500 - }, - { - "epoch": 13.6, - "learning_rate": 2.911532410075865e-06, - "loss": 1.8082, - "step": 945000 - }, - { - "epoch": 13.61, - "learning_rate": 2.85960566832313e-06, - "loss": 1.8093, - "step": 945500 - }, - { - "epoch": 13.62, - "learning_rate": 2.8076789265703948e-06, - "loss": 1.8126, - "step": 946000 - }, - { - "epoch": 13.62, - "learning_rate": 2.755856038301165e-06, - "loss": 1.8086, - "step": 946500 - }, - { - "epoch": 13.63, - "learning_rate": 2.7039292965484297e-06, - "loss": 1.8113, - "step": 947000 - }, - { - "epoch": 13.64, - "learning_rate": 2.652002554795694e-06, - "loss": 1.8125, - "step": 947500 - }, - { - "epoch": 13.65, - "learning_rate": 2.600075813042959e-06, - "loss": 1.8107, - "step": 948000 - }, - { - "epoch": 13.65, - "learning_rate": 2.5481490712902236e-06, - "loss": 1.8119, - "step": 948500 - }, - { - "epoch": 13.66, - "learning_rate": 2.496326183020994e-06, - "loss": 1.8121, - "step": 949000 - }, - { - "epoch": 13.67, - "learning_rate": 2.4443994412682585e-06, - "loss": 1.8153, - "step": 949500 - }, - { - "epoch": 13.67, - "learning_rate": 2.392472699515524e-06, - "loss": 1.811, - "step": 950000 - }, - { - "epoch": 13.68, - "learning_rate": 2.3405459577627883e-06, - "loss": 1.814, - "step": 950500 - }, - { - "epoch": 13.69, - "learning_rate": 2.288723069493559e-06, - "loss": 1.8147, - "step": 951000 - }, - { - "epoch": 13.7, - "learning_rate": 2.2367963277408233e-06, - "loss": 1.8152, - "step": 951500 - }, - { - "epoch": 13.7, - "learning_rate": 2.184869585988088e-06, - "loss": 1.8114, - "step": 952000 - }, - { - "epoch": 13.71, - "learning_rate": 2.1329428442353527e-06, - "loss": 1.8092, - "step": 952500 - }, - { - "epoch": 13.72, - "learning_rate": 2.0810161024826176e-06, - "loss": 1.809, - "step": 953000 - }, - { - "epoch": 13.72, - "learning_rate": 2.0290893607298825e-06, - "loss": 1.8162, - "step": 953500 - }, - { - "epoch": 13.73, - "learning_rate": 1.9772664724606526e-06, - "loss": 1.8088, - "step": 954000 - }, - { - "epoch": 13.74, - "learning_rate": 1.9253397307079175e-06, - "loss": 1.8148, - "step": 954500 - }, - { - "epoch": 13.75, - "learning_rate": 1.8734129889551821e-06, - "loss": 1.8101, - "step": 955000 - }, - { - "epoch": 13.75, - "learning_rate": 1.8214862472024468e-06, - "loss": 1.813, - "step": 955500 - }, - { - "epoch": 13.76, - "learning_rate": 1.769663358933217e-06, - "loss": 1.8143, - "step": 956000 - }, - { - "epoch": 13.77, - "learning_rate": 1.7177366171804818e-06, - "loss": 1.8085, - "step": 956500 - }, - { - "epoch": 13.78, - "learning_rate": 1.6658098754277467e-06, - "loss": 1.8148, - "step": 957000 - }, - { - "epoch": 13.78, - "learning_rate": 1.6138831336750114e-06, - "loss": 1.8081, - "step": 957500 - }, - { - "epoch": 13.79, - "learning_rate": 1.561956391922276e-06, - "loss": 1.8114, - "step": 958000 - }, - { - "epoch": 13.8, - "learning_rate": 1.5101335036530463e-06, - "loss": 1.8083, - "step": 958500 - }, - { - "epoch": 13.8, - "learning_rate": 1.458206761900311e-06, - "loss": 1.8137, - "step": 959000 - }, - { - "epoch": 13.81, - "learning_rate": 1.406280020147576e-06, - "loss": 1.815, - "step": 959500 - }, - { - "epoch": 13.82, - "learning_rate": 1.3543532783948406e-06, - "loss": 1.813, - "step": 960000 - }, - { - "epoch": 13.83, - "learning_rate": 1.302530390125611e-06, - "loss": 1.812, - "step": 960500 - }, - { - "epoch": 13.83, - "learning_rate": 1.2506036483728756e-06, - "loss": 1.8105, - "step": 961000 - }, - { - "epoch": 13.84, - "learning_rate": 1.1986769066201403e-06, - "loss": 1.8142, - "step": 961500 - }, - { - "epoch": 13.85, - "learning_rate": 1.146750164867405e-06, - "loss": 1.8101, - "step": 962000 - }, - { - "epoch": 13.85, - "learning_rate": 1.0948234231146699e-06, - "loss": 1.8141, - "step": 962500 - }, - { - "epoch": 13.86, - "learning_rate": 1.0430005348454401e-06, - "loss": 1.8125, - "step": 963000 - }, - { - "epoch": 13.87, - "learning_rate": 9.910737930927048e-07, - "loss": 1.8128, - "step": 963500 - }, - { - "epoch": 13.88, - "learning_rate": 9.391470513399695e-07, - "loss": 1.8126, - "step": 964000 - }, - { - "epoch": 13.88, - "learning_rate": 8.872203095872344e-07, - "loss": 1.8131, - "step": 964500 - }, - { - "epoch": 13.89, - "learning_rate": 8.353974213180045e-07, - "loss": 1.8105, - "step": 965000 - }, - { - "epoch": 13.9, - "learning_rate": 7.834706795652694e-07, - "loss": 1.8119, - "step": 965500 - }, - { - "epoch": 13.9, - "learning_rate": 7.315439378125341e-07, - "loss": 1.8076, - "step": 966000 - }, - { - "epoch": 13.91, - "learning_rate": 6.796171960597989e-07, - "loss": 1.809, - "step": 966500 - }, - { - "epoch": 13.92, - "learning_rate": 6.277943077905691e-07, - "loss": 1.8124, - "step": 967000 - }, - { - "epoch": 13.93, - "learning_rate": 5.758675660378338e-07, - "loss": 1.811, - "step": 967500 - }, - { - "epoch": 13.93, - "learning_rate": 5.239408242850986e-07, - "loss": 1.8105, - "step": 968000 - }, - { - "epoch": 13.94, - "learning_rate": 4.7201408253236337e-07, - "loss": 1.8122, - "step": 968500 - }, - { - "epoch": 13.95, - "learning_rate": 4.200873407796281e-07, - "loss": 1.8113, - "step": 969000 - }, - { - "epoch": 13.96, - "learning_rate": 3.682644525103984e-07, - "loss": 1.8112, - "step": 969500 - }, - { - "epoch": 13.96, - "learning_rate": 3.1633771075766313e-07, - "loss": 1.8096, - "step": 970000 - }, - { - "epoch": 13.97, - "learning_rate": 2.6441096900492787e-07, - "loss": 1.8133, - "step": 970500 - }, - { - "epoch": 13.98, - "learning_rate": 2.1248422725219261e-07, - "loss": 1.8126, - "step": 971000 - }, - { - "epoch": 13.98, - "learning_rate": 1.6066133898296286e-07, - "loss": 1.8129, - "step": 971500 - }, - { - "epoch": 13.99, - "learning_rate": 1.087345972302276e-07, - "loss": 1.8086, - "step": 972000 - }, - { - "epoch": 14.0, - "learning_rate": 5.6807855477492356e-08, - "loss": 1.8122, - "step": 972500 - }, - { - "epoch": 14.0, - "eval_accuracy": 0.6519198053032371, - "eval_loss": 1.6776695251464844, - "eval_runtime": 646.1912, - "eval_samples_per_second": 834.021, - "eval_steps_per_second": 34.751, - "step": 972622 - } - ], - "max_steps": 972622, - "num_train_epochs": 14, - "total_flos": 6.804216664343708e+18, - "trial_name": null, - "trial_params": null -}