diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,38482 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 38448, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "avg_batch_load_time": 0.001, + "avg_batch_processing_time": 0.8199, + "epoch": 0.0005201831044527674, + "grad_norm": 2.730478238932107, + "learning_rate": 8.665511265164645e-08, + "loss": 0.5007, + "step": 10, + "vit_learning_rate": 1.733102253032929e-08 + }, + { + "avg_batch_load_time": 0.1224, + "avg_batch_processing_time": 0.6942, + "epoch": 0.0010403662089055348, + "grad_norm": 2.1302492646568107, + "learning_rate": 1.733102253032929e-07, + "loss": 0.4935, + "step": 20, + "vit_learning_rate": 3.466204506065858e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6954, + "epoch": 0.001560549313358302, + "grad_norm": 2.30263173816543, + "learning_rate": 2.599653379549394e-07, + "loss": 0.4785, + "step": 30, + "vit_learning_rate": 5.1993067590987863e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6608, + "epoch": 0.0020807324178110697, + "grad_norm": 2.8152487842375455, + "learning_rate": 3.466204506065858e-07, + "loss": 0.507, + "step": 40, + "vit_learning_rate": 6.932409012131716e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.696, + "epoch": 0.002600915522263837, + "grad_norm": 2.384550863577903, + "learning_rate": 4.332755632582323e-07, + "loss": 0.4844, + "step": 50, + "vit_learning_rate": 8.665511265164644e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7349, + "epoch": 0.003121098626716604, + "grad_norm": 2.0718560694963166, + "learning_rate": 5.199306759098788e-07, + "loss": 0.4977, + "step": 60, + "vit_learning_rate": 1.0398613518197573e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7285, + "epoch": 0.0036412817311693717, + "grad_norm": 1.912372860700478, + "learning_rate": 6.065857885615252e-07, + "loss": 0.4734, + "step": 70, + "vit_learning_rate": 1.21317157712305e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6629, + "epoch": 0.004161464835622139, + "grad_norm": 2.0355831247008935, + "learning_rate": 6.932409012131716e-07, + "loss": 0.4598, + "step": 80, + "vit_learning_rate": 1.386481802426343e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6698, + "epoch": 0.0046816479400749065, + "grad_norm": 1.846382791904691, + "learning_rate": 7.798960138648181e-07, + "loss": 0.4476, + "step": 90, + "vit_learning_rate": 1.5597920277296358e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.733, + "epoch": 0.005201831044527674, + "grad_norm": 1.7265210721172277, + "learning_rate": 8.665511265164646e-07, + "loss": 0.4293, + "step": 100, + "vit_learning_rate": 1.7331022530329288e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.8396, + "epoch": 0.005722014148980441, + "grad_norm": 1.6715524203292553, + "learning_rate": 9.53206239168111e-07, + "loss": 0.4049, + "step": 110, + "vit_learning_rate": 1.9064124783362218e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7128, + "epoch": 0.006242197253433208, + "grad_norm": 1.6397196992150198, + "learning_rate": 1.0398613518197575e-06, + "loss": 0.4384, + "step": 120, + "vit_learning_rate": 2.0797227036395145e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.68, + "epoch": 0.006762380357885976, + "grad_norm": 1.8646454810251762, + "learning_rate": 1.1265164644714038e-06, + "loss": 0.42, + "step": 130, + "vit_learning_rate": 2.2530329289428073e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6919, + "epoch": 0.007282563462338743, + "grad_norm": 1.7512809452669578, + "learning_rate": 1.2131715771230504e-06, + "loss": 0.4171, + "step": 140, + "vit_learning_rate": 2.4263431542461e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6529, + "epoch": 0.007802746566791511, + "grad_norm": 1.766464304771403, + "learning_rate": 1.299826689774697e-06, + "loss": 0.4165, + "step": 150, + "vit_learning_rate": 2.599653379549394e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7466, + "epoch": 0.008322929671244279, + "grad_norm": 1.8920777262330426, + "learning_rate": 1.3864818024263433e-06, + "loss": 0.3968, + "step": 160, + "vit_learning_rate": 2.772963604852686e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6971, + "epoch": 0.008843112775697045, + "grad_norm": 1.6519914864816905, + "learning_rate": 1.4731369150779898e-06, + "loss": 0.3914, + "step": 170, + "vit_learning_rate": 2.946273830155979e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.67, + "epoch": 0.009363295880149813, + "grad_norm": 1.9041983904191897, + "learning_rate": 1.5597920277296362e-06, + "loss": 0.4432, + "step": 180, + "vit_learning_rate": 3.1195840554592717e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6509, + "epoch": 0.00988347898460258, + "grad_norm": 1.9242397947434482, + "learning_rate": 1.6464471403812827e-06, + "loss": 0.3967, + "step": 190, + "vit_learning_rate": 3.2928942807625647e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6729, + "epoch": 0.010403662089055347, + "grad_norm": 1.931333197264559, + "learning_rate": 1.7331022530329292e-06, + "loss": 0.4041, + "step": 200, + "vit_learning_rate": 3.4662045060658576e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6327, + "epoch": 0.010923845193508116, + "grad_norm": 1.7790968464920032, + "learning_rate": 1.8197573656845754e-06, + "loss": 0.3848, + "step": 210, + "vit_learning_rate": 3.6395147313691506e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6605, + "epoch": 0.011444028297960882, + "grad_norm": 1.3474169059527816, + "learning_rate": 1.906412478336222e-06, + "loss": 0.3577, + "step": 220, + "vit_learning_rate": 3.8128249566724436e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6746, + "epoch": 0.01196421140241365, + "grad_norm": 1.6403388529183909, + "learning_rate": 1.9930675909878687e-06, + "loss": 0.4146, + "step": 230, + "vit_learning_rate": 3.9861351819757366e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6426, + "epoch": 0.012484394506866416, + "grad_norm": 1.871275489408102, + "learning_rate": 2.079722703639515e-06, + "loss": 0.4051, + "step": 240, + "vit_learning_rate": 4.159445407279029e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6902, + "epoch": 0.013004577611319184, + "grad_norm": 1.8814533430371332, + "learning_rate": 2.1663778162911613e-06, + "loss": 0.4195, + "step": 250, + "vit_learning_rate": 4.332755632582322e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6755, + "epoch": 0.013524760715771952, + "grad_norm": 1.2630524928147646, + "learning_rate": 2.2530329289428077e-06, + "loss": 0.3843, + "step": 260, + "vit_learning_rate": 4.5060658578856145e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6593, + "epoch": 0.014044943820224719, + "grad_norm": 1.7493514447329273, + "learning_rate": 2.339688041594454e-06, + "loss": 0.3912, + "step": 270, + "vit_learning_rate": 4.679376083188908e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6229, + "epoch": 0.014565126924677487, + "grad_norm": 1.995275059465161, + "learning_rate": 2.4263431542461008e-06, + "loss": 0.3874, + "step": 280, + "vit_learning_rate": 4.8526863084922e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.732, + "epoch": 0.015085310029130253, + "grad_norm": 1.297001887189111, + "learning_rate": 2.512998266897747e-06, + "loss": 0.4069, + "step": 290, + "vit_learning_rate": 5.025996533795494e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6694, + "epoch": 0.015605493133583021, + "grad_norm": 1.7648106144098374, + "learning_rate": 2.599653379549394e-06, + "loss": 0.3933, + "step": 300, + "vit_learning_rate": 5.199306759098788e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6543, + "epoch": 0.016125676238035788, + "grad_norm": 1.7207974920750946, + "learning_rate": 2.68630849220104e-06, + "loss": 0.4129, + "step": 310, + "vit_learning_rate": 5.37261698440208e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6664, + "epoch": 0.016645859342488557, + "grad_norm": 1.5590042636363057, + "learning_rate": 2.7729636048526865e-06, + "loss": 0.3862, + "step": 320, + "vit_learning_rate": 5.545927209705372e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6584, + "epoch": 0.017166042446941324, + "grad_norm": 1.6889595707899012, + "learning_rate": 2.859618717504333e-06, + "loss": 0.3908, + "step": 330, + "vit_learning_rate": 5.719237435008665e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6487, + "epoch": 0.01768622555139409, + "grad_norm": 2.21568622462906, + "learning_rate": 2.9462738301559796e-06, + "loss": 0.4106, + "step": 340, + "vit_learning_rate": 5.892547660311958e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6492, + "epoch": 0.01820640865584686, + "grad_norm": 1.4058313148855046, + "learning_rate": 3.032928942807626e-06, + "loss": 0.4051, + "step": 350, + "vit_learning_rate": 6.065857885615251e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6551, + "epoch": 0.018726591760299626, + "grad_norm": 1.5358872549304845, + "learning_rate": 3.1195840554592723e-06, + "loss": 0.3556, + "step": 360, + "vit_learning_rate": 6.239168110918543e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6479, + "epoch": 0.019246774864752392, + "grad_norm": 1.4273262010213201, + "learning_rate": 3.206239168110919e-06, + "loss": 0.3936, + "step": 370, + "vit_learning_rate": 6.412478336221837e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6385, + "epoch": 0.01976695796920516, + "grad_norm": 1.8507707492219296, + "learning_rate": 3.2928942807625654e-06, + "loss": 0.3901, + "step": 380, + "vit_learning_rate": 6.585788561525129e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6518, + "epoch": 0.02028714107365793, + "grad_norm": 1.6533442182257478, + "learning_rate": 3.3795493934142113e-06, + "loss": 0.4257, + "step": 390, + "vit_learning_rate": 6.759098786828422e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6859, + "epoch": 0.020807324178110695, + "grad_norm": 2.074337827626123, + "learning_rate": 3.4662045060658585e-06, + "loss": 0.3965, + "step": 400, + "vit_learning_rate": 6.932409012131715e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6559, + "epoch": 0.02132750728256346, + "grad_norm": 2.355984136124082, + "learning_rate": 3.552859618717505e-06, + "loss": 0.3726, + "step": 410, + "vit_learning_rate": 7.105719237435009e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7257, + "epoch": 0.02184769038701623, + "grad_norm": 1.2443266070480896, + "learning_rate": 3.6395147313691507e-06, + "loss": 0.4072, + "step": 420, + "vit_learning_rate": 7.279029462738301e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7412, + "epoch": 0.022367873491468997, + "grad_norm": 1.2060801593121602, + "learning_rate": 3.726169844020798e-06, + "loss": 0.4015, + "step": 430, + "vit_learning_rate": 7.452339688041595e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6458, + "epoch": 0.022888056595921764, + "grad_norm": 1.9565427734990375, + "learning_rate": 3.812824956672444e-06, + "loss": 0.3802, + "step": 440, + "vit_learning_rate": 7.625649913344887e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6641, + "epoch": 0.023408239700374533, + "grad_norm": 2.094433524068676, + "learning_rate": 3.89948006932409e-06, + "loss": 0.3617, + "step": 450, + "vit_learning_rate": 7.79896013864818e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6676, + "epoch": 0.0239284228048273, + "grad_norm": 1.7797397804297415, + "learning_rate": 3.986135181975737e-06, + "loss": 0.3843, + "step": 460, + "vit_learning_rate": 7.972270363951473e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6389, + "epoch": 0.024448605909280066, + "grad_norm": 1.537525497276048, + "learning_rate": 4.072790294627384e-06, + "loss": 0.3831, + "step": 470, + "vit_learning_rate": 8.145580589254766e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7074, + "epoch": 0.024968789013732832, + "grad_norm": 1.805016806340598, + "learning_rate": 4.15944540727903e-06, + "loss": 0.3847, + "step": 480, + "vit_learning_rate": 8.318890814558058e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6641, + "epoch": 0.025488972118185602, + "grad_norm": 1.9584790183119816, + "learning_rate": 4.246100519930676e-06, + "loss": 0.3874, + "step": 490, + "vit_learning_rate": 8.492201039861352e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6494, + "epoch": 0.02600915522263837, + "grad_norm": 1.5986015133831877, + "learning_rate": 4.332755632582323e-06, + "loss": 0.3726, + "step": 500, + "vit_learning_rate": 8.665511265164644e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6461, + "epoch": 0.026529338327091135, + "grad_norm": 3.076243501086672, + "learning_rate": 4.419410745233969e-06, + "loss": 0.4076, + "step": 510, + "vit_learning_rate": 8.838821490467937e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6802, + "epoch": 0.027049521431543905, + "grad_norm": 1.9963788549310044, + "learning_rate": 4.506065857885615e-06, + "loss": 0.3942, + "step": 520, + "vit_learning_rate": 9.012131715771229e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7452, + "epoch": 0.02756970453599667, + "grad_norm": 1.3784982198083575, + "learning_rate": 4.5927209705372625e-06, + "loss": 0.3864, + "step": 530, + "vit_learning_rate": 9.185441941074524e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6683, + "epoch": 0.028089887640449437, + "grad_norm": 1.9162522990674458, + "learning_rate": 4.679376083188908e-06, + "loss": 0.3803, + "step": 540, + "vit_learning_rate": 9.358752166377816e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.666, + "epoch": 0.028610070744902207, + "grad_norm": 1.818083869271155, + "learning_rate": 4.766031195840554e-06, + "loss": 0.3841, + "step": 550, + "vit_learning_rate": 9.532062391681109e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6161, + "epoch": 0.029130253849354974, + "grad_norm": 1.7637989191355048, + "learning_rate": 4.8526863084922016e-06, + "loss": 0.3703, + "step": 560, + "vit_learning_rate": 9.7053726169844e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6708, + "epoch": 0.02965043695380774, + "grad_norm": 2.035716503508404, + "learning_rate": 4.939341421143848e-06, + "loss": 0.4072, + "step": 570, + "vit_learning_rate": 9.878682842287695e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7052, + "epoch": 0.030170620058260506, + "grad_norm": 1.9434566477426978, + "learning_rate": 5.025996533795494e-06, + "loss": 0.352, + "step": 580, + "vit_learning_rate": 1.0051993067590988e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7281, + "epoch": 0.030690803162713276, + "grad_norm": 1.455865381295848, + "learning_rate": 5.1126516464471406e-06, + "loss": 0.3864, + "step": 590, + "vit_learning_rate": 1.0225303292894282e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6586, + "epoch": 0.031210986267166042, + "grad_norm": 1.3831411380494034, + "learning_rate": 5.199306759098788e-06, + "loss": 0.4011, + "step": 600, + "vit_learning_rate": 1.0398613518197575e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6562, + "epoch": 0.03173116937161881, + "grad_norm": 2.3259716769665517, + "learning_rate": 5.285961871750433e-06, + "loss": 0.3649, + "step": 610, + "vit_learning_rate": 1.0571923743500866e-06 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6248, + "epoch": 0.032251352476071575, + "grad_norm": 1.3866693791395424, + "learning_rate": 5.37261698440208e-06, + "loss": 0.3952, + "step": 620, + "vit_learning_rate": 1.074523396880416e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6126, + "epoch": 0.03277153558052434, + "grad_norm": 1.839859705847448, + "learning_rate": 5.459272097053726e-06, + "loss": 0.4015, + "step": 630, + "vit_learning_rate": 1.0918544194107451e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6562, + "epoch": 0.033291718684977115, + "grad_norm": 1.4291825205955109, + "learning_rate": 5.545927209705373e-06, + "loss": 0.3844, + "step": 640, + "vit_learning_rate": 1.1091854419410745e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6198, + "epoch": 0.03381190178942988, + "grad_norm": 1.4456299417485505, + "learning_rate": 5.632582322357019e-06, + "loss": 0.3681, + "step": 650, + "vit_learning_rate": 1.1265164644714038e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6602, + "epoch": 0.03433208489388265, + "grad_norm": 2.481032479553148, + "learning_rate": 5.719237435008666e-06, + "loss": 0.3769, + "step": 660, + "vit_learning_rate": 1.143847487001733e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6853, + "epoch": 0.034852267998335414, + "grad_norm": 1.2024480419994146, + "learning_rate": 5.805892547660312e-06, + "loss": 0.3975, + "step": 670, + "vit_learning_rate": 1.1611785095320623e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.663, + "epoch": 0.03537245110278818, + "grad_norm": 2.354636640772094, + "learning_rate": 5.892547660311959e-06, + "loss": 0.3924, + "step": 680, + "vit_learning_rate": 1.1785095320623917e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6377, + "epoch": 0.035892634207240946, + "grad_norm": 1.6800769565794846, + "learning_rate": 5.979202772963605e-06, + "loss": 0.3996, + "step": 690, + "vit_learning_rate": 1.1958405545927208e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6311, + "epoch": 0.03641281731169372, + "grad_norm": 1.52763006755445, + "learning_rate": 6.065857885615252e-06, + "loss": 0.4043, + "step": 700, + "vit_learning_rate": 1.2131715771230502e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.614, + "epoch": 0.036933000416146486, + "grad_norm": 1.869254861580584, + "learning_rate": 6.152512998266898e-06, + "loss": 0.3664, + "step": 710, + "vit_learning_rate": 1.2305025996533795e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6407, + "epoch": 0.03745318352059925, + "grad_norm": 2.052316973297446, + "learning_rate": 6.239168110918545e-06, + "loss": 0.4037, + "step": 720, + "vit_learning_rate": 1.2478336221837087e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6461, + "epoch": 0.03797336662505202, + "grad_norm": 1.9011542033690372, + "learning_rate": 6.325823223570191e-06, + "loss": 0.3743, + "step": 730, + "vit_learning_rate": 1.265164644714038e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6547, + "epoch": 0.038493549729504785, + "grad_norm": 1.8836604964095027, + "learning_rate": 6.412478336221838e-06, + "loss": 0.3686, + "step": 740, + "vit_learning_rate": 1.2824956672443674e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6578, + "epoch": 0.03901373283395755, + "grad_norm": 1.8477276411551662, + "learning_rate": 6.499133448873484e-06, + "loss": 0.377, + "step": 750, + "vit_learning_rate": 1.2998266897746965e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6561, + "epoch": 0.03953391593841032, + "grad_norm": 1.6885052244636107, + "learning_rate": 6.585788561525131e-06, + "loss": 0.3952, + "step": 760, + "vit_learning_rate": 1.3171577123050259e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.679, + "epoch": 0.04005409904286309, + "grad_norm": 1.7426462769181834, + "learning_rate": 6.672443674176777e-06, + "loss": 0.3989, + "step": 770, + "vit_learning_rate": 1.3344887348353552e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6959, + "epoch": 0.04057428214731586, + "grad_norm": 1.5327540303408091, + "learning_rate": 6.759098786828423e-06, + "loss": 0.3732, + "step": 780, + "vit_learning_rate": 1.3518197573656844e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6484, + "epoch": 0.041094465251768623, + "grad_norm": 2.2036324742846682, + "learning_rate": 6.84575389948007e-06, + "loss": 0.4024, + "step": 790, + "vit_learning_rate": 1.3691507798960137e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.648, + "epoch": 0.04161464835622139, + "grad_norm": 1.8995822985673583, + "learning_rate": 6.932409012131717e-06, + "loss": 0.3757, + "step": 800, + "vit_learning_rate": 1.386481802426343e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6601, + "epoch": 0.042134831460674156, + "grad_norm": 1.4902776513277947, + "learning_rate": 7.0190641247833625e-06, + "loss": 0.3883, + "step": 810, + "vit_learning_rate": 1.4038128249566722e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6821, + "epoch": 0.04265501456512692, + "grad_norm": 1.8007697510992173, + "learning_rate": 7.10571923743501e-06, + "loss": 0.3694, + "step": 820, + "vit_learning_rate": 1.4211438474870018e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6358, + "epoch": 0.04317519766957969, + "grad_norm": 2.030498442342381, + "learning_rate": 7.192374350086656e-06, + "loss": 0.3903, + "step": 830, + "vit_learning_rate": 1.4384748700173311e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6434, + "epoch": 0.04369538077403246, + "grad_norm": 1.489017138366281, + "learning_rate": 7.2790294627383015e-06, + "loss": 0.3935, + "step": 840, + "vit_learning_rate": 1.4558058925476603e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6326, + "epoch": 0.04421556387848523, + "grad_norm": 2.4983795869182224, + "learning_rate": 7.365684575389949e-06, + "loss": 0.422, + "step": 850, + "vit_learning_rate": 1.4731369150779896e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6204, + "epoch": 0.044735746982937995, + "grad_norm": 1.8105841346486857, + "learning_rate": 7.452339688041596e-06, + "loss": 0.3739, + "step": 860, + "vit_learning_rate": 1.490467937608319e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6864, + "epoch": 0.04525593008739076, + "grad_norm": 1.189586209857341, + "learning_rate": 7.538994800693241e-06, + "loss": 0.3926, + "step": 870, + "vit_learning_rate": 1.507798960138648e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7257, + "epoch": 0.04577611319184353, + "grad_norm": 1.8019416645176292, + "learning_rate": 7.625649913344888e-06, + "loss": 0.4089, + "step": 880, + "vit_learning_rate": 1.5251299826689774e-06 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6426, + "epoch": 0.046296296296296294, + "grad_norm": 4.266329482571006, + "learning_rate": 7.712305025996535e-06, + "loss": 0.3985, + "step": 890, + "vit_learning_rate": 1.5424610051993068e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6457, + "epoch": 0.04681647940074907, + "grad_norm": 1.6319315750762906, + "learning_rate": 7.79896013864818e-06, + "loss": 0.3726, + "step": 900, + "vit_learning_rate": 1.559792027729636e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7511, + "epoch": 0.04733666250520183, + "grad_norm": 1.7102921452165745, + "learning_rate": 7.885615251299828e-06, + "loss": 0.3764, + "step": 910, + "vit_learning_rate": 1.5771230502599653e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6321, + "epoch": 0.0478568456096546, + "grad_norm": 1.5055851114258443, + "learning_rate": 7.972270363951475e-06, + "loss": 0.4114, + "step": 920, + "vit_learning_rate": 1.5944540727902946e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7438, + "epoch": 0.048377028714107366, + "grad_norm": 1.6145514542714898, + "learning_rate": 8.05892547660312e-06, + "loss": 0.4049, + "step": 930, + "vit_learning_rate": 1.6117850953206238e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7055, + "epoch": 0.04889721181856013, + "grad_norm": 1.507568551639784, + "learning_rate": 8.145580589254767e-06, + "loss": 0.38, + "step": 940, + "vit_learning_rate": 1.6291161178509531e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.8033, + "epoch": 0.0494173949230129, + "grad_norm": 1.627146681779528, + "learning_rate": 8.232235701906413e-06, + "loss": 0.3639, + "step": 950, + "vit_learning_rate": 1.6464471403812825e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6665, + "epoch": 0.049937578027465665, + "grad_norm": 1.4162128597123207, + "learning_rate": 8.31889081455806e-06, + "loss": 0.4009, + "step": 960, + "vit_learning_rate": 1.6637781629116116e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6631, + "epoch": 0.05045776113191844, + "grad_norm": 1.6127961028992108, + "learning_rate": 8.405545927209706e-06, + "loss": 0.3602, + "step": 970, + "vit_learning_rate": 1.681109185441941e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6909, + "epoch": 0.050977944236371205, + "grad_norm": 1.935537692351902, + "learning_rate": 8.492201039861353e-06, + "loss": 0.3883, + "step": 980, + "vit_learning_rate": 1.6984402079722703e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6613, + "epoch": 0.05149812734082397, + "grad_norm": 1.7055200604792629, + "learning_rate": 8.578856152512998e-06, + "loss": 0.3827, + "step": 990, + "vit_learning_rate": 1.7157712305025995e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6535, + "epoch": 0.05201831044527674, + "grad_norm": 1.5870254670616488, + "learning_rate": 8.665511265164645e-06, + "loss": 0.3874, + "step": 1000, + "vit_learning_rate": 1.7331022530329288e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6756, + "epoch": 0.052538493549729504, + "grad_norm": 3.093892468509193, + "learning_rate": 8.752166377816293e-06, + "loss": 0.383, + "step": 1010, + "vit_learning_rate": 1.7504332755632582e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6335, + "epoch": 0.05305867665418227, + "grad_norm": 2.02989900087187, + "learning_rate": 8.838821490467938e-06, + "loss": 0.385, + "step": 1020, + "vit_learning_rate": 1.7677642980935873e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6244, + "epoch": 0.053578859758635036, + "grad_norm": 2.1727005734247906, + "learning_rate": 8.925476603119585e-06, + "loss": 0.3769, + "step": 1030, + "vit_learning_rate": 1.7850953206239167e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6381, + "epoch": 0.05409904286308781, + "grad_norm": 1.6246224493971249, + "learning_rate": 9.01213171577123e-06, + "loss": 0.3644, + "step": 1040, + "vit_learning_rate": 1.8024263431542458e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6698, + "epoch": 0.054619225967540576, + "grad_norm": 1.9429532126621687, + "learning_rate": 9.098786828422878e-06, + "loss": 0.3922, + "step": 1050, + "vit_learning_rate": 1.8197573656845754e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6553, + "epoch": 0.05513940907199334, + "grad_norm": 1.7045737616887566, + "learning_rate": 9.185441941074525e-06, + "loss": 0.4022, + "step": 1060, + "vit_learning_rate": 1.8370883882149047e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6767, + "epoch": 0.05565959217644611, + "grad_norm": 2.2636772808879737, + "learning_rate": 9.27209705372617e-06, + "loss": 0.3994, + "step": 1070, + "vit_learning_rate": 1.8544194107452339e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6463, + "epoch": 0.056179775280898875, + "grad_norm": 1.9478884196432347, + "learning_rate": 9.358752166377816e-06, + "loss": 0.3791, + "step": 1080, + "vit_learning_rate": 1.8717504332755632e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6445, + "epoch": 0.05669995838535164, + "grad_norm": 2.092181965031471, + "learning_rate": 9.445407279029463e-06, + "loss": 0.3936, + "step": 1090, + "vit_learning_rate": 1.8890814558058926e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6252, + "epoch": 0.057220141489804414, + "grad_norm": 1.8543919245668292, + "learning_rate": 9.532062391681109e-06, + "loss": 0.4004, + "step": 1100, + "vit_learning_rate": 1.9064124783362217e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.687, + "epoch": 0.05774032459425718, + "grad_norm": 1.924036012681194, + "learning_rate": 9.618717504332756e-06, + "loss": 0.3897, + "step": 1110, + "vit_learning_rate": 1.923743500866551e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6601, + "epoch": 0.05826050769870995, + "grad_norm": 1.4740275574127806, + "learning_rate": 9.705372616984403e-06, + "loss": 0.3621, + "step": 1120, + "vit_learning_rate": 1.94107452339688e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7134, + "epoch": 0.05878069080316271, + "grad_norm": 1.6496623605371186, + "learning_rate": 9.792027729636049e-06, + "loss": 0.3964, + "step": 1130, + "vit_learning_rate": 1.9584055459272095e-06 + }, + { + "avg_batch_load_time": 0.0035, + "avg_batch_processing_time": 0.7717, + "epoch": 0.05930087390761548, + "grad_norm": 1.8149996279012772, + "learning_rate": 9.878682842287696e-06, + "loss": 0.401, + "step": 1140, + "vit_learning_rate": 1.975736568457539e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6618, + "epoch": 0.059821057012068246, + "grad_norm": 1.7003386314339102, + "learning_rate": 9.965337954939343e-06, + "loss": 0.3982, + "step": 1150, + "vit_learning_rate": 1.9930675909878683e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6385, + "epoch": 0.06034124011652101, + "grad_norm": 1.4987287459242709, + "learning_rate": 9.999999361347948e-06, + "loss": 0.3954, + "step": 1160, + "vit_learning_rate": 1.9999998722695893e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7285, + "epoch": 0.060861423220973786, + "grad_norm": 1.6723349861680261, + "learning_rate": 9.99999545847488e-06, + "loss": 0.3939, + "step": 1170, + "vit_learning_rate": 1.9999990916949762e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.699, + "epoch": 0.06138160632542655, + "grad_norm": 1.5325596373594221, + "learning_rate": 9.99998800753821e-06, + "loss": 0.4005, + "step": 1180, + "vit_learning_rate": 1.9999976015076417e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6808, + "epoch": 0.06190178942987932, + "grad_norm": 2.378026104980551, + "learning_rate": 9.999977008543223e-06, + "loss": 0.3631, + "step": 1190, + "vit_learning_rate": 1.9999954017086443e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6233, + "epoch": 0.062421972534332085, + "grad_norm": 1.6920324529610273, + "learning_rate": 9.999962461497723e-06, + "loss": 0.4162, + "step": 1200, + "vit_learning_rate": 1.9999924922995445e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6316, + "epoch": 0.06294215563878486, + "grad_norm": 1.8326248656309552, + "learning_rate": 9.999944366412034e-06, + "loss": 0.3879, + "step": 1210, + "vit_learning_rate": 1.9999888732824064e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6585, + "epoch": 0.06346233874323762, + "grad_norm": 1.6066899914777568, + "learning_rate": 9.999922723298995e-06, + "loss": 0.401, + "step": 1220, + "vit_learning_rate": 1.999984544659799e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6056, + "epoch": 0.06398252184769039, + "grad_norm": 1.6776199623423889, + "learning_rate": 9.99989753217397e-06, + "loss": 0.3812, + "step": 1230, + "vit_learning_rate": 1.9999795064347933e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.628, + "epoch": 0.06450270495214315, + "grad_norm": 1.5314124174264965, + "learning_rate": 9.999868793054825e-06, + "loss": 0.4001, + "step": 1240, + "vit_learning_rate": 1.9999737586109647e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6352, + "epoch": 0.06502288805659592, + "grad_norm": 1.7069754044445737, + "learning_rate": 9.999836505961964e-06, + "loss": 0.393, + "step": 1250, + "vit_learning_rate": 1.9999673011923927e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6143, + "epoch": 0.06554307116104868, + "grad_norm": 1.6706780092339613, + "learning_rate": 9.999800670918291e-06, + "loss": 0.4077, + "step": 1260, + "vit_learning_rate": 1.999960134183658e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6794, + "epoch": 0.06606325426550146, + "grad_norm": 1.7323844790320027, + "learning_rate": 9.999761287949237e-06, + "loss": 0.4209, + "step": 1270, + "vit_learning_rate": 1.9999522575898473e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6411, + "epoch": 0.06658343736995423, + "grad_norm": 1.8072650232835257, + "learning_rate": 9.99971835708275e-06, + "loss": 0.3929, + "step": 1280, + "vit_learning_rate": 1.99994367141655e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.5967, + "epoch": 0.06710362047440699, + "grad_norm": 2.0955769150094143, + "learning_rate": 9.999671878349294e-06, + "loss": 0.4105, + "step": 1290, + "vit_learning_rate": 1.9999343756698586e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6387, + "epoch": 0.06762380357885976, + "grad_norm": 1.175095794928172, + "learning_rate": 9.999621851781852e-06, + "loss": 0.3981, + "step": 1300, + "vit_learning_rate": 1.99992437035637e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6958, + "epoch": 0.06814398668331252, + "grad_norm": 1.91098472894056, + "learning_rate": 9.99956827741592e-06, + "loss": 0.3777, + "step": 1310, + "vit_learning_rate": 1.999913655483184e-06 + }, + { + "avg_batch_load_time": 0.001, + "avg_batch_processing_time": 0.6903, + "epoch": 0.0686641697877653, + "grad_norm": 1.4996855072980535, + "learning_rate": 9.999511155289518e-06, + "loss": 0.3958, + "step": 1320, + "vit_learning_rate": 1.9999022310579034e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7879, + "epoch": 0.06918435289221805, + "grad_norm": 1.9644450062720709, + "learning_rate": 9.999450485443179e-06, + "loss": 0.3737, + "step": 1330, + "vit_learning_rate": 1.9998900970886353e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6749, + "epoch": 0.06970453599667083, + "grad_norm": 2.395592235196475, + "learning_rate": 9.999386267919956e-06, + "loss": 0.3864, + "step": 1340, + "vit_learning_rate": 1.9998772535839908e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6623, + "epoch": 0.0702247191011236, + "grad_norm": 1.2644611405722836, + "learning_rate": 9.999318502765418e-06, + "loss": 0.4043, + "step": 1350, + "vit_learning_rate": 1.9998637005530834e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6576, + "epoch": 0.07074490220557636, + "grad_norm": 1.833032741782624, + "learning_rate": 9.999247190027655e-06, + "loss": 0.4072, + "step": 1360, + "vit_learning_rate": 1.9998494380055306e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6154, + "epoch": 0.07126508531002913, + "grad_norm": 1.7261332431245466, + "learning_rate": 9.999172329757268e-06, + "loss": 0.3897, + "step": 1370, + "vit_learning_rate": 1.9998344659514533e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.649, + "epoch": 0.07178526841448189, + "grad_norm": 1.998160604999835, + "learning_rate": 9.999093922007377e-06, + "loss": 0.402, + "step": 1380, + "vit_learning_rate": 1.9998187844014753e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6601, + "epoch": 0.07230545151893467, + "grad_norm": 1.4017398429549466, + "learning_rate": 9.999011966833627e-06, + "loss": 0.4022, + "step": 1390, + "vit_learning_rate": 1.999802393366725e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6571, + "epoch": 0.07282563462338744, + "grad_norm": 1.356426829173266, + "learning_rate": 9.998926464294172e-06, + "loss": 0.4067, + "step": 1400, + "vit_learning_rate": 1.999785292858834e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6411, + "epoch": 0.0733458177278402, + "grad_norm": 1.4464049783041786, + "learning_rate": 9.998837414449683e-06, + "loss": 0.3642, + "step": 1410, + "vit_learning_rate": 1.9997674828899363e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6858, + "epoch": 0.07386600083229297, + "grad_norm": 1.6214241630642474, + "learning_rate": 9.998744817363355e-06, + "loss": 0.3935, + "step": 1420, + "vit_learning_rate": 1.9997489634726706e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6587, + "epoch": 0.07438618393674573, + "grad_norm": 1.9153689839808696, + "learning_rate": 9.998648673100895e-06, + "loss": 0.3954, + "step": 1430, + "vit_learning_rate": 1.9997297346201786e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6334, + "epoch": 0.0749063670411985, + "grad_norm": 1.4383249557618683, + "learning_rate": 9.998548981730524e-06, + "loss": 0.3824, + "step": 1440, + "vit_learning_rate": 1.9997097963461045e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6494, + "epoch": 0.07542655014565126, + "grad_norm": 1.6342576433033449, + "learning_rate": 9.998445743322993e-06, + "loss": 0.4163, + "step": 1450, + "vit_learning_rate": 1.9996891486645983e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6635, + "epoch": 0.07594673325010404, + "grad_norm": 2.249251760511845, + "learning_rate": 9.998338957951552e-06, + "loss": 0.3756, + "step": 1460, + "vit_learning_rate": 1.9996677915903102e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6755, + "epoch": 0.07646691635455681, + "grad_norm": 1.5170965672618093, + "learning_rate": 9.998228625691983e-06, + "loss": 0.3951, + "step": 1470, + "vit_learning_rate": 1.9996457251383963e-06 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6793, + "epoch": 0.07698709945900957, + "grad_norm": 2.018198148030159, + "learning_rate": 9.99811474662258e-06, + "loss": 0.3954, + "step": 1480, + "vit_learning_rate": 1.9996229493245157e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6864, + "epoch": 0.07750728256346234, + "grad_norm": 1.5266892247661845, + "learning_rate": 9.997997320824149e-06, + "loss": 0.3887, + "step": 1490, + "vit_learning_rate": 1.9995994641648297e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6295, + "epoch": 0.0780274656679151, + "grad_norm": 1.6270075584874, + "learning_rate": 9.99787634838002e-06, + "loss": 0.4008, + "step": 1500, + "vit_learning_rate": 1.9995752696760037e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6754, + "epoch": 0.07854764877236788, + "grad_norm": 1.5622491396005207, + "learning_rate": 9.997751829376034e-06, + "loss": 0.3918, + "step": 1510, + "vit_learning_rate": 1.9995503658752067e-06 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7086, + "epoch": 0.07906783187682064, + "grad_norm": 2.062475797719363, + "learning_rate": 9.997623763900555e-06, + "loss": 0.3987, + "step": 1520, + "vit_learning_rate": 1.9995247527801107e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.621, + "epoch": 0.07958801498127341, + "grad_norm": 1.5154769613704362, + "learning_rate": 9.997492152044457e-06, + "loss": 0.3925, + "step": 1530, + "vit_learning_rate": 1.9994984304088913e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6535, + "epoch": 0.08010819808572618, + "grad_norm": 1.8365933148635227, + "learning_rate": 9.997356993901133e-06, + "loss": 0.3826, + "step": 1540, + "vit_learning_rate": 1.9994713987802265e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6676, + "epoch": 0.08062838119017894, + "grad_norm": 1.896050801152247, + "learning_rate": 9.997218289566496e-06, + "loss": 0.3795, + "step": 1550, + "vit_learning_rate": 1.999443657913299e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6474, + "epoch": 0.08114856429463171, + "grad_norm": 1.46782305860866, + "learning_rate": 9.99707603913897e-06, + "loss": 0.3939, + "step": 1560, + "vit_learning_rate": 1.999415207827794e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6183, + "epoch": 0.08166874739908447, + "grad_norm": 1.4753478457033173, + "learning_rate": 9.9969302427195e-06, + "loss": 0.3749, + "step": 1570, + "vit_learning_rate": 1.9993860485439e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6572, + "epoch": 0.08218893050353725, + "grad_norm": 1.701237737772785, + "learning_rate": 9.996780900411543e-06, + "loss": 0.3975, + "step": 1580, + "vit_learning_rate": 1.9993561800823085e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6841, + "epoch": 0.08270911360799, + "grad_norm": 1.7575182612509872, + "learning_rate": 9.996628012321075e-06, + "loss": 0.3938, + "step": 1590, + "vit_learning_rate": 1.9993256024642147e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6723, + "epoch": 0.08322929671244278, + "grad_norm": 1.6183793451921802, + "learning_rate": 9.996471578556589e-06, + "loss": 0.3979, + "step": 1600, + "vit_learning_rate": 1.9992943157113174e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6759, + "epoch": 0.08374947981689555, + "grad_norm": 1.7884515741726315, + "learning_rate": 9.996311599229089e-06, + "loss": 0.3926, + "step": 1610, + "vit_learning_rate": 1.9992623198458177e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6354, + "epoch": 0.08426966292134831, + "grad_norm": 1.7672539943007268, + "learning_rate": 9.996148074452101e-06, + "loss": 0.3874, + "step": 1620, + "vit_learning_rate": 1.99922961489042e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6332, + "epoch": 0.08478984602580109, + "grad_norm": 1.7449346816760982, + "learning_rate": 9.995981004341666e-06, + "loss": 0.3994, + "step": 1630, + "vit_learning_rate": 1.999196200868333e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6289, + "epoch": 0.08531002913025384, + "grad_norm": 1.7028650211807725, + "learning_rate": 9.995810389016335e-06, + "loss": 0.4126, + "step": 1640, + "vit_learning_rate": 1.9991620778032666e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6697, + "epoch": 0.08583021223470662, + "grad_norm": 2.1637806054649826, + "learning_rate": 9.99563622859718e-06, + "loss": 0.3993, + "step": 1650, + "vit_learning_rate": 1.999127245719436e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.644, + "epoch": 0.08635039533915938, + "grad_norm": 1.7189998036410359, + "learning_rate": 9.99545852320779e-06, + "loss": 0.3732, + "step": 1660, + "vit_learning_rate": 1.999091704641558e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6701, + "epoch": 0.08687057844361215, + "grad_norm": 1.8631034328782419, + "learning_rate": 9.995277272974266e-06, + "loss": 0.3817, + "step": 1670, + "vit_learning_rate": 1.999055454594853e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6584, + "epoch": 0.08739076154806492, + "grad_norm": 1.720018857809092, + "learning_rate": 9.995092478025224e-06, + "loss": 0.3885, + "step": 1680, + "vit_learning_rate": 1.9990184956050446e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7145, + "epoch": 0.08791094465251768, + "grad_norm": 1.910388847901205, + "learning_rate": 9.994904138491799e-06, + "loss": 0.3935, + "step": 1690, + "vit_learning_rate": 1.9989808276983595e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6595, + "epoch": 0.08843112775697046, + "grad_norm": 2.504369734061023, + "learning_rate": 9.99471225450764e-06, + "loss": 0.4074, + "step": 1700, + "vit_learning_rate": 1.9989424509015277e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6367, + "epoch": 0.08895131086142322, + "grad_norm": 2.1269908804239237, + "learning_rate": 9.994516826208906e-06, + "loss": 0.3806, + "step": 1710, + "vit_learning_rate": 1.998903365241781e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6636, + "epoch": 0.08947149396587599, + "grad_norm": 2.1614877342674284, + "learning_rate": 9.994317853734279e-06, + "loss": 0.3644, + "step": 1720, + "vit_learning_rate": 1.9988635707468554e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6516, + "epoch": 0.08999167707032875, + "grad_norm": 1.584973085508048, + "learning_rate": 9.994115337224954e-06, + "loss": 0.3724, + "step": 1730, + "vit_learning_rate": 1.99882306744499e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6079, + "epoch": 0.09051186017478152, + "grad_norm": 1.6297110354505289, + "learning_rate": 9.993909276824635e-06, + "loss": 0.3592, + "step": 1740, + "vit_learning_rate": 1.998781855364927e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6366, + "epoch": 0.0910320432792343, + "grad_norm": 2.1712357195742844, + "learning_rate": 9.99369967267955e-06, + "loss": 0.3976, + "step": 1750, + "vit_learning_rate": 1.9987399345359095e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6509, + "epoch": 0.09155222638368705, + "grad_norm": 1.240205011480137, + "learning_rate": 9.993486524938432e-06, + "loss": 0.3813, + "step": 1760, + "vit_learning_rate": 1.998697304987686e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6344, + "epoch": 0.09207240948813983, + "grad_norm": 1.3581942138608056, + "learning_rate": 9.993269833752536e-06, + "loss": 0.3902, + "step": 1770, + "vit_learning_rate": 1.9986539667505073e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6793, + "epoch": 0.09259259259259259, + "grad_norm": 1.393459200736033, + "learning_rate": 9.99304959927563e-06, + "loss": 0.3757, + "step": 1780, + "vit_learning_rate": 1.998609919855126e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7208, + "epoch": 0.09311277569704536, + "grad_norm": 1.7230477673762956, + "learning_rate": 9.992825821663996e-06, + "loss": 0.376, + "step": 1790, + "vit_learning_rate": 1.998565164332799e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6528, + "epoch": 0.09363295880149813, + "grad_norm": 1.7204022992632924, + "learning_rate": 9.992598501076425e-06, + "loss": 0.385, + "step": 1800, + "vit_learning_rate": 1.9985197002152848e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.8005, + "epoch": 0.0941531419059509, + "grad_norm": 1.6462648396037127, + "learning_rate": 9.992367637674232e-06, + "loss": 0.3784, + "step": 1810, + "vit_learning_rate": 1.998473527534846e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6255, + "epoch": 0.09467332501040367, + "grad_norm": 1.9866706108033165, + "learning_rate": 9.992133231621236e-06, + "loss": 0.4165, + "step": 1820, + "vit_learning_rate": 1.998426646324247e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6553, + "epoch": 0.09519350811485643, + "grad_norm": 2.640311659195126, + "learning_rate": 9.991895283083779e-06, + "loss": 0.3874, + "step": 1830, + "vit_learning_rate": 1.9983790566167557e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6859, + "epoch": 0.0957136912193092, + "grad_norm": 2.4255504255560414, + "learning_rate": 9.99165379223071e-06, + "loss": 0.395, + "step": 1840, + "vit_learning_rate": 1.9983307584461415e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6824, + "epoch": 0.09623387432376196, + "grad_norm": 1.4199189654376878, + "learning_rate": 9.991408759233394e-06, + "loss": 0.3766, + "step": 1850, + "vit_learning_rate": 1.9982817518466784e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6758, + "epoch": 0.09675405742821473, + "grad_norm": 1.708950444851119, + "learning_rate": 9.99116018426571e-06, + "loss": 0.3682, + "step": 1860, + "vit_learning_rate": 1.9982320368531417e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.792, + "epoch": 0.0972742405326675, + "grad_norm": 1.270887188644699, + "learning_rate": 9.99090806750405e-06, + "loss": 0.3905, + "step": 1870, + "vit_learning_rate": 1.99818161350081e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7572, + "epoch": 0.09779442363712026, + "grad_norm": 2.279032489980922, + "learning_rate": 9.990652409127321e-06, + "loss": 0.3935, + "step": 1880, + "vit_learning_rate": 1.998130481825464e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6639, + "epoch": 0.09831460674157304, + "grad_norm": 1.6751594420928537, + "learning_rate": 9.99039320931694e-06, + "loss": 0.3965, + "step": 1890, + "vit_learning_rate": 1.9980786418633878e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6149, + "epoch": 0.0988347898460258, + "grad_norm": 1.785685283117897, + "learning_rate": 9.990130468256838e-06, + "loss": 0.39, + "step": 1900, + "vit_learning_rate": 1.9980260936513674e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6651, + "epoch": 0.09935497295047857, + "grad_norm": 1.546313996013923, + "learning_rate": 9.989864186133463e-06, + "loss": 0.3931, + "step": 1910, + "vit_learning_rate": 1.997972837226692e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6717, + "epoch": 0.09987515605493133, + "grad_norm": 1.7847229560007798, + "learning_rate": 9.989594363135768e-06, + "loss": 0.3654, + "step": 1920, + "vit_learning_rate": 1.9979188726271536e-06 + }, + { + "avg_batch_load_time": 2.0205, + "avg_batch_processing_time": 0.6594, + "epoch": 0.1003953391593841, + "grad_norm": 1.329108885508235, + "learning_rate": 9.989320999455226e-06, + "loss": 0.376, + "step": 1930, + "vit_learning_rate": 1.997864199891045e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7074, + "epoch": 0.10091552226383688, + "grad_norm": 6.400667520759115, + "learning_rate": 9.989044095285819e-06, + "loss": 0.3821, + "step": 1940, + "vit_learning_rate": 1.997808819057164e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6409, + "epoch": 0.10143570536828964, + "grad_norm": 1.7974277089758033, + "learning_rate": 9.98876365082404e-06, + "loss": 0.3781, + "step": 1950, + "vit_learning_rate": 1.9977527301648076e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6859, + "epoch": 0.10195588847274241, + "grad_norm": 1.459950332858251, + "learning_rate": 9.9884796662689e-06, + "loss": 0.3717, + "step": 1960, + "vit_learning_rate": 1.99769593325378e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6607, + "epoch": 0.10247607157719517, + "grad_norm": 1.8986696627782496, + "learning_rate": 9.988192141821913e-06, + "loss": 0.3903, + "step": 1970, + "vit_learning_rate": 1.9976384283643824e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6656, + "epoch": 0.10299625468164794, + "grad_norm": 1.4907632392028956, + "learning_rate": 9.987901077687115e-06, + "loss": 0.392, + "step": 1980, + "vit_learning_rate": 1.9975802155374226e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.62, + "epoch": 0.1035164377861007, + "grad_norm": 2.0271318159717304, + "learning_rate": 9.987606474071046e-06, + "loss": 0.388, + "step": 1990, + "vit_learning_rate": 1.997521294814209e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6567, + "epoch": 0.10403662089055347, + "grad_norm": 1.530703328127797, + "learning_rate": 9.98730833118276e-06, + "loss": 0.3908, + "step": 2000, + "vit_learning_rate": 1.997461666236552e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.625, + "epoch": 0.10455680399500625, + "grad_norm": 1.900341303974434, + "learning_rate": 9.987006649233828e-06, + "loss": 0.4021, + "step": 2010, + "vit_learning_rate": 1.9974013298467655e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6815, + "epoch": 0.10507698709945901, + "grad_norm": 1.5181527375332693, + "learning_rate": 9.986701428438323e-06, + "loss": 0.3917, + "step": 2020, + "vit_learning_rate": 1.9973402856876644e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.726, + "epoch": 0.10559717020391178, + "grad_norm": 1.4797662276801857, + "learning_rate": 9.986392669012837e-06, + "loss": 0.387, + "step": 2030, + "vit_learning_rate": 1.997278533802567e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6158, + "epoch": 0.10611735330836454, + "grad_norm": 1.532859266101753, + "learning_rate": 9.986080371176466e-06, + "loss": 0.3903, + "step": 2040, + "vit_learning_rate": 1.997216074235293e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6468, + "epoch": 0.10663753641281731, + "grad_norm": 2.1780347729739153, + "learning_rate": 9.985764535150822e-06, + "loss": 0.4034, + "step": 2050, + "vit_learning_rate": 1.997152907030164e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6141, + "epoch": 0.10715771951727007, + "grad_norm": 1.8419242083528473, + "learning_rate": 9.985445161160028e-06, + "loss": 0.4243, + "step": 2060, + "vit_learning_rate": 1.9970890322320052e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6309, + "epoch": 0.10767790262172285, + "grad_norm": 1.5110122563901058, + "learning_rate": 9.985122249430714e-06, + "loss": 0.4154, + "step": 2070, + "vit_learning_rate": 1.9970244498861428e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6514, + "epoch": 0.10819808572617562, + "grad_norm": 1.747719674482028, + "learning_rate": 9.984795800192026e-06, + "loss": 0.384, + "step": 2080, + "vit_learning_rate": 1.996959160038405e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6576, + "epoch": 0.10871826883062838, + "grad_norm": 1.4914987614942925, + "learning_rate": 9.984465813675614e-06, + "loss": 0.3627, + "step": 2090, + "vit_learning_rate": 1.9968931627351224e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6705, + "epoch": 0.10923845193508115, + "grad_norm": 2.1456480798228013, + "learning_rate": 9.98413229011564e-06, + "loss": 0.3931, + "step": 2100, + "vit_learning_rate": 1.996826458023128e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6584, + "epoch": 0.10975863503953391, + "grad_norm": 1.6502808572272643, + "learning_rate": 9.983795229748778e-06, + "loss": 0.4062, + "step": 2110, + "vit_learning_rate": 1.9967590459497553e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6264, + "epoch": 0.11027881814398668, + "grad_norm": 1.645791061358044, + "learning_rate": 9.983454632814211e-06, + "loss": 0.4166, + "step": 2120, + "vit_learning_rate": 1.996690926562842e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7236, + "epoch": 0.11079900124843944, + "grad_norm": 2.0058613196658857, + "learning_rate": 9.983110499553632e-06, + "loss": 0.3979, + "step": 2130, + "vit_learning_rate": 1.9966220999107263e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6699, + "epoch": 0.11131918435289222, + "grad_norm": 1.5663048629619243, + "learning_rate": 9.982762830211239e-06, + "loss": 0.3958, + "step": 2140, + "vit_learning_rate": 1.9965525660422476e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6368, + "epoch": 0.11183936745734499, + "grad_norm": 1.8091464356812885, + "learning_rate": 9.982411625033746e-06, + "loss": 0.3835, + "step": 2150, + "vit_learning_rate": 1.996482325006749e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6749, + "epoch": 0.11235955056179775, + "grad_norm": 1.9099497047775305, + "learning_rate": 9.982056884270372e-06, + "loss": 0.3865, + "step": 2160, + "vit_learning_rate": 1.996411376854074e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6811, + "epoch": 0.11287973366625052, + "grad_norm": 1.5873762498284434, + "learning_rate": 9.981698608172847e-06, + "loss": 0.3995, + "step": 2170, + "vit_learning_rate": 1.9963397216345694e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6773, + "epoch": 0.11339991677070328, + "grad_norm": 1.5676604437826327, + "learning_rate": 9.981336796995407e-06, + "loss": 0.3917, + "step": 2180, + "vit_learning_rate": 1.996267359399081e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6495, + "epoch": 0.11392009987515606, + "grad_norm": 1.638825122307596, + "learning_rate": 9.980971450994796e-06, + "loss": 0.4061, + "step": 2190, + "vit_learning_rate": 1.996194290198959e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6211, + "epoch": 0.11444028297960883, + "grad_norm": 1.9230124869939365, + "learning_rate": 9.980602570430274e-06, + "loss": 0.3949, + "step": 2200, + "vit_learning_rate": 1.9961205140860542e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6208, + "epoch": 0.11496046608406159, + "grad_norm": 1.7666226388023683, + "learning_rate": 9.980230155563598e-06, + "loss": 0.3935, + "step": 2210, + "vit_learning_rate": 1.996046031112719e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.644, + "epoch": 0.11548064918851436, + "grad_norm": 1.5034118386156339, + "learning_rate": 9.97985420665904e-06, + "loss": 0.3923, + "step": 2220, + "vit_learning_rate": 1.995970841331808e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6638, + "epoch": 0.11600083229296712, + "grad_norm": 1.5270174689113087, + "learning_rate": 9.97947472398338e-06, + "loss": 0.3888, + "step": 2230, + "vit_learning_rate": 1.995894944796676e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6437, + "epoch": 0.1165210153974199, + "grad_norm": 1.5516833523962303, + "learning_rate": 9.979091707805906e-06, + "loss": 0.4028, + "step": 2240, + "vit_learning_rate": 1.995818341561181e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6564, + "epoch": 0.11704119850187265, + "grad_norm": 1.4198552087143228, + "learning_rate": 9.978705158398405e-06, + "loss": 0.3824, + "step": 2250, + "vit_learning_rate": 1.995741031679681e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6502, + "epoch": 0.11756138160632543, + "grad_norm": 1.4645532419603327, + "learning_rate": 9.978315076035183e-06, + "loss": 0.3936, + "step": 2260, + "vit_learning_rate": 1.9956630152070364e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6624, + "epoch": 0.1180815647107782, + "grad_norm": 2.27891014022858, + "learning_rate": 9.977921460993045e-06, + "loss": 0.4188, + "step": 2270, + "vit_learning_rate": 1.9955842921986087e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.657, + "epoch": 0.11860174781523096, + "grad_norm": 1.1082454479184765, + "learning_rate": 9.977524313551309e-06, + "loss": 0.3813, + "step": 2280, + "vit_learning_rate": 1.995504862710261e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6606, + "epoch": 0.11912193091968373, + "grad_norm": 1.6247455388091743, + "learning_rate": 9.97712363399179e-06, + "loss": 0.4041, + "step": 2290, + "vit_learning_rate": 1.995424726798358e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6181, + "epoch": 0.11964211402413649, + "grad_norm": 1.7436594670079308, + "learning_rate": 9.976719422598821e-06, + "loss": 0.3684, + "step": 2300, + "vit_learning_rate": 1.995343884519764e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.658, + "epoch": 0.12016229712858927, + "grad_norm": 1.677831249786819, + "learning_rate": 9.976311679659233e-06, + "loss": 0.4108, + "step": 2310, + "vit_learning_rate": 1.9952623359318464e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7298, + "epoch": 0.12068248023304202, + "grad_norm": 1.6728542307539023, + "learning_rate": 9.975900405462368e-06, + "loss": 0.3771, + "step": 2320, + "vit_learning_rate": 1.995180081092473e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6761, + "epoch": 0.1212026633374948, + "grad_norm": 1.6075484925851085, + "learning_rate": 9.97548560030007e-06, + "loss": 0.3739, + "step": 2330, + "vit_learning_rate": 1.995097120060014e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6747, + "epoch": 0.12172284644194757, + "grad_norm": 2.123340119155076, + "learning_rate": 9.975067264466688e-06, + "loss": 0.4063, + "step": 2340, + "vit_learning_rate": 1.9950134528933377e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6577, + "epoch": 0.12224302954640033, + "grad_norm": 1.758030319708814, + "learning_rate": 9.974645398259085e-06, + "loss": 0.4179, + "step": 2350, + "vit_learning_rate": 1.9949290796518167e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6788, + "epoch": 0.1227632126508531, + "grad_norm": 1.5018810600735832, + "learning_rate": 9.974220001976617e-06, + "loss": 0.3915, + "step": 2360, + "vit_learning_rate": 1.9948440003953233e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6572, + "epoch": 0.12328339575530586, + "grad_norm": 1.99806404020456, + "learning_rate": 9.973791075921155e-06, + "loss": 0.389, + "step": 2370, + "vit_learning_rate": 1.994758215184231e-06 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7051, + "epoch": 0.12380357885975864, + "grad_norm": 1.7595655475514331, + "learning_rate": 9.973358620397068e-06, + "loss": 0.41, + "step": 2380, + "vit_learning_rate": 1.9946717240794136e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6654, + "epoch": 0.1243237619642114, + "grad_norm": 1.5721375021964992, + "learning_rate": 9.972922635711233e-06, + "loss": 0.3843, + "step": 2390, + "vit_learning_rate": 1.9945845271422463e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6509, + "epoch": 0.12484394506866417, + "grad_norm": 1.6778113753564492, + "learning_rate": 9.97248312217303e-06, + "loss": 0.3637, + "step": 2400, + "vit_learning_rate": 1.994496624434606e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6098, + "epoch": 0.12536412817311693, + "grad_norm": 1.7027248997522015, + "learning_rate": 9.972040080094346e-06, + "loss": 0.3888, + "step": 2410, + "vit_learning_rate": 1.994408016018869e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6272, + "epoch": 0.12588431127756972, + "grad_norm": 1.5609312419045387, + "learning_rate": 9.971593509789568e-06, + "loss": 0.3791, + "step": 2420, + "vit_learning_rate": 1.9943187019579135e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.8108, + "epoch": 0.12640449438202248, + "grad_norm": 1.4542135819626787, + "learning_rate": 9.971143411575585e-06, + "loss": 0.3764, + "step": 2430, + "vit_learning_rate": 1.994228682315117e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6477, + "epoch": 0.12692467748647523, + "grad_norm": 1.500518695438568, + "learning_rate": 9.970689785771798e-06, + "loss": 0.3898, + "step": 2440, + "vit_learning_rate": 1.9941379571543595e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.643, + "epoch": 0.127444860590928, + "grad_norm": 1.9742367544502375, + "learning_rate": 9.970232632700104e-06, + "loss": 0.3801, + "step": 2450, + "vit_learning_rate": 1.9940465265400207e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7055, + "epoch": 0.12796504369538078, + "grad_norm": 1.5349889921984714, + "learning_rate": 9.969771952684905e-06, + "loss": 0.4041, + "step": 2460, + "vit_learning_rate": 1.9939543905369806e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7046, + "epoch": 0.12848522679983354, + "grad_norm": 1.313057835102265, + "learning_rate": 9.969307746053105e-06, + "loss": 0.4116, + "step": 2470, + "vit_learning_rate": 1.993861549210621e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6987, + "epoch": 0.1290054099042863, + "grad_norm": 1.3261100295912336, + "learning_rate": 9.968840013134111e-06, + "loss": 0.3777, + "step": 2480, + "vit_learning_rate": 1.993768002626822e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6145, + "epoch": 0.1295255930087391, + "grad_norm": 1.9568479719694656, + "learning_rate": 9.968368754259834e-06, + "loss": 0.3781, + "step": 2490, + "vit_learning_rate": 1.9936737508519664e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.643, + "epoch": 0.13004577611319185, + "grad_norm": 1.7867393043063555, + "learning_rate": 9.967893969764683e-06, + "loss": 0.3657, + "step": 2500, + "vit_learning_rate": 1.9935787939529365e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6809, + "epoch": 0.1305659592176446, + "grad_norm": 1.255100332232703, + "learning_rate": 9.967415659985575e-06, + "loss": 0.3983, + "step": 2510, + "vit_learning_rate": 1.993483131997115e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6556, + "epoch": 0.13108614232209737, + "grad_norm": 1.4213947078384652, + "learning_rate": 9.966933825261923e-06, + "loss": 0.3845, + "step": 2520, + "vit_learning_rate": 1.9933867650523843e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7257, + "epoch": 0.13160632542655015, + "grad_norm": 2.1958954478205928, + "learning_rate": 9.966448465935643e-06, + "loss": 0.3711, + "step": 2530, + "vit_learning_rate": 1.9932896931871285e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6356, + "epoch": 0.1321265085310029, + "grad_norm": 2.206194582250128, + "learning_rate": 9.965959582351154e-06, + "loss": 0.3938, + "step": 2540, + "vit_learning_rate": 1.9931919164702305e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7927, + "epoch": 0.13264669163545567, + "grad_norm": 1.8018114649433155, + "learning_rate": 9.965467174855372e-06, + "loss": 0.3948, + "step": 2550, + "vit_learning_rate": 1.9930934349710743e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6687, + "epoch": 0.13316687473990846, + "grad_norm": 3.07551836017383, + "learning_rate": 9.964971243797718e-06, + "loss": 0.4168, + "step": 2560, + "vit_learning_rate": 1.9929942487595435e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6141, + "epoch": 0.13368705784436122, + "grad_norm": 2.0293709171104584, + "learning_rate": 9.96447178953011e-06, + "loss": 0.3984, + "step": 2570, + "vit_learning_rate": 1.9928943579060218e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6697, + "epoch": 0.13420724094881398, + "grad_norm": 1.5487474182927907, + "learning_rate": 9.96396881240697e-06, + "loss": 0.3671, + "step": 2580, + "vit_learning_rate": 1.9927937624813936e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6224, + "epoch": 0.13472742405326674, + "grad_norm": 1.355515126397657, + "learning_rate": 9.963462312785213e-06, + "loss": 0.3681, + "step": 2590, + "vit_learning_rate": 1.9926924625570423e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7964, + "epoch": 0.13524760715771952, + "grad_norm": 1.8586685264223708, + "learning_rate": 9.96295229102426e-06, + "loss": 0.4013, + "step": 2600, + "vit_learning_rate": 1.992590458204852e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6432, + "epoch": 0.13576779026217228, + "grad_norm": 1.6539724947938057, + "learning_rate": 9.962438747486032e-06, + "loss": 0.3837, + "step": 2610, + "vit_learning_rate": 1.992487749497206e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6281, + "epoch": 0.13628797336662504, + "grad_norm": 1.8653500984387352, + "learning_rate": 9.96192168253494e-06, + "loss": 0.3813, + "step": 2620, + "vit_learning_rate": 1.992384336506988e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6944, + "epoch": 0.13680815647107783, + "grad_norm": 1.765582124182042, + "learning_rate": 9.961401096537909e-06, + "loss": 0.3896, + "step": 2630, + "vit_learning_rate": 1.9922802193075813e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6753, + "epoch": 0.1373283395755306, + "grad_norm": 1.5094009635703698, + "learning_rate": 9.960876989864345e-06, + "loss": 0.3916, + "step": 2640, + "vit_learning_rate": 1.9921753979728688e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6449, + "epoch": 0.13784852267998335, + "grad_norm": 1.864749784524792, + "learning_rate": 9.960349362886166e-06, + "loss": 0.3888, + "step": 2650, + "vit_learning_rate": 1.992069872577233e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6815, + "epoch": 0.1383687057844361, + "grad_norm": 1.5614408542737408, + "learning_rate": 9.959818215977782e-06, + "loss": 0.3782, + "step": 2660, + "vit_learning_rate": 1.9919636431955563e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6985, + "epoch": 0.1388888888888889, + "grad_norm": 1.5318365062701684, + "learning_rate": 9.959283549516102e-06, + "loss": 0.3789, + "step": 2670, + "vit_learning_rate": 1.9918567099032204e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.614, + "epoch": 0.13940907199334165, + "grad_norm": 1.3975851411875644, + "learning_rate": 9.958745363880533e-06, + "loss": 0.3877, + "step": 2680, + "vit_learning_rate": 1.9917490727761063e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6408, + "epoch": 0.1399292550977944, + "grad_norm": 1.9222022674942505, + "learning_rate": 9.958203659452977e-06, + "loss": 0.3748, + "step": 2690, + "vit_learning_rate": 1.991640731890595e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6821, + "epoch": 0.1404494382022472, + "grad_norm": 1.4619179085971252, + "learning_rate": 9.957658436617837e-06, + "loss": 0.3846, + "step": 2700, + "vit_learning_rate": 1.991531687323567e-06 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6657, + "epoch": 0.14096962130669996, + "grad_norm": 2.0259410175038717, + "learning_rate": 9.957109695762009e-06, + "loss": 0.3845, + "step": 2710, + "vit_learning_rate": 1.9914219391524017e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6308, + "epoch": 0.14148980441115272, + "grad_norm": 1.4787220473995417, + "learning_rate": 9.956557437274887e-06, + "loss": 0.3803, + "step": 2720, + "vit_learning_rate": 1.991311487454977e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.626, + "epoch": 0.1420099875156055, + "grad_norm": 1.7849965632942628, + "learning_rate": 9.956001661548362e-06, + "loss": 0.4043, + "step": 2730, + "vit_learning_rate": 1.9912003323096723e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7409, + "epoch": 0.14253017062005827, + "grad_norm": 1.9521048132109213, + "learning_rate": 9.955442368976818e-06, + "loss": 0.3859, + "step": 2740, + "vit_learning_rate": 1.9910884737953634e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7031, + "epoch": 0.14305035372451103, + "grad_norm": 1.963837451596469, + "learning_rate": 9.954879559957138e-06, + "loss": 0.4078, + "step": 2750, + "vit_learning_rate": 1.9909759119914275e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6624, + "epoch": 0.14357053682896379, + "grad_norm": 1.281598812564794, + "learning_rate": 9.954313234888699e-06, + "loss": 0.384, + "step": 2760, + "vit_learning_rate": 1.9908626469777395e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6266, + "epoch": 0.14409071993341657, + "grad_norm": 2.948203938394577, + "learning_rate": 9.95374339417337e-06, + "loss": 0.3912, + "step": 2770, + "vit_learning_rate": 1.990748678834674e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6562, + "epoch": 0.14461090303786933, + "grad_norm": 1.8007969403372046, + "learning_rate": 9.953170038215522e-06, + "loss": 0.4091, + "step": 2780, + "vit_learning_rate": 1.990634007643104e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6511, + "epoch": 0.1451310861423221, + "grad_norm": 1.867006884039199, + "learning_rate": 9.952593167422013e-06, + "loss": 0.4026, + "step": 2790, + "vit_learning_rate": 1.9905186334844024e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6847, + "epoch": 0.14565126924677488, + "grad_norm": 2.0927466230884684, + "learning_rate": 9.952012782202198e-06, + "loss": 0.4144, + "step": 2800, + "vit_learning_rate": 1.9904025564404393e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6594, + "epoch": 0.14617145235122764, + "grad_norm": 1.1855265147328142, + "learning_rate": 9.951428882967928e-06, + "loss": 0.3769, + "step": 2810, + "vit_learning_rate": 1.9902857765935853e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6039, + "epoch": 0.1466916354556804, + "grad_norm": 1.8562706403020512, + "learning_rate": 9.950841470133545e-06, + "loss": 0.3819, + "step": 2820, + "vit_learning_rate": 1.9901682940267086e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.706, + "epoch": 0.14721181856013316, + "grad_norm": 1.5130168014139982, + "learning_rate": 9.950250544115885e-06, + "loss": 0.4065, + "step": 2830, + "vit_learning_rate": 1.9900501088231768e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6248, + "epoch": 0.14773200166458594, + "grad_norm": 1.4338307313869836, + "learning_rate": 9.949656105334277e-06, + "loss": 0.3945, + "step": 2840, + "vit_learning_rate": 1.989931221066855e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6676, + "epoch": 0.1482521847690387, + "grad_norm": 1.3815697354652001, + "learning_rate": 9.949058154210542e-06, + "loss": 0.399, + "step": 2850, + "vit_learning_rate": 1.9898116308421083e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.646, + "epoch": 0.14877236787349146, + "grad_norm": 1.4300126664527184, + "learning_rate": 9.948456691168994e-06, + "loss": 0.3686, + "step": 2860, + "vit_learning_rate": 1.9896913382337988e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6526, + "epoch": 0.14929255097794425, + "grad_norm": 1.7740328430387957, + "learning_rate": 9.947851716636441e-06, + "loss": 0.3873, + "step": 2870, + "vit_learning_rate": 1.989570343327288e-06 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6599, + "epoch": 0.149812734082397, + "grad_norm": 3.2364782916236376, + "learning_rate": 9.947243231042181e-06, + "loss": 0.3905, + "step": 2880, + "vit_learning_rate": 1.9894486462084358e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6149, + "epoch": 0.15033291718684977, + "grad_norm": 1.6848264987004262, + "learning_rate": 9.946631234818001e-06, + "loss": 0.3818, + "step": 2890, + "vit_learning_rate": 1.9893262469636e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6669, + "epoch": 0.15085310029130253, + "grad_norm": 1.4955687349078843, + "learning_rate": 9.946015728398182e-06, + "loss": 0.3855, + "step": 2900, + "vit_learning_rate": 1.989203145679636e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6387, + "epoch": 0.15137328339575531, + "grad_norm": 1.6272287984570026, + "learning_rate": 9.945396712219497e-06, + "loss": 0.3804, + "step": 2910, + "vit_learning_rate": 1.989079342443899e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6894, + "epoch": 0.15189346650020807, + "grad_norm": 1.3149856469039565, + "learning_rate": 9.944774186721209e-06, + "loss": 0.3815, + "step": 2920, + "vit_learning_rate": 1.9889548373442417e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6803, + "epoch": 0.15241364960466083, + "grad_norm": 1.5391707258530647, + "learning_rate": 9.944148152345069e-06, + "loss": 0.3788, + "step": 2930, + "vit_learning_rate": 1.9888296304690136e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.665, + "epoch": 0.15293383270911362, + "grad_norm": 4.800848553005253, + "learning_rate": 9.943518609535319e-06, + "loss": 0.3738, + "step": 2940, + "vit_learning_rate": 1.9887037219070634e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6623, + "epoch": 0.15345401581356638, + "grad_norm": 2.0361962494356165, + "learning_rate": 9.942885558738692e-06, + "loss": 0.3788, + "step": 2950, + "vit_learning_rate": 1.9885771117477382e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6836, + "epoch": 0.15397419891801914, + "grad_norm": 1.726041666786871, + "learning_rate": 9.94224900040441e-06, + "loss": 0.3805, + "step": 2960, + "vit_learning_rate": 1.9884498000808816e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6896, + "epoch": 0.1544943820224719, + "grad_norm": 1.9973846658855723, + "learning_rate": 9.94160893498418e-06, + "loss": 0.403, + "step": 2970, + "vit_learning_rate": 1.9883217869968355e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6434, + "epoch": 0.15501456512692469, + "grad_norm": 1.3684905766734194, + "learning_rate": 9.940965362932204e-06, + "loss": 0.3864, + "step": 2980, + "vit_learning_rate": 1.9881930725864404e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6666, + "epoch": 0.15553474823137745, + "grad_norm": 5.83628470812151, + "learning_rate": 9.940318284705168e-06, + "loss": 0.3673, + "step": 2990, + "vit_learning_rate": 1.9880636569410332e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6343, + "epoch": 0.1560549313358302, + "grad_norm": 1.442133596399894, + "learning_rate": 9.93966770076225e-06, + "loss": 0.3856, + "step": 3000, + "vit_learning_rate": 1.9879335401524496e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.661, + "epoch": 0.156575114440283, + "grad_norm": 2.396697609048269, + "learning_rate": 9.93901361156511e-06, + "loss": 0.3879, + "step": 3010, + "vit_learning_rate": 1.9878027223130216e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6995, + "epoch": 0.15709529754473575, + "grad_norm": 1.7734662001591317, + "learning_rate": 9.938356017577896e-06, + "loss": 0.3917, + "step": 3020, + "vit_learning_rate": 1.987671203515579e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6975, + "epoch": 0.1576154806491885, + "grad_norm": 1.4355547298164602, + "learning_rate": 9.937694919267253e-06, + "loss": 0.3946, + "step": 3030, + "vit_learning_rate": 1.9875389838534506e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6313, + "epoch": 0.15813566375364127, + "grad_norm": 1.692385176529309, + "learning_rate": 9.9370303171023e-06, + "loss": 0.3858, + "step": 3040, + "vit_learning_rate": 1.98740606342046e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6272, + "epoch": 0.15865584685809406, + "grad_norm": 1.999251035638639, + "learning_rate": 9.936362211554649e-06, + "loss": 0.4006, + "step": 3050, + "vit_learning_rate": 1.9872724423109295e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7186, + "epoch": 0.15917602996254682, + "grad_norm": 2.1567917613098944, + "learning_rate": 9.935690603098395e-06, + "loss": 0.3852, + "step": 3060, + "vit_learning_rate": 1.9871381206196785e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6655, + "epoch": 0.15969621306699958, + "grad_norm": 1.6744452553745564, + "learning_rate": 9.935015492210122e-06, + "loss": 0.4109, + "step": 3070, + "vit_learning_rate": 1.9870030984420244e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6642, + "epoch": 0.16021639617145236, + "grad_norm": 2.1837565843778983, + "learning_rate": 9.934336879368898e-06, + "loss": 0.4189, + "step": 3080, + "vit_learning_rate": 1.986867375873779e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7061, + "epoch": 0.16073657927590512, + "grad_norm": 1.3953812408850208, + "learning_rate": 9.933654765056274e-06, + "loss": 0.3969, + "step": 3090, + "vit_learning_rate": 1.9867309530112544e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7194, + "epoch": 0.16125676238035788, + "grad_norm": 3.4699344817389175, + "learning_rate": 9.932969149756289e-06, + "loss": 0.3795, + "step": 3100, + "vit_learning_rate": 1.9865938299512575e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7085, + "epoch": 0.16177694548481064, + "grad_norm": 1.4254825908762652, + "learning_rate": 9.932280033955464e-06, + "loss": 0.3665, + "step": 3110, + "vit_learning_rate": 1.9864560067910926e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6585, + "epoch": 0.16229712858926343, + "grad_norm": 1.9270526010928508, + "learning_rate": 9.931587418142804e-06, + "loss": 0.3897, + "step": 3120, + "vit_learning_rate": 1.9863174836285606e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.624, + "epoch": 0.1628173116937162, + "grad_norm": 1.6309720393235745, + "learning_rate": 9.930891302809802e-06, + "loss": 0.3868, + "step": 3130, + "vit_learning_rate": 1.9861782605619603e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6527, + "epoch": 0.16333749479816895, + "grad_norm": 1.2095004389384036, + "learning_rate": 9.930191688450426e-06, + "loss": 0.4045, + "step": 3140, + "vit_learning_rate": 1.986038337690085e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6293, + "epoch": 0.16385767790262173, + "grad_norm": 1.5352545165626172, + "learning_rate": 9.929488575561137e-06, + "loss": 0.3563, + "step": 3150, + "vit_learning_rate": 1.985897715112227e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6363, + "epoch": 0.1643778610070745, + "grad_norm": 1.4535595254757843, + "learning_rate": 9.928781964640867e-06, + "loss": 0.3921, + "step": 3160, + "vit_learning_rate": 1.985756392928173e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6554, + "epoch": 0.16489804411152725, + "grad_norm": 1.4420986279913726, + "learning_rate": 9.92807185619104e-06, + "loss": 0.3628, + "step": 3170, + "vit_learning_rate": 1.985614371238208e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.766, + "epoch": 0.16541822721598, + "grad_norm": 2.3277495393872347, + "learning_rate": 9.92735825071556e-06, + "loss": 0.3636, + "step": 3180, + "vit_learning_rate": 1.985471650143112e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6213, + "epoch": 0.1659384103204328, + "grad_norm": 1.9001500985129456, + "learning_rate": 9.926641148720808e-06, + "loss": 0.3675, + "step": 3190, + "vit_learning_rate": 1.9853282297441613e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7381, + "epoch": 0.16645859342488556, + "grad_norm": 1.522577229328823, + "learning_rate": 9.925920550715651e-06, + "loss": 0.3895, + "step": 3200, + "vit_learning_rate": 1.9851841101431297e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6267, + "epoch": 0.16697877652933832, + "grad_norm": 1.8193168165222209, + "learning_rate": 9.925196457211434e-06, + "loss": 0.3865, + "step": 3210, + "vit_learning_rate": 1.9850392914422863e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6336, + "epoch": 0.1674989596337911, + "grad_norm": 2.0904313731385002, + "learning_rate": 9.924468868721984e-06, + "loss": 0.3879, + "step": 3220, + "vit_learning_rate": 1.9848937737443963e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.634, + "epoch": 0.16801914273824387, + "grad_norm": 1.7350769473719891, + "learning_rate": 9.923737785763606e-06, + "loss": 0.3745, + "step": 3230, + "vit_learning_rate": 1.9847475571527207e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6341, + "epoch": 0.16853932584269662, + "grad_norm": 1.5310640851908035, + "learning_rate": 9.923003208855089e-06, + "loss": 0.3814, + "step": 3240, + "vit_learning_rate": 1.9846006417710174e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6396, + "epoch": 0.16905950894714938, + "grad_norm": 1.2198141836686285, + "learning_rate": 9.922265138517696e-06, + "loss": 0.3917, + "step": 3250, + "vit_learning_rate": 1.984453027703539e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6375, + "epoch": 0.16957969205160217, + "grad_norm": 1.484152651786983, + "learning_rate": 9.921523575275173e-06, + "loss": 0.387, + "step": 3260, + "vit_learning_rate": 1.9843047150550343e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7122, + "epoch": 0.17009987515605493, + "grad_norm": 2.3444089618197794, + "learning_rate": 9.920778519653744e-06, + "loss": 0.4009, + "step": 3270, + "vit_learning_rate": 1.9841557039307485e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.637, + "epoch": 0.1706200582605077, + "grad_norm": 1.8821170559297629, + "learning_rate": 9.920029972182108e-06, + "loss": 0.399, + "step": 3280, + "vit_learning_rate": 1.9840059944364214e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6977, + "epoch": 0.17114024136496048, + "grad_norm": 1.3983585344882241, + "learning_rate": 9.919277933391446e-06, + "loss": 0.3694, + "step": 3290, + "vit_learning_rate": 1.983855586678289e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6513, + "epoch": 0.17166042446941324, + "grad_norm": 1.844697678475153, + "learning_rate": 9.918522403815414e-06, + "loss": 0.4152, + "step": 3300, + "vit_learning_rate": 1.9837044807630826e-06 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6807, + "epoch": 0.172180607573866, + "grad_norm": 1.3611179568879201, + "learning_rate": 9.917763383990146e-06, + "loss": 0.3926, + "step": 3310, + "vit_learning_rate": 1.983552676798029e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6274, + "epoch": 0.17270079067831876, + "grad_norm": 3.5977544452735897, + "learning_rate": 9.917000874454255e-06, + "loss": 0.3709, + "step": 3320, + "vit_learning_rate": 1.9834001748908507e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6344, + "epoch": 0.17322097378277154, + "grad_norm": 2.0035676651037635, + "learning_rate": 9.916234875748824e-06, + "loss": 0.3958, + "step": 3330, + "vit_learning_rate": 1.9832469751497645e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7062, + "epoch": 0.1737411568872243, + "grad_norm": 1.418679956381084, + "learning_rate": 9.915465388417418e-06, + "loss": 0.3614, + "step": 3340, + "vit_learning_rate": 1.9830930776834835e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7421, + "epoch": 0.17426133999167706, + "grad_norm": 1.3849167396079198, + "learning_rate": 9.914692413006075e-06, + "loss": 0.4062, + "step": 3350, + "vit_learning_rate": 1.982938482601215e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6604, + "epoch": 0.17478152309612985, + "grad_norm": 1.9489644682861815, + "learning_rate": 9.913915950063309e-06, + "loss": 0.3696, + "step": 3360, + "vit_learning_rate": 1.9827831900126616e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6914, + "epoch": 0.1753017062005826, + "grad_norm": 2.16366962255017, + "learning_rate": 9.91313600014011e-06, + "loss": 0.3909, + "step": 3370, + "vit_learning_rate": 1.9826272000280214e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6458, + "epoch": 0.17582188930503537, + "grad_norm": 1.8648243406848843, + "learning_rate": 9.912352563789936e-06, + "loss": 0.3894, + "step": 3380, + "vit_learning_rate": 1.982470512757987e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6231, + "epoch": 0.17634207240948813, + "grad_norm": 1.2420893721299746, + "learning_rate": 9.91156564156873e-06, + "loss": 0.3787, + "step": 3390, + "vit_learning_rate": 1.982313128313746e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6873, + "epoch": 0.1768622555139409, + "grad_norm": 2.313334907712331, + "learning_rate": 9.910775234034898e-06, + "loss": 0.368, + "step": 3400, + "vit_learning_rate": 1.9821550468069793e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6925, + "epoch": 0.17738243861839367, + "grad_norm": 1.6905595267817173, + "learning_rate": 9.909981341749327e-06, + "loss": 0.3761, + "step": 3410, + "vit_learning_rate": 1.981996268349865e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7108, + "epoch": 0.17790262172284643, + "grad_norm": 2.3274840680371867, + "learning_rate": 9.90918396527537e-06, + "loss": 0.3917, + "step": 3420, + "vit_learning_rate": 1.981836793055074e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6551, + "epoch": 0.17842280482729922, + "grad_norm": 1.5335020603888019, + "learning_rate": 9.908383105178861e-06, + "loss": 0.4419, + "step": 3430, + "vit_learning_rate": 1.9816766210357717e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6096, + "epoch": 0.17894298793175198, + "grad_norm": 1.647236549450501, + "learning_rate": 9.907578762028097e-06, + "loss": 0.3982, + "step": 3440, + "vit_learning_rate": 1.981515752405619e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6501, + "epoch": 0.17946317103620474, + "grad_norm": 2.03819258470158, + "learning_rate": 9.90677093639385e-06, + "loss": 0.3772, + "step": 3450, + "vit_learning_rate": 1.9813541872787697e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6817, + "epoch": 0.1799833541406575, + "grad_norm": 2.2365826523462293, + "learning_rate": 9.905959628849368e-06, + "loss": 0.3876, + "step": 3460, + "vit_learning_rate": 1.9811919257698733e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6857, + "epoch": 0.18050353724511028, + "grad_norm": 2.034281009438932, + "learning_rate": 9.905144839970363e-06, + "loss": 0.3543, + "step": 3470, + "vit_learning_rate": 1.9810289679940723e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7182, + "epoch": 0.18102372034956304, + "grad_norm": 1.6601155277650255, + "learning_rate": 9.904326570335017e-06, + "loss": 0.3791, + "step": 3480, + "vit_learning_rate": 1.9808653140670034e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6678, + "epoch": 0.1815439034540158, + "grad_norm": 1.421513403432145, + "learning_rate": 9.903504820523993e-06, + "loss": 0.3855, + "step": 3490, + "vit_learning_rate": 1.9807009641047984e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7938, + "epoch": 0.1820640865584686, + "grad_norm": 1.5482025466058273, + "learning_rate": 9.902679591120408e-06, + "loss": 0.37, + "step": 3500, + "vit_learning_rate": 1.9805359182240813e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6699, + "epoch": 0.18258426966292135, + "grad_norm": 1.4649921149975809, + "learning_rate": 9.90185088270986e-06, + "loss": 0.3984, + "step": 3510, + "vit_learning_rate": 1.980370176541972e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6679, + "epoch": 0.1831044527673741, + "grad_norm": 1.5555286795315832, + "learning_rate": 9.90101869588041e-06, + "loss": 0.3745, + "step": 3520, + "vit_learning_rate": 1.980203739176082e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6661, + "epoch": 0.1836246358718269, + "grad_norm": 1.5174511527429626, + "learning_rate": 9.90018303122259e-06, + "loss": 0.3907, + "step": 3530, + "vit_learning_rate": 1.9800366062445178e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6874, + "epoch": 0.18414481897627966, + "grad_norm": 1.706738376787952, + "learning_rate": 9.899343889329398e-06, + "loss": 0.4001, + "step": 3540, + "vit_learning_rate": 1.9798687778658794e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.664, + "epoch": 0.18466500208073242, + "grad_norm": 1.7030971706579083, + "learning_rate": 9.898501270796299e-06, + "loss": 0.4267, + "step": 3550, + "vit_learning_rate": 1.9797002541592597e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.613, + "epoch": 0.18518518518518517, + "grad_norm": 1.6456646299108872, + "learning_rate": 9.89765517622123e-06, + "loss": 0.3773, + "step": 3560, + "vit_learning_rate": 1.9795310352442456e-06 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.6882, + "epoch": 0.18570536828963796, + "grad_norm": 1.382726584096793, + "learning_rate": 9.896805606204587e-06, + "loss": 0.3761, + "step": 3570, + "vit_learning_rate": 1.9793611212409173e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6898, + "epoch": 0.18622555139409072, + "grad_norm": 1.9764973836211777, + "learning_rate": 9.895952561349241e-06, + "loss": 0.3468, + "step": 3580, + "vit_learning_rate": 1.9791905122698477e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7172, + "epoch": 0.18674573449854348, + "grad_norm": 2.2872008744435415, + "learning_rate": 9.895096042260517e-06, + "loss": 0.3893, + "step": 3590, + "vit_learning_rate": 1.9790192084521032e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6286, + "epoch": 0.18726591760299627, + "grad_norm": 1.5747825516382399, + "learning_rate": 9.89423604954622e-06, + "loss": 0.3668, + "step": 3600, + "vit_learning_rate": 1.9788472099092438e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6904, + "epoch": 0.18778610070744903, + "grad_norm": 1.591221122526492, + "learning_rate": 9.893372583816608e-06, + "loss": 0.3631, + "step": 3610, + "vit_learning_rate": 1.978674516763321e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7682, + "epoch": 0.1883062838119018, + "grad_norm": 1.5789707364828272, + "learning_rate": 9.892505645684407e-06, + "loss": 0.3729, + "step": 3620, + "vit_learning_rate": 1.9785011291368813e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7148, + "epoch": 0.18882646691635455, + "grad_norm": 1.902175437472526, + "learning_rate": 9.891635235764812e-06, + "loss": 0.3832, + "step": 3630, + "vit_learning_rate": 1.978327047152962e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6122, + "epoch": 0.18934665002080733, + "grad_norm": 2.0920262857900855, + "learning_rate": 9.890761354675474e-06, + "loss": 0.3955, + "step": 3640, + "vit_learning_rate": 1.9781522709350946e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7211, + "epoch": 0.1898668331252601, + "grad_norm": 1.9072671885573282, + "learning_rate": 9.88988400303651e-06, + "loss": 0.4121, + "step": 3650, + "vit_learning_rate": 1.9779768006073015e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6823, + "epoch": 0.19038701622971285, + "grad_norm": 1.4631527625465286, + "learning_rate": 9.889003181470505e-06, + "loss": 0.3809, + "step": 3660, + "vit_learning_rate": 1.9778006362941006e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6343, + "epoch": 0.19090719933416564, + "grad_norm": 1.6570295824722825, + "learning_rate": 9.888118890602496e-06, + "loss": 0.3718, + "step": 3670, + "vit_learning_rate": 1.977623778120499e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.699, + "epoch": 0.1914273824386184, + "grad_norm": 2.5386912120096166, + "learning_rate": 9.887231131059994e-06, + "loss": 0.3834, + "step": 3680, + "vit_learning_rate": 1.9774462262119985e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6208, + "epoch": 0.19194756554307116, + "grad_norm": 1.6513475987455775, + "learning_rate": 9.886339903472959e-06, + "loss": 0.3898, + "step": 3690, + "vit_learning_rate": 1.9772679806945916e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6414, + "epoch": 0.19246774864752392, + "grad_norm": 1.4356333556834802, + "learning_rate": 9.88544520847382e-06, + "loss": 0.3798, + "step": 3700, + "vit_learning_rate": 1.977089041694764e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6196, + "epoch": 0.1929879317519767, + "grad_norm": 1.5894338801502665, + "learning_rate": 9.884547046697469e-06, + "loss": 0.3964, + "step": 3710, + "vit_learning_rate": 1.976909409339493e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.8745, + "epoch": 0.19350811485642946, + "grad_norm": 1.9928568835080616, + "learning_rate": 9.883645418781247e-06, + "loss": 0.3903, + "step": 3720, + "vit_learning_rate": 1.976729083756249e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.698, + "epoch": 0.19402829796088222, + "grad_norm": 1.7472595326071496, + "learning_rate": 9.882740325364963e-06, + "loss": 0.3958, + "step": 3730, + "vit_learning_rate": 1.9765480650729925e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.722, + "epoch": 0.194548481065335, + "grad_norm": 1.8044702890783635, + "learning_rate": 9.881831767090886e-06, + "loss": 0.4019, + "step": 3740, + "vit_learning_rate": 1.976366353418177e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.696, + "epoch": 0.19506866416978777, + "grad_norm": 1.7129539382351262, + "learning_rate": 9.880919744603739e-06, + "loss": 0.399, + "step": 3750, + "vit_learning_rate": 1.9761839489207477e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6394, + "epoch": 0.19558884727424053, + "grad_norm": 1.3142177753829458, + "learning_rate": 9.880004258550708e-06, + "loss": 0.3617, + "step": 3760, + "vit_learning_rate": 1.976000851710141e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.654, + "epoch": 0.1961090303786933, + "grad_norm": 1.4820924556418327, + "learning_rate": 9.879085309581429e-06, + "loss": 0.3689, + "step": 3770, + "vit_learning_rate": 1.9758170619162857e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6362, + "epoch": 0.19662921348314608, + "grad_norm": 1.5270231243231582, + "learning_rate": 9.878162898348005e-06, + "loss": 0.3806, + "step": 3780, + "vit_learning_rate": 1.9756325796696006e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6753, + "epoch": 0.19714939658759884, + "grad_norm": 1.6646859977680302, + "learning_rate": 9.87723702550499e-06, + "loss": 0.3791, + "step": 3790, + "vit_learning_rate": 1.9754474051009973e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6353, + "epoch": 0.1976695796920516, + "grad_norm": 1.6667551283317843, + "learning_rate": 9.876307691709393e-06, + "loss": 0.3916, + "step": 3800, + "vit_learning_rate": 1.975261538341878e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6417, + "epoch": 0.19818976279650438, + "grad_norm": 1.4359844669757025, + "learning_rate": 9.875374897620687e-06, + "loss": 0.3817, + "step": 3810, + "vit_learning_rate": 1.975074979524137e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6503, + "epoch": 0.19870994590095714, + "grad_norm": 1.632834153195407, + "learning_rate": 9.87443864390079e-06, + "loss": 0.3949, + "step": 3820, + "vit_learning_rate": 1.974887728780158e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.681, + "epoch": 0.1992301290054099, + "grad_norm": 2.0950524238404284, + "learning_rate": 9.873498931214084e-06, + "loss": 0.3821, + "step": 3830, + "vit_learning_rate": 1.9746997862428166e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6492, + "epoch": 0.19975031210986266, + "grad_norm": 1.768758920830796, + "learning_rate": 9.872555760227401e-06, + "loss": 0.3861, + "step": 3840, + "vit_learning_rate": 1.97451115204548e-06 + }, + { + "avg_batch_load_time": 2.0573, + "avg_batch_processing_time": 0.6641, + "epoch": 0.20027049521431545, + "grad_norm": 1.6184698139671145, + "learning_rate": 9.871609131610024e-06, + "loss": 0.3941, + "step": 3850, + "vit_learning_rate": 1.9743218263220047e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6631, + "epoch": 0.2007906783187682, + "grad_norm": 1.658051764904703, + "learning_rate": 9.870659046033698e-06, + "loss": 0.395, + "step": 3860, + "vit_learning_rate": 1.9741318092067395e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6692, + "epoch": 0.20131086142322097, + "grad_norm": 1.5838142103912176, + "learning_rate": 9.869705504172614e-06, + "loss": 0.3781, + "step": 3870, + "vit_learning_rate": 1.973941100834523e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7232, + "epoch": 0.20183104452767375, + "grad_norm": 1.107200601312967, + "learning_rate": 9.86874850670342e-06, + "loss": 0.3929, + "step": 3880, + "vit_learning_rate": 1.973749701340684e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6434, + "epoch": 0.2023512276321265, + "grad_norm": 2.40795495845649, + "learning_rate": 9.867788054305211e-06, + "loss": 0.3846, + "step": 3890, + "vit_learning_rate": 1.9735576108610422e-06 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.6833, + "epoch": 0.20287141073657927, + "grad_norm": 1.2232974367068747, + "learning_rate": 9.866824147659541e-06, + "loss": 0.3706, + "step": 3900, + "vit_learning_rate": 1.973364829531908e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6218, + "epoch": 0.20339159384103203, + "grad_norm": 1.9628188644406055, + "learning_rate": 9.865856787450408e-06, + "loss": 0.371, + "step": 3910, + "vit_learning_rate": 1.973171357490081e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6133, + "epoch": 0.20391177694548482, + "grad_norm": 2.4362061308628404, + "learning_rate": 9.864885974364264e-06, + "loss": 0.4077, + "step": 3920, + "vit_learning_rate": 1.9729771948728527e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 4.0484, + "epoch": 0.20443196004993758, + "grad_norm": 1.477009590142045, + "learning_rate": 9.863911709090012e-06, + "loss": 0.3952, + "step": 3930, + "vit_learning_rate": 1.9727823418180024e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7128, + "epoch": 0.20495214315439034, + "grad_norm": 1.9816324546899862, + "learning_rate": 9.862933992319003e-06, + "loss": 0.3962, + "step": 3940, + "vit_learning_rate": 1.9725867984638007e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.637, + "epoch": 0.20547232625884312, + "grad_norm": 1.6661455875968327, + "learning_rate": 9.861952824745037e-06, + "loss": 0.4077, + "step": 3950, + "vit_learning_rate": 1.9723905649490073e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6622, + "epoch": 0.20599250936329588, + "grad_norm": 2.182293824856373, + "learning_rate": 9.860968207064367e-06, + "loss": 0.3865, + "step": 3960, + "vit_learning_rate": 1.972193641412873e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.5364, + "epoch": 0.20651269246774864, + "grad_norm": 1.45401161417741, + "learning_rate": 9.859980139975687e-06, + "loss": 0.3798, + "step": 3970, + "vit_learning_rate": 1.971996027995137e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6594, + "epoch": 0.2070328755722014, + "grad_norm": 1.393670658459038, + "learning_rate": 9.858988624180143e-06, + "loss": 0.3894, + "step": 3980, + "vit_learning_rate": 1.9717977248360284e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.8445, + "epoch": 0.2075530586766542, + "grad_norm": 1.4957172518482018, + "learning_rate": 9.857993660381331e-06, + "loss": 0.3728, + "step": 3990, + "vit_learning_rate": 1.971598732076266e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 4.8307, + "epoch": 0.20807324178110695, + "grad_norm": 1.3852887410921912, + "learning_rate": 9.856995249285286e-06, + "loss": 0.3998, + "step": 4000, + "vit_learning_rate": 1.9713990498570573e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 2.8093, + "epoch": 0.2085934248855597, + "grad_norm": 1.1895310745307008, + "learning_rate": 9.855993391600499e-06, + "loss": 0.3899, + "step": 4010, + "vit_learning_rate": 1.9711986783200996e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.5547, + "epoch": 0.2091136079900125, + "grad_norm": 1.8463053063535013, + "learning_rate": 9.854988088037898e-06, + "loss": 0.3994, + "step": 4020, + "vit_learning_rate": 1.9709976176075793e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.7836, + "epoch": 0.20963379109446525, + "grad_norm": 1.3908317424396814, + "learning_rate": 9.853979339310862e-06, + "loss": 0.3882, + "step": 4030, + "vit_learning_rate": 1.970795867862172e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6183, + "epoch": 0.21015397419891801, + "grad_norm": 1.662796588320816, + "learning_rate": 9.85296714613521e-06, + "loss": 0.3837, + "step": 4040, + "vit_learning_rate": 1.9705934292270417e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.5511, + "epoch": 0.21067415730337077, + "grad_norm": 1.3513909594357687, + "learning_rate": 9.851951509229209e-06, + "loss": 0.3831, + "step": 4050, + "vit_learning_rate": 1.9703903018458417e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6215, + "epoch": 0.21119434040782356, + "grad_norm": 1.51098751409228, + "learning_rate": 9.85093242931357e-06, + "loss": 0.3825, + "step": 4060, + "vit_learning_rate": 1.970186485862714e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6266, + "epoch": 0.21171452351227632, + "grad_norm": 1.5985195443423377, + "learning_rate": 9.849909907111444e-06, + "loss": 0.3882, + "step": 4070, + "vit_learning_rate": 1.9699819814222886e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.6988, + "epoch": 0.21223470661672908, + "grad_norm": 1.8450223258933487, + "learning_rate": 9.848883943348427e-06, + "loss": 0.377, + "step": 4080, + "vit_learning_rate": 1.9697767886696853e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.633, + "epoch": 0.21275488972118187, + "grad_norm": 1.900166806103517, + "learning_rate": 9.847854538752557e-06, + "loss": 0.3897, + "step": 4090, + "vit_learning_rate": 1.969570907750511e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 2.6946, + "epoch": 0.21327507282563463, + "grad_norm": 1.6679691555069602, + "learning_rate": 9.846821694054312e-06, + "loss": 0.4, + "step": 4100, + "vit_learning_rate": 1.9693643388108623e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.5808, + "epoch": 0.21379525593008739, + "grad_norm": 1.7120880531149185, + "learning_rate": 9.845785409986615e-06, + "loss": 0.3614, + "step": 4110, + "vit_learning_rate": 1.9691570819973226e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 4.6733, + "epoch": 0.21431543903454014, + "grad_norm": 1.9892491162598802, + "learning_rate": 9.844745687284823e-06, + "loss": 0.3982, + "step": 4120, + "vit_learning_rate": 1.9689491374569643e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 4.6684, + "epoch": 0.21483562213899293, + "grad_norm": 1.7303760258307073, + "learning_rate": 9.84370252668674e-06, + "loss": 0.3806, + "step": 4130, + "vit_learning_rate": 1.968740505337348e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 4.7557, + "epoch": 0.2153558052434457, + "grad_norm": 2.1633817965936086, + "learning_rate": 9.842655928932607e-06, + "loss": 0.3862, + "step": 4140, + "vit_learning_rate": 1.968531185786521e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 2.8417, + "epoch": 0.21587598834789845, + "grad_norm": 1.9931531588794889, + "learning_rate": 9.841605894765101e-06, + "loss": 0.3793, + "step": 4150, + "vit_learning_rate": 1.96832117895302e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6569, + "epoch": 0.21639617145235124, + "grad_norm": 2.786483820067717, + "learning_rate": 9.840552424929344e-06, + "loss": 0.3834, + "step": 4160, + "vit_learning_rate": 1.9681104849858686e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 7.1061, + "epoch": 0.216916354556804, + "grad_norm": 1.4759709869886135, + "learning_rate": 9.83949552017289e-06, + "loss": 0.3876, + "step": 4170, + "vit_learning_rate": 1.9678991040345775e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 4.7224, + "epoch": 0.21743653766125676, + "grad_norm": 2.7961378396844343, + "learning_rate": 9.838435181245731e-06, + "loss": 0.3882, + "step": 4180, + "vit_learning_rate": 1.967687036249146e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6659, + "epoch": 0.21795672076570952, + "grad_norm": 1.4258420382770098, + "learning_rate": 9.837371408900303e-06, + "loss": 0.3733, + "step": 4190, + "vit_learning_rate": 1.96747428178006e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6367, + "epoch": 0.2184769038701623, + "grad_norm": 1.5497449567203456, + "learning_rate": 9.836304203891468e-06, + "loss": 0.3991, + "step": 4200, + "vit_learning_rate": 1.9672608407782933e-06 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 4.8106, + "epoch": 0.21899708697461506, + "grad_norm": 1.4212149482143566, + "learning_rate": 9.83523356697653e-06, + "loss": 0.3776, + "step": 4210, + "vit_learning_rate": 1.967046713395306e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.69, + "epoch": 0.21951727007906782, + "grad_norm": 1.4682742657417118, + "learning_rate": 9.83415949891523e-06, + "loss": 0.4017, + "step": 4220, + "vit_learning_rate": 1.9668318997830457e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 4.6955, + "epoch": 0.2200374531835206, + "grad_norm": 1.7227820678707477, + "learning_rate": 9.833082000469737e-06, + "loss": 0.3955, + "step": 4230, + "vit_learning_rate": 1.9666164000939475e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.7642, + "epoch": 0.22055763628797337, + "grad_norm": 1.417532062150043, + "learning_rate": 9.832001072404662e-06, + "loss": 0.4141, + "step": 4240, + "vit_learning_rate": 1.9664002144809322e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6701, + "epoch": 0.22107781939242613, + "grad_norm": 1.7355773817183777, + "learning_rate": 9.830916715487045e-06, + "loss": 0.3858, + "step": 4250, + "vit_learning_rate": 1.9661833430974085e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 2.6447, + "epoch": 0.2215980024968789, + "grad_norm": 1.3317421292459164, + "learning_rate": 9.829828930486359e-06, + "loss": 0.3927, + "step": 4260, + "vit_learning_rate": 1.9659657860972712e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6625, + "epoch": 0.22211818560133167, + "grad_norm": 1.5998186628005218, + "learning_rate": 9.82873771817451e-06, + "loss": 0.4095, + "step": 4270, + "vit_learning_rate": 1.965747543634902e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6827, + "epoch": 0.22263836870578443, + "grad_norm": 1.6763806015983749, + "learning_rate": 9.82764307932584e-06, + "loss": 0.3713, + "step": 4280, + "vit_learning_rate": 1.9655286158651677e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6687, + "epoch": 0.2231585518102372, + "grad_norm": 1.7099312821619745, + "learning_rate": 9.826545014717117e-06, + "loss": 0.3745, + "step": 4290, + "vit_learning_rate": 1.965309002943423e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.7125, + "epoch": 0.22367873491468998, + "grad_norm": 1.8425796026348171, + "learning_rate": 9.825443525127542e-06, + "loss": 0.3934, + "step": 4300, + "vit_learning_rate": 1.9650887050255084e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6529, + "epoch": 0.22419891801914274, + "grad_norm": 3.402836069960489, + "learning_rate": 9.824338611338748e-06, + "loss": 0.3814, + "step": 4310, + "vit_learning_rate": 1.9648677222677493e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6541, + "epoch": 0.2247191011235955, + "grad_norm": 1.7986201050500197, + "learning_rate": 9.823230274134797e-06, + "loss": 0.3974, + "step": 4320, + "vit_learning_rate": 1.964646054826959e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6073, + "epoch": 0.2252392842280483, + "grad_norm": 1.5321861648339794, + "learning_rate": 9.82211851430218e-06, + "loss": 0.3772, + "step": 4330, + "vit_learning_rate": 1.9644237028604355e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6515, + "epoch": 0.22575946733250105, + "grad_norm": 1.6637527057805832, + "learning_rate": 9.821003332629812e-06, + "loss": 0.3743, + "step": 4340, + "vit_learning_rate": 1.9642006665259624e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6407, + "epoch": 0.2262796504369538, + "grad_norm": 1.310495691415425, + "learning_rate": 9.819884729909047e-06, + "loss": 0.4086, + "step": 4350, + "vit_learning_rate": 1.9639769459818093e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6385, + "epoch": 0.22679983354140656, + "grad_norm": 1.4955224552539332, + "learning_rate": 9.818762706933658e-06, + "loss": 0.3893, + "step": 4360, + "vit_learning_rate": 1.9637525413867314e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6684, + "epoch": 0.22732001664585935, + "grad_norm": 3.1996121059978275, + "learning_rate": 9.817637264499848e-06, + "loss": 0.3924, + "step": 4370, + "vit_learning_rate": 1.963527452899969e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6499, + "epoch": 0.2278401997503121, + "grad_norm": 2.552293584845462, + "learning_rate": 9.816508403406245e-06, + "loss": 0.3783, + "step": 4380, + "vit_learning_rate": 1.9633016806812488e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6486, + "epoch": 0.22836038285476487, + "grad_norm": 1.6604906240839654, + "learning_rate": 9.815376124453904e-06, + "loss": 0.3781, + "step": 4390, + "vit_learning_rate": 1.9630752248907803e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.62, + "epoch": 0.22888056595921766, + "grad_norm": 1.3093819509762679, + "learning_rate": 9.814240428446305e-06, + "loss": 0.4088, + "step": 4400, + "vit_learning_rate": 1.9628480856892607e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7034, + "epoch": 0.22940074906367042, + "grad_norm": 1.8235991541123031, + "learning_rate": 9.813101316189354e-06, + "loss": 0.3785, + "step": 4410, + "vit_learning_rate": 1.962620263237871e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6479, + "epoch": 0.22992093216812318, + "grad_norm": 1.3810895045744613, + "learning_rate": 9.811958788491382e-06, + "loss": 0.3997, + "step": 4420, + "vit_learning_rate": 1.962391757698276e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6515, + "epoch": 0.23044111527257594, + "grad_norm": 1.4431895277616638, + "learning_rate": 9.810812846163138e-06, + "loss": 0.3967, + "step": 4430, + "vit_learning_rate": 1.9621625692326275e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6547, + "epoch": 0.23096129837702872, + "grad_norm": 1.6830618904207943, + "learning_rate": 9.8096634900178e-06, + "loss": 0.3793, + "step": 4440, + "vit_learning_rate": 1.9619326980035597e-06 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.7156, + "epoch": 0.23148148148148148, + "grad_norm": 1.3599197327716481, + "learning_rate": 9.808510720870968e-06, + "loss": 0.3692, + "step": 4450, + "vit_learning_rate": 1.961702144174193e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7094, + "epoch": 0.23200166458593424, + "grad_norm": 2.2371927368995483, + "learning_rate": 9.80735453954066e-06, + "loss": 0.3633, + "step": 4460, + "vit_learning_rate": 1.9614709079081313e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6509, + "epoch": 0.23252184769038703, + "grad_norm": 2.191196221973349, + "learning_rate": 9.806194946847318e-06, + "loss": 0.3748, + "step": 4470, + "vit_learning_rate": 1.9612389893694633e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.7606, + "epoch": 0.2330420307948398, + "grad_norm": 1.558156814582986, + "learning_rate": 9.805031943613805e-06, + "loss": 0.3654, + "step": 4480, + "vit_learning_rate": 1.9610063887227608e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.804, + "epoch": 0.23356221389929255, + "grad_norm": 1.7099869437261428, + "learning_rate": 9.803865530665403e-06, + "loss": 0.3879, + "step": 4490, + "vit_learning_rate": 1.9607731061330807e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7361, + "epoch": 0.2340823970037453, + "grad_norm": 1.353841383373491, + "learning_rate": 9.802695708829816e-06, + "loss": 0.3766, + "step": 4500, + "vit_learning_rate": 1.960539141765963e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.654, + "epoch": 0.2346025801081981, + "grad_norm": 1.637491011003399, + "learning_rate": 9.801522478937165e-06, + "loss": 0.3857, + "step": 4510, + "vit_learning_rate": 1.9603044957874327e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6694, + "epoch": 0.23512276321265085, + "grad_norm": 2.001422729709869, + "learning_rate": 9.800345841819988e-06, + "loss": 0.392, + "step": 4520, + "vit_learning_rate": 1.9600691683639972e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6641, + "epoch": 0.2356429463171036, + "grad_norm": 1.5371207937344504, + "learning_rate": 9.799165798313242e-06, + "loss": 0.4038, + "step": 4530, + "vit_learning_rate": 1.9598331596626482e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6944, + "epoch": 0.2361631294215564, + "grad_norm": 1.3406238259021739, + "learning_rate": 9.797982349254304e-06, + "loss": 0.3979, + "step": 4540, + "vit_learning_rate": 1.9595964698508603e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.679, + "epoch": 0.23668331252600916, + "grad_norm": 1.8539883124979424, + "learning_rate": 9.796795495482963e-06, + "loss": 0.3643, + "step": 4550, + "vit_learning_rate": 1.959359099096592e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6264, + "epoch": 0.23720349563046192, + "grad_norm": 2.9368584921794483, + "learning_rate": 9.795605237841427e-06, + "loss": 0.3921, + "step": 4560, + "vit_learning_rate": 1.9591210475682852e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6379, + "epoch": 0.23772367873491468, + "grad_norm": 1.79328707786782, + "learning_rate": 9.79441157717432e-06, + "loss": 0.3742, + "step": 4570, + "vit_learning_rate": 1.958882315434864e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6515, + "epoch": 0.23824386183936747, + "grad_norm": 1.5841960168074982, + "learning_rate": 9.79321451432868e-06, + "loss": 0.385, + "step": 4580, + "vit_learning_rate": 1.9586429028657356e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6396, + "epoch": 0.23876404494382023, + "grad_norm": 1.7710264216219769, + "learning_rate": 9.792014050153955e-06, + "loss": 0.3815, + "step": 4590, + "vit_learning_rate": 1.958402810030791e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6295, + "epoch": 0.23928422804827298, + "grad_norm": 1.6956758398886191, + "learning_rate": 9.790810185502015e-06, + "loss": 0.4085, + "step": 4600, + "vit_learning_rate": 1.9581620371004026e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7757, + "epoch": 0.23980441115272577, + "grad_norm": 1.3373346258466072, + "learning_rate": 9.789602921227135e-06, + "loss": 0.4092, + "step": 4610, + "vit_learning_rate": 1.9579205842454266e-06 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.7103, + "epoch": 0.24032459425717853, + "grad_norm": 2.085556213767069, + "learning_rate": 9.788392258186007e-06, + "loss": 0.3759, + "step": 4620, + "vit_learning_rate": 1.957678451637201e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6919, + "epoch": 0.2408447773616313, + "grad_norm": 1.6553777655318862, + "learning_rate": 9.787178197237734e-06, + "loss": 0.3737, + "step": 4630, + "vit_learning_rate": 1.9574356394475467e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7008, + "epoch": 0.24136496046608405, + "grad_norm": 2.1487906147111837, + "learning_rate": 9.785960739243829e-06, + "loss": 0.3861, + "step": 4640, + "vit_learning_rate": 1.9571921478487656e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6446, + "epoch": 0.24188514357053684, + "grad_norm": 1.646925783730869, + "learning_rate": 9.784739885068218e-06, + "loss": 0.3979, + "step": 4650, + "vit_learning_rate": 1.9569479770136433e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6676, + "epoch": 0.2424053266749896, + "grad_norm": 2.1126001419829215, + "learning_rate": 9.783515635577235e-06, + "loss": 0.3985, + "step": 4660, + "vit_learning_rate": 1.9567031271154466e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6913, + "epoch": 0.24292550977944236, + "grad_norm": 1.5018173924249225, + "learning_rate": 9.782287991639622e-06, + "loss": 0.3688, + "step": 4670, + "vit_learning_rate": 1.9564575983279245e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.625, + "epoch": 0.24344569288389514, + "grad_norm": 1.7459295985678787, + "learning_rate": 9.781056954126535e-06, + "loss": 0.4189, + "step": 4680, + "vit_learning_rate": 1.956211390825307e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6835, + "epoch": 0.2439658759883479, + "grad_norm": 1.7267175797698169, + "learning_rate": 9.779822523911532e-06, + "loss": 0.3546, + "step": 4690, + "vit_learning_rate": 1.955964504782306e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6435, + "epoch": 0.24448605909280066, + "grad_norm": 1.950481492352406, + "learning_rate": 9.778584701870581e-06, + "loss": 0.3664, + "step": 4700, + "vit_learning_rate": 1.955716940374116e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7059, + "epoch": 0.24500624219725342, + "grad_norm": 2.0095159744680275, + "learning_rate": 9.777343488882058e-06, + "loss": 0.394, + "step": 4710, + "vit_learning_rate": 1.9554686977764116e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.717, + "epoch": 0.2455264253017062, + "grad_norm": 1.5604383657087455, + "learning_rate": 9.776098885826745e-06, + "loss": 0.3989, + "step": 4720, + "vit_learning_rate": 1.955219777165349e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6492, + "epoch": 0.24604660840615897, + "grad_norm": 1.432188048443782, + "learning_rate": 9.774850893587828e-06, + "loss": 0.4006, + "step": 4730, + "vit_learning_rate": 1.9549701787175656e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6447, + "epoch": 0.24656679151061173, + "grad_norm": 1.302400963735209, + "learning_rate": 9.773599513050898e-06, + "loss": 0.3965, + "step": 4740, + "vit_learning_rate": 1.9547199026101793e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6776, + "epoch": 0.24708697461506451, + "grad_norm": 1.831188077677403, + "learning_rate": 9.772344745103955e-06, + "loss": 0.3694, + "step": 4750, + "vit_learning_rate": 1.9544689490207904e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.661, + "epoch": 0.24760715771951727, + "grad_norm": 1.725418310454588, + "learning_rate": 9.771086590637392e-06, + "loss": 0.3916, + "step": 4760, + "vit_learning_rate": 1.9542173181274782e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6448, + "epoch": 0.24812734082397003, + "grad_norm": 1.7530909553078537, + "learning_rate": 9.769825050544022e-06, + "loss": 0.382, + "step": 4770, + "vit_learning_rate": 1.953965010108804e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6444, + "epoch": 0.2486475239284228, + "grad_norm": 1.516276262417959, + "learning_rate": 9.768560125719043e-06, + "loss": 0.3935, + "step": 4780, + "vit_learning_rate": 1.9537120251438083e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7062, + "epoch": 0.24916770703287558, + "grad_norm": 1.7837088788752538, + "learning_rate": 9.767291817060066e-06, + "loss": 0.3908, + "step": 4790, + "vit_learning_rate": 1.9534583634120128e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6307, + "epoch": 0.24968789013732834, + "grad_norm": 1.1837992839416889, + "learning_rate": 9.766020125467097e-06, + "loss": 0.3778, + "step": 4800, + "vit_learning_rate": 1.953204025093419e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7343, + "epoch": 0.2502080732417811, + "grad_norm": 1.464135742001841, + "learning_rate": 9.764745051842548e-06, + "loss": 0.379, + "step": 4810, + "vit_learning_rate": 1.9529490103685094e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6175, + "epoch": 0.25072825634623386, + "grad_norm": 1.5909369273294518, + "learning_rate": 9.763466597091228e-06, + "loss": 0.4176, + "step": 4820, + "vit_learning_rate": 1.9526933194182454e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6347, + "epoch": 0.25124843945068664, + "grad_norm": 1.6409921912591405, + "learning_rate": 9.762184762120346e-06, + "loss": 0.4028, + "step": 4830, + "vit_learning_rate": 1.952436952424069e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6407, + "epoch": 0.25176862255513943, + "grad_norm": 1.418541573333437, + "learning_rate": 9.760899547839506e-06, + "loss": 0.4024, + "step": 4840, + "vit_learning_rate": 1.952179909567901e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6133, + "epoch": 0.25228880565959216, + "grad_norm": 1.8035171965092758, + "learning_rate": 9.759610955160719e-06, + "loss": 0.3897, + "step": 4850, + "vit_learning_rate": 1.9519221910321437e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.626, + "epoch": 0.25280898876404495, + "grad_norm": 1.56647524494443, + "learning_rate": 9.75831898499838e-06, + "loss": 0.3882, + "step": 4860, + "vit_learning_rate": 1.9516637969996757e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6991, + "epoch": 0.25332917186849774, + "grad_norm": 1.5870502240655184, + "learning_rate": 9.757023638269297e-06, + "loss": 0.3793, + "step": 4870, + "vit_learning_rate": 1.951404727653859e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.638, + "epoch": 0.25384935497295047, + "grad_norm": 1.6899159807942226, + "learning_rate": 9.755724915892658e-06, + "loss": 0.3967, + "step": 4880, + "vit_learning_rate": 1.9511449831785313e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6727, + "epoch": 0.25436953807740326, + "grad_norm": 1.1869494150412128, + "learning_rate": 9.754422818790054e-06, + "loss": 0.3907, + "step": 4890, + "vit_learning_rate": 1.950884563758011e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6086, + "epoch": 0.254889721181856, + "grad_norm": 1.7990760714693486, + "learning_rate": 9.753117347885477e-06, + "loss": 0.3869, + "step": 4900, + "vit_learning_rate": 1.9506234695770953e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6654, + "epoch": 0.2554099042863088, + "grad_norm": 1.3670242975101572, + "learning_rate": 9.751808504105303e-06, + "loss": 0.4092, + "step": 4910, + "vit_learning_rate": 1.9503617008210604e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6884, + "epoch": 0.25593008739076156, + "grad_norm": 1.5938183255045866, + "learning_rate": 9.750496288378301e-06, + "loss": 0.3846, + "step": 4920, + "vit_learning_rate": 1.95009925767566e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6315, + "epoch": 0.2564502704952143, + "grad_norm": 2.1132099063465297, + "learning_rate": 9.749180701635644e-06, + "loss": 0.3938, + "step": 4930, + "vit_learning_rate": 1.9498361403271285e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.682, + "epoch": 0.2569704535996671, + "grad_norm": 2.5110703510537173, + "learning_rate": 9.747861744810885e-06, + "loss": 0.3668, + "step": 4940, + "vit_learning_rate": 1.949572348962177e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6463, + "epoch": 0.25749063670411987, + "grad_norm": 1.2485946603610771, + "learning_rate": 9.746539418839975e-06, + "loss": 0.3885, + "step": 4950, + "vit_learning_rate": 1.9493078837679946e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6516, + "epoch": 0.2580108198085726, + "grad_norm": 1.6279236704947426, + "learning_rate": 9.745213724661254e-06, + "loss": 0.4089, + "step": 4960, + "vit_learning_rate": 1.9490427449322503e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.661, + "epoch": 0.2585310029130254, + "grad_norm": 1.8000409334227694, + "learning_rate": 9.743884663215452e-06, + "loss": 0.3913, + "step": 4970, + "vit_learning_rate": 1.94877693264309e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6364, + "epoch": 0.2590511860174782, + "grad_norm": 1.5553646601744293, + "learning_rate": 9.742552235445689e-06, + "loss": 0.3575, + "step": 4980, + "vit_learning_rate": 1.9485104470891375e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6323, + "epoch": 0.2595713691219309, + "grad_norm": 1.5560367226120977, + "learning_rate": 9.741216442297474e-06, + "loss": 0.406, + "step": 4990, + "vit_learning_rate": 1.9482432884594948e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6735, + "epoch": 0.2600915522263837, + "grad_norm": 1.5705441088026753, + "learning_rate": 9.739877284718704e-06, + "loss": 0.3833, + "step": 5000, + "vit_learning_rate": 1.9479754569437405e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6625, + "epoch": 0.2606117353308365, + "grad_norm": 1.3555238259024887, + "learning_rate": 9.738534763659662e-06, + "loss": 0.3877, + "step": 5010, + "vit_learning_rate": 1.947706952731932e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6576, + "epoch": 0.2611319184352892, + "grad_norm": 1.5422850981585867, + "learning_rate": 9.737188880073019e-06, + "loss": 0.3821, + "step": 5020, + "vit_learning_rate": 1.9474377760146033e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6143, + "epoch": 0.261652101539742, + "grad_norm": 1.2527257168806, + "learning_rate": 9.735839634913831e-06, + "loss": 0.3677, + "step": 5030, + "vit_learning_rate": 1.947167926982766e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6572, + "epoch": 0.26217228464419473, + "grad_norm": 2.302034830835032, + "learning_rate": 9.734487029139544e-06, + "loss": 0.3853, + "step": 5040, + "vit_learning_rate": 1.9468974058279084e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6307, + "epoch": 0.2626924677486475, + "grad_norm": 1.3478605220096411, + "learning_rate": 9.733131063709981e-06, + "loss": 0.36, + "step": 5050, + "vit_learning_rate": 1.946626212741996e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6787, + "epoch": 0.2632126508531003, + "grad_norm": 1.3939511988671585, + "learning_rate": 9.731771739587356e-06, + "loss": 0.3674, + "step": 5060, + "vit_learning_rate": 1.946354347917471e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.8182, + "epoch": 0.26373283395755304, + "grad_norm": 1.7831764191119335, + "learning_rate": 9.730409057736263e-06, + "loss": 0.3951, + "step": 5070, + "vit_learning_rate": 1.9460818115472524e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6523, + "epoch": 0.2642530170620058, + "grad_norm": 2.1703042628037634, + "learning_rate": 9.729043019123678e-06, + "loss": 0.3887, + "step": 5080, + "vit_learning_rate": 1.9458086038247353e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6371, + "epoch": 0.2647732001664586, + "grad_norm": 2.6989691349510596, + "learning_rate": 9.72767362471896e-06, + "loss": 0.3934, + "step": 5090, + "vit_learning_rate": 1.945534724943792e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.629, + "epoch": 0.26529338327091134, + "grad_norm": 2.0297104412197724, + "learning_rate": 9.726300875493853e-06, + "loss": 0.3914, + "step": 5100, + "vit_learning_rate": 1.9452601750987703e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6495, + "epoch": 0.26581356637536413, + "grad_norm": 2.178327500648506, + "learning_rate": 9.724924772422475e-06, + "loss": 0.4113, + "step": 5110, + "vit_learning_rate": 1.9449849544844947e-06 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6294, + "epoch": 0.2663337494798169, + "grad_norm": 1.7035274404720875, + "learning_rate": 9.723545316481329e-06, + "loss": 0.4028, + "step": 5120, + "vit_learning_rate": 1.9447090632962656e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.75, + "epoch": 0.26685393258426965, + "grad_norm": 2.217901377095658, + "learning_rate": 9.722162508649293e-06, + "loss": 0.3786, + "step": 5130, + "vit_learning_rate": 1.9444325017298582e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6626, + "epoch": 0.26737411568872244, + "grad_norm": 1.6295894673282474, + "learning_rate": 9.720776349907626e-06, + "loss": 0.3691, + "step": 5140, + "vit_learning_rate": 1.944155269981525e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.652, + "epoch": 0.2678942987931752, + "grad_norm": 1.6802371654817068, + "learning_rate": 9.71938684123997e-06, + "loss": 0.3725, + "step": 5150, + "vit_learning_rate": 1.9438773682479932e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7845, + "epoch": 0.26841448189762795, + "grad_norm": 2.0589229765646335, + "learning_rate": 9.717993983632331e-06, + "loss": 0.3727, + "step": 5160, + "vit_learning_rate": 1.943598796726466e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7501, + "epoch": 0.26893466500208074, + "grad_norm": 2.343940464466448, + "learning_rate": 9.716597778073105e-06, + "loss": 0.3896, + "step": 5170, + "vit_learning_rate": 1.9433195556146206e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6234, + "epoch": 0.2694548481065335, + "grad_norm": 1.6418131555287914, + "learning_rate": 9.715198225553057e-06, + "loss": 0.3907, + "step": 5180, + "vit_learning_rate": 1.943039645110611e-06 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6627, + "epoch": 0.26997503121098626, + "grad_norm": 1.7870108515877112, + "learning_rate": 9.713795327065328e-06, + "loss": 0.3986, + "step": 5190, + "vit_learning_rate": 1.9427590654130653e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6529, + "epoch": 0.27049521431543905, + "grad_norm": 1.6396575625498975, + "learning_rate": 9.712389083605432e-06, + "loss": 0.3765, + "step": 5200, + "vit_learning_rate": 1.942477816721086e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6954, + "epoch": 0.2710153974198918, + "grad_norm": 1.3934413814498805, + "learning_rate": 9.710979496171258e-06, + "loss": 0.3688, + "step": 5210, + "vit_learning_rate": 1.9421958992342515e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6763, + "epoch": 0.27153558052434457, + "grad_norm": 1.4795475029302723, + "learning_rate": 9.709566565763072e-06, + "loss": 0.3927, + "step": 5220, + "vit_learning_rate": 1.941913313152614e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6564, + "epoch": 0.27205576362879735, + "grad_norm": 1.5900998805670414, + "learning_rate": 9.708150293383505e-06, + "loss": 0.4158, + "step": 5230, + "vit_learning_rate": 1.9416300586767007e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7133, + "epoch": 0.2725759467332501, + "grad_norm": 1.8207752830707664, + "learning_rate": 9.706730680037563e-06, + "loss": 0.3912, + "step": 5240, + "vit_learning_rate": 1.9413461360075124e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.653, + "epoch": 0.2730961298377029, + "grad_norm": 1.662165318270706, + "learning_rate": 9.705307726732623e-06, + "loss": 0.3817, + "step": 5250, + "vit_learning_rate": 1.9410615453465242e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6692, + "epoch": 0.27361631294215566, + "grad_norm": 1.5702846599174598, + "learning_rate": 9.703881434478432e-06, + "loss": 0.3668, + "step": 5260, + "vit_learning_rate": 1.9407762868956864e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.658, + "epoch": 0.2741364960466084, + "grad_norm": 1.6792934104904866, + "learning_rate": 9.702451804287106e-06, + "loss": 0.3826, + "step": 5270, + "vit_learning_rate": 1.940490360857421e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6268, + "epoch": 0.2746566791510612, + "grad_norm": 1.8584935756510894, + "learning_rate": 9.701018837173131e-06, + "loss": 0.3736, + "step": 5280, + "vit_learning_rate": 1.940203767434626e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6443, + "epoch": 0.27517686225551397, + "grad_norm": 2.229968089395708, + "learning_rate": 9.699582534153356e-06, + "loss": 0.3543, + "step": 5290, + "vit_learning_rate": 1.939916506830671e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6534, + "epoch": 0.2756970453599667, + "grad_norm": 1.4533777724893546, + "learning_rate": 9.698142896247003e-06, + "loss": 0.3686, + "step": 5300, + "vit_learning_rate": 1.9396285792494004e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6725, + "epoch": 0.2762172284644195, + "grad_norm": 1.428209563352205, + "learning_rate": 9.696699924475659e-06, + "loss": 0.38, + "step": 5310, + "vit_learning_rate": 1.9393399848951317e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6535, + "epoch": 0.2767374115688722, + "grad_norm": 2.286619288325488, + "learning_rate": 9.695253619863276e-06, + "loss": 0.3674, + "step": 5320, + "vit_learning_rate": 1.939050723972655e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6339, + "epoch": 0.277257594673325, + "grad_norm": 1.3814373018977728, + "learning_rate": 9.69380398343617e-06, + "loss": 0.4018, + "step": 5330, + "vit_learning_rate": 1.938760796687234e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6446, + "epoch": 0.2777777777777778, + "grad_norm": 1.4527449572632882, + "learning_rate": 9.692351016223022e-06, + "loss": 0.3963, + "step": 5340, + "vit_learning_rate": 1.9384702032446042e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6314, + "epoch": 0.2782979608822305, + "grad_norm": 1.8231383418860287, + "learning_rate": 9.690894719254879e-06, + "loss": 0.3766, + "step": 5350, + "vit_learning_rate": 1.9381789438509753e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6423, + "epoch": 0.2788181439866833, + "grad_norm": 1.1567063159085391, + "learning_rate": 9.689435093565146e-06, + "loss": 0.3675, + "step": 5360, + "vit_learning_rate": 1.9378870187130288e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.663, + "epoch": 0.2793383270911361, + "grad_norm": 1.3524108536214523, + "learning_rate": 9.687972140189595e-06, + "loss": 0.3872, + "step": 5370, + "vit_learning_rate": 1.9375944280379187e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6925, + "epoch": 0.2798585101955888, + "grad_norm": 1.8262518205736473, + "learning_rate": 9.686505860166357e-06, + "loss": 0.3704, + "step": 5380, + "vit_learning_rate": 1.937301172033271e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6878, + "epoch": 0.2803786933000416, + "grad_norm": 1.7787047880705764, + "learning_rate": 9.685036254535925e-06, + "loss": 0.3844, + "step": 5390, + "vit_learning_rate": 1.9370072509071847e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6192, + "epoch": 0.2808988764044944, + "grad_norm": 2.293564130517082, + "learning_rate": 9.683563324341147e-06, + "loss": 0.3924, + "step": 5400, + "vit_learning_rate": 1.9367126648682293e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6443, + "epoch": 0.28141905950894713, + "grad_norm": 1.7157810773872795, + "learning_rate": 9.682087070627238e-06, + "loss": 0.3877, + "step": 5410, + "vit_learning_rate": 1.9364174141254473e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7479, + "epoch": 0.2819392426133999, + "grad_norm": 1.804992606125261, + "learning_rate": 9.680607494441765e-06, + "loss": 0.3865, + "step": 5420, + "vit_learning_rate": 1.9361214988883527e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.656, + "epoch": 0.2824594257178527, + "grad_norm": 1.6714149667857876, + "learning_rate": 9.679124596834655e-06, + "loss": 0.3902, + "step": 5430, + "vit_learning_rate": 1.935824919366931e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6491, + "epoch": 0.28297960882230544, + "grad_norm": 1.596818782723876, + "learning_rate": 9.677638378858194e-06, + "loss": 0.399, + "step": 5440, + "vit_learning_rate": 1.9355276757716386e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6637, + "epoch": 0.2834997919267582, + "grad_norm": 3.687526745944351, + "learning_rate": 9.676148841567022e-06, + "loss": 0.4068, + "step": 5450, + "vit_learning_rate": 1.935229768313404e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6239, + "epoch": 0.284019975031211, + "grad_norm": 1.0596461285162888, + "learning_rate": 9.67465598601813e-06, + "loss": 0.3698, + "step": 5460, + "vit_learning_rate": 1.934931197203626e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6636, + "epoch": 0.28454015813566375, + "grad_norm": 1.5422999500673549, + "learning_rate": 9.673159813270875e-06, + "loss": 0.4116, + "step": 5470, + "vit_learning_rate": 1.9346319626541747e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6661, + "epoch": 0.28506034124011653, + "grad_norm": 3.0793384181746033, + "learning_rate": 9.671660324386957e-06, + "loss": 0.3544, + "step": 5480, + "vit_learning_rate": 1.934332064877391e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6402, + "epoch": 0.28558052434456926, + "grad_norm": 1.8321374789346172, + "learning_rate": 9.670157520430434e-06, + "loss": 0.3785, + "step": 5490, + "vit_learning_rate": 1.9340315040860865e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6367, + "epoch": 0.28610070744902205, + "grad_norm": 1.5261490212158348, + "learning_rate": 9.668651402467715e-06, + "loss": 0.399, + "step": 5500, + "vit_learning_rate": 1.9337302804935428e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6987, + "epoch": 0.28662089055347484, + "grad_norm": 2.2189757967197092, + "learning_rate": 9.667141971567565e-06, + "loss": 0.3911, + "step": 5510, + "vit_learning_rate": 1.9334283943135126e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6649, + "epoch": 0.28714107365792757, + "grad_norm": 1.4159578111886437, + "learning_rate": 9.66562922880109e-06, + "loss": 0.4192, + "step": 5520, + "vit_learning_rate": 1.933125845760218e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6492, + "epoch": 0.28766125676238036, + "grad_norm": 1.7851036045624546, + "learning_rate": 9.664113175241759e-06, + "loss": 0.3826, + "step": 5530, + "vit_learning_rate": 1.9328226350483516e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6281, + "epoch": 0.28818143986683314, + "grad_norm": 1.431302423101443, + "learning_rate": 9.662593811965379e-06, + "loss": 0.3906, + "step": 5540, + "vit_learning_rate": 1.9325187623930756e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6125, + "epoch": 0.2887016229712859, + "grad_norm": 1.7250563229113764, + "learning_rate": 9.661071140050115e-06, + "loss": 0.3989, + "step": 5550, + "vit_learning_rate": 1.9322142280100228e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6694, + "epoch": 0.28922180607573866, + "grad_norm": 1.4432084322979677, + "learning_rate": 9.659545160576472e-06, + "loss": 0.3923, + "step": 5560, + "vit_learning_rate": 1.931909032115294e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6475, + "epoch": 0.28974198918019145, + "grad_norm": 1.8735064264278298, + "learning_rate": 9.658015874627306e-06, + "loss": 0.4029, + "step": 5570, + "vit_learning_rate": 1.931603174925461e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6621, + "epoch": 0.2902621722846442, + "grad_norm": 1.9050317102772254, + "learning_rate": 9.656483283287821e-06, + "loss": 0.4028, + "step": 5580, + "vit_learning_rate": 1.9312966566575637e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6448, + "epoch": 0.29078235538909697, + "grad_norm": 1.5891250910150534, + "learning_rate": 9.65494738764556e-06, + "loss": 0.3973, + "step": 5590, + "vit_learning_rate": 1.930989477529112e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6695, + "epoch": 0.29130253849354976, + "grad_norm": 1.3295584708045196, + "learning_rate": 9.653408188790418e-06, + "loss": 0.3971, + "step": 5600, + "vit_learning_rate": 1.9306816377580832e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6473, + "epoch": 0.2918227215980025, + "grad_norm": 2.0924367059198636, + "learning_rate": 9.651865687814632e-06, + "loss": 0.3714, + "step": 5610, + "vit_learning_rate": 1.9303731375629263e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6404, + "epoch": 0.2923429047024553, + "grad_norm": 1.64844835567205, + "learning_rate": 9.650319885812777e-06, + "loss": 0.4067, + "step": 5620, + "vit_learning_rate": 1.9300639771625554e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6581, + "epoch": 0.292863087806908, + "grad_norm": 1.7666881214142498, + "learning_rate": 9.648770783881782e-06, + "loss": 0.3928, + "step": 5630, + "vit_learning_rate": 1.929754156776356e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6715, + "epoch": 0.2933832709113608, + "grad_norm": 3.3972322343272676, + "learning_rate": 9.647218383120902e-06, + "loss": 0.3914, + "step": 5640, + "vit_learning_rate": 1.92944367662418e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.644, + "epoch": 0.2939034540158136, + "grad_norm": 1.5669650093248466, + "learning_rate": 9.645662684631748e-06, + "loss": 0.3659, + "step": 5650, + "vit_learning_rate": 1.9291325369263493e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6783, + "epoch": 0.2944236371202663, + "grad_norm": 2.4598523347418833, + "learning_rate": 9.644103689518261e-06, + "loss": 0.4047, + "step": 5660, + "vit_learning_rate": 1.928820737903652e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6604, + "epoch": 0.2949438202247191, + "grad_norm": 1.6495314971675479, + "learning_rate": 9.642541398886726e-06, + "loss": 0.3911, + "step": 5670, + "vit_learning_rate": 1.928508279777345e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7077, + "epoch": 0.2954640033291719, + "grad_norm": 1.5241146512940529, + "learning_rate": 9.640975813845764e-06, + "loss": 0.386, + "step": 5680, + "vit_learning_rate": 1.9281951627691525e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6557, + "epoch": 0.2959841864336246, + "grad_norm": 1.7684744720801258, + "learning_rate": 9.639406935506337e-06, + "loss": 0.3853, + "step": 5690, + "vit_learning_rate": 1.927881387101267e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6754, + "epoch": 0.2965043695380774, + "grad_norm": 2.4191111692268663, + "learning_rate": 9.63783476498174e-06, + "loss": 0.3961, + "step": 5700, + "vit_learning_rate": 1.927566952996348e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6576, + "epoch": 0.2970245526425302, + "grad_norm": 2.1890499481814776, + "learning_rate": 9.636259303387609e-06, + "loss": 0.3708, + "step": 5710, + "vit_learning_rate": 1.9272518606775214e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7014, + "epoch": 0.2975447357469829, + "grad_norm": 1.3898027105437007, + "learning_rate": 9.634680551841909e-06, + "loss": 0.3826, + "step": 5720, + "vit_learning_rate": 1.9269361103683815e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6195, + "epoch": 0.2980649188514357, + "grad_norm": 1.7069275498740146, + "learning_rate": 9.633098511464947e-06, + "loss": 0.3844, + "step": 5730, + "vit_learning_rate": 1.926619702292989e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6518, + "epoch": 0.2985851019558885, + "grad_norm": 1.5800773052471246, + "learning_rate": 9.631513183379355e-06, + "loss": 0.3902, + "step": 5740, + "vit_learning_rate": 1.926302636675871e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7493, + "epoch": 0.29910528506034123, + "grad_norm": 1.6910306595982987, + "learning_rate": 9.62992456871011e-06, + "loss": 0.37, + "step": 5750, + "vit_learning_rate": 1.9259849137420217e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6716, + "epoch": 0.299625468164794, + "grad_norm": 2.018046267999922, + "learning_rate": 9.628332668584506e-06, + "loss": 0.359, + "step": 5760, + "vit_learning_rate": 1.9256665337169012e-06 + }, + { + "avg_batch_load_time": 2.1697, + "avg_batch_processing_time": 0.6746, + "epoch": 0.30014565126924675, + "grad_norm": 1.6936886330087433, + "learning_rate": 9.626737484132184e-06, + "loss": 0.4025, + "step": 5770, + "vit_learning_rate": 1.925347496826437e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.65, + "epoch": 0.30066583437369954, + "grad_norm": 1.7887843111727875, + "learning_rate": 9.625139016485105e-06, + "loss": 0.3961, + "step": 5780, + "vit_learning_rate": 1.9250278032970205e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6356, + "epoch": 0.3011860174781523, + "grad_norm": 1.573281759602641, + "learning_rate": 9.623537266777562e-06, + "loss": 0.3756, + "step": 5790, + "vit_learning_rate": 1.9247074533555122e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6874, + "epoch": 0.30170620058260506, + "grad_norm": 2.16073679915581, + "learning_rate": 9.621932236146178e-06, + "loss": 0.4011, + "step": 5800, + "vit_learning_rate": 1.9243864472292353e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.641, + "epoch": 0.30222638368705784, + "grad_norm": 1.7242092124837116, + "learning_rate": 9.620323925729907e-06, + "loss": 0.3917, + "step": 5810, + "vit_learning_rate": 1.9240647851459814e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6014, + "epoch": 0.30274656679151063, + "grad_norm": 1.457319768018885, + "learning_rate": 9.618712336670026e-06, + "loss": 0.3953, + "step": 5820, + "vit_learning_rate": 1.923742467334005e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7443, + "epoch": 0.30326674989596336, + "grad_norm": 1.6359492518902135, + "learning_rate": 9.61709747011014e-06, + "loss": 0.396, + "step": 5830, + "vit_learning_rate": 1.923419494022028e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6785, + "epoch": 0.30378693300041615, + "grad_norm": 1.3054982143916358, + "learning_rate": 9.615479327196179e-06, + "loss": 0.3866, + "step": 5840, + "vit_learning_rate": 1.9230958654392357e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6955, + "epoch": 0.30430711610486894, + "grad_norm": 2.7244403519103995, + "learning_rate": 9.6138579090764e-06, + "loss": 0.3986, + "step": 5850, + "vit_learning_rate": 1.92277158181528e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.684, + "epoch": 0.30482729920932167, + "grad_norm": 1.9299839398140024, + "learning_rate": 9.612233216901383e-06, + "loss": 0.396, + "step": 5860, + "vit_learning_rate": 1.922446643380276e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6733, + "epoch": 0.30534748231377445, + "grad_norm": 3.3493568651407735, + "learning_rate": 9.61060525182403e-06, + "loss": 0.3788, + "step": 5870, + "vit_learning_rate": 1.9221210503648058e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6538, + "epoch": 0.30586766541822724, + "grad_norm": 2.0983096770423932, + "learning_rate": 9.608974014999569e-06, + "loss": 0.3772, + "step": 5880, + "vit_learning_rate": 1.9217948029999135e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6384, + "epoch": 0.30638784852268, + "grad_norm": 2.0944250886963047, + "learning_rate": 9.607339507585546e-06, + "loss": 0.3898, + "step": 5890, + "vit_learning_rate": 1.921467901517109e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6354, + "epoch": 0.30690803162713276, + "grad_norm": 1.497000729244866, + "learning_rate": 9.605701730741828e-06, + "loss": 0.3825, + "step": 5900, + "vit_learning_rate": 1.9211403461483656e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6285, + "epoch": 0.3074282147315855, + "grad_norm": 2.518955663993865, + "learning_rate": 9.604060685630608e-06, + "loss": 0.3843, + "step": 5910, + "vit_learning_rate": 1.9208121371261213e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6714, + "epoch": 0.3079483978360383, + "grad_norm": 4.05508976953639, + "learning_rate": 9.602416373416388e-06, + "loss": 0.4152, + "step": 5920, + "vit_learning_rate": 1.9204832746832775e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6229, + "epoch": 0.30846858094049107, + "grad_norm": 1.6967616899200515, + "learning_rate": 9.600768795265995e-06, + "loss": 0.3931, + "step": 5930, + "vit_learning_rate": 1.9201537590531987e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6338, + "epoch": 0.3089887640449438, + "grad_norm": 1.8925255107135597, + "learning_rate": 9.599117952348576e-06, + "loss": 0.3917, + "step": 5940, + "vit_learning_rate": 1.919823590469715e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6686, + "epoch": 0.3095089471493966, + "grad_norm": 2.558279388531377, + "learning_rate": 9.597463845835588e-06, + "loss": 0.3673, + "step": 5950, + "vit_learning_rate": 1.9194927691671174e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6823, + "epoch": 0.31002913025384937, + "grad_norm": 1.2343544743639685, + "learning_rate": 9.595806476900808e-06, + "loss": 0.3732, + "step": 5960, + "vit_learning_rate": 1.9191612953801616e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7501, + "epoch": 0.3105493133583021, + "grad_norm": 1.3135251271454564, + "learning_rate": 9.59414584672033e-06, + "loss": 0.3894, + "step": 5970, + "vit_learning_rate": 1.9188291693440656e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6815, + "epoch": 0.3110694964627549, + "grad_norm": 1.3004876742989429, + "learning_rate": 9.592481956472553e-06, + "loss": 0.385, + "step": 5980, + "vit_learning_rate": 1.9184963912945105e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7045, + "epoch": 0.3115896795672077, + "grad_norm": 1.5282974390035533, + "learning_rate": 9.5908148073382e-06, + "loss": 0.3979, + "step": 5990, + "vit_learning_rate": 1.9181629614676396e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6723, + "epoch": 0.3121098626716604, + "grad_norm": 1.841845825438322, + "learning_rate": 9.589144400500303e-06, + "loss": 0.4236, + "step": 6000, + "vit_learning_rate": 1.9178288801000602e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6466, + "epoch": 0.3126300457761132, + "grad_norm": 1.5586261010935518, + "learning_rate": 9.587470737144204e-06, + "loss": 0.394, + "step": 6010, + "vit_learning_rate": 1.9174941474288404e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6226, + "epoch": 0.313150228880566, + "grad_norm": 1.414807494212976, + "learning_rate": 9.585793818457555e-06, + "loss": 0.386, + "step": 6020, + "vit_learning_rate": 1.9171587636915106e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7264, + "epoch": 0.3136704119850187, + "grad_norm": 1.6216137341409333, + "learning_rate": 9.584113645630323e-06, + "loss": 0.4024, + "step": 6030, + "vit_learning_rate": 1.916822729126064e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6637, + "epoch": 0.3141905950894715, + "grad_norm": 2.0652092407092635, + "learning_rate": 9.582430219854778e-06, + "loss": 0.3897, + "step": 6040, + "vit_learning_rate": 1.9164860439709552e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6896, + "epoch": 0.31471077819392423, + "grad_norm": 1.6821642493784685, + "learning_rate": 9.580743542325503e-06, + "loss": 0.381, + "step": 6050, + "vit_learning_rate": 1.9161487084651005e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6279, + "epoch": 0.315230961298377, + "grad_norm": 1.3606941195882165, + "learning_rate": 9.579053614239387e-06, + "loss": 0.3855, + "step": 6060, + "vit_learning_rate": 1.915810722847877e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6656, + "epoch": 0.3157511444028298, + "grad_norm": 2.1285239014941126, + "learning_rate": 9.577360436795626e-06, + "loss": 0.4065, + "step": 6070, + "vit_learning_rate": 1.9154720873591252e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6516, + "epoch": 0.31627132750728254, + "grad_norm": 1.183249924185875, + "learning_rate": 9.575664011195721e-06, + "loss": 0.3965, + "step": 6080, + "vit_learning_rate": 1.915132802239144e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7121, + "epoch": 0.3167915106117353, + "grad_norm": 2.032737969619279, + "learning_rate": 9.573964338643478e-06, + "loss": 0.3834, + "step": 6090, + "vit_learning_rate": 1.914792867728695e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6832, + "epoch": 0.3173116937161881, + "grad_norm": 1.6364929554804537, + "learning_rate": 9.572261420345008e-06, + "loss": 0.3933, + "step": 6100, + "vit_learning_rate": 1.914452284069001e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6671, + "epoch": 0.31783187682064085, + "grad_norm": 1.7044903193994592, + "learning_rate": 9.570555257508722e-06, + "loss": 0.3715, + "step": 6110, + "vit_learning_rate": 1.914111051501744e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7184, + "epoch": 0.31835205992509363, + "grad_norm": 1.9267886457692662, + "learning_rate": 9.568845851345341e-06, + "loss": 0.3723, + "step": 6120, + "vit_learning_rate": 1.913769170269068e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6777, + "epoch": 0.3188722430295464, + "grad_norm": 1.6409978625593995, + "learning_rate": 9.567133203067876e-06, + "loss": 0.4178, + "step": 6130, + "vit_learning_rate": 1.9134266406135753e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6471, + "epoch": 0.31939242613399915, + "grad_norm": 2.116254675676243, + "learning_rate": 9.565417313891652e-06, + "loss": 0.3872, + "step": 6140, + "vit_learning_rate": 1.91308346277833e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6314, + "epoch": 0.31991260923845194, + "grad_norm": 1.9172831994667359, + "learning_rate": 9.56369818503428e-06, + "loss": 0.3938, + "step": 6150, + "vit_learning_rate": 1.912739637006856e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.604, + "epoch": 0.3204327923429047, + "grad_norm": 1.3843576006755096, + "learning_rate": 9.561975817715683e-06, + "loss": 0.3863, + "step": 6160, + "vit_learning_rate": 1.912395163543136e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6329, + "epoch": 0.32095297544735746, + "grad_norm": 1.6612749583186663, + "learning_rate": 9.560250213158073e-06, + "loss": 0.3997, + "step": 6170, + "vit_learning_rate": 1.9120500426316144e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6553, + "epoch": 0.32147315855181025, + "grad_norm": 1.5881548212289223, + "learning_rate": 9.55852137258596e-06, + "loss": 0.4017, + "step": 6180, + "vit_learning_rate": 1.911704274517192e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.679, + "epoch": 0.321993341656263, + "grad_norm": 1.5795404284326322, + "learning_rate": 9.556789297226156e-06, + "loss": 0.3817, + "step": 6190, + "vit_learning_rate": 1.911357859445231e-06 + }, + { + "avg_batch_load_time": 0.0098, + "avg_batch_processing_time": 0.623, + "epoch": 0.32251352476071576, + "grad_norm": 1.3975764383021196, + "learning_rate": 9.555053988307764e-06, + "loss": 0.3907, + "step": 6200, + "vit_learning_rate": 1.9110107976615527e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6938, + "epoch": 0.32303370786516855, + "grad_norm": 3.38004721743464, + "learning_rate": 9.553315447062182e-06, + "loss": 0.3759, + "step": 6210, + "vit_learning_rate": 1.910663089412436e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6421, + "epoch": 0.3235538909696213, + "grad_norm": 1.385184093107366, + "learning_rate": 9.5515736747231e-06, + "loss": 0.3843, + "step": 6220, + "vit_learning_rate": 1.9103147349446196e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7464, + "epoch": 0.32407407407407407, + "grad_norm": 2.3455632228456404, + "learning_rate": 9.549828672526506e-06, + "loss": 0.3828, + "step": 6230, + "vit_learning_rate": 1.9099657345053012e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6406, + "epoch": 0.32459425717852686, + "grad_norm": 2.275268046153239, + "learning_rate": 9.548080441710677e-06, + "loss": 0.3859, + "step": 6240, + "vit_learning_rate": 1.909616088342135e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6433, + "epoch": 0.3251144402829796, + "grad_norm": 1.4362094195244768, + "learning_rate": 9.546328983516178e-06, + "loss": 0.3638, + "step": 6250, + "vit_learning_rate": 1.9092657967032352e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6935, + "epoch": 0.3256346233874324, + "grad_norm": 1.7319977484001245, + "learning_rate": 9.544574299185868e-06, + "loss": 0.3933, + "step": 6260, + "vit_learning_rate": 1.9089148598371733e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6358, + "epoch": 0.32615480649188516, + "grad_norm": 1.7613739575674598, + "learning_rate": 9.542816389964895e-06, + "loss": 0.3804, + "step": 6270, + "vit_learning_rate": 1.908563277992979e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6151, + "epoch": 0.3266749895963379, + "grad_norm": 1.5837434333779545, + "learning_rate": 9.541055257100698e-06, + "loss": 0.3785, + "step": 6280, + "vit_learning_rate": 1.908211051420139e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6277, + "epoch": 0.3271951727007907, + "grad_norm": 1.568154756869639, + "learning_rate": 9.539290901842997e-06, + "loss": 0.3816, + "step": 6290, + "vit_learning_rate": 1.907858180368599e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6535, + "epoch": 0.32771535580524347, + "grad_norm": 1.5418485348887307, + "learning_rate": 9.5375233254438e-06, + "loss": 0.3856, + "step": 6300, + "vit_learning_rate": 1.90750466508876e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6248, + "epoch": 0.3282355389096962, + "grad_norm": 2.3558880791246595, + "learning_rate": 9.535752529157408e-06, + "loss": 0.383, + "step": 6310, + "vit_learning_rate": 1.9071505058314815e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6485, + "epoch": 0.328755722014149, + "grad_norm": 2.3118308092212576, + "learning_rate": 9.5339785142404e-06, + "loss": 0.4155, + "step": 6320, + "vit_learning_rate": 1.9067957028480794e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6263, + "epoch": 0.3292759051186018, + "grad_norm": 2.597196916970026, + "learning_rate": 9.532201281951636e-06, + "loss": 0.3875, + "step": 6330, + "vit_learning_rate": 1.906440256390327e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.65, + "epoch": 0.3297960882230545, + "grad_norm": 1.4070798467411854, + "learning_rate": 9.53042083355227e-06, + "loss": 0.3865, + "step": 6340, + "vit_learning_rate": 1.9060841667104538e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6449, + "epoch": 0.3303162713275073, + "grad_norm": 1.6008471833576632, + "learning_rate": 9.52863717030573e-06, + "loss": 0.3863, + "step": 6350, + "vit_learning_rate": 1.9057274340611458e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7388, + "epoch": 0.33083645443196, + "grad_norm": 3.773891320905201, + "learning_rate": 9.526850293477726e-06, + "loss": 0.4005, + "step": 6360, + "vit_learning_rate": 1.905370058695545e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6707, + "epoch": 0.3313566375364128, + "grad_norm": 1.4344322066579025, + "learning_rate": 9.525060204336251e-06, + "loss": 0.4085, + "step": 6370, + "vit_learning_rate": 1.9050120408672502e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6944, + "epoch": 0.3318768206408656, + "grad_norm": 1.3762218903428507, + "learning_rate": 9.523266904151575e-06, + "loss": 0.3739, + "step": 6380, + "vit_learning_rate": 1.9046533808303149e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6483, + "epoch": 0.33239700374531833, + "grad_norm": 1.016903281900013, + "learning_rate": 9.52147039419625e-06, + "loss": 0.4014, + "step": 6390, + "vit_learning_rate": 1.9042940788392498e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6858, + "epoch": 0.3329171868497711, + "grad_norm": 1.6802167822366336, + "learning_rate": 9.519670675745102e-06, + "loss": 0.392, + "step": 6400, + "vit_learning_rate": 1.9039341351490201e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6693, + "epoch": 0.3334373699542239, + "grad_norm": 1.419280148563179, + "learning_rate": 9.517867750075234e-06, + "loss": 0.3899, + "step": 6410, + "vit_learning_rate": 1.9035735500150468e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6896, + "epoch": 0.33395755305867664, + "grad_norm": 2.184784204101205, + "learning_rate": 9.51606161846603e-06, + "loss": 0.3901, + "step": 6420, + "vit_learning_rate": 1.9032123236932056e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6406, + "epoch": 0.3344777361631294, + "grad_norm": 1.766004386856197, + "learning_rate": 9.51425228219914e-06, + "loss": 0.3748, + "step": 6430, + "vit_learning_rate": 1.9028504564398279e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6143, + "epoch": 0.3349979192675822, + "grad_norm": 1.6704925863944842, + "learning_rate": 9.512439742558496e-06, + "loss": 0.3879, + "step": 6440, + "vit_learning_rate": 1.902487948511699e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7363, + "epoch": 0.33551810237203494, + "grad_norm": 1.8103788147814919, + "learning_rate": 9.510624000830301e-06, + "loss": 0.4017, + "step": 6450, + "vit_learning_rate": 1.9021248001660599e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6415, + "epoch": 0.33603828547648773, + "grad_norm": 1.8691468352860638, + "learning_rate": 9.508805058303028e-06, + "loss": 0.3716, + "step": 6460, + "vit_learning_rate": 1.9017610116606053e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6641, + "epoch": 0.3365584685809405, + "grad_norm": 1.6049043905304294, + "learning_rate": 9.506982916267424e-06, + "loss": 0.3949, + "step": 6470, + "vit_learning_rate": 1.9013965832534847e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6959, + "epoch": 0.33707865168539325, + "grad_norm": 1.399939512131981, + "learning_rate": 9.505157576016506e-06, + "loss": 0.3889, + "step": 6480, + "vit_learning_rate": 1.901031515203301e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7898, + "epoch": 0.33759883478984604, + "grad_norm": 2.5803435700383726, + "learning_rate": 9.503329038845556e-06, + "loss": 0.3585, + "step": 6490, + "vit_learning_rate": 1.900665807769111e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.631, + "epoch": 0.33811901789429877, + "grad_norm": 1.2712592802572478, + "learning_rate": 9.501497306052134e-06, + "loss": 0.3617, + "step": 6500, + "vit_learning_rate": 1.9002994612104266e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6735, + "epoch": 0.33863920099875156, + "grad_norm": 1.424748481539991, + "learning_rate": 9.499662378936058e-06, + "loss": 0.3919, + "step": 6510, + "vit_learning_rate": 1.8999324757872114e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6891, + "epoch": 0.33915938410320434, + "grad_norm": 1.4013313780204746, + "learning_rate": 9.49782425879942e-06, + "loss": 0.4157, + "step": 6520, + "vit_learning_rate": 1.8995648517598837e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7475, + "epoch": 0.3396795672076571, + "grad_norm": 3.284369069214103, + "learning_rate": 9.495982946946571e-06, + "loss": 0.3749, + "step": 6530, + "vit_learning_rate": 1.899196589389314e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7112, + "epoch": 0.34019975031210986, + "grad_norm": 1.4032355747632754, + "learning_rate": 9.494138444684134e-06, + "loss": 0.3891, + "step": 6540, + "vit_learning_rate": 1.8988276889368265e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6859, + "epoch": 0.34071993341656265, + "grad_norm": 1.5700132811776295, + "learning_rate": 9.492290753320988e-06, + "loss": 0.4063, + "step": 6550, + "vit_learning_rate": 1.8984581506641974e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6475, + "epoch": 0.3412401165210154, + "grad_norm": 1.6820083409205826, + "learning_rate": 9.490439874168285e-06, + "loss": 0.3794, + "step": 6560, + "vit_learning_rate": 1.8980879748336567e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7918, + "epoch": 0.34176029962546817, + "grad_norm": 1.5995803252317766, + "learning_rate": 9.488585808539428e-06, + "loss": 0.3873, + "step": 6570, + "vit_learning_rate": 1.8977171617078853e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6989, + "epoch": 0.34228048272992095, + "grad_norm": 1.5092110111737396, + "learning_rate": 9.486728557750091e-06, + "loss": 0.371, + "step": 6580, + "vit_learning_rate": 1.897345711550018e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.656, + "epoch": 0.3428006658343737, + "grad_norm": 1.9604893409202886, + "learning_rate": 9.484868123118203e-06, + "loss": 0.3893, + "step": 6590, + "vit_learning_rate": 1.8969736246236403e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6853, + "epoch": 0.3433208489388265, + "grad_norm": 2.194860264638084, + "learning_rate": 9.483004505963952e-06, + "loss": 0.3873, + "step": 6600, + "vit_learning_rate": 1.89660090119279e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6796, + "epoch": 0.34384103204327926, + "grad_norm": 1.5759825340896074, + "learning_rate": 9.481137707609787e-06, + "loss": 0.3903, + "step": 6610, + "vit_learning_rate": 1.8962275415219572e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6614, + "epoch": 0.344361215147732, + "grad_norm": 1.450994765581149, + "learning_rate": 9.479267729380412e-06, + "loss": 0.3956, + "step": 6620, + "vit_learning_rate": 1.895853545876082e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6872, + "epoch": 0.3448813982521848, + "grad_norm": 2.3697257042855626, + "learning_rate": 9.477394572602789e-06, + "loss": 0.3948, + "step": 6630, + "vit_learning_rate": 1.8954789145205577e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6625, + "epoch": 0.3454015813566375, + "grad_norm": 1.679795747617547, + "learning_rate": 9.475518238606136e-06, + "loss": 0.3765, + "step": 6640, + "vit_learning_rate": 1.8951036477212268e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6623, + "epoch": 0.3459217644610903, + "grad_norm": 1.9073215055367665, + "learning_rate": 9.47363872872192e-06, + "loss": 0.3638, + "step": 6650, + "vit_learning_rate": 1.894727745744384e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.673, + "epoch": 0.3464419475655431, + "grad_norm": 1.7318782560952193, + "learning_rate": 9.471756044283875e-06, + "loss": 0.3696, + "step": 6660, + "vit_learning_rate": 1.8943512088567747e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6483, + "epoch": 0.3469621306699958, + "grad_norm": 1.3267864979969928, + "learning_rate": 9.469870186627972e-06, + "loss": 0.3708, + "step": 6670, + "vit_learning_rate": 1.8939740373255943e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6081, + "epoch": 0.3474823137744486, + "grad_norm": 1.770895162314103, + "learning_rate": 9.467981157092444e-06, + "loss": 0.4015, + "step": 6680, + "vit_learning_rate": 1.8935962314184886e-06 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6811, + "epoch": 0.3480024968789014, + "grad_norm": 1.1163282950532816, + "learning_rate": 9.466088957017771e-06, + "loss": 0.3803, + "step": 6690, + "vit_learning_rate": 1.8932177914035538e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6779, + "epoch": 0.3485226799833541, + "grad_norm": 1.2842852271700071, + "learning_rate": 9.464193587746681e-06, + "loss": 0.4056, + "step": 6700, + "vit_learning_rate": 1.892838717549336e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7851, + "epoch": 0.3490428630878069, + "grad_norm": 1.3461551038726693, + "learning_rate": 9.462295050624158e-06, + "loss": 0.3823, + "step": 6710, + "vit_learning_rate": 1.8924590101248313e-06 + }, + { + "avg_batch_load_time": 0.0941, + "avg_batch_processing_time": 0.6507, + "epoch": 0.3495630461922597, + "grad_norm": 1.6321869804805043, + "learning_rate": 9.460393346997424e-06, + "loss": 0.353, + "step": 6720, + "vit_learning_rate": 1.8920786693994845e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6738, + "epoch": 0.35008322929671243, + "grad_norm": 1.3742956123526984, + "learning_rate": 9.458488478215957e-06, + "loss": 0.4015, + "step": 6730, + "vit_learning_rate": 1.8916976956431911e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7246, + "epoch": 0.3506034124011652, + "grad_norm": 2.024896329852318, + "learning_rate": 9.456580445631476e-06, + "loss": 0.3719, + "step": 6740, + "vit_learning_rate": 1.8913160891262952e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7309, + "epoch": 0.351123595505618, + "grad_norm": 1.4578040633397962, + "learning_rate": 9.454669250597948e-06, + "loss": 0.3895, + "step": 6750, + "vit_learning_rate": 1.8909338501195896e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6741, + "epoch": 0.35164377861007073, + "grad_norm": 1.4490025764681178, + "learning_rate": 9.452754894471581e-06, + "loss": 0.3931, + "step": 6760, + "vit_learning_rate": 1.890550978894316e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6689, + "epoch": 0.3521639617145235, + "grad_norm": 1.2565704454691033, + "learning_rate": 9.45083737861083e-06, + "loss": 0.3587, + "step": 6770, + "vit_learning_rate": 1.8901674757221655e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7413, + "epoch": 0.35268414481897625, + "grad_norm": 1.5447736644501988, + "learning_rate": 9.448916704376384e-06, + "loss": 0.4106, + "step": 6780, + "vit_learning_rate": 1.8897833408752766e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6528, + "epoch": 0.35320432792342904, + "grad_norm": 1.892357629399855, + "learning_rate": 9.446992873131184e-06, + "loss": 0.3723, + "step": 6790, + "vit_learning_rate": 1.8893985746262366e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6287, + "epoch": 0.3537245110278818, + "grad_norm": 1.6291413347892438, + "learning_rate": 9.445065886240408e-06, + "loss": 0.3896, + "step": 6800, + "vit_learning_rate": 1.8890131772480812e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6229, + "epoch": 0.35424469413233456, + "grad_norm": 1.8285236095372506, + "learning_rate": 9.443135745071467e-06, + "loss": 0.3675, + "step": 6810, + "vit_learning_rate": 1.8886271490142932e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6223, + "epoch": 0.35476487723678735, + "grad_norm": 1.6357884810964007, + "learning_rate": 9.441202450994017e-06, + "loss": 0.3544, + "step": 6820, + "vit_learning_rate": 1.8882404901988032e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.8241, + "epoch": 0.35528506034124013, + "grad_norm": 1.4345901376124162, + "learning_rate": 9.43926600537995e-06, + "loss": 0.4067, + "step": 6830, + "vit_learning_rate": 1.8878532010759898e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6726, + "epoch": 0.35580524344569286, + "grad_norm": 2.2436322217802744, + "learning_rate": 9.437326409603393e-06, + "loss": 0.3624, + "step": 6840, + "vit_learning_rate": 1.8874652819206783e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7211, + "epoch": 0.35632542655014565, + "grad_norm": 1.6759220504019736, + "learning_rate": 9.435383665040708e-06, + "loss": 0.3883, + "step": 6850, + "vit_learning_rate": 1.8870767330081414e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6838, + "epoch": 0.35684560965459844, + "grad_norm": 1.5785941140783422, + "learning_rate": 9.433437773070496e-06, + "loss": 0.4082, + "step": 6860, + "vit_learning_rate": 1.8866875546140987e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6643, + "epoch": 0.35736579275905117, + "grad_norm": 2.5967192486215698, + "learning_rate": 9.431488735073582e-06, + "loss": 0.3975, + "step": 6870, + "vit_learning_rate": 1.8862977470147164e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6688, + "epoch": 0.35788597586350396, + "grad_norm": 1.5192313091311562, + "learning_rate": 9.429536552433037e-06, + "loss": 0.3671, + "step": 6880, + "vit_learning_rate": 1.885907310486607e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7002, + "epoch": 0.35840615896795674, + "grad_norm": 1.481705293252127, + "learning_rate": 9.427581226534148e-06, + "loss": 0.395, + "step": 6890, + "vit_learning_rate": 1.8855162453068294e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.78, + "epoch": 0.3589263420724095, + "grad_norm": 1.2429909593557618, + "learning_rate": 9.425622758764447e-06, + "loss": 0.3958, + "step": 6900, + "vit_learning_rate": 1.8851245517528892e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6762, + "epoch": 0.35944652517686226, + "grad_norm": 1.3815958185027009, + "learning_rate": 9.423661150513685e-06, + "loss": 0.3769, + "step": 6910, + "vit_learning_rate": 1.8847322301027368e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7126, + "epoch": 0.359966708281315, + "grad_norm": 1.5927235101713402, + "learning_rate": 9.421696403173848e-06, + "loss": 0.3663, + "step": 6920, + "vit_learning_rate": 1.8843392806347693e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6122, + "epoch": 0.3604868913857678, + "grad_norm": 1.8113526259270307, + "learning_rate": 9.419728518139145e-06, + "loss": 0.4147, + "step": 6930, + "vit_learning_rate": 1.8839457036278286e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6424, + "epoch": 0.36100707449022057, + "grad_norm": 1.7481353020229284, + "learning_rate": 9.417757496806012e-06, + "loss": 0.4072, + "step": 6940, + "vit_learning_rate": 1.8835514993612024e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7193, + "epoch": 0.3615272575946733, + "grad_norm": 2.165329419907997, + "learning_rate": 9.415783340573118e-06, + "loss": 0.4033, + "step": 6950, + "vit_learning_rate": 1.8831566681146232e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6363, + "epoch": 0.3620474406991261, + "grad_norm": 1.8383727742753975, + "learning_rate": 9.413806050841343e-06, + "loss": 0.3753, + "step": 6960, + "vit_learning_rate": 1.8827612101682683e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6877, + "epoch": 0.3625676238035789, + "grad_norm": 1.2909102816546378, + "learning_rate": 9.411825629013804e-06, + "loss": 0.3865, + "step": 6970, + "vit_learning_rate": 1.8823651258027606e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6541, + "epoch": 0.3630878069080316, + "grad_norm": 1.6794341661413672, + "learning_rate": 9.409842076495833e-06, + "loss": 0.3743, + "step": 6980, + "vit_learning_rate": 1.8819684152991661e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6856, + "epoch": 0.3636079900124844, + "grad_norm": 1.540580911902761, + "learning_rate": 9.407855394694985e-06, + "loss": 0.3678, + "step": 6990, + "vit_learning_rate": 1.8815710789389966e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6324, + "epoch": 0.3641281731169372, + "grad_norm": 1.8315310455496432, + "learning_rate": 9.405865585021035e-06, + "loss": 0.3912, + "step": 7000, + "vit_learning_rate": 1.8811731170042069e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7974, + "epoch": 0.3646483562213899, + "grad_norm": 1.0173159990901943, + "learning_rate": 9.403872648885981e-06, + "loss": 0.3757, + "step": 7010, + "vit_learning_rate": 1.880774529777196e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.644, + "epoch": 0.3651685393258427, + "grad_norm": 1.4478829230272232, + "learning_rate": 9.401876587704036e-06, + "loss": 0.4172, + "step": 7020, + "vit_learning_rate": 1.880375317540807e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7118, + "epoch": 0.3656887224302955, + "grad_norm": 1.5597555546474433, + "learning_rate": 9.399877402891631e-06, + "loss": 0.3809, + "step": 7030, + "vit_learning_rate": 1.879975480578326e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6952, + "epoch": 0.3662089055347482, + "grad_norm": 1.6031310213424221, + "learning_rate": 9.397875095867414e-06, + "loss": 0.392, + "step": 7040, + "vit_learning_rate": 1.8795750191734828e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7053, + "epoch": 0.366729088639201, + "grad_norm": 1.8914712375782026, + "learning_rate": 9.395869668052252e-06, + "loss": 0.3837, + "step": 7050, + "vit_learning_rate": 1.8791739336104502e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7102, + "epoch": 0.3672492717436538, + "grad_norm": 1.883991989136428, + "learning_rate": 9.39386112086922e-06, + "loss": 0.3949, + "step": 7060, + "vit_learning_rate": 1.8787722241738437e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6562, + "epoch": 0.3677694548481065, + "grad_norm": 1.3780768467519786, + "learning_rate": 9.39184945574361e-06, + "loss": 0.3937, + "step": 7070, + "vit_learning_rate": 1.8783698911487219e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6778, + "epoch": 0.3682896379525593, + "grad_norm": 1.6472369519558712, + "learning_rate": 9.389834674102929e-06, + "loss": 0.3689, + "step": 7080, + "vit_learning_rate": 1.8779669348205856e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6508, + "epoch": 0.36880982105701204, + "grad_norm": 1.3577924644041572, + "learning_rate": 9.38781677737689e-06, + "loss": 0.373, + "step": 7090, + "vit_learning_rate": 1.8775633554753777e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6444, + "epoch": 0.36933000416146483, + "grad_norm": 1.7665674702911665, + "learning_rate": 9.385795766997421e-06, + "loss": 0.3723, + "step": 7100, + "vit_learning_rate": 1.877159153399484e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.8308, + "epoch": 0.3698501872659176, + "grad_norm": 1.2674499313129788, + "learning_rate": 9.383771644398658e-06, + "loss": 0.3885, + "step": 7110, + "vit_learning_rate": 1.8767543288797315e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.622, + "epoch": 0.37037037037037035, + "grad_norm": 2.2434499061464623, + "learning_rate": 9.381744411016945e-06, + "loss": 0.3821, + "step": 7120, + "vit_learning_rate": 1.876348882203389e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6342, + "epoch": 0.37089055347482314, + "grad_norm": 1.3773822587903357, + "learning_rate": 9.379714068290835e-06, + "loss": 0.3913, + "step": 7130, + "vit_learning_rate": 1.8759428136581667e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6696, + "epoch": 0.3714107365792759, + "grad_norm": 2.029069489099036, + "learning_rate": 9.377680617661084e-06, + "loss": 0.3707, + "step": 7140, + "vit_learning_rate": 1.8755361235322168e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6741, + "epoch": 0.37193091968372866, + "grad_norm": 1.8075662292783732, + "learning_rate": 9.37564406057066e-06, + "loss": 0.3798, + "step": 7150, + "vit_learning_rate": 1.8751288121141315e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6588, + "epoch": 0.37245110278818144, + "grad_norm": 1.951839608156936, + "learning_rate": 9.373604398464726e-06, + "loss": 0.387, + "step": 7160, + "vit_learning_rate": 1.874720879692945e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.653, + "epoch": 0.37297128589263423, + "grad_norm": 1.8357685348538662, + "learning_rate": 9.371561632790656e-06, + "loss": 0.3762, + "step": 7170, + "vit_learning_rate": 1.8743123265581308e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6511, + "epoch": 0.37349146899708696, + "grad_norm": 4.097432727707294, + "learning_rate": 9.369515764998025e-06, + "loss": 0.3849, + "step": 7180, + "vit_learning_rate": 1.8739031529996047e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6483, + "epoch": 0.37401165210153975, + "grad_norm": 2.611239021624235, + "learning_rate": 9.367466796538606e-06, + "loss": 0.3891, + "step": 7190, + "vit_learning_rate": 1.873493359307721e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6386, + "epoch": 0.37453183520599254, + "grad_norm": 1.1311623501703498, + "learning_rate": 9.365414728866376e-06, + "loss": 0.3791, + "step": 7200, + "vit_learning_rate": 1.8730829457732748e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7725, + "epoch": 0.37505201831044527, + "grad_norm": 2.420691840476535, + "learning_rate": 9.363359563437508e-06, + "loss": 0.3748, + "step": 7210, + "vit_learning_rate": 1.8726719126875015e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7685, + "epoch": 0.37557220141489805, + "grad_norm": 1.8845990458344297, + "learning_rate": 9.361301301710375e-06, + "loss": 0.4154, + "step": 7220, + "vit_learning_rate": 1.872260260342075e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7777, + "epoch": 0.3760923845193508, + "grad_norm": 1.7531661048409142, + "learning_rate": 9.35923994514555e-06, + "loss": 0.4038, + "step": 7230, + "vit_learning_rate": 1.8718479890291097e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7239, + "epoch": 0.3766125676238036, + "grad_norm": 1.9551093114227887, + "learning_rate": 9.357175495205794e-06, + "loss": 0.3838, + "step": 7240, + "vit_learning_rate": 1.8714350990411588e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6729, + "epoch": 0.37713275072825636, + "grad_norm": 1.8102067468173002, + "learning_rate": 9.355107953356076e-06, + "loss": 0.3946, + "step": 7250, + "vit_learning_rate": 1.8710215906712149e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6856, + "epoch": 0.3776529338327091, + "grad_norm": 1.3416123626211862, + "learning_rate": 9.353037321063542e-06, + "loss": 0.4083, + "step": 7260, + "vit_learning_rate": 1.8706074642127083e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6306, + "epoch": 0.3781731169371619, + "grad_norm": 1.622152713218394, + "learning_rate": 9.350963599797546e-06, + "loss": 0.3731, + "step": 7270, + "vit_learning_rate": 1.870192719959509e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6373, + "epoch": 0.37869330004161467, + "grad_norm": 1.299524204514637, + "learning_rate": 9.348886791029629e-06, + "loss": 0.3996, + "step": 7280, + "vit_learning_rate": 1.8697773582059255e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6495, + "epoch": 0.3792134831460674, + "grad_norm": 1.5819261043540145, + "learning_rate": 9.34680689623352e-06, + "loss": 0.3682, + "step": 7290, + "vit_learning_rate": 1.8693613792467035e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6554, + "epoch": 0.3797336662505202, + "grad_norm": 1.4441791560903718, + "learning_rate": 9.344723916885139e-06, + "loss": 0.3521, + "step": 7300, + "vit_learning_rate": 1.8689447833770276e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6358, + "epoch": 0.380253849354973, + "grad_norm": 2.7798135781602453, + "learning_rate": 9.342637854462599e-06, + "loss": 0.3589, + "step": 7310, + "vit_learning_rate": 1.8685275708925195e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6929, + "epoch": 0.3807740324594257, + "grad_norm": 2.109260919647575, + "learning_rate": 9.340548710446195e-06, + "loss": 0.389, + "step": 7320, + "vit_learning_rate": 1.8681097420892389e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6672, + "epoch": 0.3812942155638785, + "grad_norm": 1.3830802154762365, + "learning_rate": 9.338456486318415e-06, + "loss": 0.3857, + "step": 7330, + "vit_learning_rate": 1.8676912972636826e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6854, + "epoch": 0.3818143986683313, + "grad_norm": 1.7467110624580542, + "learning_rate": 9.336361183563924e-06, + "loss": 0.389, + "step": 7340, + "vit_learning_rate": 1.8672722367127848e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.64, + "epoch": 0.382334581772784, + "grad_norm": 1.4743440018662117, + "learning_rate": 9.334262803669583e-06, + "loss": 0.3653, + "step": 7350, + "vit_learning_rate": 1.8668525607339163e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6779, + "epoch": 0.3828547648772368, + "grad_norm": 1.603223024658, + "learning_rate": 9.332161348124426e-06, + "loss": 0.3819, + "step": 7360, + "vit_learning_rate": 1.866432269624885e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6994, + "epoch": 0.38337494798168953, + "grad_norm": 1.2652023271778623, + "learning_rate": 9.330056818419677e-06, + "loss": 0.406, + "step": 7370, + "vit_learning_rate": 1.8660113636839349e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6341, + "epoch": 0.3838951310861423, + "grad_norm": 1.3697066121988386, + "learning_rate": 9.327949216048735e-06, + "loss": 0.3957, + "step": 7380, + "vit_learning_rate": 1.8655898432097468e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.633, + "epoch": 0.3844153141905951, + "grad_norm": 1.6604819584498773, + "learning_rate": 9.325838542507185e-06, + "loss": 0.375, + "step": 7390, + "vit_learning_rate": 1.8651677085014366e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6542, + "epoch": 0.38493549729504783, + "grad_norm": 1.664813501607244, + "learning_rate": 9.323724799292789e-06, + "loss": 0.3921, + "step": 7400, + "vit_learning_rate": 1.8647449598585573e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6793, + "epoch": 0.3854556803995006, + "grad_norm": 2.069787247088653, + "learning_rate": 9.321607987905485e-06, + "loss": 0.3835, + "step": 7410, + "vit_learning_rate": 1.8643215975810968e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7182, + "epoch": 0.3859758635039534, + "grad_norm": 1.4923943349639692, + "learning_rate": 9.319488109847395e-06, + "loss": 0.3599, + "step": 7420, + "vit_learning_rate": 1.8638976219694786e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7002, + "epoch": 0.38649604660840614, + "grad_norm": 2.568190542807367, + "learning_rate": 9.31736516662281e-06, + "loss": 0.3894, + "step": 7430, + "vit_learning_rate": 1.8634730333245616e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6289, + "epoch": 0.38701622971285893, + "grad_norm": 1.514066181046752, + "learning_rate": 9.315239159738198e-06, + "loss": 0.3803, + "step": 7440, + "vit_learning_rate": 1.8630478319476395e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6502, + "epoch": 0.3875364128173117, + "grad_norm": 1.4411520771806396, + "learning_rate": 9.313110090702207e-06, + "loss": 0.3881, + "step": 7450, + "vit_learning_rate": 1.862622018140441e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6531, + "epoch": 0.38805659592176445, + "grad_norm": 1.5976321360776768, + "learning_rate": 9.310977961025647e-06, + "loss": 0.3737, + "step": 7460, + "vit_learning_rate": 1.8621955922051292e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6309, + "epoch": 0.38857677902621723, + "grad_norm": 1.4049017110582163, + "learning_rate": 9.308842772221506e-06, + "loss": 0.3701, + "step": 7470, + "vit_learning_rate": 1.8617685544443012e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6587, + "epoch": 0.38909696213067, + "grad_norm": 1.569530813969266, + "learning_rate": 9.306704525804948e-06, + "loss": 0.3797, + "step": 7480, + "vit_learning_rate": 1.8613409051609893e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7323, + "epoch": 0.38961714523512275, + "grad_norm": 1.9059865393633022, + "learning_rate": 9.304563223293295e-06, + "loss": 0.3446, + "step": 7490, + "vit_learning_rate": 1.860912644658659e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7137, + "epoch": 0.39013732833957554, + "grad_norm": 1.7105685375315764, + "learning_rate": 9.302418866206048e-06, + "loss": 0.3734, + "step": 7500, + "vit_learning_rate": 1.8604837732412095e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6551, + "epoch": 0.39065751144402827, + "grad_norm": 1.5841006452146393, + "learning_rate": 9.30027145606487e-06, + "loss": 0.3891, + "step": 7510, + "vit_learning_rate": 1.860054291212974e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6455, + "epoch": 0.39117769454848106, + "grad_norm": 1.6900707672904882, + "learning_rate": 9.298120994393593e-06, + "loss": 0.4106, + "step": 7520, + "vit_learning_rate": 1.8596241988787182e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6869, + "epoch": 0.39169787765293385, + "grad_norm": 2.511793635202266, + "learning_rate": 9.29596748271821e-06, + "loss": 0.3558, + "step": 7530, + "vit_learning_rate": 1.8591934965436417e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6127, + "epoch": 0.3922180607573866, + "grad_norm": 2.237987112906066, + "learning_rate": 9.293810922566885e-06, + "loss": 0.3897, + "step": 7540, + "vit_learning_rate": 1.8587621845133766e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7634, + "epoch": 0.39273824386183936, + "grad_norm": 1.9465616671213535, + "learning_rate": 9.29165131546994e-06, + "loss": 0.3783, + "step": 7550, + "vit_learning_rate": 1.8583302630939879e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6477, + "epoch": 0.39325842696629215, + "grad_norm": 1.3815614676259997, + "learning_rate": 9.289488662959864e-06, + "loss": 0.4003, + "step": 7560, + "vit_learning_rate": 1.8578977325919725e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.742, + "epoch": 0.3937786100707449, + "grad_norm": 1.338317769493746, + "learning_rate": 9.2873229665713e-06, + "loss": 0.3837, + "step": 7570, + "vit_learning_rate": 1.8574645933142598e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6726, + "epoch": 0.39429879317519767, + "grad_norm": 1.5859122182718641, + "learning_rate": 9.285154227841058e-06, + "loss": 0.3659, + "step": 7580, + "vit_learning_rate": 1.8570308455682112e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6312, + "epoch": 0.39481897627965046, + "grad_norm": 1.6984129283324858, + "learning_rate": 9.282982448308103e-06, + "loss": 0.389, + "step": 7590, + "vit_learning_rate": 1.8565964896616204e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6567, + "epoch": 0.3953391593841032, + "grad_norm": 1.7431938624305827, + "learning_rate": 9.28080762951356e-06, + "loss": 0.3711, + "step": 7600, + "vit_learning_rate": 1.8561615259027116e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6282, + "epoch": 0.395859342488556, + "grad_norm": 1.7528600074261405, + "learning_rate": 9.278629773000706e-06, + "loss": 0.389, + "step": 7610, + "vit_learning_rate": 1.855725954600141e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6786, + "epoch": 0.39637952559300876, + "grad_norm": 1.6180371703376786, + "learning_rate": 9.27644888031498e-06, + "loss": 0.3816, + "step": 7620, + "vit_learning_rate": 1.855289776062996e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6449, + "epoch": 0.3968997086974615, + "grad_norm": 1.7470804358731333, + "learning_rate": 9.274264953003974e-06, + "loss": 0.4083, + "step": 7630, + "vit_learning_rate": 1.8548529906007945e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6494, + "epoch": 0.3974198918019143, + "grad_norm": 1.4119646994179658, + "learning_rate": 9.27207799261743e-06, + "loss": 0.386, + "step": 7640, + "vit_learning_rate": 1.8544155985234857e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6464, + "epoch": 0.397940074906367, + "grad_norm": 2.7974628509773978, + "learning_rate": 9.269888000707243e-06, + "loss": 0.3909, + "step": 7650, + "vit_learning_rate": 1.8539776001414484e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.8014, + "epoch": 0.3984602580108198, + "grad_norm": 1.9003883796479049, + "learning_rate": 9.267694978827462e-06, + "loss": 0.3692, + "step": 7660, + "vit_learning_rate": 1.853538995765492e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6224, + "epoch": 0.3989804411152726, + "grad_norm": 1.581535810536161, + "learning_rate": 9.265498928534284e-06, + "loss": 0.4031, + "step": 7670, + "vit_learning_rate": 1.8530997857068567e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6669, + "epoch": 0.3995006242197253, + "grad_norm": 1.6827888959496144, + "learning_rate": 9.263299851386058e-06, + "loss": 0.3607, + "step": 7680, + "vit_learning_rate": 1.8526599702772113e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6347, + "epoch": 0.4000208073241781, + "grad_norm": 1.5609955275327243, + "learning_rate": 9.261097748943278e-06, + "loss": 0.3668, + "step": 7690, + "vit_learning_rate": 1.8522195497886553e-06 + }, + { + "avg_batch_load_time": 1.9522, + "avg_batch_processing_time": 0.6175, + "epoch": 0.4005409904286309, + "grad_norm": 2.629335457579643, + "learning_rate": 9.258892622768581e-06, + "loss": 0.3936, + "step": 7700, + "vit_learning_rate": 1.8517785245537158e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6483, + "epoch": 0.4010611735330836, + "grad_norm": 2.598965005337113, + "learning_rate": 9.256684474426757e-06, + "loss": 0.392, + "step": 7710, + "vit_learning_rate": 1.8513368948853514e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6453, + "epoch": 0.4015813566375364, + "grad_norm": 1.5322192617303838, + "learning_rate": 9.254473305484738e-06, + "loss": 0.3903, + "step": 7720, + "vit_learning_rate": 1.8508946610969475e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6418, + "epoch": 0.4021015397419892, + "grad_norm": 1.5977958710949762, + "learning_rate": 9.252259117511602e-06, + "loss": 0.3755, + "step": 7730, + "vit_learning_rate": 1.8504518235023201e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7314, + "epoch": 0.40262172284644193, + "grad_norm": 1.552456335337134, + "learning_rate": 9.25004191207856e-06, + "loss": 0.4057, + "step": 7740, + "vit_learning_rate": 1.8500083824157117e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6378, + "epoch": 0.4031419059508947, + "grad_norm": 1.1576955595259608, + "learning_rate": 9.247821690758974e-06, + "loss": 0.3782, + "step": 7750, + "vit_learning_rate": 1.8495643381517947e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7664, + "epoch": 0.4036620890553475, + "grad_norm": 1.4013929453689817, + "learning_rate": 9.245598455128344e-06, + "loss": 0.3963, + "step": 7760, + "vit_learning_rate": 1.8491196910256685e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6749, + "epoch": 0.40418227215980024, + "grad_norm": 1.5068188165383012, + "learning_rate": 9.243372206764305e-06, + "loss": 0.3917, + "step": 7770, + "vit_learning_rate": 1.8486744413528607e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.627, + "epoch": 0.404702455264253, + "grad_norm": 3.8183334579740675, + "learning_rate": 9.241142947246634e-06, + "loss": 0.3984, + "step": 7780, + "vit_learning_rate": 1.8482285894493264e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6284, + "epoch": 0.4052226383687058, + "grad_norm": 1.5912368526389766, + "learning_rate": 9.238910678157242e-06, + "loss": 0.3712, + "step": 7790, + "vit_learning_rate": 1.847782135631448e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.634, + "epoch": 0.40574282147315854, + "grad_norm": 1.676889032276241, + "learning_rate": 9.236675401080178e-06, + "loss": 0.3765, + "step": 7800, + "vit_learning_rate": 1.8473350802160354e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7162, + "epoch": 0.40626300457761133, + "grad_norm": 1.5177314855971187, + "learning_rate": 9.234437117601626e-06, + "loss": 0.3792, + "step": 7810, + "vit_learning_rate": 1.846887423520325e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6839, + "epoch": 0.40678318768206406, + "grad_norm": 1.7134495587276577, + "learning_rate": 9.2321958293099e-06, + "loss": 0.3826, + "step": 7820, + "vit_learning_rate": 1.8464391658619798e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.3854, + "epoch": 0.40730337078651685, + "grad_norm": 1.1876002373084367, + "learning_rate": 9.229951537795447e-06, + "loss": 0.3972, + "step": 7830, + "vit_learning_rate": 1.8459903075590893e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6846, + "epoch": 0.40782355389096964, + "grad_norm": 2.2031455588565154, + "learning_rate": 9.22770424465085e-06, + "loss": 0.4089, + "step": 7840, + "vit_learning_rate": 1.8455408489301698e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6778, + "epoch": 0.40834373699542237, + "grad_norm": 1.5270582635460122, + "learning_rate": 9.225453951470815e-06, + "loss": 0.3898, + "step": 7850, + "vit_learning_rate": 1.8450907902941629e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7432, + "epoch": 0.40886392009987516, + "grad_norm": 1.913546015522357, + "learning_rate": 9.223200659852181e-06, + "loss": 0.3812, + "step": 7860, + "vit_learning_rate": 1.844640131970436e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6675, + "epoch": 0.40938410320432794, + "grad_norm": 1.817399851469572, + "learning_rate": 9.220944371393915e-06, + "loss": 0.3737, + "step": 7870, + "vit_learning_rate": 1.8441888742787828e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6556, + "epoch": 0.4099042863087807, + "grad_norm": 2.0846333094434715, + "learning_rate": 9.218685087697108e-06, + "loss": 0.3891, + "step": 7880, + "vit_learning_rate": 1.8437370175394214e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6764, + "epoch": 0.41042446941323346, + "grad_norm": 1.8433843283500588, + "learning_rate": 9.216422810364978e-06, + "loss": 0.3944, + "step": 7890, + "vit_learning_rate": 1.8432845620729953e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6734, + "epoch": 0.41094465251768625, + "grad_norm": 1.8399419532289092, + "learning_rate": 9.214157541002869e-06, + "loss": 0.3875, + "step": 7900, + "vit_learning_rate": 1.8428315082005735e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6728, + "epoch": 0.411464835622139, + "grad_norm": 1.6758753889411755, + "learning_rate": 9.211889281218243e-06, + "loss": 0.3484, + "step": 7910, + "vit_learning_rate": 1.8423778562436482e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6802, + "epoch": 0.41198501872659177, + "grad_norm": 1.7788524204661293, + "learning_rate": 9.20961803262069e-06, + "loss": 0.3941, + "step": 7920, + "vit_learning_rate": 1.8419236065241378e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7696, + "epoch": 0.41250520183104455, + "grad_norm": 1.3829427019847083, + "learning_rate": 9.207343796821918e-06, + "loss": 0.3819, + "step": 7930, + "vit_learning_rate": 1.8414687593643833e-06 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6331, + "epoch": 0.4130253849354973, + "grad_norm": 1.7179147456597788, + "learning_rate": 9.205066575435754e-06, + "loss": 0.3653, + "step": 7940, + "vit_learning_rate": 1.8410133150871507e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6365, + "epoch": 0.4135455680399501, + "grad_norm": 2.355622585454297, + "learning_rate": 9.202786370078147e-06, + "loss": 0.4071, + "step": 7950, + "vit_learning_rate": 1.8405572740156293e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6547, + "epoch": 0.4140657511444028, + "grad_norm": 1.8592441549207415, + "learning_rate": 9.20050318236716e-06, + "loss": 0.4096, + "step": 7960, + "vit_learning_rate": 1.8401006364734319e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6597, + "epoch": 0.4145859342488556, + "grad_norm": 1.7004604992792978, + "learning_rate": 9.198217013922975e-06, + "loss": 0.3928, + "step": 7970, + "vit_learning_rate": 1.8396434027845946e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6958, + "epoch": 0.4151061173533084, + "grad_norm": 2.7977470150399695, + "learning_rate": 9.195927866367884e-06, + "loss": 0.4098, + "step": 7980, + "vit_learning_rate": 1.8391855732735764e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6641, + "epoch": 0.4156263004577611, + "grad_norm": 2.029327799105479, + "learning_rate": 9.193635741326299e-06, + "loss": 0.3826, + "step": 7990, + "vit_learning_rate": 1.8387271482652593e-06 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.6741, + "epoch": 0.4161464835622139, + "grad_norm": 1.6601263765406657, + "learning_rate": 9.191340640424743e-06, + "loss": 0.3909, + "step": 8000, + "vit_learning_rate": 1.8382681280849481e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6274, + "epoch": 0.4166666666666667, + "grad_norm": 1.6982953029766659, + "learning_rate": 9.189042565291847e-06, + "loss": 0.3668, + "step": 8010, + "vit_learning_rate": 1.8378085130583691e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6588, + "epoch": 0.4171868497711194, + "grad_norm": 1.5236389706085534, + "learning_rate": 9.186741517558361e-06, + "loss": 0.3723, + "step": 8020, + "vit_learning_rate": 1.837348303511672e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6086, + "epoch": 0.4177070328755722, + "grad_norm": 1.4726989111298971, + "learning_rate": 9.184437498857135e-06, + "loss": 0.3716, + "step": 8030, + "vit_learning_rate": 1.8368874997714268e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6289, + "epoch": 0.418227215980025, + "grad_norm": 2.0380756686819703, + "learning_rate": 9.182130510823134e-06, + "loss": 0.3869, + "step": 8040, + "vit_learning_rate": 1.8364261021646267e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7644, + "epoch": 0.4187473990844777, + "grad_norm": 1.2397174245233995, + "learning_rate": 9.179820555093428e-06, + "loss": 0.3463, + "step": 8050, + "vit_learning_rate": 1.8359641110186854e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6565, + "epoch": 0.4192675821889305, + "grad_norm": 1.0733679762872064, + "learning_rate": 9.17750763330719e-06, + "loss": 0.3668, + "step": 8060, + "vit_learning_rate": 1.8355015266614377e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6186, + "epoch": 0.4197877652933833, + "grad_norm": 1.77728893113943, + "learning_rate": 9.175191747105701e-06, + "loss": 0.3724, + "step": 8070, + "vit_learning_rate": 1.83503834942114e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 2.4092, + "epoch": 0.42030794839783603, + "grad_norm": 1.6685541483267827, + "learning_rate": 9.172872898132346e-06, + "loss": 0.4092, + "step": 8080, + "vit_learning_rate": 1.834574579626469e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.3954, + "epoch": 0.4208281315022888, + "grad_norm": 1.8258063537658584, + "learning_rate": 9.17055108803261e-06, + "loss": 0.3847, + "step": 8090, + "vit_learning_rate": 1.8341102176065218e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6465, + "epoch": 0.42134831460674155, + "grad_norm": 1.666688656925749, + "learning_rate": 9.16822631845408e-06, + "loss": 0.3849, + "step": 8100, + "vit_learning_rate": 1.833645263690816e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6727, + "epoch": 0.42186849771119433, + "grad_norm": 1.4271933799111647, + "learning_rate": 9.165898591046446e-06, + "loss": 0.3922, + "step": 8110, + "vit_learning_rate": 1.833179718209289e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6319, + "epoch": 0.4223886808156471, + "grad_norm": 1.5778466616349085, + "learning_rate": 9.163567907461492e-06, + "loss": 0.4022, + "step": 8120, + "vit_learning_rate": 1.8327135814922982e-06 + }, + { + "avg_batch_load_time": 0.0033, + "avg_batch_processing_time": 2.4357, + "epoch": 0.42290886392009985, + "grad_norm": 1.6010503072747602, + "learning_rate": 9.161234269353104e-06, + "loss": 0.3517, + "step": 8130, + "vit_learning_rate": 1.8322468538706203e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6407, + "epoch": 0.42342904702455264, + "grad_norm": 1.873664254592826, + "learning_rate": 9.15889767837726e-06, + "loss": 0.3614, + "step": 8140, + "vit_learning_rate": 1.831779535675452e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6721, + "epoch": 0.4239492301290054, + "grad_norm": 1.5991554387968903, + "learning_rate": 9.15655813619204e-06, + "loss": 0.4027, + "step": 8150, + "vit_learning_rate": 1.8313116272384077e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.3515, + "epoch": 0.42446941323345816, + "grad_norm": 1.7201138253341952, + "learning_rate": 9.154215644457612e-06, + "loss": 0.3938, + "step": 8160, + "vit_learning_rate": 1.830843128891522e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7018, + "epoch": 0.42498959633791095, + "grad_norm": 1.2350217564789454, + "learning_rate": 9.15187020483624e-06, + "loss": 0.375, + "step": 8170, + "vit_learning_rate": 1.8303740409672476e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6733, + "epoch": 0.42550977944236373, + "grad_norm": 1.6403756742151336, + "learning_rate": 9.14952181899228e-06, + "loss": 0.3768, + "step": 8180, + "vit_learning_rate": 1.8299043637984557e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 4.0876, + "epoch": 0.42602996254681647, + "grad_norm": 1.226870643389763, + "learning_rate": 9.147170488592176e-06, + "loss": 0.3703, + "step": 8190, + "vit_learning_rate": 1.8294340977184348e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 2.3652, + "epoch": 0.42655014565126925, + "grad_norm": 1.3770542391300171, + "learning_rate": 9.144816215304464e-06, + "loss": 0.4003, + "step": 8200, + "vit_learning_rate": 1.8289632430608925e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6588, + "epoch": 0.42707032875572204, + "grad_norm": 1.5653172143193337, + "learning_rate": 9.14245900079977e-06, + "loss": 0.395, + "step": 8210, + "vit_learning_rate": 1.8284918001599538e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.4328, + "epoch": 0.42759051186017477, + "grad_norm": 1.4325036342824375, + "learning_rate": 9.140098846750803e-06, + "loss": 0.4052, + "step": 8220, + "vit_learning_rate": 1.8280197693501602e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 2.3786, + "epoch": 0.42811069496462756, + "grad_norm": 1.9551571355825852, + "learning_rate": 9.13773575483236e-06, + "loss": 0.3846, + "step": 8230, + "vit_learning_rate": 1.8275471509664718e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.4715, + "epoch": 0.4286308780690803, + "grad_norm": 1.4421997602059882, + "learning_rate": 9.135369726721323e-06, + "loss": 0.3854, + "step": 8240, + "vit_learning_rate": 1.8270739453442646e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6455, + "epoch": 0.4291510611735331, + "grad_norm": 1.5815829289223928, + "learning_rate": 9.133000764096659e-06, + "loss": 0.3878, + "step": 8250, + "vit_learning_rate": 1.8266001528193313e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 2.3988, + "epoch": 0.42967124427798586, + "grad_norm": 1.892281776560883, + "learning_rate": 9.130628868639411e-06, + "loss": 0.3784, + "step": 8260, + "vit_learning_rate": 1.8261257737278819e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6127, + "epoch": 0.4301914273824386, + "grad_norm": 2.305554029952674, + "learning_rate": 9.12825404203271e-06, + "loss": 0.3957, + "step": 8270, + "vit_learning_rate": 1.825650808406542e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 5.8334, + "epoch": 0.4307116104868914, + "grad_norm": 1.8888946774576545, + "learning_rate": 9.125876285961767e-06, + "loss": 0.373, + "step": 8280, + "vit_learning_rate": 1.8251752571923532e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6454, + "epoch": 0.43123179359134417, + "grad_norm": 2.522610055883211, + "learning_rate": 9.123495602113865e-06, + "loss": 0.393, + "step": 8290, + "vit_learning_rate": 1.8246991204227728e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.4809, + "epoch": 0.4317519766957969, + "grad_norm": 1.5712482055038481, + "learning_rate": 9.121111992178373e-06, + "loss": 0.3767, + "step": 8300, + "vit_learning_rate": 1.8242223984356745e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.646, + "epoch": 0.4322721598002497, + "grad_norm": 1.5169030862650377, + "learning_rate": 9.118725457846734e-06, + "loss": 0.3699, + "step": 8310, + "vit_learning_rate": 1.8237450915693463e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 4.0947, + "epoch": 0.4327923429047025, + "grad_norm": 1.5356654492486401, + "learning_rate": 9.116336000812458e-06, + "loss": 0.368, + "step": 8320, + "vit_learning_rate": 1.8232672001624914e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6765, + "epoch": 0.4333125260091552, + "grad_norm": 1.1773694621545419, + "learning_rate": 9.113943622771139e-06, + "loss": 0.3803, + "step": 8330, + "vit_learning_rate": 1.8227887245542277e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.3636, + "epoch": 0.433832709113608, + "grad_norm": 1.6850569766126877, + "learning_rate": 9.111548325420443e-06, + "loss": 0.3811, + "step": 8340, + "vit_learning_rate": 1.8223096650840883e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.3977, + "epoch": 0.4343528922180608, + "grad_norm": 1.184456469541064, + "learning_rate": 9.109150110460104e-06, + "loss": 0.3823, + "step": 8350, + "vit_learning_rate": 1.8218300220920203e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6243, + "epoch": 0.4348730753225135, + "grad_norm": 1.7083496518422228, + "learning_rate": 9.106748979591921e-06, + "loss": 0.3675, + "step": 8360, + "vit_learning_rate": 1.8213497959183842e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6428, + "epoch": 0.4353932584269663, + "grad_norm": 3.3121082808431757, + "learning_rate": 9.104344934519777e-06, + "loss": 0.3654, + "step": 8370, + "vit_learning_rate": 1.8208689869039552e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6879, + "epoch": 0.43591344153141903, + "grad_norm": 1.8254457570327207, + "learning_rate": 9.101937976949613e-06, + "loss": 0.3907, + "step": 8380, + "vit_learning_rate": 1.8203875953899223e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.716, + "epoch": 0.4364336246358718, + "grad_norm": 1.9257492271685743, + "learning_rate": 9.099528108589436e-06, + "loss": 0.3807, + "step": 8390, + "vit_learning_rate": 1.8199056217178868e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6631, + "epoch": 0.4369538077403246, + "grad_norm": 1.6213417549328504, + "learning_rate": 9.097115331149319e-06, + "loss": 0.3856, + "step": 8400, + "vit_learning_rate": 1.8194230662298635e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.3668, + "epoch": 0.43747399084477734, + "grad_norm": 1.4249659223001891, + "learning_rate": 9.094699646341404e-06, + "loss": 0.3773, + "step": 8410, + "vit_learning_rate": 1.8189399292682805e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6566, + "epoch": 0.4379941739492301, + "grad_norm": 1.6664556082190658, + "learning_rate": 9.092281055879894e-06, + "loss": 0.3938, + "step": 8420, + "vit_learning_rate": 1.8184562111759784e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7233, + "epoch": 0.4385143570536829, + "grad_norm": 1.7038072406867883, + "learning_rate": 9.08985956148105e-06, + "loss": 0.3609, + "step": 8430, + "vit_learning_rate": 1.81797191229621e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 4.1637, + "epoch": 0.43903454015813564, + "grad_norm": 1.4639674363859396, + "learning_rate": 9.087435164863199e-06, + "loss": 0.3554, + "step": 8440, + "vit_learning_rate": 1.8174870329726398e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.3442, + "epoch": 0.43955472326258843, + "grad_norm": 1.8625970259938234, + "learning_rate": 9.085007867746727e-06, + "loss": 0.3642, + "step": 8450, + "vit_learning_rate": 1.8170015735493451e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6582, + "epoch": 0.4400749063670412, + "grad_norm": 1.3875850613129617, + "learning_rate": 9.082577671854071e-06, + "loss": 0.3789, + "step": 8460, + "vit_learning_rate": 1.816515534370814e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6107, + "epoch": 0.44059508947149395, + "grad_norm": 2.1478062567978045, + "learning_rate": 9.080144578909736e-06, + "loss": 0.4008, + "step": 8470, + "vit_learning_rate": 1.816028915781947e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6356, + "epoch": 0.44111527257594674, + "grad_norm": 1.6604293254481293, + "learning_rate": 9.077708590640275e-06, + "loss": 0.3904, + "step": 8480, + "vit_learning_rate": 1.8155417181280548e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6408, + "epoch": 0.4416354556803995, + "grad_norm": 2.985827868543645, + "learning_rate": 9.075269708774297e-06, + "loss": 0.3683, + "step": 8490, + "vit_learning_rate": 1.8150539417548591e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6362, + "epoch": 0.44215563878485226, + "grad_norm": 2.159367686693401, + "learning_rate": 9.072827935042467e-06, + "loss": 0.3686, + "step": 8500, + "vit_learning_rate": 1.8145655870084934e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6386, + "epoch": 0.44267582188930504, + "grad_norm": 1.8407811593260588, + "learning_rate": 9.070383271177499e-06, + "loss": 0.3931, + "step": 8510, + "vit_learning_rate": 1.8140766542354994e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6916, + "epoch": 0.4431960049937578, + "grad_norm": 2.614217981954905, + "learning_rate": 9.06793571891416e-06, + "loss": 0.3903, + "step": 8520, + "vit_learning_rate": 1.8135871437828316e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6789, + "epoch": 0.44371618809821056, + "grad_norm": 1.9258758176104513, + "learning_rate": 9.065485279989264e-06, + "loss": 0.3808, + "step": 8530, + "vit_learning_rate": 1.8130970559978525e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6653, + "epoch": 0.44423637120266335, + "grad_norm": 1.6010549795493247, + "learning_rate": 9.063031956141676e-06, + "loss": 0.3789, + "step": 8540, + "vit_learning_rate": 1.8126063912283351e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7303, + "epoch": 0.4447565543071161, + "grad_norm": 1.706000662181887, + "learning_rate": 9.06057574911231e-06, + "loss": 0.3761, + "step": 8550, + "vit_learning_rate": 1.8121151498224618e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6431, + "epoch": 0.44527673741156887, + "grad_norm": 1.8319503217615063, + "learning_rate": 9.05811666064412e-06, + "loss": 0.388, + "step": 8560, + "vit_learning_rate": 1.8116233321288238e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.758, + "epoch": 0.44579692051602166, + "grad_norm": 1.405051098624571, + "learning_rate": 9.055654692482109e-06, + "loss": 0.3943, + "step": 8570, + "vit_learning_rate": 1.8111309384964218e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6463, + "epoch": 0.4463171036204744, + "grad_norm": 1.8140262837228387, + "learning_rate": 9.053189846373324e-06, + "loss": 0.3862, + "step": 8580, + "vit_learning_rate": 1.8106379692746645e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6637, + "epoch": 0.4468372867249272, + "grad_norm": 1.8540433322076566, + "learning_rate": 9.05072212406685e-06, + "loss": 0.3787, + "step": 8590, + "vit_learning_rate": 1.81014442481337e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6207, + "epoch": 0.44735746982937996, + "grad_norm": 2.009830457183394, + "learning_rate": 9.048251527313818e-06, + "loss": 0.4034, + "step": 8600, + "vit_learning_rate": 1.8096503054627634e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 2.4685, + "epoch": 0.4478776529338327, + "grad_norm": 1.4590538878395032, + "learning_rate": 9.045778057867396e-06, + "loss": 0.3709, + "step": 8610, + "vit_learning_rate": 1.809155611573479e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 2.4203, + "epoch": 0.4483978360382855, + "grad_norm": 1.6143806551442885, + "learning_rate": 9.043301717482791e-06, + "loss": 0.3755, + "step": 8620, + "vit_learning_rate": 1.808660343496558e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6373, + "epoch": 0.44891801914273827, + "grad_norm": 1.4271108875350433, + "learning_rate": 9.040822507917246e-06, + "loss": 0.3741, + "step": 8630, + "vit_learning_rate": 1.808164501583449e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.4166, + "epoch": 0.449438202247191, + "grad_norm": 1.1196237705136995, + "learning_rate": 9.038340430930044e-06, + "loss": 0.3811, + "step": 8640, + "vit_learning_rate": 1.8076680861860086e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6186, + "epoch": 0.4499583853516438, + "grad_norm": 2.488806929521565, + "learning_rate": 9.035855488282496e-06, + "loss": 0.3796, + "step": 8650, + "vit_learning_rate": 1.807171097656499e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7453, + "epoch": 0.4504785684560966, + "grad_norm": 1.7249644573546268, + "learning_rate": 9.033367681737953e-06, + "loss": 0.3922, + "step": 8660, + "vit_learning_rate": 1.8066735363475905e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6661, + "epoch": 0.4509987515605493, + "grad_norm": 1.7514521276253263, + "learning_rate": 9.030877013061796e-06, + "loss": 0.4107, + "step": 8670, + "vit_learning_rate": 1.806175402612359e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6744, + "epoch": 0.4515189346650021, + "grad_norm": 1.3246768854239888, + "learning_rate": 9.028383484021436e-06, + "loss": 0.3838, + "step": 8680, + "vit_learning_rate": 1.805676696804287e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6248, + "epoch": 0.4520391177694548, + "grad_norm": 1.6674324805432328, + "learning_rate": 9.025887096386316e-06, + "loss": 0.3955, + "step": 8690, + "vit_learning_rate": 1.805177419277263e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7208, + "epoch": 0.4525593008739076, + "grad_norm": 2.0831242487985184, + "learning_rate": 9.023387851927903e-06, + "loss": 0.3587, + "step": 8700, + "vit_learning_rate": 1.8046775703855806e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.703, + "epoch": 0.4530794839783604, + "grad_norm": 1.6495014946648905, + "learning_rate": 9.020885752419698e-06, + "loss": 0.383, + "step": 8710, + "vit_learning_rate": 1.8041771504839393e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6985, + "epoch": 0.45359966708281313, + "grad_norm": 1.3600139996038456, + "learning_rate": 9.01838079963722e-06, + "loss": 0.3843, + "step": 8720, + "vit_learning_rate": 1.803676159927444e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6933, + "epoch": 0.4541198501872659, + "grad_norm": 1.3148964450758356, + "learning_rate": 9.015872995358021e-06, + "loss": 0.3924, + "step": 8730, + "vit_learning_rate": 1.8031745990716042e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6343, + "epoch": 0.4546400332917187, + "grad_norm": 2.4922872748701685, + "learning_rate": 9.01336234136167e-06, + "loss": 0.4019, + "step": 8740, + "vit_learning_rate": 1.802672468272334e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6669, + "epoch": 0.45516021639617144, + "grad_norm": 1.4695688007809466, + "learning_rate": 9.010848839429763e-06, + "loss": 0.3853, + "step": 8750, + "vit_learning_rate": 1.8021697678859524e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6695, + "epoch": 0.4556803995006242, + "grad_norm": 2.0606603094037164, + "learning_rate": 9.008332491345912e-06, + "loss": 0.3947, + "step": 8760, + "vit_learning_rate": 1.8016664982691823e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.3986, + "epoch": 0.456200582605077, + "grad_norm": 1.5952201868860432, + "learning_rate": 9.005813298895752e-06, + "loss": 0.3642, + "step": 8770, + "vit_learning_rate": 1.80116265977915e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6921, + "epoch": 0.45672076570952974, + "grad_norm": 1.228423801489659, + "learning_rate": 9.003291263866936e-06, + "loss": 0.3826, + "step": 8780, + "vit_learning_rate": 1.800658252773387e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.634, + "epoch": 0.45724094881398253, + "grad_norm": 1.9089875550649251, + "learning_rate": 9.000766388049133e-06, + "loss": 0.3726, + "step": 8790, + "vit_learning_rate": 1.8001532776098264e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6591, + "epoch": 0.4577611319184353, + "grad_norm": 1.4785682211872277, + "learning_rate": 8.99823867323403e-06, + "loss": 0.3965, + "step": 8800, + "vit_learning_rate": 1.7996477346468058e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6566, + "epoch": 0.45828131502288805, + "grad_norm": 1.3660737839094614, + "learning_rate": 8.995708121215325e-06, + "loss": 0.3813, + "step": 8810, + "vit_learning_rate": 1.7991416242430649e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6582, + "epoch": 0.45880149812734083, + "grad_norm": 2.291447019749816, + "learning_rate": 8.993174733788734e-06, + "loss": 0.3853, + "step": 8820, + "vit_learning_rate": 1.7986349467577467e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6862, + "epoch": 0.45932168123179357, + "grad_norm": 5.736591930634126, + "learning_rate": 8.990638512751983e-06, + "loss": 0.3982, + "step": 8830, + "vit_learning_rate": 1.7981277025503962e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.3571, + "epoch": 0.45984186433624635, + "grad_norm": 1.5961442195783446, + "learning_rate": 8.988099459904804e-06, + "loss": 0.3887, + "step": 8840, + "vit_learning_rate": 1.7976198919809607e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6707, + "epoch": 0.46036204744069914, + "grad_norm": 1.753082559023421, + "learning_rate": 8.985557577048947e-06, + "loss": 0.3687, + "step": 8850, + "vit_learning_rate": 1.7971115154097893e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6444, + "epoch": 0.46088223054515187, + "grad_norm": 2.473086042109129, + "learning_rate": 8.983012865988165e-06, + "loss": 0.3789, + "step": 8860, + "vit_learning_rate": 1.796602573197633e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7516, + "epoch": 0.46140241364960466, + "grad_norm": 2.7750907290208753, + "learning_rate": 8.98046532852822e-06, + "loss": 0.3845, + "step": 8870, + "vit_learning_rate": 1.7960930657056437e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6376, + "epoch": 0.46192259675405745, + "grad_norm": 1.8087841464929197, + "learning_rate": 8.977914966476876e-06, + "loss": 0.3514, + "step": 8880, + "vit_learning_rate": 1.7955829932953748e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7203, + "epoch": 0.4624427798585102, + "grad_norm": 1.4325877133297324, + "learning_rate": 8.975361781643905e-06, + "loss": 0.3738, + "step": 8890, + "vit_learning_rate": 1.7950723563287808e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.3721, + "epoch": 0.46296296296296297, + "grad_norm": 1.5781522960355918, + "learning_rate": 8.972805775841082e-06, + "loss": 0.4146, + "step": 8900, + "vit_learning_rate": 1.7945611551682162e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6696, + "epoch": 0.46348314606741575, + "grad_norm": 1.5376109229677222, + "learning_rate": 8.970246950882182e-06, + "loss": 0.3982, + "step": 8910, + "vit_learning_rate": 1.7940493901764362e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6883, + "epoch": 0.4640033291718685, + "grad_norm": 2.492224557370879, + "learning_rate": 8.967685308582982e-06, + "loss": 0.3765, + "step": 8920, + "vit_learning_rate": 1.7935370617165964e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6635, + "epoch": 0.46452351227632127, + "grad_norm": 1.7393187070581384, + "learning_rate": 8.965120850761257e-06, + "loss": 0.3582, + "step": 8930, + "vit_learning_rate": 1.7930241701522512e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6553, + "epoch": 0.46504369538077406, + "grad_norm": 1.4773861393970797, + "learning_rate": 8.962553579236782e-06, + "loss": 0.3766, + "step": 8940, + "vit_learning_rate": 1.792510715847356e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6758, + "epoch": 0.4655638784852268, + "grad_norm": 1.5825276085139268, + "learning_rate": 8.959983495831326e-06, + "loss": 0.3859, + "step": 8950, + "vit_learning_rate": 1.7919966991662649e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6454, + "epoch": 0.4660840615896796, + "grad_norm": 1.437150368545848, + "learning_rate": 8.957410602368652e-06, + "loss": 0.3864, + "step": 8960, + "vit_learning_rate": 1.7914821204737303e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7329, + "epoch": 0.4666042446941323, + "grad_norm": 1.8044837545333483, + "learning_rate": 8.954834900674525e-06, + "loss": 0.3825, + "step": 8970, + "vit_learning_rate": 1.7909669801349048e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7201, + "epoch": 0.4671244277985851, + "grad_norm": 1.389284340632531, + "learning_rate": 8.952256392576693e-06, + "loss": 0.3833, + "step": 8980, + "vit_learning_rate": 1.7904512785153386e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7258, + "epoch": 0.4676446109030379, + "grad_norm": 2.161247503010971, + "learning_rate": 8.949675079904902e-06, + "loss": 0.3756, + "step": 8990, + "vit_learning_rate": 1.7899350159809803e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 2.4317, + "epoch": 0.4681647940074906, + "grad_norm": 2.188312793954064, + "learning_rate": 8.947090964490886e-06, + "loss": 0.4181, + "step": 9000, + "vit_learning_rate": 1.789418192898177e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.3686, + "epoch": 0.4686849771119434, + "grad_norm": 1.803889720694085, + "learning_rate": 8.944504048168366e-06, + "loss": 0.3974, + "step": 9010, + "vit_learning_rate": 1.788900809633673e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6724, + "epoch": 0.4692051602163962, + "grad_norm": 1.499179454726745, + "learning_rate": 8.941914332773055e-06, + "loss": 0.3788, + "step": 9020, + "vit_learning_rate": 1.7883828665546105e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6318, + "epoch": 0.4697253433208489, + "grad_norm": 1.873307622820769, + "learning_rate": 8.939321820142646e-06, + "loss": 0.3752, + "step": 9030, + "vit_learning_rate": 1.787864364028529e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6364, + "epoch": 0.4702455264253017, + "grad_norm": 1.3803578500119524, + "learning_rate": 8.936726512116824e-06, + "loss": 0.391, + "step": 9040, + "vit_learning_rate": 1.7873453024233647e-06 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.7, + "epoch": 0.4707657095297545, + "grad_norm": 1.589108852694282, + "learning_rate": 8.934128410537254e-06, + "loss": 0.3762, + "step": 9050, + "vit_learning_rate": 1.7868256821074505e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6601, + "epoch": 0.4712858926342072, + "grad_norm": 1.7685106041978715, + "learning_rate": 8.931527517247582e-06, + "loss": 0.3797, + "step": 9060, + "vit_learning_rate": 1.7863055034495163e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.679, + "epoch": 0.47180607573866, + "grad_norm": 1.2550004050062977, + "learning_rate": 8.928923834093436e-06, + "loss": 0.3933, + "step": 9070, + "vit_learning_rate": 1.7857847668186871e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6634, + "epoch": 0.4723262588431128, + "grad_norm": 1.944435534759575, + "learning_rate": 8.926317362922428e-06, + "loss": 0.4071, + "step": 9080, + "vit_learning_rate": 1.7852634725844853e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.651, + "epoch": 0.47284644194756553, + "grad_norm": 1.307895102988528, + "learning_rate": 8.923708105584142e-06, + "loss": 0.3983, + "step": 9090, + "vit_learning_rate": 1.784741621116828e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6765, + "epoch": 0.4733666250520183, + "grad_norm": 1.6612236495974344, + "learning_rate": 8.921096063930141e-06, + "loss": 0.39, + "step": 9100, + "vit_learning_rate": 1.784219212786028e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6364, + "epoch": 0.47388680815647105, + "grad_norm": 2.0198704513121544, + "learning_rate": 8.918481239813968e-06, + "loss": 0.3694, + "step": 9110, + "vit_learning_rate": 1.7836962479627933e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6494, + "epoch": 0.47440699126092384, + "grad_norm": 1.9309702731941008, + "learning_rate": 8.915863635091134e-06, + "loss": 0.3763, + "step": 9120, + "vit_learning_rate": 1.7831727270182264e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7259, + "epoch": 0.4749271743653766, + "grad_norm": 1.8515351465788612, + "learning_rate": 8.913243251619125e-06, + "loss": 0.369, + "step": 9130, + "vit_learning_rate": 1.782648650323825e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6572, + "epoch": 0.47544735746982936, + "grad_norm": 1.8766823239825359, + "learning_rate": 8.910620091257407e-06, + "loss": 0.3645, + "step": 9140, + "vit_learning_rate": 1.7821240182514813e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6411, + "epoch": 0.47596754057428214, + "grad_norm": 1.3971213790785264, + "learning_rate": 8.907994155867403e-06, + "loss": 0.3778, + "step": 9150, + "vit_learning_rate": 1.7815988311734805e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7849, + "epoch": 0.47648772367873493, + "grad_norm": 1.1694488447121663, + "learning_rate": 8.905365447312515e-06, + "loss": 0.3877, + "step": 9160, + "vit_learning_rate": 1.7810730894625027e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6713, + "epoch": 0.47700790678318766, + "grad_norm": 1.7774877441009227, + "learning_rate": 8.902733967458109e-06, + "loss": 0.3878, + "step": 9170, + "vit_learning_rate": 1.7805467934916215e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7617, + "epoch": 0.47752808988764045, + "grad_norm": 1.6554354686821091, + "learning_rate": 8.900099718171515e-06, + "loss": 0.3805, + "step": 9180, + "vit_learning_rate": 1.7800199436343029e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6356, + "epoch": 0.47804827299209324, + "grad_norm": 1.8126563619825184, + "learning_rate": 8.897462701322037e-06, + "loss": 0.3712, + "step": 9190, + "vit_learning_rate": 1.7794925402644072e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6398, + "epoch": 0.47856845609654597, + "grad_norm": 2.100577856444804, + "learning_rate": 8.894822918780936e-06, + "loss": 0.3872, + "step": 9200, + "vit_learning_rate": 1.7789645837561872e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6541, + "epoch": 0.47908863920099876, + "grad_norm": 1.855523936931664, + "learning_rate": 8.892180372421435e-06, + "loss": 0.3984, + "step": 9210, + "vit_learning_rate": 1.7784360744842868e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6443, + "epoch": 0.47960882230545154, + "grad_norm": 1.507067128673032, + "learning_rate": 8.889535064118721e-06, + "loss": 0.3659, + "step": 9220, + "vit_learning_rate": 1.7779070128237442e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7759, + "epoch": 0.4801290054099043, + "grad_norm": 1.8373130319944575, + "learning_rate": 8.88688699574994e-06, + "loss": 0.3732, + "step": 9230, + "vit_learning_rate": 1.777377399149988e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6729, + "epoch": 0.48064918851435706, + "grad_norm": 1.776529633195195, + "learning_rate": 8.884236169194198e-06, + "loss": 0.3954, + "step": 9240, + "vit_learning_rate": 1.7768472338388394e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6197, + "epoch": 0.4811693716188098, + "grad_norm": 1.2913533164424373, + "learning_rate": 8.881582586332556e-06, + "loss": 0.3657, + "step": 9250, + "vit_learning_rate": 1.7763165172665108e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.655, + "epoch": 0.4816895547232626, + "grad_norm": 2.2172400167900728, + "learning_rate": 8.87892624904803e-06, + "loss": 0.4106, + "step": 9260, + "vit_learning_rate": 1.7757852498096057e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6436, + "epoch": 0.48220973782771537, + "grad_norm": 2.0555776631570897, + "learning_rate": 8.876267159225595e-06, + "loss": 0.3918, + "step": 9270, + "vit_learning_rate": 1.7752534318451188e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6741, + "epoch": 0.4827299209321681, + "grad_norm": 1.415201802534323, + "learning_rate": 8.873605318752179e-06, + "loss": 0.3956, + "step": 9280, + "vit_learning_rate": 1.7747210637504356e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6706, + "epoch": 0.4832501040366209, + "grad_norm": 1.8949123640751628, + "learning_rate": 8.870940729516653e-06, + "loss": 0.376, + "step": 9290, + "vit_learning_rate": 1.7741881459033303e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6449, + "epoch": 0.4837702871410737, + "grad_norm": 2.0834476046605612, + "learning_rate": 8.86827339340985e-06, + "loss": 0.378, + "step": 9300, + "vit_learning_rate": 1.7736546786819697e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6751, + "epoch": 0.4842904702455264, + "grad_norm": 1.3252224346308454, + "learning_rate": 8.865603312324546e-06, + "loss": 0.3663, + "step": 9310, + "vit_learning_rate": 1.773120662464909e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6297, + "epoch": 0.4848106533499792, + "grad_norm": 1.2211625105487083, + "learning_rate": 8.862930488155465e-06, + "loss": 0.3779, + "step": 9320, + "vit_learning_rate": 1.772586097631093e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7697, + "epoch": 0.485330836454432, + "grad_norm": 1.3836287746864313, + "learning_rate": 8.860254922799283e-06, + "loss": 0.3766, + "step": 9330, + "vit_learning_rate": 1.7720509845598564e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7448, + "epoch": 0.4858510195588847, + "grad_norm": 1.9763244416754129, + "learning_rate": 8.85757661815461e-06, + "loss": 0.358, + "step": 9340, + "vit_learning_rate": 1.771515323630922e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.673, + "epoch": 0.4863712026633375, + "grad_norm": 3.147633773214209, + "learning_rate": 8.854895576122014e-06, + "loss": 0.3804, + "step": 9350, + "vit_learning_rate": 1.7709791152244025e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6238, + "epoch": 0.4868913857677903, + "grad_norm": 1.574935317218396, + "learning_rate": 8.852211798603994e-06, + "loss": 0.3791, + "step": 9360, + "vit_learning_rate": 1.7704423597207984e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.659, + "epoch": 0.487411568872243, + "grad_norm": 1.550749258697359, + "learning_rate": 8.849525287504996e-06, + "loss": 0.4022, + "step": 9370, + "vit_learning_rate": 1.7699050575009988e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6407, + "epoch": 0.4879317519766958, + "grad_norm": 1.5866218886637873, + "learning_rate": 8.846836044731402e-06, + "loss": 0.3907, + "step": 9380, + "vit_learning_rate": 1.76936720894628e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6917, + "epoch": 0.4884519350811486, + "grad_norm": 1.5915704059797404, + "learning_rate": 8.844144072191537e-06, + "loss": 0.3854, + "step": 9390, + "vit_learning_rate": 1.768828814438307e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6412, + "epoch": 0.4889721181856013, + "grad_norm": 1.5633774941334544, + "learning_rate": 8.84144937179566e-06, + "loss": 0.3789, + "step": 9400, + "vit_learning_rate": 1.7682898743591318e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6248, + "epoch": 0.4894923012900541, + "grad_norm": 1.5558357719098903, + "learning_rate": 8.838751945455965e-06, + "loss": 0.3835, + "step": 9410, + "vit_learning_rate": 1.7677503890911928e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6652, + "epoch": 0.49001248439450684, + "grad_norm": 2.125110227221074, + "learning_rate": 8.836051795086584e-06, + "loss": 0.3857, + "step": 9420, + "vit_learning_rate": 1.7672103590173167e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6137, + "epoch": 0.49053266749895963, + "grad_norm": 1.2249592439633419, + "learning_rate": 8.833348922603579e-06, + "loss": 0.3736, + "step": 9430, + "vit_learning_rate": 1.7666697845207157e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6995, + "epoch": 0.4910528506034124, + "grad_norm": 1.5647242000058112, + "learning_rate": 8.830643329924944e-06, + "loss": 0.373, + "step": 9440, + "vit_learning_rate": 1.7661286659849886e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6359, + "epoch": 0.49157303370786515, + "grad_norm": 2.8747552288514355, + "learning_rate": 8.827935018970605e-06, + "loss": 0.3718, + "step": 9450, + "vit_learning_rate": 1.7655870037941208e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7384, + "epoch": 0.49209321681231794, + "grad_norm": 1.8507250810893294, + "learning_rate": 8.825223991662415e-06, + "loss": 0.3875, + "step": 9460, + "vit_learning_rate": 1.7650447983324826e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6345, + "epoch": 0.4926133999167707, + "grad_norm": 2.214653747248754, + "learning_rate": 8.822510249924152e-06, + "loss": 0.3946, + "step": 9470, + "vit_learning_rate": 1.7645020499848303e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6828, + "epoch": 0.49313358302122345, + "grad_norm": 1.583928630474654, + "learning_rate": 8.819793795681528e-06, + "loss": 0.3895, + "step": 9480, + "vit_learning_rate": 1.7639587591363054e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.642, + "epoch": 0.49365376612567624, + "grad_norm": 1.6610339409574908, + "learning_rate": 8.817074630862175e-06, + "loss": 0.3754, + "step": 9490, + "vit_learning_rate": 1.7634149261724348e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6405, + "epoch": 0.49417394923012903, + "grad_norm": 1.6995789222908653, + "learning_rate": 8.814352757395645e-06, + "loss": 0.3665, + "step": 9500, + "vit_learning_rate": 1.762870551479129e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.613, + "epoch": 0.49469413233458176, + "grad_norm": 1.8749043753899346, + "learning_rate": 8.81162817721342e-06, + "loss": 0.3635, + "step": 9510, + "vit_learning_rate": 1.7623256354426837e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6204, + "epoch": 0.49521431543903455, + "grad_norm": 1.526568365087304, + "learning_rate": 8.808900892248895e-06, + "loss": 0.3592, + "step": 9520, + "vit_learning_rate": 1.7617801784497788e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6421, + "epoch": 0.49573449854348733, + "grad_norm": 1.2881977271365908, + "learning_rate": 8.806170904437388e-06, + "loss": 0.3806, + "step": 9530, + "vit_learning_rate": 1.7612341808874776e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.719, + "epoch": 0.49625468164794007, + "grad_norm": 1.6944734083357542, + "learning_rate": 8.803438215716139e-06, + "loss": 0.3875, + "step": 9540, + "vit_learning_rate": 1.7606876431432277e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6875, + "epoch": 0.49677486475239285, + "grad_norm": 1.3692418621164653, + "learning_rate": 8.800702828024297e-06, + "loss": 0.3531, + "step": 9550, + "vit_learning_rate": 1.7601405656048592e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7612, + "epoch": 0.4972950478568456, + "grad_norm": 2.0908754662526974, + "learning_rate": 8.797964743302929e-06, + "loss": 0.3845, + "step": 9560, + "vit_learning_rate": 1.7595929486605856e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6275, + "epoch": 0.49781523096129837, + "grad_norm": 1.881528477223903, + "learning_rate": 8.79522396349502e-06, + "loss": 0.3926, + "step": 9570, + "vit_learning_rate": 1.7590447926990036e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.743, + "epoch": 0.49833541406575116, + "grad_norm": 2.028532115399852, + "learning_rate": 8.792480490545462e-06, + "loss": 0.3637, + "step": 9580, + "vit_learning_rate": 1.758496098109092e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6381, + "epoch": 0.4988555971702039, + "grad_norm": 1.2395615809311142, + "learning_rate": 8.789734326401057e-06, + "loss": 0.3874, + "step": 9590, + "vit_learning_rate": 1.7579468652802112e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7583, + "epoch": 0.4993757802746567, + "grad_norm": 2.308479730369546, + "learning_rate": 8.786985473010524e-06, + "loss": 0.4125, + "step": 9600, + "vit_learning_rate": 1.7573970946021046e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6922, + "epoch": 0.49989596337910946, + "grad_norm": 1.3166017634664924, + "learning_rate": 8.784233932324486e-06, + "loss": 0.3664, + "step": 9610, + "vit_learning_rate": 1.756846786464897e-06 + }, + { + "avg_batch_load_time": 2.1002, + "avg_batch_processing_time": 0.6139, + "epoch": 0.5004161464835623, + "grad_norm": 1.474514231598599, + "learning_rate": 8.78147970629547e-06, + "loss": 0.367, + "step": 9620, + "vit_learning_rate": 1.7562959412590938e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6779, + "epoch": 0.5009363295880149, + "grad_norm": 2.2337537980270836, + "learning_rate": 8.778722796877914e-06, + "loss": 0.3781, + "step": 9630, + "vit_learning_rate": 1.7557445593755826e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6186, + "epoch": 0.5014565126924677, + "grad_norm": 1.399669118394847, + "learning_rate": 8.775963206028158e-06, + "loss": 0.3795, + "step": 9640, + "vit_learning_rate": 1.7551926412056315e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.667, + "epoch": 0.5019766957969205, + "grad_norm": 2.4300227541232484, + "learning_rate": 8.773200935704445e-06, + "loss": 0.3721, + "step": 9650, + "vit_learning_rate": 1.7546401871408888e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7281, + "epoch": 0.5024968789013733, + "grad_norm": 1.3601001542615665, + "learning_rate": 8.770435987866914e-06, + "loss": 0.3672, + "step": 9660, + "vit_learning_rate": 1.7540871975733825e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6882, + "epoch": 0.5030170620058261, + "grad_norm": 1.5031630467174508, + "learning_rate": 8.767668364477615e-06, + "loss": 0.3816, + "step": 9670, + "vit_learning_rate": 1.7535336728955228e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6433, + "epoch": 0.5035372451102789, + "grad_norm": 1.5149322623488886, + "learning_rate": 8.764898067500488e-06, + "loss": 0.3784, + "step": 9680, + "vit_learning_rate": 1.7529796135000974e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6356, + "epoch": 0.5040574282147315, + "grad_norm": 1.4023496028650497, + "learning_rate": 8.762125098901373e-06, + "loss": 0.3731, + "step": 9690, + "vit_learning_rate": 1.7524250197802744e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6477, + "epoch": 0.5045776113191843, + "grad_norm": 1.3073809982606968, + "learning_rate": 8.759349460648006e-06, + "loss": 0.3653, + "step": 9700, + "vit_learning_rate": 1.7518698921296009e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6337, + "epoch": 0.5050977944236371, + "grad_norm": 1.481316068492034, + "learning_rate": 8.756571154710016e-06, + "loss": 0.3871, + "step": 9710, + "vit_learning_rate": 1.751314230942003e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6625, + "epoch": 0.5056179775280899, + "grad_norm": 1.5086656058905663, + "learning_rate": 8.753790183058926e-06, + "loss": 0.3543, + "step": 9720, + "vit_learning_rate": 1.7507580366117849e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6712, + "epoch": 0.5061381606325427, + "grad_norm": 1.657003793335166, + "learning_rate": 8.751006547668152e-06, + "loss": 0.3815, + "step": 9730, + "vit_learning_rate": 1.75020130953363e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6656, + "epoch": 0.5066583437369955, + "grad_norm": 1.2054447326418067, + "learning_rate": 8.748220250512998e-06, + "loss": 0.3937, + "step": 9740, + "vit_learning_rate": 1.7496440501025993e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6735, + "epoch": 0.5071785268414482, + "grad_norm": 1.3168920241949251, + "learning_rate": 8.745431293570656e-06, + "loss": 0.3751, + "step": 9750, + "vit_learning_rate": 1.749086258714131e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6259, + "epoch": 0.5076987099459009, + "grad_norm": 1.889533596590576, + "learning_rate": 8.74263967882021e-06, + "loss": 0.3433, + "step": 9760, + "vit_learning_rate": 1.7485279357640418e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6333, + "epoch": 0.5082188930503537, + "grad_norm": 1.9791058209872932, + "learning_rate": 8.739845408242627e-06, + "loss": 0.3892, + "step": 9770, + "vit_learning_rate": 1.7479690816485253e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.62, + "epoch": 0.5087390761548065, + "grad_norm": 1.3077523451254585, + "learning_rate": 8.737048483820758e-06, + "loss": 0.3709, + "step": 9780, + "vit_learning_rate": 1.7474096967641514e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7309, + "epoch": 0.5092592592592593, + "grad_norm": 1.399126217872924, + "learning_rate": 8.734248907539336e-06, + "loss": 0.3611, + "step": 9790, + "vit_learning_rate": 1.7468497815078672e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7584, + "epoch": 0.509779442363712, + "grad_norm": 1.7279163056219824, + "learning_rate": 8.73144668138498e-06, + "loss": 0.3713, + "step": 9800, + "vit_learning_rate": 1.7462893362769958e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.641, + "epoch": 0.5102996254681648, + "grad_norm": 1.981031831646849, + "learning_rate": 8.728641807346188e-06, + "loss": 0.3879, + "step": 9810, + "vit_learning_rate": 1.7457283614692373e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6524, + "epoch": 0.5108198085726176, + "grad_norm": 1.3071989078043222, + "learning_rate": 8.725834287413334e-06, + "loss": 0.3586, + "step": 9820, + "vit_learning_rate": 1.7451668574826665e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6518, + "epoch": 0.5113399916770703, + "grad_norm": 2.2239805436765496, + "learning_rate": 8.723024123578671e-06, + "loss": 0.3752, + "step": 9830, + "vit_learning_rate": 1.744604824715734e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6163, + "epoch": 0.5118601747815231, + "grad_norm": 1.4816858850525105, + "learning_rate": 8.720211317836333e-06, + "loss": 0.3809, + "step": 9840, + "vit_learning_rate": 1.7440422635672665e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6979, + "epoch": 0.5123803578859759, + "grad_norm": 1.6599937857169174, + "learning_rate": 8.71739587218232e-06, + "loss": 0.375, + "step": 9850, + "vit_learning_rate": 1.743479174436464e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6319, + "epoch": 0.5129005409904286, + "grad_norm": 1.8441485498844794, + "learning_rate": 8.714577788614513e-06, + "loss": 0.3647, + "step": 9860, + "vit_learning_rate": 1.7429155577229022e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.639, + "epoch": 0.5134207240948814, + "grad_norm": 2.036107616388733, + "learning_rate": 8.711757069132658e-06, + "loss": 0.3885, + "step": 9870, + "vit_learning_rate": 1.7423514138265314e-06 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.6689, + "epoch": 0.5139409071993342, + "grad_norm": 1.2890323449350185, + "learning_rate": 8.708933715738379e-06, + "loss": 0.3779, + "step": 9880, + "vit_learning_rate": 1.7417867431476756e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6404, + "epoch": 0.514461090303787, + "grad_norm": 1.5177885115909096, + "learning_rate": 8.706107730435164e-06, + "loss": 0.3959, + "step": 9890, + "vit_learning_rate": 1.7412215460870326e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6573, + "epoch": 0.5149812734082397, + "grad_norm": 1.934937756017241, + "learning_rate": 8.70327911522837e-06, + "loss": 0.359, + "step": 9900, + "vit_learning_rate": 1.7406558230456736e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6979, + "epoch": 0.5155014565126924, + "grad_norm": 1.390349959612243, + "learning_rate": 8.70044787212522e-06, + "loss": 0.4018, + "step": 9910, + "vit_learning_rate": 1.7400895744250436e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6759, + "epoch": 0.5160216396171452, + "grad_norm": 2.2356327059771868, + "learning_rate": 8.697614003134802e-06, + "loss": 0.3821, + "step": 9920, + "vit_learning_rate": 1.7395228006269602e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6806, + "epoch": 0.516541822721598, + "grad_norm": 1.4876866017795627, + "learning_rate": 8.694777510268066e-06, + "loss": 0.3432, + "step": 9930, + "vit_learning_rate": 1.738955502053613e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6566, + "epoch": 0.5170620058260508, + "grad_norm": 2.319482218408376, + "learning_rate": 8.691938395537828e-06, + "loss": 0.3831, + "step": 9940, + "vit_learning_rate": 1.7383876791075654e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7107, + "epoch": 0.5175821889305036, + "grad_norm": 2.1699993663070836, + "learning_rate": 8.68909666095876e-06, + "loss": 0.3939, + "step": 9950, + "vit_learning_rate": 1.7378193321917519e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6702, + "epoch": 0.5181023720349563, + "grad_norm": 1.5405914439340926, + "learning_rate": 8.686252308547395e-06, + "loss": 0.383, + "step": 9960, + "vit_learning_rate": 1.7372504617094787e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6453, + "epoch": 0.518622555139409, + "grad_norm": 1.3290277450573138, + "learning_rate": 8.683405340322123e-06, + "loss": 0.39, + "step": 9970, + "vit_learning_rate": 1.7366810680644247e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6337, + "epoch": 0.5191427382438618, + "grad_norm": 1.7713265832361846, + "learning_rate": 8.680555758303193e-06, + "loss": 0.3736, + "step": 9980, + "vit_learning_rate": 1.7361111516606384e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6806, + "epoch": 0.5196629213483146, + "grad_norm": 1.746013628851707, + "learning_rate": 8.677703564512704e-06, + "loss": 0.3997, + "step": 9990, + "vit_learning_rate": 1.7355407129025405e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6852, + "epoch": 0.5201831044527674, + "grad_norm": 2.733598751373215, + "learning_rate": 8.674848760974612e-06, + "loss": 0.374, + "step": 10000, + "vit_learning_rate": 1.734969752194922e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6485, + "epoch": 0.5207032875572202, + "grad_norm": 2.4276055043236, + "learning_rate": 8.671991349714723e-06, + "loss": 0.3884, + "step": 10010, + "vit_learning_rate": 1.7343982699429442e-06 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6919, + "epoch": 0.521223470661673, + "grad_norm": 1.821318167423227, + "learning_rate": 8.669131332760694e-06, + "loss": 0.3839, + "step": 10020, + "vit_learning_rate": 1.7338262665521387e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6933, + "epoch": 0.5217436537661256, + "grad_norm": 2.2875899832300663, + "learning_rate": 8.666268712142032e-06, + "loss": 0.3765, + "step": 10030, + "vit_learning_rate": 1.733253742428406e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6104, + "epoch": 0.5222638368705784, + "grad_norm": 1.481571313094445, + "learning_rate": 8.663403489890091e-06, + "loss": 0.354, + "step": 10040, + "vit_learning_rate": 1.732680697978018e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6914, + "epoch": 0.5227840199750312, + "grad_norm": 1.5259406895050336, + "learning_rate": 8.660535668038072e-06, + "loss": 0.3812, + "step": 10050, + "vit_learning_rate": 1.732107133607614e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7018, + "epoch": 0.523304203079484, + "grad_norm": 1.4077542226214326, + "learning_rate": 8.657665248621017e-06, + "loss": 0.3765, + "step": 10060, + "vit_learning_rate": 1.7315330497242032e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.703, + "epoch": 0.5238243861839368, + "grad_norm": 1.5520438560872925, + "learning_rate": 8.654792233675814e-06, + "loss": 0.3887, + "step": 10070, + "vit_learning_rate": 1.7309584467351628e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6537, + "epoch": 0.5243445692883895, + "grad_norm": 2.0952585831848136, + "learning_rate": 8.651916625241197e-06, + "loss": 0.3836, + "step": 10080, + "vit_learning_rate": 1.7303833250482391e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.646, + "epoch": 0.5248647523928422, + "grad_norm": 1.615256175774236, + "learning_rate": 8.649038425357732e-06, + "loss": 0.3858, + "step": 10090, + "vit_learning_rate": 1.729807685071546e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.619, + "epoch": 0.525384935497295, + "grad_norm": 1.3512002209224852, + "learning_rate": 8.64615763606783e-06, + "loss": 0.3463, + "step": 10100, + "vit_learning_rate": 1.729231527213566e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6341, + "epoch": 0.5259051186017478, + "grad_norm": 1.7235084179805082, + "learning_rate": 8.643274259415737e-06, + "loss": 0.3652, + "step": 10110, + "vit_learning_rate": 1.7286548518831471e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6299, + "epoch": 0.5264253017062006, + "grad_norm": 1.7241685835651173, + "learning_rate": 8.640388297447537e-06, + "loss": 0.3846, + "step": 10120, + "vit_learning_rate": 1.7280776594895073e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6906, + "epoch": 0.5269454848106534, + "grad_norm": 1.2702426013895838, + "learning_rate": 8.637499752211145e-06, + "loss": 0.3738, + "step": 10130, + "vit_learning_rate": 1.7274999504422287e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6409, + "epoch": 0.5274656679151061, + "grad_norm": 1.6960569938274934, + "learning_rate": 8.63460862575631e-06, + "loss": 0.395, + "step": 10140, + "vit_learning_rate": 1.726921725151262e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6393, + "epoch": 0.5279858510195589, + "grad_norm": 1.343039106129471, + "learning_rate": 8.631714920134619e-06, + "loss": 0.403, + "step": 10150, + "vit_learning_rate": 1.7263429840269237e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7024, + "epoch": 0.5285060341240116, + "grad_norm": 1.5226185809953263, + "learning_rate": 8.628818637399481e-06, + "loss": 0.3771, + "step": 10160, + "vit_learning_rate": 1.725763727479896e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6673, + "epoch": 0.5290262172284644, + "grad_norm": 1.586921701559607, + "learning_rate": 8.625919779606135e-06, + "loss": 0.3995, + "step": 10170, + "vit_learning_rate": 1.7251839559212268e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.656, + "epoch": 0.5295464003329172, + "grad_norm": 2.0012593964764083, + "learning_rate": 8.623018348811654e-06, + "loss": 0.3671, + "step": 10180, + "vit_learning_rate": 1.7246036697623305e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6932, + "epoch": 0.53006658343737, + "grad_norm": 1.2118137931452597, + "learning_rate": 8.620114347074929e-06, + "loss": 0.3726, + "step": 10190, + "vit_learning_rate": 1.7240228694149856e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6793, + "epoch": 0.5305867665418227, + "grad_norm": 1.4261570867668931, + "learning_rate": 8.61720777645668e-06, + "loss": 0.3859, + "step": 10200, + "vit_learning_rate": 1.7234415552913357e-06 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6724, + "epoch": 0.5311069496462755, + "grad_norm": 1.4155384811085434, + "learning_rate": 8.614298639019446e-06, + "loss": 0.3749, + "step": 10210, + "vit_learning_rate": 1.722859727803889e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6492, + "epoch": 0.5316271327507283, + "grad_norm": 1.7120624670427478, + "learning_rate": 8.61138693682759e-06, + "loss": 0.3712, + "step": 10220, + "vit_learning_rate": 1.7222773873655178e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6797, + "epoch": 0.532147315855181, + "grad_norm": 1.5546875510027802, + "learning_rate": 8.608472671947299e-06, + "loss": 0.3918, + "step": 10230, + "vit_learning_rate": 1.7216945343894597e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6889, + "epoch": 0.5326674989596338, + "grad_norm": 1.4733044567322888, + "learning_rate": 8.60555584644657e-06, + "loss": 0.3759, + "step": 10240, + "vit_learning_rate": 1.7211111692893137e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7105, + "epoch": 0.5331876820640865, + "grad_norm": 1.4643138654535413, + "learning_rate": 8.602636462395221e-06, + "loss": 0.3846, + "step": 10250, + "vit_learning_rate": 1.7205272924790441e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6215, + "epoch": 0.5337078651685393, + "grad_norm": 1.6537225078048274, + "learning_rate": 8.59971452186489e-06, + "loss": 0.3908, + "step": 10260, + "vit_learning_rate": 1.7199429043729779e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6721, + "epoch": 0.5342280482729921, + "grad_norm": 2.325655577833949, + "learning_rate": 8.596790026929022e-06, + "loss": 0.3984, + "step": 10270, + "vit_learning_rate": 1.7193580053858043e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6958, + "epoch": 0.5347482313774449, + "grad_norm": 1.4095659250326, + "learning_rate": 8.59386297966288e-06, + "loss": 0.3763, + "step": 10280, + "vit_learning_rate": 1.7187725959325756e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6471, + "epoch": 0.5352684144818977, + "grad_norm": 1.3791866018969392, + "learning_rate": 8.590933382143532e-06, + "loss": 0.3741, + "step": 10290, + "vit_learning_rate": 1.7181866764287061e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6347, + "epoch": 0.5357885975863504, + "grad_norm": 1.444859441583763, + "learning_rate": 8.588001236449861e-06, + "loss": 0.3712, + "step": 10300, + "vit_learning_rate": 1.717600247289972e-06 + }, + { + "avg_batch_load_time": 0.0276, + "avg_batch_processing_time": 0.6321, + "epoch": 0.5363087806908031, + "grad_norm": 1.403378137219336, + "learning_rate": 8.585066544662558e-06, + "loss": 0.3716, + "step": 10310, + "vit_learning_rate": 1.7170133089325115e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6346, + "epoch": 0.5368289637952559, + "grad_norm": 1.6893460037798165, + "learning_rate": 8.582129308864121e-06, + "loss": 0.377, + "step": 10320, + "vit_learning_rate": 1.716425861772824e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.664, + "epoch": 0.5373491468997087, + "grad_norm": 1.3405300147837937, + "learning_rate": 8.57918953113885e-06, + "loss": 0.3815, + "step": 10330, + "vit_learning_rate": 1.7158379062277695e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7228, + "epoch": 0.5378693300041615, + "grad_norm": 1.3062630091226246, + "learning_rate": 8.576247213572849e-06, + "loss": 0.3464, + "step": 10340, + "vit_learning_rate": 1.7152494427145694e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6697, + "epoch": 0.5383895131086143, + "grad_norm": 1.3852018395638022, + "learning_rate": 8.573302358254027e-06, + "loss": 0.3779, + "step": 10350, + "vit_learning_rate": 1.714660471650805e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6594, + "epoch": 0.538909696213067, + "grad_norm": 1.8850368319454205, + "learning_rate": 8.570354967272092e-06, + "loss": 0.3727, + "step": 10360, + "vit_learning_rate": 1.714070993454418e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.631, + "epoch": 0.5394298793175197, + "grad_norm": 1.666150975252164, + "learning_rate": 8.567405042718556e-06, + "loss": 0.385, + "step": 10370, + "vit_learning_rate": 1.7134810085437108e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6803, + "epoch": 0.5399500624219725, + "grad_norm": 1.45217294178686, + "learning_rate": 8.56445258668672e-06, + "loss": 0.3902, + "step": 10380, + "vit_learning_rate": 1.7128905173373435e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6994, + "epoch": 0.5404702455264253, + "grad_norm": 1.615070520565727, + "learning_rate": 8.561497601271689e-06, + "loss": 0.3907, + "step": 10390, + "vit_learning_rate": 1.7122995202543374e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6715, + "epoch": 0.5409904286308781, + "grad_norm": 1.853287434575261, + "learning_rate": 8.558540088570358e-06, + "loss": 0.3885, + "step": 10400, + "vit_learning_rate": 1.7117080177140715e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6283, + "epoch": 0.5415106117353309, + "grad_norm": 1.0875696595932258, + "learning_rate": 8.55558005068142e-06, + "loss": 0.3881, + "step": 10410, + "vit_learning_rate": 1.7111160101362839e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6665, + "epoch": 0.5420307948397836, + "grad_norm": 1.7175056741929529, + "learning_rate": 8.552617489705354e-06, + "loss": 0.3787, + "step": 10420, + "vit_learning_rate": 1.7105234979410707e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7315, + "epoch": 0.5425509779442363, + "grad_norm": 1.441542235499574, + "learning_rate": 8.549652407744438e-06, + "loss": 0.3661, + "step": 10430, + "vit_learning_rate": 1.7099304815488876e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6335, + "epoch": 0.5430711610486891, + "grad_norm": 3.9129182484067555, + "learning_rate": 8.54668480690273e-06, + "loss": 0.3978, + "step": 10440, + "vit_learning_rate": 1.7093369613805459e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6379, + "epoch": 0.5435913441531419, + "grad_norm": 1.6036333441357682, + "learning_rate": 8.54371468928608e-06, + "loss": 0.3802, + "step": 10450, + "vit_learning_rate": 1.7087429378572158e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6421, + "epoch": 0.5441115272575947, + "grad_norm": 1.3780361732903612, + "learning_rate": 8.540742057002125e-06, + "loss": 0.3796, + "step": 10460, + "vit_learning_rate": 1.7081484114004248e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7037, + "epoch": 0.5446317103620475, + "grad_norm": 1.0209917047464196, + "learning_rate": 8.53776691216028e-06, + "loss": 0.4041, + "step": 10470, + "vit_learning_rate": 1.707553382432056e-06 + }, + { + "avg_batch_load_time": 0.0658, + "avg_batch_processing_time": 0.6488, + "epoch": 0.5451518934665002, + "grad_norm": 1.240999413426021, + "learning_rate": 8.534789256871754e-06, + "loss": 0.3752, + "step": 10480, + "vit_learning_rate": 1.7069578513743505e-06 + }, + { + "avg_batch_load_time": 0.0036, + "avg_batch_processing_time": 0.6503, + "epoch": 0.545672076570953, + "grad_norm": 2.0426237250786246, + "learning_rate": 8.531809093249525e-06, + "loss": 0.3767, + "step": 10490, + "vit_learning_rate": 1.7063618186499047e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6915, + "epoch": 0.5461922596754057, + "grad_norm": 1.3964346158955312, + "learning_rate": 8.52882642340836e-06, + "loss": 0.379, + "step": 10500, + "vit_learning_rate": 1.7057652846816717e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6795, + "epoch": 0.5467124427798585, + "grad_norm": 1.4146669320975835, + "learning_rate": 8.5258412494648e-06, + "loss": 0.3548, + "step": 10510, + "vit_learning_rate": 1.7051682498929598e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6557, + "epoch": 0.5472326258843113, + "grad_norm": 1.5522491123032716, + "learning_rate": 8.522853573537165e-06, + "loss": 0.3818, + "step": 10520, + "vit_learning_rate": 1.7045707147074328e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6755, + "epoch": 0.547752808988764, + "grad_norm": 1.6923339842802418, + "learning_rate": 8.519863397745552e-06, + "loss": 0.3769, + "step": 10530, + "vit_learning_rate": 1.70397267954911e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.717, + "epoch": 0.5482729920932168, + "grad_norm": 1.4930018025411225, + "learning_rate": 8.516870724211826e-06, + "loss": 0.3867, + "step": 10540, + "vit_learning_rate": 1.7033741448423648e-06 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6598, + "epoch": 0.5487931751976696, + "grad_norm": 1.8660930959113748, + "learning_rate": 8.51387555505963e-06, + "loss": 0.3761, + "step": 10550, + "vit_learning_rate": 1.7027751110119256e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6911, + "epoch": 0.5493133583021224, + "grad_norm": 1.69428520125814, + "learning_rate": 8.510877892414376e-06, + "loss": 0.3745, + "step": 10560, + "vit_learning_rate": 1.7021755784828748e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6616, + "epoch": 0.5498335414065751, + "grad_norm": 1.7279011510397602, + "learning_rate": 8.507877738403246e-06, + "loss": 0.3927, + "step": 10570, + "vit_learning_rate": 1.7015755476806488e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7038, + "epoch": 0.5503537245110279, + "grad_norm": 1.3021413281204943, + "learning_rate": 8.504875095155185e-06, + "loss": 0.3845, + "step": 10580, + "vit_learning_rate": 1.700975019031037e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6321, + "epoch": 0.5508739076154806, + "grad_norm": 1.4838335610752904, + "learning_rate": 8.501869964800916e-06, + "loss": 0.3795, + "step": 10590, + "vit_learning_rate": 1.700373992960183e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7272, + "epoch": 0.5513940907199334, + "grad_norm": 1.95463763410442, + "learning_rate": 8.498862349472916e-06, + "loss": 0.3611, + "step": 10600, + "vit_learning_rate": 1.699772469894583e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6487, + "epoch": 0.5519142738243862, + "grad_norm": 1.2761898939170113, + "learning_rate": 8.495852251305427e-06, + "loss": 0.3604, + "step": 10610, + "vit_learning_rate": 1.6991704502610853e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6741, + "epoch": 0.552434456928839, + "grad_norm": 1.9046069705409385, + "learning_rate": 8.492839672434459e-06, + "loss": 0.3799, + "step": 10620, + "vit_learning_rate": 1.6985679344868916e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6446, + "epoch": 0.5529546400332918, + "grad_norm": 2.0763371077343593, + "learning_rate": 8.489824614997775e-06, + "loss": 0.3781, + "step": 10630, + "vit_learning_rate": 1.6979649229995549e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6877, + "epoch": 0.5534748231377444, + "grad_norm": 3.0639671151278156, + "learning_rate": 8.486807081134903e-06, + "loss": 0.3636, + "step": 10640, + "vit_learning_rate": 1.6973614162269805e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6646, + "epoch": 0.5539950062421972, + "grad_norm": 1.2890406410981141, + "learning_rate": 8.483787072987123e-06, + "loss": 0.4041, + "step": 10650, + "vit_learning_rate": 1.696757414597424e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6434, + "epoch": 0.55451518934665, + "grad_norm": 1.7752401172775727, + "learning_rate": 8.480764592697473e-06, + "loss": 0.3557, + "step": 10660, + "vit_learning_rate": 1.6961529185394942e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6328, + "epoch": 0.5550353724511028, + "grad_norm": 1.7480260144985589, + "learning_rate": 8.477739642410746e-06, + "loss": 0.3564, + "step": 10670, + "vit_learning_rate": 1.6955479284821493e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6494, + "epoch": 0.5555555555555556, + "grad_norm": 1.4258516596978323, + "learning_rate": 8.474712224273489e-06, + "loss": 0.3817, + "step": 10680, + "vit_learning_rate": 1.6949424448546976e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6493, + "epoch": 0.5560757386600084, + "grad_norm": 2.3422680995179763, + "learning_rate": 8.471682340433995e-06, + "loss": 0.3859, + "step": 10690, + "vit_learning_rate": 1.6943364680867988e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6278, + "epoch": 0.556595921764461, + "grad_norm": 1.3911790976774614, + "learning_rate": 8.468649993042313e-06, + "loss": 0.3808, + "step": 10700, + "vit_learning_rate": 1.6937299986084623e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6096, + "epoch": 0.5571161048689138, + "grad_norm": 1.5743218774477439, + "learning_rate": 8.465615184250235e-06, + "loss": 0.3796, + "step": 10710, + "vit_learning_rate": 1.693123036850047e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6423, + "epoch": 0.5576362879733666, + "grad_norm": 1.8836557712928979, + "learning_rate": 8.462577916211303e-06, + "loss": 0.3726, + "step": 10720, + "vit_learning_rate": 1.6925155832422607e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6445, + "epoch": 0.5581564710778194, + "grad_norm": 1.5248683172248525, + "learning_rate": 8.459538191080805e-06, + "loss": 0.3601, + "step": 10730, + "vit_learning_rate": 1.6919076382161607e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6568, + "epoch": 0.5586766541822722, + "grad_norm": 1.498791180823127, + "learning_rate": 8.456496011015767e-06, + "loss": 0.3854, + "step": 10740, + "vit_learning_rate": 1.691299202203153e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7025, + "epoch": 0.559196837286725, + "grad_norm": 1.529929414567097, + "learning_rate": 8.453451378174961e-06, + "loss": 0.3712, + "step": 10750, + "vit_learning_rate": 1.6906902756349922e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6458, + "epoch": 0.5597170203911777, + "grad_norm": 1.6464362223689497, + "learning_rate": 8.450404294718902e-06, + "loss": 0.3934, + "step": 10760, + "vit_learning_rate": 1.6900808589437804e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7254, + "epoch": 0.5602372034956304, + "grad_norm": 2.1498427753788962, + "learning_rate": 8.44735476280984e-06, + "loss": 0.3835, + "step": 10770, + "vit_learning_rate": 1.6894709525619678e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6559, + "epoch": 0.5607573866000832, + "grad_norm": 1.3865007410581875, + "learning_rate": 8.444302784611762e-06, + "loss": 0.3929, + "step": 10780, + "vit_learning_rate": 1.6888605569223523e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6602, + "epoch": 0.561277569704536, + "grad_norm": 1.625367662559517, + "learning_rate": 8.441248362290392e-06, + "loss": 0.3744, + "step": 10790, + "vit_learning_rate": 1.6882496724580784e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6751, + "epoch": 0.5617977528089888, + "grad_norm": 1.3885099452659078, + "learning_rate": 8.438191498013193e-06, + "loss": 0.3769, + "step": 10800, + "vit_learning_rate": 1.6876382996026384e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.632, + "epoch": 0.5623179359134415, + "grad_norm": 1.6087127289600784, + "learning_rate": 8.435132193949353e-06, + "loss": 0.3667, + "step": 10810, + "vit_learning_rate": 1.6870264387898706e-06 + }, + { + "avg_batch_load_time": 0.0251, + "avg_batch_processing_time": 0.6792, + "epoch": 0.5628381190178943, + "grad_norm": 1.9900777066672382, + "learning_rate": 8.432070452269798e-06, + "loss": 0.3991, + "step": 10820, + "vit_learning_rate": 1.6864140904539592e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6924, + "epoch": 0.563358302122347, + "grad_norm": 1.4530371639027193, + "learning_rate": 8.429006275147175e-06, + "loss": 0.386, + "step": 10830, + "vit_learning_rate": 1.6858012550294348e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6434, + "epoch": 0.5638784852267998, + "grad_norm": 1.9255030607122663, + "learning_rate": 8.425939664755874e-06, + "loss": 0.3816, + "step": 10840, + "vit_learning_rate": 1.6851879329511744e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6732, + "epoch": 0.5643986683312526, + "grad_norm": 1.7715808121563494, + "learning_rate": 8.422870623271994e-06, + "loss": 0.3734, + "step": 10850, + "vit_learning_rate": 1.6845741246543987e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7118, + "epoch": 0.5649188514357054, + "grad_norm": 1.9267138086872482, + "learning_rate": 8.419799152873373e-06, + "loss": 0.3743, + "step": 10860, + "vit_learning_rate": 1.6839598305746744e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6415, + "epoch": 0.5654390345401581, + "grad_norm": 1.4278683744874565, + "learning_rate": 8.416725255739567e-06, + "loss": 0.3793, + "step": 10870, + "vit_learning_rate": 1.6833450511479132e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6789, + "epoch": 0.5659592176446109, + "grad_norm": 1.2530835692176356, + "learning_rate": 8.413648934051854e-06, + "loss": 0.409, + "step": 10880, + "vit_learning_rate": 1.6827297868103707e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6849, + "epoch": 0.5664794007490637, + "grad_norm": 1.5276207727542397, + "learning_rate": 8.410570189993232e-06, + "loss": 0.38, + "step": 10890, + "vit_learning_rate": 1.6821140379986463e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6257, + "epoch": 0.5669995838535165, + "grad_norm": 2.1870488799541246, + "learning_rate": 8.40748902574842e-06, + "loss": 0.4184, + "step": 10900, + "vit_learning_rate": 1.6814978051496835e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6494, + "epoch": 0.5675197669579692, + "grad_norm": 2.3489811366730957, + "learning_rate": 8.404405443503852e-06, + "loss": 0.3807, + "step": 10910, + "vit_learning_rate": 1.6808810887007701e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.684, + "epoch": 0.568039950062422, + "grad_norm": 2.4383555861197297, + "learning_rate": 8.40131944544768e-06, + "loss": 0.3926, + "step": 10920, + "vit_learning_rate": 1.6802638890895358e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6734, + "epoch": 0.5685601331668747, + "grad_norm": 1.6412006136610975, + "learning_rate": 8.398231033769769e-06, + "loss": 0.4023, + "step": 10930, + "vit_learning_rate": 1.6796462067539534e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6871, + "epoch": 0.5690803162713275, + "grad_norm": 1.725101087975996, + "learning_rate": 8.395140210661698e-06, + "loss": 0.3762, + "step": 10940, + "vit_learning_rate": 1.6790280421323396e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6781, + "epoch": 0.5696004993757803, + "grad_norm": 1.705968690074529, + "learning_rate": 8.392046978316757e-06, + "loss": 0.3715, + "step": 10950, + "vit_learning_rate": 1.678409395663351e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6814, + "epoch": 0.5701206824802331, + "grad_norm": 1.686155964547054, + "learning_rate": 8.388951338929944e-06, + "loss": 0.3925, + "step": 10960, + "vit_learning_rate": 1.6777902677859886e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7661, + "epoch": 0.5706408655846859, + "grad_norm": 1.8338248540041286, + "learning_rate": 8.385853294697965e-06, + "loss": 0.3578, + "step": 10970, + "vit_learning_rate": 1.6771706589395928e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6924, + "epoch": 0.5711610486891385, + "grad_norm": 1.2699039045482257, + "learning_rate": 8.382752847819237e-06, + "loss": 0.3924, + "step": 10980, + "vit_learning_rate": 1.676550569563847e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.701, + "epoch": 0.5716812317935913, + "grad_norm": 1.561051144769169, + "learning_rate": 8.379650000493875e-06, + "loss": 0.3737, + "step": 10990, + "vit_learning_rate": 1.6759300000987747e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6445, + "epoch": 0.5722014148980441, + "grad_norm": 1.5430826191612177, + "learning_rate": 8.376544754923702e-06, + "loss": 0.3661, + "step": 11000, + "vit_learning_rate": 1.6753089509847401e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6952, + "epoch": 0.5727215980024969, + "grad_norm": 1.44166582055479, + "learning_rate": 8.373437113312242e-06, + "loss": 0.3776, + "step": 11010, + "vit_learning_rate": 1.674687422662448e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6739, + "epoch": 0.5732417811069497, + "grad_norm": 1.4510075409039693, + "learning_rate": 8.370327077864719e-06, + "loss": 0.3668, + "step": 11020, + "vit_learning_rate": 1.6740654155729437e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6264, + "epoch": 0.5737619642114025, + "grad_norm": 1.6787197316672677, + "learning_rate": 8.367214650788058e-06, + "loss": 0.3647, + "step": 11030, + "vit_learning_rate": 1.6734429301576115e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6904, + "epoch": 0.5742821473158551, + "grad_norm": 2.2458553625894995, + "learning_rate": 8.364099834290874e-06, + "loss": 0.3886, + "step": 11040, + "vit_learning_rate": 1.6728199668581747e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6345, + "epoch": 0.5748023304203079, + "grad_norm": 1.6955260651233335, + "learning_rate": 8.360982630583487e-06, + "loss": 0.3716, + "step": 11050, + "vit_learning_rate": 1.6721965261166972e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6537, + "epoch": 0.5753225135247607, + "grad_norm": 1.157454958012852, + "learning_rate": 8.357863041877902e-06, + "loss": 0.4012, + "step": 11060, + "vit_learning_rate": 1.6715726083755803e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7037, + "epoch": 0.5758426966292135, + "grad_norm": 1.4345451931826805, + "learning_rate": 8.354741070387825e-06, + "loss": 0.3844, + "step": 11070, + "vit_learning_rate": 1.6709482140775649e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6871, + "epoch": 0.5763628797336663, + "grad_norm": 1.308805967495576, + "learning_rate": 8.351616718328648e-06, + "loss": 0.3808, + "step": 11080, + "vit_learning_rate": 1.6703233436657294e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6935, + "epoch": 0.576883062838119, + "grad_norm": 1.5639828172477768, + "learning_rate": 8.34848998791745e-06, + "loss": 0.4001, + "step": 11090, + "vit_learning_rate": 1.6696979975834898e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6723, + "epoch": 0.5774032459425718, + "grad_norm": 1.6185505271120253, + "learning_rate": 8.345360881373003e-06, + "loss": 0.3899, + "step": 11100, + "vit_learning_rate": 1.6690721762746006e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6341, + "epoch": 0.5779234290470245, + "grad_norm": 1.2720959317071037, + "learning_rate": 8.342229400915763e-06, + "loss": 0.3778, + "step": 11110, + "vit_learning_rate": 1.6684458801831524e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6592, + "epoch": 0.5784436121514773, + "grad_norm": 1.5074792357888793, + "learning_rate": 8.33909554876787e-06, + "loss": 0.3593, + "step": 11120, + "vit_learning_rate": 1.6678191097535738e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6376, + "epoch": 0.5789637952559301, + "grad_norm": 2.9494085556927363, + "learning_rate": 8.335959327153148e-06, + "loss": 0.4141, + "step": 11130, + "vit_learning_rate": 1.6671918654306291e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6451, + "epoch": 0.5794839783603829, + "grad_norm": 1.594877242429533, + "learning_rate": 8.332820738297099e-06, + "loss": 0.4142, + "step": 11140, + "vit_learning_rate": 1.6665641476594197e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6372, + "epoch": 0.5800041614648356, + "grad_norm": 1.906263826891528, + "learning_rate": 8.329679784426911e-06, + "loss": 0.3628, + "step": 11150, + "vit_learning_rate": 1.6659359568853821e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.695, + "epoch": 0.5805243445692884, + "grad_norm": 1.1827702543161633, + "learning_rate": 8.326536467771447e-06, + "loss": 0.3818, + "step": 11160, + "vit_learning_rate": 1.665307293554289e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6139, + "epoch": 0.5810445276737412, + "grad_norm": 1.4725938102022005, + "learning_rate": 8.323390790561242e-06, + "loss": 0.3646, + "step": 11170, + "vit_learning_rate": 1.6646781581122483e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.671, + "epoch": 0.5815647107781939, + "grad_norm": 1.6095356751789545, + "learning_rate": 8.320242755028515e-06, + "loss": 0.4014, + "step": 11180, + "vit_learning_rate": 1.6640485510057028e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.754, + "epoch": 0.5820848938826467, + "grad_norm": 1.5852742697174698, + "learning_rate": 8.317092363407153e-06, + "loss": 0.3738, + "step": 11190, + "vit_learning_rate": 1.6634184726814305e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6724, + "epoch": 0.5826050769870995, + "grad_norm": 1.5728611458531412, + "learning_rate": 8.313939617932716e-06, + "loss": 0.4017, + "step": 11200, + "vit_learning_rate": 1.6627879235865427e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6301, + "epoch": 0.5831252600915522, + "grad_norm": 1.892883886145551, + "learning_rate": 8.310784520842433e-06, + "loss": 0.3871, + "step": 11210, + "vit_learning_rate": 1.6621569041684862e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6677, + "epoch": 0.583645443196005, + "grad_norm": 1.774928321481838, + "learning_rate": 8.307627074375204e-06, + "loss": 0.3666, + "step": 11220, + "vit_learning_rate": 1.6615254148750407e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6185, + "epoch": 0.5841656263004578, + "grad_norm": 1.5402438059349421, + "learning_rate": 8.304467280771596e-06, + "loss": 0.3778, + "step": 11230, + "vit_learning_rate": 1.6608934561543188e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6458, + "epoch": 0.5846858094049106, + "grad_norm": 1.6359048387452597, + "learning_rate": 8.301305142273839e-06, + "loss": 0.3728, + "step": 11240, + "vit_learning_rate": 1.6602610284547676e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7159, + "epoch": 0.5852059925093633, + "grad_norm": 1.5044281264593087, + "learning_rate": 8.29814066112583e-06, + "loss": 0.3617, + "step": 11250, + "vit_learning_rate": 1.6596281322251656e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6362, + "epoch": 0.585726175613816, + "grad_norm": 1.7127266948165272, + "learning_rate": 8.294973839573129e-06, + "loss": 0.3797, + "step": 11260, + "vit_learning_rate": 1.6589947679146254e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7149, + "epoch": 0.5862463587182688, + "grad_norm": 1.622267995604017, + "learning_rate": 8.29180467986295e-06, + "loss": 0.368, + "step": 11270, + "vit_learning_rate": 1.6583609359725896e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6842, + "epoch": 0.5867665418227216, + "grad_norm": 1.3576219673137595, + "learning_rate": 8.288633184244175e-06, + "loss": 0.3762, + "step": 11280, + "vit_learning_rate": 1.6577266368488347e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6023, + "epoch": 0.5872867249271744, + "grad_norm": 1.8396690737766375, + "learning_rate": 8.285459354967337e-06, + "loss": 0.3802, + "step": 11290, + "vit_learning_rate": 1.6570918709934673e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7156, + "epoch": 0.5878069080316272, + "grad_norm": 1.79268319713907, + "learning_rate": 8.282283194284632e-06, + "loss": 0.3576, + "step": 11300, + "vit_learning_rate": 1.6564566388569263e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6149, + "epoch": 0.58832709113608, + "grad_norm": 2.2328321629726897, + "learning_rate": 8.279104704449902e-06, + "loss": 0.3847, + "step": 11310, + "vit_learning_rate": 1.65582094088998e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6467, + "epoch": 0.5888472742405326, + "grad_norm": 1.7333908637898832, + "learning_rate": 8.275923887718646e-06, + "loss": 0.386, + "step": 11320, + "vit_learning_rate": 1.655184777543729e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6813, + "epoch": 0.5893674573449854, + "grad_norm": 2.1651795510028977, + "learning_rate": 8.272740746348019e-06, + "loss": 0.3609, + "step": 11330, + "vit_learning_rate": 1.6545481492696035e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.651, + "epoch": 0.5898876404494382, + "grad_norm": 1.2795559794422224, + "learning_rate": 8.269555282596813e-06, + "loss": 0.3733, + "step": 11340, + "vit_learning_rate": 1.6539110565193623e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6625, + "epoch": 0.590407823553891, + "grad_norm": 2.2793982026229855, + "learning_rate": 8.26636749872548e-06, + "loss": 0.4005, + "step": 11350, + "vit_learning_rate": 1.653273499745096e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6359, + "epoch": 0.5909280066583438, + "grad_norm": 1.572198240208907, + "learning_rate": 8.263177396996116e-06, + "loss": 0.4052, + "step": 11360, + "vit_learning_rate": 1.652635479399223e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6497, + "epoch": 0.5914481897627965, + "grad_norm": 2.610571771992912, + "learning_rate": 8.259984979672457e-06, + "loss": 0.3847, + "step": 11370, + "vit_learning_rate": 1.6519969959344912e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6418, + "epoch": 0.5919683728672492, + "grad_norm": 1.669058117401033, + "learning_rate": 8.256790249019883e-06, + "loss": 0.3728, + "step": 11380, + "vit_learning_rate": 1.6513580498039765e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6241, + "epoch": 0.592488555971702, + "grad_norm": 1.3786916099914601, + "learning_rate": 8.253593207305421e-06, + "loss": 0.3782, + "step": 11390, + "vit_learning_rate": 1.6507186414610842e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6516, + "epoch": 0.5930087390761548, + "grad_norm": 1.3725909979515927, + "learning_rate": 8.250393856797735e-06, + "loss": 0.4125, + "step": 11400, + "vit_learning_rate": 1.650078771359547e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.69, + "epoch": 0.5935289221806076, + "grad_norm": 1.3288783761754714, + "learning_rate": 8.247192199767124e-06, + "loss": 0.39, + "step": 11410, + "vit_learning_rate": 1.6494384399534248e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6749, + "epoch": 0.5940491052850604, + "grad_norm": 1.72014711255732, + "learning_rate": 8.24398823848553e-06, + "loss": 0.3685, + "step": 11420, + "vit_learning_rate": 1.6487976476971058e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7142, + "epoch": 0.5945692883895131, + "grad_norm": 1.489366094500631, + "learning_rate": 8.240781975226524e-06, + "loss": 0.3718, + "step": 11430, + "vit_learning_rate": 1.6481563950453045e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7127, + "epoch": 0.5950894714939658, + "grad_norm": 1.631183127858616, + "learning_rate": 8.237573412265313e-06, + "loss": 0.3984, + "step": 11440, + "vit_learning_rate": 1.6475146824530623e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.661, + "epoch": 0.5956096545984186, + "grad_norm": 1.5019236873803814, + "learning_rate": 8.234362551878736e-06, + "loss": 0.3778, + "step": 11450, + "vit_learning_rate": 1.646872510375747e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6457, + "epoch": 0.5961298377028714, + "grad_norm": 1.5626937682360857, + "learning_rate": 8.231149396345267e-06, + "loss": 0.3773, + "step": 11460, + "vit_learning_rate": 1.646229879269053e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6654, + "epoch": 0.5966500208073242, + "grad_norm": 1.488376868970373, + "learning_rate": 8.227933947944998e-06, + "loss": 0.3909, + "step": 11470, + "vit_learning_rate": 1.6455867895889994e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6364, + "epoch": 0.597170203911777, + "grad_norm": 1.6537700310134253, + "learning_rate": 8.224716208959658e-06, + "loss": 0.3794, + "step": 11480, + "vit_learning_rate": 1.6449432417919313e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6628, + "epoch": 0.5976903870162297, + "grad_norm": 2.093028635845478, + "learning_rate": 8.221496181672597e-06, + "loss": 0.3669, + "step": 11490, + "vit_learning_rate": 1.6442992363345192e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6497, + "epoch": 0.5982105701206825, + "grad_norm": 1.4682419827999353, + "learning_rate": 8.218273868368787e-06, + "loss": 0.3794, + "step": 11500, + "vit_learning_rate": 1.6436547736737573e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6077, + "epoch": 0.5987307532251352, + "grad_norm": 1.406639770956715, + "learning_rate": 8.21504927133483e-06, + "loss": 0.3643, + "step": 11510, + "vit_learning_rate": 1.6430098542669657e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6883, + "epoch": 0.599250936329588, + "grad_norm": 2.4250640022151515, + "learning_rate": 8.211822392858936e-06, + "loss": 0.3705, + "step": 11520, + "vit_learning_rate": 1.642364478571787e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7023, + "epoch": 0.5997711194340408, + "grad_norm": 2.09701256487436, + "learning_rate": 8.208593235230947e-06, + "loss": 0.3918, + "step": 11530, + "vit_learning_rate": 1.641718647046189e-06 + }, + { + "avg_batch_load_time": 2.002, + "avg_batch_processing_time": 0.6261, + "epoch": 0.6002913025384935, + "grad_norm": 2.047130071000073, + "learning_rate": 8.205361800742313e-06, + "loss": 0.3532, + "step": 11540, + "vit_learning_rate": 1.6410723601484624e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6358, + "epoch": 0.6008114856429463, + "grad_norm": 1.680139253178763, + "learning_rate": 8.202128091686104e-06, + "loss": 0.3798, + "step": 11550, + "vit_learning_rate": 1.6404256183372207e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6262, + "epoch": 0.6013316687473991, + "grad_norm": 1.523815011330795, + "learning_rate": 8.198892110357004e-06, + "loss": 0.3872, + "step": 11560, + "vit_learning_rate": 1.6397784220714007e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6582, + "epoch": 0.6018518518518519, + "grad_norm": 1.3713256268959308, + "learning_rate": 8.19565385905131e-06, + "loss": 0.3832, + "step": 11570, + "vit_learning_rate": 1.6391307718102616e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6655, + "epoch": 0.6023720349563046, + "grad_norm": 1.5609534177404687, + "learning_rate": 8.192413340066923e-06, + "loss": 0.3512, + "step": 11580, + "vit_learning_rate": 1.6384826680133845e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6105, + "epoch": 0.6028922180607574, + "grad_norm": 1.4731558699972793, + "learning_rate": 8.189170555703364e-06, + "loss": 0.3907, + "step": 11590, + "vit_learning_rate": 1.6378341111406727e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7703, + "epoch": 0.6034124011652101, + "grad_norm": 1.6167811066172966, + "learning_rate": 8.185925508261753e-06, + "loss": 0.3608, + "step": 11600, + "vit_learning_rate": 1.6371851016523505e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6098, + "epoch": 0.6039325842696629, + "grad_norm": 1.1876543279400271, + "learning_rate": 8.182678200044822e-06, + "loss": 0.3655, + "step": 11610, + "vit_learning_rate": 1.6365356400089642e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6615, + "epoch": 0.6044527673741157, + "grad_norm": 1.4435499102866711, + "learning_rate": 8.1794286333569e-06, + "loss": 0.3868, + "step": 11620, + "vit_learning_rate": 1.6358857266713799e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6508, + "epoch": 0.6049729504785685, + "grad_norm": 1.6979642360585867, + "learning_rate": 8.176176810503929e-06, + "loss": 0.3656, + "step": 11630, + "vit_learning_rate": 1.6352353621007855e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6499, + "epoch": 0.6054931335830213, + "grad_norm": 2.0697879040590506, + "learning_rate": 8.172922733793439e-06, + "loss": 0.3706, + "step": 11640, + "vit_learning_rate": 1.6345845467586877e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7674, + "epoch": 0.606013316687474, + "grad_norm": 1.3506319770528294, + "learning_rate": 8.169666405534572e-06, + "loss": 0.3766, + "step": 11650, + "vit_learning_rate": 1.633933281106914e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6978, + "epoch": 0.6065334997919267, + "grad_norm": 1.7325869658596553, + "learning_rate": 8.166407828038058e-06, + "loss": 0.3757, + "step": 11660, + "vit_learning_rate": 1.6332815656076115e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6746, + "epoch": 0.6070536828963795, + "grad_norm": 2.444139853321681, + "learning_rate": 8.16314700361623e-06, + "loss": 0.3955, + "step": 11670, + "vit_learning_rate": 1.6326294007232458e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7716, + "epoch": 0.6075738660008323, + "grad_norm": 1.3485056630226522, + "learning_rate": 8.159883934583011e-06, + "loss": 0.3836, + "step": 11680, + "vit_learning_rate": 1.631976786916602e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6588, + "epoch": 0.6080940491052851, + "grad_norm": 1.5838369060190636, + "learning_rate": 8.156618623253919e-06, + "loss": 0.3826, + "step": 11690, + "vit_learning_rate": 1.6313237246507837e-06 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.7465, + "epoch": 0.6086142322097379, + "grad_norm": 1.5323921095795228, + "learning_rate": 8.153351071946064e-06, + "loss": 0.3737, + "step": 11700, + "vit_learning_rate": 1.6306702143892123e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6597, + "epoch": 0.6091344153141905, + "grad_norm": 1.4397048815815194, + "learning_rate": 8.150081282978139e-06, + "loss": 0.3711, + "step": 11710, + "vit_learning_rate": 1.6300162565956276e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6324, + "epoch": 0.6096545984186433, + "grad_norm": 1.8490190446855062, + "learning_rate": 8.146809258670436e-06, + "loss": 0.3689, + "step": 11720, + "vit_learning_rate": 1.629361851734087e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6325, + "epoch": 0.6101747815230961, + "grad_norm": 2.0642978627929853, + "learning_rate": 8.143535001344824e-06, + "loss": 0.4081, + "step": 11730, + "vit_learning_rate": 1.6287070002689647e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.68, + "epoch": 0.6106949646275489, + "grad_norm": 1.4516982882116218, + "learning_rate": 8.140258513324762e-06, + "loss": 0.3733, + "step": 11740, + "vit_learning_rate": 1.6280517026649523e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6615, + "epoch": 0.6112151477320017, + "grad_norm": 1.4330396813193977, + "learning_rate": 8.136979796935286e-06, + "loss": 0.3979, + "step": 11750, + "vit_learning_rate": 1.6273959593870571e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.673, + "epoch": 0.6117353308364545, + "grad_norm": 1.4595157713279108, + "learning_rate": 8.13369885450302e-06, + "loss": 0.3602, + "step": 11760, + "vit_learning_rate": 1.6267397709006038e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6848, + "epoch": 0.6122555139409072, + "grad_norm": 1.2028705301372624, + "learning_rate": 8.130415688356165e-06, + "loss": 0.381, + "step": 11770, + "vit_learning_rate": 1.6260831376712326e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.664, + "epoch": 0.61277569704536, + "grad_norm": 1.324854841232032, + "learning_rate": 8.127130300824498e-06, + "loss": 0.3738, + "step": 11780, + "vit_learning_rate": 1.6254260601648993e-06 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6637, + "epoch": 0.6132958801498127, + "grad_norm": 1.309536547300209, + "learning_rate": 8.123842694239372e-06, + "loss": 0.389, + "step": 11790, + "vit_learning_rate": 1.6247685388478742e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6391, + "epoch": 0.6138160632542655, + "grad_norm": 1.3055704141927562, + "learning_rate": 8.12055287093372e-06, + "loss": 0.366, + "step": 11800, + "vit_learning_rate": 1.6241105741867437e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.653, + "epoch": 0.6143362463587183, + "grad_norm": 1.4822909851372594, + "learning_rate": 8.117260833242045e-06, + "loss": 0.3598, + "step": 11810, + "vit_learning_rate": 1.6234521666484087e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6506, + "epoch": 0.614856429463171, + "grad_norm": 2.199496539224046, + "learning_rate": 8.113966583500417e-06, + "loss": 0.3889, + "step": 11820, + "vit_learning_rate": 1.6227933167000832e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6833, + "epoch": 0.6153766125676238, + "grad_norm": 1.5768384938936706, + "learning_rate": 8.110670124046482e-06, + "loss": 0.3985, + "step": 11830, + "vit_learning_rate": 1.6221340248092962e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7429, + "epoch": 0.6158967956720766, + "grad_norm": 1.5793389815606005, + "learning_rate": 8.107371457219452e-06, + "loss": 0.3714, + "step": 11840, + "vit_learning_rate": 1.6214742914438904e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6309, + "epoch": 0.6164169787765293, + "grad_norm": 2.3675965087016424, + "learning_rate": 8.104070585360106e-06, + "loss": 0.3873, + "step": 11850, + "vit_learning_rate": 1.6208141170720212e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6612, + "epoch": 0.6169371618809821, + "grad_norm": 1.6147941140488353, + "learning_rate": 8.100767510810785e-06, + "loss": 0.3513, + "step": 11860, + "vit_learning_rate": 1.620153502162157e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.625, + "epoch": 0.6174573449854349, + "grad_norm": 1.6187197760377927, + "learning_rate": 8.097462235915394e-06, + "loss": 0.3984, + "step": 11870, + "vit_learning_rate": 1.6194924471830787e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7008, + "epoch": 0.6179775280898876, + "grad_norm": 2.1759136792507476, + "learning_rate": 8.094154763019404e-06, + "loss": 0.3905, + "step": 11880, + "vit_learning_rate": 1.6188309526038806e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6678, + "epoch": 0.6184977111943404, + "grad_norm": 1.3462660835342966, + "learning_rate": 8.090845094469837e-06, + "loss": 0.3822, + "step": 11890, + "vit_learning_rate": 1.6181690188939671e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6366, + "epoch": 0.6190178942987932, + "grad_norm": 1.8929757820214077, + "learning_rate": 8.087533232615282e-06, + "loss": 0.3881, + "step": 11900, + "vit_learning_rate": 1.617506646523056e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6417, + "epoch": 0.619538077403246, + "grad_norm": 1.6192273931927428, + "learning_rate": 8.084219179805879e-06, + "loss": 0.3776, + "step": 11910, + "vit_learning_rate": 1.6168438359611755e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6485, + "epoch": 0.6200582605076987, + "grad_norm": 1.726368922929596, + "learning_rate": 8.080902938393324e-06, + "loss": 0.3852, + "step": 11920, + "vit_learning_rate": 1.6161805876786645e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6944, + "epoch": 0.6205784436121515, + "grad_norm": 1.5930762674173649, + "learning_rate": 8.077584510730865e-06, + "loss": 0.3919, + "step": 11930, + "vit_learning_rate": 1.615516902146173e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6523, + "epoch": 0.6210986267166042, + "grad_norm": 1.3057166294395157, + "learning_rate": 8.074263899173306e-06, + "loss": 0.3789, + "step": 11940, + "vit_learning_rate": 1.6148527798346609e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6687, + "epoch": 0.621618809821057, + "grad_norm": 1.8165571862989893, + "learning_rate": 8.070941106076995e-06, + "loss": 0.4041, + "step": 11950, + "vit_learning_rate": 1.6141882212153989e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6599, + "epoch": 0.6221389929255098, + "grad_norm": 1.921384238445385, + "learning_rate": 8.06761613379983e-06, + "loss": 0.3864, + "step": 11960, + "vit_learning_rate": 1.6135232267599659e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6439, + "epoch": 0.6226591760299626, + "grad_norm": 1.7332339805171784, + "learning_rate": 8.064288984701257e-06, + "loss": 0.4054, + "step": 11970, + "vit_learning_rate": 1.6128577969402513e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6424, + "epoch": 0.6231793591344154, + "grad_norm": 4.811286376910751, + "learning_rate": 8.060959661142266e-06, + "loss": 0.3704, + "step": 11980, + "vit_learning_rate": 1.612191932228453e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6584, + "epoch": 0.623699542238868, + "grad_norm": 1.7168352811097691, + "learning_rate": 8.057628165485386e-06, + "loss": 0.3949, + "step": 11990, + "vit_learning_rate": 1.6115256330970772e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6974, + "epoch": 0.6242197253433208, + "grad_norm": 1.6230183074294195, + "learning_rate": 8.054294500094697e-06, + "loss": 0.3817, + "step": 12000, + "vit_learning_rate": 1.6108589000189391e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6402, + "epoch": 0.6247399084477736, + "grad_norm": 1.5755833838146174, + "learning_rate": 8.050958667335807e-06, + "loss": 0.3839, + "step": 12010, + "vit_learning_rate": 1.6101917334671612e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 2.3697, + "epoch": 0.6252600915522264, + "grad_norm": 1.1072529463038905, + "learning_rate": 8.047620669575869e-06, + "loss": 0.3517, + "step": 12020, + "vit_learning_rate": 1.6095241339151736e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.4253, + "epoch": 0.6257802746566792, + "grad_norm": 1.7060792262222737, + "learning_rate": 8.04428050918357e-06, + "loss": 0.3995, + "step": 12030, + "vit_learning_rate": 1.608856101836714e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6441, + "epoch": 0.626300457761132, + "grad_norm": 1.4212736945231765, + "learning_rate": 8.040938188529134e-06, + "loss": 0.3888, + "step": 12040, + "vit_learning_rate": 1.6081876377058267e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7052, + "epoch": 0.6268206408655846, + "grad_norm": 1.9374814120942092, + "learning_rate": 8.037593709984319e-06, + "loss": 0.3698, + "step": 12050, + "vit_learning_rate": 1.6075187419968637e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6396, + "epoch": 0.6273408239700374, + "grad_norm": 1.2189273037392037, + "learning_rate": 8.034247075922406e-06, + "loss": 0.3788, + "step": 12060, + "vit_learning_rate": 1.606849415184481e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.629, + "epoch": 0.6278610070744902, + "grad_norm": 1.4487912706252477, + "learning_rate": 8.030898288718216e-06, + "loss": 0.349, + "step": 12070, + "vit_learning_rate": 1.606179657743643e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6691, + "epoch": 0.628381190178943, + "grad_norm": 1.410647228935281, + "learning_rate": 8.027547350748091e-06, + "loss": 0.3568, + "step": 12080, + "vit_learning_rate": 1.605509470149618e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 4.0869, + "epoch": 0.6289013732833958, + "grad_norm": 1.2564479569370117, + "learning_rate": 8.0241942643899e-06, + "loss": 0.3762, + "step": 12090, + "vit_learning_rate": 1.6048388528779797e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6744, + "epoch": 0.6294215563878485, + "grad_norm": 1.9649195913585782, + "learning_rate": 8.02083903202304e-06, + "loss": 0.3741, + "step": 12100, + "vit_learning_rate": 1.6041678064046079e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6767, + "epoch": 0.6299417394923013, + "grad_norm": 1.4197235741373437, + "learning_rate": 8.017481656028428e-06, + "loss": 0.3785, + "step": 12110, + "vit_learning_rate": 1.6034963312056853e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7425, + "epoch": 0.630461922596754, + "grad_norm": 1.093053078744578, + "learning_rate": 8.014122138788504e-06, + "loss": 0.3707, + "step": 12120, + "vit_learning_rate": 1.6028244277577005e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.4155, + "epoch": 0.6309821057012068, + "grad_norm": 1.695873098196877, + "learning_rate": 8.010760482687224e-06, + "loss": 0.3815, + "step": 12130, + "vit_learning_rate": 1.6021520965374447e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.647, + "epoch": 0.6315022888056596, + "grad_norm": 2.127713112979463, + "learning_rate": 8.007396690110066e-06, + "loss": 0.3758, + "step": 12140, + "vit_learning_rate": 1.6014793380220132e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6511, + "epoch": 0.6320224719101124, + "grad_norm": 1.4525086651637633, + "learning_rate": 8.004030763444021e-06, + "loss": 0.3749, + "step": 12150, + "vit_learning_rate": 1.600806152688804e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7056, + "epoch": 0.6325426550145651, + "grad_norm": 1.723175577947126, + "learning_rate": 8.000662705077598e-06, + "loss": 0.3728, + "step": 12160, + "vit_learning_rate": 1.6001325410155195e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6963, + "epoch": 0.6330628381190179, + "grad_norm": 1.4063228665495684, + "learning_rate": 7.997292517400815e-06, + "loss": 0.3607, + "step": 12170, + "vit_learning_rate": 1.5994585034801627e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6307, + "epoch": 0.6335830212234707, + "grad_norm": 1.266597992466268, + "learning_rate": 7.993920202805198e-06, + "loss": 0.3971, + "step": 12180, + "vit_learning_rate": 1.5987840405610396e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6418, + "epoch": 0.6341032043279234, + "grad_norm": 1.2652604848983282, + "learning_rate": 7.990545763683793e-06, + "loss": 0.375, + "step": 12190, + "vit_learning_rate": 1.5981091527367585e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6738, + "epoch": 0.6346233874323762, + "grad_norm": 1.6442458062623648, + "learning_rate": 7.987169202431144e-06, + "loss": 0.3638, + "step": 12200, + "vit_learning_rate": 1.5974338404862286e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.398, + "epoch": 0.635143570536829, + "grad_norm": 1.5560613346897545, + "learning_rate": 7.983790521443304e-06, + "loss": 0.3749, + "step": 12210, + "vit_learning_rate": 1.5967581042886607e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.3613, + "epoch": 0.6356637536412817, + "grad_norm": 1.3450728461464783, + "learning_rate": 7.98040972311783e-06, + "loss": 0.3677, + "step": 12220, + "vit_learning_rate": 1.5960819446235657e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6377, + "epoch": 0.6361839367457345, + "grad_norm": 1.7860038683952089, + "learning_rate": 7.977026809853782e-06, + "loss": 0.3839, + "step": 12230, + "vit_learning_rate": 1.5954053619707564e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6684, + "epoch": 0.6367041198501873, + "grad_norm": 1.5374208301623582, + "learning_rate": 7.97364178405172e-06, + "loss": 0.3571, + "step": 12240, + "vit_learning_rate": 1.594728356810344e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7675, + "epoch": 0.63722430295464, + "grad_norm": 1.4791971758412, + "learning_rate": 7.970254648113705e-06, + "loss": 0.3794, + "step": 12250, + "vit_learning_rate": 1.5940509296227408e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6796, + "epoch": 0.6377444860590928, + "grad_norm": 1.5361594100751477, + "learning_rate": 7.966865404443295e-06, + "loss": 0.4099, + "step": 12260, + "vit_learning_rate": 1.5933730808886587e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.695, + "epoch": 0.6382646691635455, + "grad_norm": 1.662865001796314, + "learning_rate": 7.96347405544554e-06, + "loss": 0.3919, + "step": 12270, + "vit_learning_rate": 1.5926948110891076e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6721, + "epoch": 0.6387848522679983, + "grad_norm": 1.2915032395115953, + "learning_rate": 7.960080603526984e-06, + "loss": 0.3853, + "step": 12280, + "vit_learning_rate": 1.5920161207053965e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 2.3836, + "epoch": 0.6393050353724511, + "grad_norm": 1.3449454446703497, + "learning_rate": 7.956685051095672e-06, + "loss": 0.3952, + "step": 12290, + "vit_learning_rate": 1.5913370102191341e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6384, + "epoch": 0.6398252184769039, + "grad_norm": 1.383503132392721, + "learning_rate": 7.953287400561129e-06, + "loss": 0.3843, + "step": 12300, + "vit_learning_rate": 1.5906574801122257e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.639, + "epoch": 0.6403454015813567, + "grad_norm": 1.3983825239571646, + "learning_rate": 7.949887654334376e-06, + "loss": 0.3781, + "step": 12310, + "vit_learning_rate": 1.5899775308668747e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.4399, + "epoch": 0.6408655846858095, + "grad_norm": 1.465181786395715, + "learning_rate": 7.946485814827916e-06, + "loss": 0.3846, + "step": 12320, + "vit_learning_rate": 1.5892971629655829e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6618, + "epoch": 0.6413857677902621, + "grad_norm": 1.9705397319117886, + "learning_rate": 7.94308188445574e-06, + "loss": 0.3832, + "step": 12330, + "vit_learning_rate": 1.588616376891148e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7131, + "epoch": 0.6419059508947149, + "grad_norm": 1.5781420908754302, + "learning_rate": 7.939675865633326e-06, + "loss": 0.3676, + "step": 12340, + "vit_learning_rate": 1.587935173126665e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.3836, + "epoch": 0.6424261339991677, + "grad_norm": 1.678069904912975, + "learning_rate": 7.936267760777626e-06, + "loss": 0.3766, + "step": 12350, + "vit_learning_rate": 1.587253552155525e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.7254, + "epoch": 0.6429463171036205, + "grad_norm": 1.6278511168001466, + "learning_rate": 7.93285757230708e-06, + "loss": 0.361, + "step": 12360, + "vit_learning_rate": 1.5865715144614159e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.4035, + "epoch": 0.6434665002080733, + "grad_norm": 1.4647157972401927, + "learning_rate": 7.929445302641601e-06, + "loss": 0.3741, + "step": 12370, + "vit_learning_rate": 1.58588906052832e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 4.1389, + "epoch": 0.643986683312526, + "grad_norm": 1.4352154360490759, + "learning_rate": 7.926030954202582e-06, + "loss": 0.3439, + "step": 12380, + "vit_learning_rate": 1.5852061908405162e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6558, + "epoch": 0.6445068664169787, + "grad_norm": 1.3503672277315142, + "learning_rate": 7.92261452941289e-06, + "loss": 0.3735, + "step": 12390, + "vit_learning_rate": 1.5845229058825776e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6498, + "epoch": 0.6450270495214315, + "grad_norm": 1.627466227550381, + "learning_rate": 7.919196030696865e-06, + "loss": 0.3824, + "step": 12400, + "vit_learning_rate": 1.5838392061393728e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6873, + "epoch": 0.6455472326258843, + "grad_norm": 1.936700008105164, + "learning_rate": 7.915775460480322e-06, + "loss": 0.4102, + "step": 12410, + "vit_learning_rate": 1.5831550920960642e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6278, + "epoch": 0.6460674157303371, + "grad_norm": 1.5429922446706008, + "learning_rate": 7.91235282119054e-06, + "loss": 0.3845, + "step": 12420, + "vit_learning_rate": 1.582470564238108e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6653, + "epoch": 0.6465875988347899, + "grad_norm": 1.3328433135569842, + "learning_rate": 7.908928115256274e-06, + "loss": 0.3579, + "step": 12430, + "vit_learning_rate": 1.5817856230512547e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.721, + "epoch": 0.6471077819392426, + "grad_norm": 1.6991429451558104, + "learning_rate": 7.905501345107738e-06, + "loss": 0.3849, + "step": 12440, + "vit_learning_rate": 1.5811002690215473e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.4173, + "epoch": 0.6476279650436954, + "grad_norm": 1.3960353966537733, + "learning_rate": 7.902072513176612e-06, + "loss": 0.3603, + "step": 12450, + "vit_learning_rate": 1.5804145026353224e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6396, + "epoch": 0.6481481481481481, + "grad_norm": 1.6498621754810079, + "learning_rate": 7.898641621896047e-06, + "loss": 0.3921, + "step": 12460, + "vit_learning_rate": 1.579728324379209e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7688, + "epoch": 0.6486683312526009, + "grad_norm": 1.9256901185758923, + "learning_rate": 7.895208673700643e-06, + "loss": 0.3918, + "step": 12470, + "vit_learning_rate": 1.5790417347401283e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6207, + "epoch": 0.6491885143570537, + "grad_norm": 1.280635986385613, + "learning_rate": 7.891773671026468e-06, + "loss": 0.3986, + "step": 12480, + "vit_learning_rate": 1.5783547342052934e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6345, + "epoch": 0.6497086974615065, + "grad_norm": 1.7220923195417759, + "learning_rate": 7.888336616311048e-06, + "loss": 0.3772, + "step": 12490, + "vit_learning_rate": 1.5776673232622093e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6753, + "epoch": 0.6502288805659592, + "grad_norm": 1.6111362529889268, + "learning_rate": 7.884897511993361e-06, + "loss": 0.3635, + "step": 12500, + "vit_learning_rate": 1.576979502398672e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6368, + "epoch": 0.650749063670412, + "grad_norm": 1.576659396249063, + "learning_rate": 7.88145636051384e-06, + "loss": 0.3785, + "step": 12510, + "vit_learning_rate": 1.576291272102768e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6291, + "epoch": 0.6512692467748648, + "grad_norm": 1.7841110603952304, + "learning_rate": 7.878013164314375e-06, + "loss": 0.375, + "step": 12520, + "vit_learning_rate": 1.5756026328628748e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6384, + "epoch": 0.6517894298793175, + "grad_norm": 1.2160517518904361, + "learning_rate": 7.874567925838303e-06, + "loss": 0.37, + "step": 12530, + "vit_learning_rate": 1.5749135851676604e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6411, + "epoch": 0.6523096129837703, + "grad_norm": 1.3120004466247486, + "learning_rate": 7.871120647530413e-06, + "loss": 0.3847, + "step": 12540, + "vit_learning_rate": 1.5742241295060824e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7396, + "epoch": 0.652829796088223, + "grad_norm": 1.6622783195529027, + "learning_rate": 7.867671331836934e-06, + "loss": 0.3814, + "step": 12550, + "vit_learning_rate": 1.5735342663673868e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.398, + "epoch": 0.6533499791926758, + "grad_norm": 1.4658490586329187, + "learning_rate": 7.864219981205552e-06, + "loss": 0.3691, + "step": 12560, + "vit_learning_rate": 1.5728439962411102e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6862, + "epoch": 0.6538701622971286, + "grad_norm": 1.2909609912049715, + "learning_rate": 7.860766598085391e-06, + "loss": 0.3827, + "step": 12570, + "vit_learning_rate": 1.5721533196170782e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6768, + "epoch": 0.6543903454015814, + "grad_norm": 1.3975145674890541, + "learning_rate": 7.857311184927015e-06, + "loss": 0.3667, + "step": 12580, + "vit_learning_rate": 1.571462236985403e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.3972, + "epoch": 0.6549105285060342, + "grad_norm": 1.5444688707038017, + "learning_rate": 7.853853744182433e-06, + "loss": 0.3803, + "step": 12590, + "vit_learning_rate": 1.5707707488364866e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.3743, + "epoch": 0.6554307116104869, + "grad_norm": 1.4513652638039336, + "learning_rate": 7.850394278305093e-06, + "loss": 0.3698, + "step": 12600, + "vit_learning_rate": 1.5700788556610184e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6329, + "epoch": 0.6559508947149396, + "grad_norm": 1.4556552146164463, + "learning_rate": 7.846932789749878e-06, + "loss": 0.3779, + "step": 12610, + "vit_learning_rate": 1.5693865579499752e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6669, + "epoch": 0.6564710778193924, + "grad_norm": 1.9396582627741417, + "learning_rate": 7.843469280973101e-06, + "loss": 0.3938, + "step": 12620, + "vit_learning_rate": 1.56869385619462e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6481, + "epoch": 0.6569912609238452, + "grad_norm": 1.8645838114902373, + "learning_rate": 7.84000375443252e-06, + "loss": 0.3864, + "step": 12630, + "vit_learning_rate": 1.5680007508865037e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6802, + "epoch": 0.657511444028298, + "grad_norm": 2.3064224776077973, + "learning_rate": 7.836536212587316e-06, + "loss": 0.3639, + "step": 12640, + "vit_learning_rate": 1.5673072425174633e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6269, + "epoch": 0.6580316271327508, + "grad_norm": 1.8240842029323785, + "learning_rate": 7.833066657898105e-06, + "loss": 0.3621, + "step": 12650, + "vit_learning_rate": 1.5666133315796208e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7413, + "epoch": 0.6585518102372035, + "grad_norm": 1.2103690440005563, + "learning_rate": 7.829595092826929e-06, + "loss": 0.3855, + "step": 12660, + "vit_learning_rate": 1.5659190185653857e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 4.1398, + "epoch": 0.6590719933416562, + "grad_norm": 1.8194532248543873, + "learning_rate": 7.826121519837256e-06, + "loss": 0.3849, + "step": 12670, + "vit_learning_rate": 1.5652243039674508e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6517, + "epoch": 0.659592176446109, + "grad_norm": 1.3925714388768098, + "learning_rate": 7.82264594139398e-06, + "loss": 0.4026, + "step": 12680, + "vit_learning_rate": 1.5645291882787958e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6176, + "epoch": 0.6601123595505618, + "grad_norm": 1.4949186682339648, + "learning_rate": 7.819168359963418e-06, + "loss": 0.3653, + "step": 12690, + "vit_learning_rate": 1.5638336719926835e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6585, + "epoch": 0.6606325426550146, + "grad_norm": 1.3425402879308412, + "learning_rate": 7.815688778013309e-06, + "loss": 0.368, + "step": 12700, + "vit_learning_rate": 1.5631377556026618e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 2.367, + "epoch": 0.6611527257594674, + "grad_norm": 1.3122277918575187, + "learning_rate": 7.812207198012812e-06, + "loss": 0.3682, + "step": 12710, + "vit_learning_rate": 1.562441439602562e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 4.0849, + "epoch": 0.66167290886392, + "grad_norm": 1.2426753001385453, + "learning_rate": 7.808723622432498e-06, + "loss": 0.3728, + "step": 12720, + "vit_learning_rate": 1.5617447244864993e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6664, + "epoch": 0.6621930919683728, + "grad_norm": 1.4789503392360448, + "learning_rate": 7.805238053744363e-06, + "loss": 0.381, + "step": 12730, + "vit_learning_rate": 1.5610476107488723e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7684, + "epoch": 0.6627132750728256, + "grad_norm": 1.2515221566407913, + "learning_rate": 7.801750494421812e-06, + "loss": 0.366, + "step": 12740, + "vit_learning_rate": 1.5603500988843623e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.4433, + "epoch": 0.6632334581772784, + "grad_norm": 1.6547134446012879, + "learning_rate": 7.798260946939664e-06, + "loss": 0.3598, + "step": 12750, + "vit_learning_rate": 1.5596521893879326e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.3686, + "epoch": 0.6637536412817312, + "grad_norm": 1.593297356448961, + "learning_rate": 7.794769413774149e-06, + "loss": 0.3671, + "step": 12760, + "vit_learning_rate": 1.5589538827548294e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6846, + "epoch": 0.664273824386184, + "grad_norm": 1.6006588735583498, + "learning_rate": 7.791275897402901e-06, + "loss": 0.3772, + "step": 12770, + "vit_learning_rate": 1.5582551794805801e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6885, + "epoch": 0.6647940074906367, + "grad_norm": 2.065754874579543, + "learning_rate": 7.787780400304974e-06, + "loss": 0.362, + "step": 12780, + "vit_learning_rate": 1.5575560800609947e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.3804, + "epoch": 0.6653141905950894, + "grad_norm": 1.5421921190792232, + "learning_rate": 7.784282924960814e-06, + "loss": 0.3567, + "step": 12790, + "vit_learning_rate": 1.5568565849921624e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6253, + "epoch": 0.6658343736995422, + "grad_norm": 1.4414696410533367, + "learning_rate": 7.780783473852276e-06, + "loss": 0.3731, + "step": 12800, + "vit_learning_rate": 1.5561566947704549e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6775, + "epoch": 0.666354556803995, + "grad_norm": 1.3301048460146176, + "learning_rate": 7.77728204946262e-06, + "loss": 0.3711, + "step": 12810, + "vit_learning_rate": 1.5554564098925238e-06 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6787, + "epoch": 0.6668747399084478, + "grad_norm": 1.188867474168245, + "learning_rate": 7.7737786542765e-06, + "loss": 0.3523, + "step": 12820, + "vit_learning_rate": 1.5547557308552998e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6215, + "epoch": 0.6673949230129005, + "grad_norm": 2.0148247776938875, + "learning_rate": 7.770273290779977e-06, + "loss": 0.356, + "step": 12830, + "vit_learning_rate": 1.5540546581559952e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6266, + "epoch": 0.6679151061173533, + "grad_norm": 1.3274112336009165, + "learning_rate": 7.766765961460501e-06, + "loss": 0.3917, + "step": 12840, + "vit_learning_rate": 1.5533531922921e-06 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6727, + "epoch": 0.6684352892218061, + "grad_norm": 1.4341062231242745, + "learning_rate": 7.763256668806919e-06, + "loss": 0.3445, + "step": 12850, + "vit_learning_rate": 1.5526513337613837e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7268, + "epoch": 0.6689554723262588, + "grad_norm": 1.27457084303424, + "learning_rate": 7.759745415309472e-06, + "loss": 0.3876, + "step": 12860, + "vit_learning_rate": 1.5519490830618942e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6571, + "epoch": 0.6694756554307116, + "grad_norm": 1.9447455787753787, + "learning_rate": 7.756232203459794e-06, + "loss": 0.3807, + "step": 12870, + "vit_learning_rate": 1.5512464406919586e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6642, + "epoch": 0.6699958385351644, + "grad_norm": 1.372841248908203, + "learning_rate": 7.752717035750907e-06, + "loss": 0.3724, + "step": 12880, + "vit_learning_rate": 1.5505434071501813e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.3738, + "epoch": 0.6705160216396171, + "grad_norm": 1.511912219417926, + "learning_rate": 7.74919991467722e-06, + "loss": 0.3772, + "step": 12890, + "vit_learning_rate": 1.5498399829354437e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.3944, + "epoch": 0.6710362047440699, + "grad_norm": 1.3529532512119555, + "learning_rate": 7.74568084273453e-06, + "loss": 0.381, + "step": 12900, + "vit_learning_rate": 1.5491361685469055e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 2.3882, + "epoch": 0.6715563878485227, + "grad_norm": 2.433263549881256, + "learning_rate": 7.742159822420013e-06, + "loss": 0.383, + "step": 12910, + "vit_learning_rate": 1.5484319644840024e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6752, + "epoch": 0.6720765709529755, + "grad_norm": 1.3252175329208877, + "learning_rate": 7.73863685623224e-06, + "loss": 0.3649, + "step": 12920, + "vit_learning_rate": 1.5477273712464477e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6243, + "epoch": 0.6725967540574282, + "grad_norm": 1.629302252059782, + "learning_rate": 7.735111946671149e-06, + "loss": 0.387, + "step": 12930, + "vit_learning_rate": 1.5470223893342296e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7353, + "epoch": 0.673116937161881, + "grad_norm": 1.5845373149310376, + "learning_rate": 7.731585096238065e-06, + "loss": 0.3659, + "step": 12940, + "vit_learning_rate": 1.5463170192476129e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6521, + "epoch": 0.6736371202663337, + "grad_norm": 1.5087131966157443, + "learning_rate": 7.72805630743569e-06, + "loss": 0.37, + "step": 12950, + "vit_learning_rate": 1.5456112614871377e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.3755, + "epoch": 0.6741573033707865, + "grad_norm": 1.3165191374623406, + "learning_rate": 7.724525582768094e-06, + "loss": 0.3401, + "step": 12960, + "vit_learning_rate": 1.5449051165536186e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6699, + "epoch": 0.6746774864752393, + "grad_norm": 1.4689320382661066, + "learning_rate": 7.720992924740733e-06, + "loss": 0.3659, + "step": 12970, + "vit_learning_rate": 1.5441985849481466e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6804, + "epoch": 0.6751976695796921, + "grad_norm": 1.4030712072463158, + "learning_rate": 7.717458335860424e-06, + "loss": 0.3833, + "step": 12980, + "vit_learning_rate": 1.5434916671720846e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6564, + "epoch": 0.6757178526841449, + "grad_norm": 1.5020848111941911, + "learning_rate": 7.713921818635361e-06, + "loss": 0.3369, + "step": 12990, + "vit_learning_rate": 1.5427843637270721e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6411, + "epoch": 0.6762380357885975, + "grad_norm": 1.2724241034006394, + "learning_rate": 7.710383375575103e-06, + "loss": 0.3897, + "step": 13000, + "vit_learning_rate": 1.5420766751150205e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6628, + "epoch": 0.6767582188930503, + "grad_norm": 2.0120490267724187, + "learning_rate": 7.706843009190575e-06, + "loss": 0.3962, + "step": 13010, + "vit_learning_rate": 1.5413686018381149e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6994, + "epoch": 0.6772784019975031, + "grad_norm": 1.8670950941477085, + "learning_rate": 7.703300721994072e-06, + "loss": 0.3806, + "step": 13020, + "vit_learning_rate": 1.540660144398814e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6629, + "epoch": 0.6777985851019559, + "grad_norm": 1.3702387347995635, + "learning_rate": 7.699756516499244e-06, + "loss": 0.3604, + "step": 13030, + "vit_learning_rate": 1.5399513032998485e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.4829, + "epoch": 0.6783187682064087, + "grad_norm": 1.2678939623497607, + "learning_rate": 7.69621039522111e-06, + "loss": 0.3726, + "step": 13040, + "vit_learning_rate": 1.5392420790442216e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6193, + "epoch": 0.6788389513108615, + "grad_norm": 1.4617321292031955, + "learning_rate": 7.69266236067604e-06, + "loss": 0.3806, + "step": 13050, + "vit_learning_rate": 1.538532472135208e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7394, + "epoch": 0.6793591344153141, + "grad_norm": 2.326804541001065, + "learning_rate": 7.689112415381774e-06, + "loss": 0.3658, + "step": 13060, + "vit_learning_rate": 1.5378224830763546e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.638, + "epoch": 0.6798793175197669, + "grad_norm": 1.5524911608946672, + "learning_rate": 7.685560561857397e-06, + "loss": 0.4059, + "step": 13070, + "vit_learning_rate": 1.5371121123714792e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6872, + "epoch": 0.6803995006242197, + "grad_norm": 1.4189312807362868, + "learning_rate": 7.68200680262335e-06, + "loss": 0.3717, + "step": 13080, + "vit_learning_rate": 1.5364013605246697e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6059, + "epoch": 0.6809196837286725, + "grad_norm": 2.183377291394751, + "learning_rate": 7.67845114020143e-06, + "loss": 0.4084, + "step": 13090, + "vit_learning_rate": 1.5356902280402858e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6357, + "epoch": 0.6814398668331253, + "grad_norm": 1.4669845361561484, + "learning_rate": 7.674893577114783e-06, + "loss": 0.3955, + "step": 13100, + "vit_learning_rate": 1.5349787154229565e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6215, + "epoch": 0.681960049937578, + "grad_norm": 1.6588222781042647, + "learning_rate": 7.671334115887902e-06, + "loss": 0.4072, + "step": 13110, + "vit_learning_rate": 1.5342668231775804e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.658, + "epoch": 0.6824802330420308, + "grad_norm": 1.5181547588800288, + "learning_rate": 7.66777275904663e-06, + "loss": 0.3833, + "step": 13120, + "vit_learning_rate": 1.533554551809326e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.4676, + "epoch": 0.6830004161464835, + "grad_norm": 2.4457167640079, + "learning_rate": 7.664209509118152e-06, + "loss": 0.3709, + "step": 13130, + "vit_learning_rate": 1.5328419018236303e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6707, + "epoch": 0.6835205992509363, + "grad_norm": 1.1476170580441476, + "learning_rate": 7.660644368630998e-06, + "loss": 0.3889, + "step": 13140, + "vit_learning_rate": 1.5321288737261994e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.3927, + "epoch": 0.6840407823553891, + "grad_norm": 1.5643177497021123, + "learning_rate": 7.65707734011504e-06, + "loss": 0.3559, + "step": 13150, + "vit_learning_rate": 1.5314154680230078e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.649, + "epoch": 0.6845609654598419, + "grad_norm": 1.5953020747831779, + "learning_rate": 7.653508426101488e-06, + "loss": 0.3527, + "step": 13160, + "vit_learning_rate": 1.5307016852202975e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6376, + "epoch": 0.6850811485642946, + "grad_norm": 1.2480279256044573, + "learning_rate": 7.649937629122892e-06, + "loss": 0.3733, + "step": 13170, + "vit_learning_rate": 1.5299875258245782e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.644, + "epoch": 0.6856013316687474, + "grad_norm": 1.7045460208436367, + "learning_rate": 7.646364951713135e-06, + "loss": 0.3653, + "step": 13180, + "vit_learning_rate": 1.5292729903426267e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6432, + "epoch": 0.6861215147732002, + "grad_norm": 1.082584095955217, + "learning_rate": 7.642790396407442e-06, + "loss": 0.3984, + "step": 13190, + "vit_learning_rate": 1.528558079281488e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.619, + "epoch": 0.686641697877653, + "grad_norm": 1.964643596510439, + "learning_rate": 7.639213965742359e-06, + "loss": 0.3838, + "step": 13200, + "vit_learning_rate": 1.5278427931484715e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7326, + "epoch": 0.6871618809821057, + "grad_norm": 1.486617211526797, + "learning_rate": 7.635635662255772e-06, + "loss": 0.3696, + "step": 13210, + "vit_learning_rate": 1.527127132451154e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6879, + "epoch": 0.6876820640865585, + "grad_norm": 1.5888559181065625, + "learning_rate": 7.632055488486894e-06, + "loss": 0.3682, + "step": 13220, + "vit_learning_rate": 1.5264110976973786e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7077, + "epoch": 0.6882022471910112, + "grad_norm": 2.269077450025123, + "learning_rate": 7.628473446976261e-06, + "loss": 0.3718, + "step": 13230, + "vit_learning_rate": 1.525694689395252e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6545, + "epoch": 0.688722430295464, + "grad_norm": 1.6411484720242413, + "learning_rate": 7.6248895402657416e-06, + "loss": 0.3882, + "step": 13240, + "vit_learning_rate": 1.5249779080531483e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6778, + "epoch": 0.6892426133999168, + "grad_norm": 1.3551418942551063, + "learning_rate": 7.621303770898521e-06, + "loss": 0.3783, + "step": 13250, + "vit_learning_rate": 1.5242607541797039e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6475, + "epoch": 0.6897627965043696, + "grad_norm": 1.5291271523368242, + "learning_rate": 7.617716141419109e-06, + "loss": 0.3469, + "step": 13260, + "vit_learning_rate": 1.5235432282838217e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.7798, + "epoch": 0.6902829796088223, + "grad_norm": 1.2203826814150986, + "learning_rate": 7.614126654373337e-06, + "loss": 0.3758, + "step": 13270, + "vit_learning_rate": 1.5228253308746672e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7028, + "epoch": 0.690803162713275, + "grad_norm": 1.4137006209511904, + "learning_rate": 7.610535312308352e-06, + "loss": 0.3619, + "step": 13280, + "vit_learning_rate": 1.52210706246167e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7027, + "epoch": 0.6913233458177278, + "grad_norm": 1.8015700013982296, + "learning_rate": 7.606942117772619e-06, + "loss": 0.3662, + "step": 13290, + "vit_learning_rate": 1.5213884235545237e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.641, + "epoch": 0.6918435289221806, + "grad_norm": 1.463971466709437, + "learning_rate": 7.603347073315917e-06, + "loss": 0.3328, + "step": 13300, + "vit_learning_rate": 1.520669414663183e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.712, + "epoch": 0.6923637120266334, + "grad_norm": 1.2432534398465547, + "learning_rate": 7.599750181489336e-06, + "loss": 0.3728, + "step": 13310, + "vit_learning_rate": 1.519950036297867e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6441, + "epoch": 0.6928838951310862, + "grad_norm": 1.427748852648158, + "learning_rate": 7.596151444845278e-06, + "loss": 0.3663, + "step": 13320, + "vit_learning_rate": 1.5192302889690554e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.68, + "epoch": 0.693404078235539, + "grad_norm": 1.657526208781812, + "learning_rate": 7.592550865937458e-06, + "loss": 0.3739, + "step": 13330, + "vit_learning_rate": 1.5185101731874915e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6251, + "epoch": 0.6939242613399916, + "grad_norm": 1.5436901005859676, + "learning_rate": 7.588948447320893e-06, + "loss": 0.3776, + "step": 13340, + "vit_learning_rate": 1.5177896894641784e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.643, + "epoch": 0.6944444444444444, + "grad_norm": 1.2954253895057488, + "learning_rate": 7.585344191551906e-06, + "loss": 0.3754, + "step": 13350, + "vit_learning_rate": 1.5170688383103811e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6645, + "epoch": 0.6949646275488972, + "grad_norm": 1.8810142197446795, + "learning_rate": 7.58173810118813e-06, + "loss": 0.3875, + "step": 13360, + "vit_learning_rate": 1.5163476202376257e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6678, + "epoch": 0.69548481065335, + "grad_norm": 1.735404472266144, + "learning_rate": 7.578130178788489e-06, + "loss": 0.3876, + "step": 13370, + "vit_learning_rate": 1.5156260357576975e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6438, + "epoch": 0.6960049937578028, + "grad_norm": 1.9994852512960621, + "learning_rate": 7.5745204269132144e-06, + "loss": 0.3897, + "step": 13380, + "vit_learning_rate": 1.5149040853826427e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6469, + "epoch": 0.6965251768622556, + "grad_norm": 2.0892742096578307, + "learning_rate": 7.570908848123837e-06, + "loss": 0.3718, + "step": 13390, + "vit_learning_rate": 1.5141817696247671e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7746, + "epoch": 0.6970453599667082, + "grad_norm": 1.4770329987917548, + "learning_rate": 7.56729544498318e-06, + "loss": 0.3795, + "step": 13400, + "vit_learning_rate": 1.513459088996636e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6702, + "epoch": 0.697565543071161, + "grad_norm": 1.4719903534675203, + "learning_rate": 7.56368022005536e-06, + "loss": 0.3738, + "step": 13410, + "vit_learning_rate": 1.5127360440110718e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6742, + "epoch": 0.6980857261756138, + "grad_norm": 1.6666991876719066, + "learning_rate": 7.560063175905794e-06, + "loss": 0.3932, + "step": 13420, + "vit_learning_rate": 1.5120126351811586e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7485, + "epoch": 0.6986059092800666, + "grad_norm": 4.429720974367968, + "learning_rate": 7.556444315101182e-06, + "loss": 0.3711, + "step": 13430, + "vit_learning_rate": 1.511288863020236e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7404, + "epoch": 0.6991260923845194, + "grad_norm": 1.374411781397774, + "learning_rate": 7.5528236402095146e-06, + "loss": 0.3969, + "step": 13440, + "vit_learning_rate": 1.5105647280419027e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6368, + "epoch": 0.6996462754889721, + "grad_norm": 1.4068749261854148, + "learning_rate": 7.549201153800073e-06, + "loss": 0.3703, + "step": 13450, + "vit_learning_rate": 1.5098402307600145e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6869, + "epoch": 0.7001664585934249, + "grad_norm": 1.5021555637651363, + "learning_rate": 7.545576858443421e-06, + "loss": 0.3909, + "step": 13460, + "vit_learning_rate": 1.509115371688684e-06 + }, + { + "avg_batch_load_time": 2.0543, + "avg_batch_processing_time": 0.6122, + "epoch": 0.7006866416978776, + "grad_norm": 1.753632986152713, + "learning_rate": 7.541950756711409e-06, + "loss": 0.3863, + "step": 13470, + "vit_learning_rate": 1.5083901513422816e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6151, + "epoch": 0.7012068248023304, + "grad_norm": 1.3783680805331073, + "learning_rate": 7.538322851177165e-06, + "loss": 0.3785, + "step": 13480, + "vit_learning_rate": 1.5076645702354326e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6345, + "epoch": 0.7017270079067832, + "grad_norm": 2.137852139402937, + "learning_rate": 7.534693144415099e-06, + "loss": 0.3721, + "step": 13490, + "vit_learning_rate": 1.5069386288830196e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6391, + "epoch": 0.702247191011236, + "grad_norm": 2.427798222387384, + "learning_rate": 7.531061639000904e-06, + "loss": 0.3955, + "step": 13500, + "vit_learning_rate": 1.5062123278001806e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6727, + "epoch": 0.7027673741156887, + "grad_norm": 1.4080498329943718, + "learning_rate": 7.527428337511538e-06, + "loss": 0.3726, + "step": 13510, + "vit_learning_rate": 1.5054856675023075e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6425, + "epoch": 0.7032875572201415, + "grad_norm": 1.413837815345843, + "learning_rate": 7.523793242525246e-06, + "loss": 0.3715, + "step": 13520, + "vit_learning_rate": 1.504758648505049e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6493, + "epoch": 0.7038077403245943, + "grad_norm": 1.2123315996385178, + "learning_rate": 7.520156356621535e-06, + "loss": 0.368, + "step": 13530, + "vit_learning_rate": 1.5040312713243068e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6756, + "epoch": 0.704327923429047, + "grad_norm": 1.5307494135640392, + "learning_rate": 7.516517682381192e-06, + "loss": 0.3787, + "step": 13540, + "vit_learning_rate": 1.5033035364762383e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6379, + "epoch": 0.7048481065334998, + "grad_norm": 1.7290252393462473, + "learning_rate": 7.512877222386268e-06, + "loss": 0.3569, + "step": 13550, + "vit_learning_rate": 1.5025754444772534e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6847, + "epoch": 0.7053682896379525, + "grad_norm": 1.2654830138981554, + "learning_rate": 7.50923497922008e-06, + "loss": 0.3835, + "step": 13560, + "vit_learning_rate": 1.5018469958440159e-06 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7004, + "epoch": 0.7058884727424053, + "grad_norm": 1.5768482358367515, + "learning_rate": 7.505590955467216e-06, + "loss": 0.3996, + "step": 13570, + "vit_learning_rate": 1.501118191093443e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6708, + "epoch": 0.7064086558468581, + "grad_norm": 1.3105728729379749, + "learning_rate": 7.501945153713519e-06, + "loss": 0.3834, + "step": 13580, + "vit_learning_rate": 1.5003890307427036e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6634, + "epoch": 0.7069288389513109, + "grad_norm": 1.6271670602504722, + "learning_rate": 7.498297576546103e-06, + "loss": 0.3646, + "step": 13590, + "vit_learning_rate": 1.4996595153092204e-06 + }, + { + "avg_batch_load_time": 0.0231, + "avg_batch_processing_time": 0.6738, + "epoch": 0.7074490220557637, + "grad_norm": 1.5920007894858597, + "learning_rate": 7.494648226553335e-06, + "loss": 0.3737, + "step": 13600, + "vit_learning_rate": 1.4989296453106667e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7597, + "epoch": 0.7079692051602164, + "grad_norm": 2.404700848776553, + "learning_rate": 7.490997106324844e-06, + "loss": 0.3819, + "step": 13610, + "vit_learning_rate": 1.4981994212649687e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6412, + "epoch": 0.7084893882646691, + "grad_norm": 1.4613836764640746, + "learning_rate": 7.487344218451515e-06, + "loss": 0.3949, + "step": 13620, + "vit_learning_rate": 1.4974688436903026e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6838, + "epoch": 0.7090095713691219, + "grad_norm": 2.372519258602707, + "learning_rate": 7.483689565525481e-06, + "loss": 0.3807, + "step": 13630, + "vit_learning_rate": 1.4967379131050958e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6588, + "epoch": 0.7095297544735747, + "grad_norm": 2.6372631027274993, + "learning_rate": 7.480033150140138e-06, + "loss": 0.3731, + "step": 13640, + "vit_learning_rate": 1.4960066300280273e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7306, + "epoch": 0.7100499375780275, + "grad_norm": 1.503856811943579, + "learning_rate": 7.476374974890123e-06, + "loss": 0.371, + "step": 13650, + "vit_learning_rate": 1.4952749949780245e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7132, + "epoch": 0.7105701206824803, + "grad_norm": 1.728637510399613, + "learning_rate": 7.4727150423713305e-06, + "loss": 0.3623, + "step": 13660, + "vit_learning_rate": 1.4945430084742658e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6871, + "epoch": 0.711090303786933, + "grad_norm": 1.5065833987203785, + "learning_rate": 7.469053355180893e-06, + "loss": 0.3606, + "step": 13670, + "vit_learning_rate": 1.4938106710361783e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6446, + "epoch": 0.7116104868913857, + "grad_norm": 2.197104917278502, + "learning_rate": 7.465389915917197e-06, + "loss": 0.3698, + "step": 13680, + "vit_learning_rate": 1.4930779831834392e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6473, + "epoch": 0.7121306699958385, + "grad_norm": 1.8146985961239959, + "learning_rate": 7.4617247271798646e-06, + "loss": 0.3729, + "step": 13690, + "vit_learning_rate": 1.4923449454359727e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6826, + "epoch": 0.7126508531002913, + "grad_norm": 2.7776083495283195, + "learning_rate": 7.458057791569762e-06, + "loss": 0.3742, + "step": 13700, + "vit_learning_rate": 1.4916115583139524e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7312, + "epoch": 0.7131710362047441, + "grad_norm": 1.3202250039630066, + "learning_rate": 7.454389111689002e-06, + "loss": 0.3879, + "step": 13710, + "vit_learning_rate": 1.4908778223378001e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6204, + "epoch": 0.7136912193091969, + "grad_norm": 1.7511690565881814, + "learning_rate": 7.450718690140921e-06, + "loss": 0.3497, + "step": 13720, + "vit_learning_rate": 1.490143738028184e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6398, + "epoch": 0.7142114024136496, + "grad_norm": 1.6435458810893548, + "learning_rate": 7.4470465295301034e-06, + "loss": 0.3636, + "step": 13730, + "vit_learning_rate": 1.4894093059060204e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6267, + "epoch": 0.7147315855181023, + "grad_norm": 1.5211978956911882, + "learning_rate": 7.443372632462363e-06, + "loss": 0.3737, + "step": 13740, + "vit_learning_rate": 1.4886745264924723e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7158, + "epoch": 0.7152517686225551, + "grad_norm": 1.5967132490189617, + "learning_rate": 7.439697001544745e-06, + "loss": 0.3779, + "step": 13750, + "vit_learning_rate": 1.4879394003089489e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6418, + "epoch": 0.7157719517270079, + "grad_norm": 1.687361886522841, + "learning_rate": 7.436019639385529e-06, + "loss": 0.3915, + "step": 13760, + "vit_learning_rate": 1.4872039278771057e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7453, + "epoch": 0.7162921348314607, + "grad_norm": 1.6934809832550652, + "learning_rate": 7.432340548594217e-06, + "loss": 0.4049, + "step": 13770, + "vit_learning_rate": 1.486468109718843e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6506, + "epoch": 0.7168123179359135, + "grad_norm": 1.6944177746590843, + "learning_rate": 7.428659731781545e-06, + "loss": 0.3504, + "step": 13780, + "vit_learning_rate": 1.4857319463563087e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6696, + "epoch": 0.7173325010403662, + "grad_norm": 1.1110264229758153, + "learning_rate": 7.424977191559464e-06, + "loss": 0.3516, + "step": 13790, + "vit_learning_rate": 1.4849954383118925e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7279, + "epoch": 0.717852684144819, + "grad_norm": 1.7943940499142819, + "learning_rate": 7.4212929305411575e-06, + "loss": 0.382, + "step": 13800, + "vit_learning_rate": 1.4842585861082314e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7297, + "epoch": 0.7183728672492717, + "grad_norm": 1.2028888459595695, + "learning_rate": 7.417606951341028e-06, + "loss": 0.3618, + "step": 13810, + "vit_learning_rate": 1.4835213902682053e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6415, + "epoch": 0.7188930503537245, + "grad_norm": 1.564897892636387, + "learning_rate": 7.413919256574692e-06, + "loss": 0.3895, + "step": 13820, + "vit_learning_rate": 1.4827838513149383e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6742, + "epoch": 0.7194132334581773, + "grad_norm": 1.2488420052802887, + "learning_rate": 7.410229848858989e-06, + "loss": 0.3705, + "step": 13830, + "vit_learning_rate": 1.4820459697717977e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6602, + "epoch": 0.71993341656263, + "grad_norm": 1.3816464426471176, + "learning_rate": 7.406538730811971e-06, + "loss": 0.3908, + "step": 13840, + "vit_learning_rate": 1.481307746162394e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7435, + "epoch": 0.7204535996670828, + "grad_norm": 2.4641344657445865, + "learning_rate": 7.402845905052907e-06, + "loss": 0.3705, + "step": 13850, + "vit_learning_rate": 1.480569181010581e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7108, + "epoch": 0.7209737827715356, + "grad_norm": 1.877601219133894, + "learning_rate": 7.399151374202272e-06, + "loss": 0.3752, + "step": 13860, + "vit_learning_rate": 1.4798302748404542e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7116, + "epoch": 0.7214939658759884, + "grad_norm": 1.4555832845839554, + "learning_rate": 7.395455140881758e-06, + "loss": 0.3878, + "step": 13870, + "vit_learning_rate": 1.4790910281763514e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6618, + "epoch": 0.7220141489804411, + "grad_norm": 1.5964318751143647, + "learning_rate": 7.391757207714258e-06, + "loss": 0.3941, + "step": 13880, + "vit_learning_rate": 1.4783514415428516e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6563, + "epoch": 0.7225343320848939, + "grad_norm": 2.155796029907592, + "learning_rate": 7.388057577323877e-06, + "loss": 0.3684, + "step": 13890, + "vit_learning_rate": 1.4776115154647752e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6434, + "epoch": 0.7230545151893466, + "grad_norm": 1.4264935256790388, + "learning_rate": 7.384356252335922e-06, + "loss": 0.3641, + "step": 13900, + "vit_learning_rate": 1.4768712504671843e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7208, + "epoch": 0.7235746982937994, + "grad_norm": 1.3171156286834826, + "learning_rate": 7.380653235376902e-06, + "loss": 0.3831, + "step": 13910, + "vit_learning_rate": 1.4761306470753803e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6967, + "epoch": 0.7240948813982522, + "grad_norm": 1.8501438321227681, + "learning_rate": 7.37694852907453e-06, + "loss": 0.3813, + "step": 13920, + "vit_learning_rate": 1.4753897058149057e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6687, + "epoch": 0.724615064502705, + "grad_norm": 1.3120109112587657, + "learning_rate": 7.37324213605771e-06, + "loss": 0.3831, + "step": 13930, + "vit_learning_rate": 1.4746484272115419e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6381, + "epoch": 0.7251352476071578, + "grad_norm": 2.494281290293798, + "learning_rate": 7.3695340589565535e-06, + "loss": 0.3788, + "step": 13940, + "vit_learning_rate": 1.4739068117913105e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6492, + "epoch": 0.7256554307116105, + "grad_norm": 1.5320573178825347, + "learning_rate": 7.365824300402358e-06, + "loss": 0.3779, + "step": 13950, + "vit_learning_rate": 1.4731648600804715e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.638, + "epoch": 0.7261756138160632, + "grad_norm": 1.6189217224172354, + "learning_rate": 7.3621128630276175e-06, + "loss": 0.3675, + "step": 13960, + "vit_learning_rate": 1.4724225726055233e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6317, + "epoch": 0.726695796920516, + "grad_norm": 3.327994076828249, + "learning_rate": 7.358399749466021e-06, + "loss": 0.3885, + "step": 13970, + "vit_learning_rate": 1.471679949893204e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.66, + "epoch": 0.7272159800249688, + "grad_norm": 1.339360917658427, + "learning_rate": 7.354684962352439e-06, + "loss": 0.3577, + "step": 13980, + "vit_learning_rate": 1.4709369924704875e-06 + }, + { + "avg_batch_load_time": 0.04, + "avg_batch_processing_time": 0.6665, + "epoch": 0.7277361631294216, + "grad_norm": 1.5153194370177858, + "learning_rate": 7.350968504322939e-06, + "loss": 0.388, + "step": 13990, + "vit_learning_rate": 1.4701937008645874e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6576, + "epoch": 0.7282563462338744, + "grad_norm": 1.7996036722311854, + "learning_rate": 7.347250378014764e-06, + "loss": 0.35, + "step": 14000, + "vit_learning_rate": 1.4694500756029527e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6806, + "epoch": 0.728776529338327, + "grad_norm": 1.1715610715102418, + "learning_rate": 7.34353058606635e-06, + "loss": 0.3735, + "step": 14010, + "vit_learning_rate": 1.4687061172132698e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.72, + "epoch": 0.7292967124427798, + "grad_norm": 1.2697015003524916, + "learning_rate": 7.3398091311173105e-06, + "loss": 0.3807, + "step": 14020, + "vit_learning_rate": 1.4679618262234619e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6932, + "epoch": 0.7298168955472326, + "grad_norm": 1.3773600996708628, + "learning_rate": 7.336086015808439e-06, + "loss": 0.3448, + "step": 14030, + "vit_learning_rate": 1.4672172031616875e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6671, + "epoch": 0.7303370786516854, + "grad_norm": 1.440790946930812, + "learning_rate": 7.332361242781707e-06, + "loss": 0.3774, + "step": 14040, + "vit_learning_rate": 1.4664722485563412e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6811, + "epoch": 0.7308572617561382, + "grad_norm": 1.7298437537065074, + "learning_rate": 7.328634814680265e-06, + "loss": 0.3774, + "step": 14050, + "vit_learning_rate": 1.4657269629360527e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6889, + "epoch": 0.731377444860591, + "grad_norm": 2.9944335865629585, + "learning_rate": 7.324906734148436e-06, + "loss": 0.3524, + "step": 14060, + "vit_learning_rate": 1.464981346829687e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6394, + "epoch": 0.7318976279650437, + "grad_norm": 1.2510689249267546, + "learning_rate": 7.3211770038317135e-06, + "loss": 0.3635, + "step": 14070, + "vit_learning_rate": 1.4642354007663424e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6405, + "epoch": 0.7324178110694964, + "grad_norm": 1.6301351995946567, + "learning_rate": 7.317445626376767e-06, + "loss": 0.393, + "step": 14080, + "vit_learning_rate": 1.4634891252753531e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6943, + "epoch": 0.7329379941739492, + "grad_norm": 1.342082320596154, + "learning_rate": 7.313712604431429e-06, + "loss": 0.371, + "step": 14090, + "vit_learning_rate": 1.4627425208862856e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7011, + "epoch": 0.733458177278402, + "grad_norm": 1.58731469838389, + "learning_rate": 7.309977940644705e-06, + "loss": 0.384, + "step": 14100, + "vit_learning_rate": 1.4619955881289407e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.69, + "epoch": 0.7339783603828548, + "grad_norm": 1.566136567493984, + "learning_rate": 7.30624163766676e-06, + "loss": 0.3581, + "step": 14110, + "vit_learning_rate": 1.4612483275333518e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6929, + "epoch": 0.7344985434873076, + "grad_norm": 1.481925637102645, + "learning_rate": 7.302503698148924e-06, + "loss": 0.3892, + "step": 14120, + "vit_learning_rate": 1.4605007396297845e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6334, + "epoch": 0.7350187265917603, + "grad_norm": 1.0749919969106516, + "learning_rate": 7.298764124743692e-06, + "loss": 0.3724, + "step": 14130, + "vit_learning_rate": 1.459752824948738e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6529, + "epoch": 0.735538909696213, + "grad_norm": 1.524992587564812, + "learning_rate": 7.295022920104711e-06, + "loss": 0.3654, + "step": 14140, + "vit_learning_rate": 1.4590045840209419e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6751, + "epoch": 0.7360590928006658, + "grad_norm": 1.6486709168027618, + "learning_rate": 7.291280086886792e-06, + "loss": 0.3745, + "step": 14150, + "vit_learning_rate": 1.4582560173773581e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6477, + "epoch": 0.7365792759051186, + "grad_norm": 1.3636638308539564, + "learning_rate": 7.2875356277459005e-06, + "loss": 0.3671, + "step": 14160, + "vit_learning_rate": 1.4575071255491799e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6212, + "epoch": 0.7370994590095714, + "grad_norm": 1.4492116408032332, + "learning_rate": 7.283789545339154e-06, + "loss": 0.3989, + "step": 14170, + "vit_learning_rate": 1.4567579090678307e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.666, + "epoch": 0.7376196421140241, + "grad_norm": 1.4466348182422097, + "learning_rate": 7.2800418423248224e-06, + "loss": 0.3865, + "step": 14180, + "vit_learning_rate": 1.4560083684649643e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6458, + "epoch": 0.7381398252184769, + "grad_norm": 1.5709693063254022, + "learning_rate": 7.276292521362325e-06, + "loss": 0.3829, + "step": 14190, + "vit_learning_rate": 1.4552585042724648e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6572, + "epoch": 0.7386600083229297, + "grad_norm": 1.496086239795908, + "learning_rate": 7.272541585112232e-06, + "loss": 0.3785, + "step": 14200, + "vit_learning_rate": 1.4545083170224462e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6454, + "epoch": 0.7391801914273824, + "grad_norm": 1.4405757877137906, + "learning_rate": 7.268789036236255e-06, + "loss": 0.3953, + "step": 14210, + "vit_learning_rate": 1.4537578072472507e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6126, + "epoch": 0.7397003745318352, + "grad_norm": 1.3977024250130272, + "learning_rate": 7.265034877397254e-06, + "loss": 0.3721, + "step": 14220, + "vit_learning_rate": 1.4530069754794508e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6733, + "epoch": 0.740220557636288, + "grad_norm": 1.4434940084652659, + "learning_rate": 7.261279111259234e-06, + "loss": 0.3708, + "step": 14230, + "vit_learning_rate": 1.4522558222518467e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.635, + "epoch": 0.7407407407407407, + "grad_norm": 1.2497432730963245, + "learning_rate": 7.257521740487332e-06, + "loss": 0.3851, + "step": 14240, + "vit_learning_rate": 1.4515043480974663e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6893, + "epoch": 0.7412609238451935, + "grad_norm": 2.5547262785764335, + "learning_rate": 7.253762767747832e-06, + "loss": 0.3613, + "step": 14250, + "vit_learning_rate": 1.450752553549566e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6538, + "epoch": 0.7417811069496463, + "grad_norm": 1.7861040491655427, + "learning_rate": 7.250002195708148e-06, + "loss": 0.3971, + "step": 14260, + "vit_learning_rate": 1.4500004391416293e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6091, + "epoch": 0.7423012900540991, + "grad_norm": 2.0642082405187687, + "learning_rate": 7.246240027036834e-06, + "loss": 0.379, + "step": 14270, + "vit_learning_rate": 1.4492480054073665e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6472, + "epoch": 0.7428214731585518, + "grad_norm": 1.4231075931589572, + "learning_rate": 7.242476264403573e-06, + "loss": 0.3599, + "step": 14280, + "vit_learning_rate": 1.4484952528807145e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6858, + "epoch": 0.7433416562630045, + "grad_norm": 2.105187250845683, + "learning_rate": 7.238710910479185e-06, + "loss": 0.3809, + "step": 14290, + "vit_learning_rate": 1.447742182095837e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6828, + "epoch": 0.7438618393674573, + "grad_norm": 1.8017029856013331, + "learning_rate": 7.234943967935615e-06, + "loss": 0.3942, + "step": 14300, + "vit_learning_rate": 1.4469887935871228e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6384, + "epoch": 0.7443820224719101, + "grad_norm": 1.7777310109976305, + "learning_rate": 7.231175439445933e-06, + "loss": 0.3886, + "step": 14310, + "vit_learning_rate": 1.4462350878891865e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6617, + "epoch": 0.7449022055763629, + "grad_norm": 1.9568985341745482, + "learning_rate": 7.227405327684339e-06, + "loss": 0.3654, + "step": 14320, + "vit_learning_rate": 1.4454810655368677e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6103, + "epoch": 0.7454223886808157, + "grad_norm": 1.3033831950904469, + "learning_rate": 7.223633635326154e-06, + "loss": 0.3567, + "step": 14330, + "vit_learning_rate": 1.4447267270652306e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6406, + "epoch": 0.7459425717852685, + "grad_norm": 1.8864310724274613, + "learning_rate": 7.219860365047822e-06, + "loss": 0.3635, + "step": 14340, + "vit_learning_rate": 1.4439720730095641e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6976, + "epoch": 0.7464627548897211, + "grad_norm": 1.7165743489804217, + "learning_rate": 7.216085519526904e-06, + "loss": 0.3745, + "step": 14350, + "vit_learning_rate": 1.4432171039053807e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6972, + "epoch": 0.7469829379941739, + "grad_norm": 1.5357273238307658, + "learning_rate": 7.2123091014420845e-06, + "loss": 0.376, + "step": 14360, + "vit_learning_rate": 1.4424618202884166e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.641, + "epoch": 0.7475031210986267, + "grad_norm": 1.7815725603838084, + "learning_rate": 7.208531113473159e-06, + "loss": 0.3774, + "step": 14370, + "vit_learning_rate": 1.4417062226946314e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7117, + "epoch": 0.7480233042030795, + "grad_norm": 2.0051209134446695, + "learning_rate": 7.204751558301035e-06, + "loss": 0.3848, + "step": 14380, + "vit_learning_rate": 1.4409503116602068e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7752, + "epoch": 0.7485434873075323, + "grad_norm": 2.109051259909492, + "learning_rate": 7.200970438607739e-06, + "loss": 0.3721, + "step": 14390, + "vit_learning_rate": 1.4401940877215478e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6797, + "epoch": 0.7490636704119851, + "grad_norm": 1.5313972912460356, + "learning_rate": 7.197187757076402e-06, + "loss": 0.3745, + "step": 14400, + "vit_learning_rate": 1.4394375514152804e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.706, + "epoch": 0.7495838535164377, + "grad_norm": 1.4974803500812452, + "learning_rate": 7.193403516391268e-06, + "loss": 0.3605, + "step": 14410, + "vit_learning_rate": 1.4386807032782536e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6831, + "epoch": 0.7501040366208905, + "grad_norm": 1.605748100969173, + "learning_rate": 7.189617719237682e-06, + "loss": 0.3782, + "step": 14420, + "vit_learning_rate": 1.437923543847536e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6634, + "epoch": 0.7506242197253433, + "grad_norm": 1.784532919802023, + "learning_rate": 7.185830368302098e-06, + "loss": 0.3713, + "step": 14430, + "vit_learning_rate": 1.4371660736604194e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6284, + "epoch": 0.7511444028297961, + "grad_norm": 1.4278589658269771, + "learning_rate": 7.18204146627207e-06, + "loss": 0.3621, + "step": 14440, + "vit_learning_rate": 1.4364082932544138e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6542, + "epoch": 0.7516645859342489, + "grad_norm": 1.58489431735092, + "learning_rate": 7.178251015836253e-06, + "loss": 0.3845, + "step": 14450, + "vit_learning_rate": 1.4356502031672504e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.686, + "epoch": 0.7521847690387016, + "grad_norm": 1.6673236022800761, + "learning_rate": 7.174459019684402e-06, + "loss": 0.383, + "step": 14460, + "vit_learning_rate": 1.4348918039368803e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7353, + "epoch": 0.7527049521431544, + "grad_norm": 1.630114606387159, + "learning_rate": 7.1706654805073684e-06, + "loss": 0.4216, + "step": 14470, + "vit_learning_rate": 1.4341330961014734e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6949, + "epoch": 0.7532251352476071, + "grad_norm": 1.555841689354372, + "learning_rate": 7.166870400997099e-06, + "loss": 0.3676, + "step": 14480, + "vit_learning_rate": 1.4333740801994196e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6431, + "epoch": 0.7537453183520599, + "grad_norm": 1.4708853064542495, + "learning_rate": 7.16307378384663e-06, + "loss": 0.3706, + "step": 14490, + "vit_learning_rate": 1.432614756769326e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.687, + "epoch": 0.7542655014565127, + "grad_norm": 1.2120553821804758, + "learning_rate": 7.1592756317500955e-06, + "loss": 0.3782, + "step": 14500, + "vit_learning_rate": 1.4318551263500189e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6491, + "epoch": 0.7547856845609655, + "grad_norm": 1.4570373875176947, + "learning_rate": 7.155475947402712e-06, + "loss": 0.3552, + "step": 14510, + "vit_learning_rate": 1.4310951894805423e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7003, + "epoch": 0.7553058676654182, + "grad_norm": 1.6633240225384036, + "learning_rate": 7.151674733500787e-06, + "loss": 0.3698, + "step": 14520, + "vit_learning_rate": 1.4303349467001572e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6745, + "epoch": 0.755826050769871, + "grad_norm": 1.4615272545551994, + "learning_rate": 7.147871992741715e-06, + "loss": 0.3705, + "step": 14530, + "vit_learning_rate": 1.4295743985483428e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.633, + "epoch": 0.7563462338743238, + "grad_norm": 1.5415217012961786, + "learning_rate": 7.144067727823969e-06, + "loss": 0.3587, + "step": 14540, + "vit_learning_rate": 1.4288135455647936e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6485, + "epoch": 0.7568664169787765, + "grad_norm": 2.6871590545600976, + "learning_rate": 7.140261941447106e-06, + "loss": 0.3636, + "step": 14550, + "vit_learning_rate": 1.428052388289421e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6555, + "epoch": 0.7573866000832293, + "grad_norm": 2.2620025891573827, + "learning_rate": 7.136454636311764e-06, + "loss": 0.3824, + "step": 14560, + "vit_learning_rate": 1.4272909272623529e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6716, + "epoch": 0.757906783187682, + "grad_norm": 1.3519606870977177, + "learning_rate": 7.1326458151196595e-06, + "loss": 0.3731, + "step": 14570, + "vit_learning_rate": 1.4265291630239317e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6455, + "epoch": 0.7584269662921348, + "grad_norm": 1.6103580420974153, + "learning_rate": 7.12883548057358e-06, + "loss": 0.375, + "step": 14580, + "vit_learning_rate": 1.4257670961147156e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6275, + "epoch": 0.7589471493965876, + "grad_norm": 1.5137989509600656, + "learning_rate": 7.1250236353773894e-06, + "loss": 0.3631, + "step": 14590, + "vit_learning_rate": 1.4250047270754776e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6416, + "epoch": 0.7594673325010404, + "grad_norm": 1.853336892314415, + "learning_rate": 7.121210282236026e-06, + "loss": 0.3595, + "step": 14600, + "vit_learning_rate": 1.4242420564472052e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6507, + "epoch": 0.7599875156054932, + "grad_norm": 1.6699429834376167, + "learning_rate": 7.117395423855496e-06, + "loss": 0.3725, + "step": 14610, + "vit_learning_rate": 1.4234790847710991e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6676, + "epoch": 0.760507698709946, + "grad_norm": 1.4741630827585048, + "learning_rate": 7.1135790629428735e-06, + "loss": 0.3454, + "step": 14620, + "vit_learning_rate": 1.4227158125885745e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7237, + "epoch": 0.7610278818143986, + "grad_norm": 1.1829739962302492, + "learning_rate": 7.109761202206298e-06, + "loss": 0.388, + "step": 14630, + "vit_learning_rate": 1.4219522404412593e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6339, + "epoch": 0.7615480649188514, + "grad_norm": 1.2132861893497138, + "learning_rate": 7.105941844354978e-06, + "loss": 0.3988, + "step": 14640, + "vit_learning_rate": 1.4211883688709953e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.666, + "epoch": 0.7620682480233042, + "grad_norm": 1.3867644198378912, + "learning_rate": 7.102120992099177e-06, + "loss": 0.3769, + "step": 14650, + "vit_learning_rate": 1.4204241984198352e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6517, + "epoch": 0.762588431127757, + "grad_norm": 1.8829407232793005, + "learning_rate": 7.0982986481502235e-06, + "loss": 0.3859, + "step": 14660, + "vit_learning_rate": 1.4196597296300446e-06 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6767, + "epoch": 0.7631086142322098, + "grad_norm": 1.8376659406391835, + "learning_rate": 7.094474815220506e-06, + "loss": 0.3983, + "step": 14670, + "vit_learning_rate": 1.418894963044101e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7124, + "epoch": 0.7636287973366626, + "grad_norm": 1.589307926715048, + "learning_rate": 7.090649496023465e-06, + "loss": 0.3582, + "step": 14680, + "vit_learning_rate": 1.4181298992046929e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6631, + "epoch": 0.7641489804411152, + "grad_norm": 1.8315008608816727, + "learning_rate": 7.086822693273601e-06, + "loss": 0.3603, + "step": 14690, + "vit_learning_rate": 1.41736453865472e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6405, + "epoch": 0.764669163545568, + "grad_norm": 1.6871459522475427, + "learning_rate": 7.08299440968646e-06, + "loss": 0.3743, + "step": 14700, + "vit_learning_rate": 1.416598881937292e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7268, + "epoch": 0.7651893466500208, + "grad_norm": 1.4120155117228965, + "learning_rate": 7.079164647978648e-06, + "loss": 0.3715, + "step": 14710, + "vit_learning_rate": 1.4158329295957295e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6514, + "epoch": 0.7657095297544736, + "grad_norm": 2.2854018421227575, + "learning_rate": 7.075333410867812e-06, + "loss": 0.3939, + "step": 14720, + "vit_learning_rate": 1.4150666821735622e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6341, + "epoch": 0.7662297128589264, + "grad_norm": 1.4852778452141142, + "learning_rate": 7.07150070107265e-06, + "loss": 0.3892, + "step": 14730, + "vit_learning_rate": 1.4143001402145298e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7135, + "epoch": 0.7667498959633791, + "grad_norm": 1.3740695606279587, + "learning_rate": 7.067666521312905e-06, + "loss": 0.3825, + "step": 14740, + "vit_learning_rate": 1.4135333042625808e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.618, + "epoch": 0.7672700790678318, + "grad_norm": 1.5513072653341011, + "learning_rate": 7.063830874309361e-06, + "loss": 0.4132, + "step": 14750, + "vit_learning_rate": 1.412766174861872e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6343, + "epoch": 0.7677902621722846, + "grad_norm": 2.069461578371292, + "learning_rate": 7.059993762783844e-06, + "loss": 0.3804, + "step": 14760, + "vit_learning_rate": 1.4119987525567684e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6871, + "epoch": 0.7683104452767374, + "grad_norm": 1.6232646379125608, + "learning_rate": 7.056155189459219e-06, + "loss": 0.3654, + "step": 14770, + "vit_learning_rate": 1.4112310378918437e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7095, + "epoch": 0.7688306283811902, + "grad_norm": 2.6300303920741275, + "learning_rate": 7.052315157059393e-06, + "loss": 0.3687, + "step": 14780, + "vit_learning_rate": 1.4104630314118784e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6865, + "epoch": 0.769350811485643, + "grad_norm": 1.558406133629891, + "learning_rate": 7.0484736683093e-06, + "loss": 0.3806, + "step": 14790, + "vit_learning_rate": 1.4096947336618598e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7097, + "epoch": 0.7698709945900957, + "grad_norm": 1.681751427526523, + "learning_rate": 7.044630725934913e-06, + "loss": 0.3915, + "step": 14800, + "vit_learning_rate": 1.4089261451869826e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6705, + "epoch": 0.7703911776945485, + "grad_norm": 1.8004003317008057, + "learning_rate": 7.040786332663237e-06, + "loss": 0.3705, + "step": 14810, + "vit_learning_rate": 1.4081572665326473e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6571, + "epoch": 0.7709113607990012, + "grad_norm": 1.9254657417267054, + "learning_rate": 7.036940491222304e-06, + "loss": 0.3859, + "step": 14820, + "vit_learning_rate": 1.4073880982444606e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6825, + "epoch": 0.771431543903454, + "grad_norm": 1.6353939724836302, + "learning_rate": 7.033093204341173e-06, + "loss": 0.367, + "step": 14830, + "vit_learning_rate": 1.4066186408682343e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6685, + "epoch": 0.7719517270079068, + "grad_norm": 1.3211942285584164, + "learning_rate": 7.029244474749929e-06, + "loss": 0.3863, + "step": 14840, + "vit_learning_rate": 1.4058488949499856e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.636, + "epoch": 0.7724719101123596, + "grad_norm": 1.531739429565257, + "learning_rate": 7.025394305179687e-06, + "loss": 0.3827, + "step": 14850, + "vit_learning_rate": 1.4050788610359373e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6544, + "epoch": 0.7729920932168123, + "grad_norm": 2.1206633856263664, + "learning_rate": 7.021542698362573e-06, + "loss": 0.388, + "step": 14860, + "vit_learning_rate": 1.4043085396725145e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6345, + "epoch": 0.7735122763212651, + "grad_norm": 1.1776861637402825, + "learning_rate": 7.017689657031743e-06, + "loss": 0.3924, + "step": 14870, + "vit_learning_rate": 1.4035379314063484e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6363, + "epoch": 0.7740324594257179, + "grad_norm": 1.644378781400518, + "learning_rate": 7.013835183921366e-06, + "loss": 0.3463, + "step": 14880, + "vit_learning_rate": 1.4027670367842729e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6496, + "epoch": 0.7745526425301706, + "grad_norm": 1.7922328008029307, + "learning_rate": 7.009979281766627e-06, + "loss": 0.3865, + "step": 14890, + "vit_learning_rate": 1.4019958563533253e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6413, + "epoch": 0.7750728256346234, + "grad_norm": 1.5091452440488458, + "learning_rate": 7.006121953303724e-06, + "loss": 0.3627, + "step": 14900, + "vit_learning_rate": 1.4012243906607446e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6733, + "epoch": 0.7755930087390761, + "grad_norm": 1.3742406970991254, + "learning_rate": 7.00226320126987e-06, + "loss": 0.374, + "step": 14910, + "vit_learning_rate": 1.4004526402539738e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7691, + "epoch": 0.7761131918435289, + "grad_norm": 1.5183858391212457, + "learning_rate": 6.998403028403289e-06, + "loss": 0.3785, + "step": 14920, + "vit_learning_rate": 1.3996806056806576e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6686, + "epoch": 0.7766333749479817, + "grad_norm": 1.47658917441444, + "learning_rate": 6.994541437443208e-06, + "loss": 0.3812, + "step": 14930, + "vit_learning_rate": 1.3989082874886416e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6414, + "epoch": 0.7771535580524345, + "grad_norm": 1.347413733063297, + "learning_rate": 6.990678431129867e-06, + "loss": 0.3586, + "step": 14940, + "vit_learning_rate": 1.3981356862259732e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6227, + "epoch": 0.7776737411568873, + "grad_norm": 1.5249578820908747, + "learning_rate": 6.986814012204504e-06, + "loss": 0.3447, + "step": 14950, + "vit_learning_rate": 1.3973628024409005e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6613, + "epoch": 0.77819392426134, + "grad_norm": 1.4635384722908265, + "learning_rate": 6.982948183409364e-06, + "loss": 0.3768, + "step": 14960, + "vit_learning_rate": 1.3965896366818725e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6812, + "epoch": 0.7787141073657927, + "grad_norm": 1.025926945532712, + "learning_rate": 6.979080947487689e-06, + "loss": 0.3723, + "step": 14970, + "vit_learning_rate": 1.3958161894975376e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6443, + "epoch": 0.7792342904702455, + "grad_norm": 1.2440808722012142, + "learning_rate": 6.975212307183723e-06, + "loss": 0.3256, + "step": 14980, + "vit_learning_rate": 1.3950424614367443e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6938, + "epoch": 0.7797544735746983, + "grad_norm": 1.2083646626239477, + "learning_rate": 6.9713422652427046e-06, + "loss": 0.3755, + "step": 14990, + "vit_learning_rate": 1.3942684530485407e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6829, + "epoch": 0.7802746566791511, + "grad_norm": 1.646737819499363, + "learning_rate": 6.967470824410867e-06, + "loss": 0.3546, + "step": 15000, + "vit_learning_rate": 1.3934941648821733e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6197, + "epoch": 0.7807948397836039, + "grad_norm": 1.685972863015154, + "learning_rate": 6.963597987435435e-06, + "loss": 0.3845, + "step": 15010, + "vit_learning_rate": 1.392719597487087e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.711, + "epoch": 0.7813150228880565, + "grad_norm": 1.5556751301367477, + "learning_rate": 6.95972375706463e-06, + "loss": 0.3727, + "step": 15020, + "vit_learning_rate": 1.3919447514129256e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6411, + "epoch": 0.7818352059925093, + "grad_norm": 1.597300762778971, + "learning_rate": 6.955848136047651e-06, + "loss": 0.403, + "step": 15030, + "vit_learning_rate": 1.39116962720953e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6586, + "epoch": 0.7823553890969621, + "grad_norm": 1.4953826821904483, + "learning_rate": 6.951971127134696e-06, + "loss": 0.3957, + "step": 15040, + "vit_learning_rate": 1.3903942254269389e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6368, + "epoch": 0.7828755722014149, + "grad_norm": 1.7359352512805368, + "learning_rate": 6.948092733076939e-06, + "loss": 0.3643, + "step": 15050, + "vit_learning_rate": 1.3896185466153876e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6828, + "epoch": 0.7833957553058677, + "grad_norm": 1.2717871991517178, + "learning_rate": 6.944212956626544e-06, + "loss": 0.365, + "step": 15060, + "vit_learning_rate": 1.3888425913253085e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6538, + "epoch": 0.7839159384103205, + "grad_norm": 1.617390978428005, + "learning_rate": 6.9403318005366475e-06, + "loss": 0.3751, + "step": 15070, + "vit_learning_rate": 1.3880663601073293e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6368, + "epoch": 0.7844361215147732, + "grad_norm": 1.7246563611384975, + "learning_rate": 6.936449267561371e-06, + "loss": 0.374, + "step": 15080, + "vit_learning_rate": 1.387289853512274e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6253, + "epoch": 0.7849563046192259, + "grad_norm": 1.681074490058111, + "learning_rate": 6.932565360455816e-06, + "loss": 0.3743, + "step": 15090, + "vit_learning_rate": 1.3865130720911632e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6822, + "epoch": 0.7854764877236787, + "grad_norm": 1.470576142073081, + "learning_rate": 6.92868008197605e-06, + "loss": 0.3768, + "step": 15100, + "vit_learning_rate": 1.3857360163952098e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7952, + "epoch": 0.7859966708281315, + "grad_norm": 1.6312874198899059, + "learning_rate": 6.924793434879121e-06, + "loss": 0.3621, + "step": 15110, + "vit_learning_rate": 1.384958686975824e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6546, + "epoch": 0.7865168539325843, + "grad_norm": 1.507321236053005, + "learning_rate": 6.920905421923043e-06, + "loss": 0.3818, + "step": 15120, + "vit_learning_rate": 1.3841810843846085e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6262, + "epoch": 0.7870370370370371, + "grad_norm": 1.3678113600626853, + "learning_rate": 6.917016045866807e-06, + "loss": 0.358, + "step": 15130, + "vit_learning_rate": 1.3834032091733612e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6548, + "epoch": 0.7875572201414898, + "grad_norm": 1.3673624378919578, + "learning_rate": 6.913125309470361e-06, + "loss": 0.365, + "step": 15140, + "vit_learning_rate": 1.382625061894072e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6496, + "epoch": 0.7880774032459426, + "grad_norm": 1.8969390864559348, + "learning_rate": 6.909233215494626e-06, + "loss": 0.3576, + "step": 15150, + "vit_learning_rate": 1.3818466430989251e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6425, + "epoch": 0.7885975863503953, + "grad_norm": 1.6772795303730772, + "learning_rate": 6.905339766701486e-06, + "loss": 0.3578, + "step": 15160, + "vit_learning_rate": 1.3810679533402969e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6514, + "epoch": 0.7891177694548481, + "grad_norm": 2.2003611462238126, + "learning_rate": 6.901444965853779e-06, + "loss": 0.3634, + "step": 15170, + "vit_learning_rate": 1.3802889931707556e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7022, + "epoch": 0.7896379525593009, + "grad_norm": 1.372300400587347, + "learning_rate": 6.897548815715311e-06, + "loss": 0.3525, + "step": 15180, + "vit_learning_rate": 1.3795097631430622e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6096, + "epoch": 0.7901581356637536, + "grad_norm": 2.8292327821685426, + "learning_rate": 6.893651319050842e-06, + "loss": 0.3603, + "step": 15190, + "vit_learning_rate": 1.3787302638101684e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7455, + "epoch": 0.7906783187682064, + "grad_norm": 1.240733402837639, + "learning_rate": 6.88975247862609e-06, + "loss": 0.3641, + "step": 15200, + "vit_learning_rate": 1.3779504957252176e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6731, + "epoch": 0.7911985018726592, + "grad_norm": 1.7820832684144974, + "learning_rate": 6.885852297207718e-06, + "loss": 0.3665, + "step": 15210, + "vit_learning_rate": 1.3771704594415435e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6413, + "epoch": 0.791718684977112, + "grad_norm": 1.484205247107778, + "learning_rate": 6.881950777563353e-06, + "loss": 0.3625, + "step": 15220, + "vit_learning_rate": 1.3763901555126705e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6642, + "epoch": 0.7922388680815647, + "grad_norm": 1.325004301294029, + "learning_rate": 6.8780479224615635e-06, + "loss": 0.359, + "step": 15230, + "vit_learning_rate": 1.3756095844923125e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7038, + "epoch": 0.7927590511860175, + "grad_norm": 1.749767034544886, + "learning_rate": 6.874143734671866e-06, + "loss": 0.3989, + "step": 15240, + "vit_learning_rate": 1.374828746934373e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7128, + "epoch": 0.7932792342904702, + "grad_norm": 1.8848420819558662, + "learning_rate": 6.870238216964728e-06, + "loss": 0.3804, + "step": 15250, + "vit_learning_rate": 1.3740476433929454e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6468, + "epoch": 0.793799417394923, + "grad_norm": 1.3790149500452726, + "learning_rate": 6.866331372111552e-06, + "loss": 0.3856, + "step": 15260, + "vit_learning_rate": 1.3732662744223102e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6619, + "epoch": 0.7943196004993758, + "grad_norm": 1.6142572562259447, + "learning_rate": 6.862423202884692e-06, + "loss": 0.3903, + "step": 15270, + "vit_learning_rate": 1.372484640576938e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6748, + "epoch": 0.7948397836038286, + "grad_norm": 1.936957278284747, + "learning_rate": 6.858513712057433e-06, + "loss": 0.3772, + "step": 15280, + "vit_learning_rate": 1.3717027424114864e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6817, + "epoch": 0.7953599667082814, + "grad_norm": 1.9934108772073016, + "learning_rate": 6.854602902404006e-06, + "loss": 0.3664, + "step": 15290, + "vit_learning_rate": 1.370920580480801e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.631, + "epoch": 0.795880149812734, + "grad_norm": 1.6543991321226494, + "learning_rate": 6.850690776699574e-06, + "loss": 0.3918, + "step": 15300, + "vit_learning_rate": 1.3701381553399146e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7, + "epoch": 0.7964003329171868, + "grad_norm": 1.3810002907258108, + "learning_rate": 6.8467773377202275e-06, + "loss": 0.3935, + "step": 15310, + "vit_learning_rate": 1.3693554675440453e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6416, + "epoch": 0.7969205160216396, + "grad_norm": 1.5693696538941715, + "learning_rate": 6.8428625882430014e-06, + "loss": 0.3738, + "step": 15320, + "vit_learning_rate": 1.3685725176486e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7487, + "epoch": 0.7974406991260924, + "grad_norm": 1.7686134821767703, + "learning_rate": 6.838946531045852e-06, + "loss": 0.3641, + "step": 15330, + "vit_learning_rate": 1.3677893062091701e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6536, + "epoch": 0.7979608822305452, + "grad_norm": 1.223344579475221, + "learning_rate": 6.835029168907665e-06, + "loss": 0.3702, + "step": 15340, + "vit_learning_rate": 1.3670058337815328e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.684, + "epoch": 0.798481065334998, + "grad_norm": 1.508759510410958, + "learning_rate": 6.831110504608255e-06, + "loss": 0.3582, + "step": 15350, + "vit_learning_rate": 1.3662221009216509e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6723, + "epoch": 0.7990012484394506, + "grad_norm": 2.763364968796962, + "learning_rate": 6.827190540928357e-06, + "loss": 0.3864, + "step": 15360, + "vit_learning_rate": 1.3654381081856712e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7802, + "epoch": 0.7995214315439034, + "grad_norm": 1.686594262232918, + "learning_rate": 6.82326928064963e-06, + "loss": 0.3767, + "step": 15370, + "vit_learning_rate": 1.364653856129926e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7069, + "epoch": 0.8000416146483562, + "grad_norm": 1.4869104173463912, + "learning_rate": 6.819346726554652e-06, + "loss": 0.3716, + "step": 15380, + "vit_learning_rate": 1.3638693453109302e-06 + }, + { + "avg_batch_load_time": 2.5202, + "avg_batch_processing_time": 0.6852, + "epoch": 0.800561797752809, + "grad_norm": 1.1960413133977918, + "learning_rate": 6.815422881426922e-06, + "loss": 0.3644, + "step": 15390, + "vit_learning_rate": 1.3630845762853841e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6495, + "epoch": 0.8010819808572618, + "grad_norm": 1.9705151014228228, + "learning_rate": 6.8114977480508506e-06, + "loss": 0.374, + "step": 15400, + "vit_learning_rate": 1.36229954961017e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6284, + "epoch": 0.8016021639617146, + "grad_norm": 1.417878878330881, + "learning_rate": 6.807571329211766e-06, + "loss": 0.3725, + "step": 15410, + "vit_learning_rate": 1.361514265842353e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6843, + "epoch": 0.8021223470661673, + "grad_norm": 2.228805144662131, + "learning_rate": 6.803643627695907e-06, + "loss": 0.3678, + "step": 15420, + "vit_learning_rate": 1.3607287255391812e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6692, + "epoch": 0.80264253017062, + "grad_norm": 1.1712689526241786, + "learning_rate": 6.7997146462904225e-06, + "loss": 0.3828, + "step": 15430, + "vit_learning_rate": 1.3599429292580845e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.671, + "epoch": 0.8031627132750728, + "grad_norm": 1.5857105345278577, + "learning_rate": 6.795784387783374e-06, + "loss": 0.391, + "step": 15440, + "vit_learning_rate": 1.3591568775566747e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.666, + "epoch": 0.8036828963795256, + "grad_norm": 1.9110131230403442, + "learning_rate": 6.79185285496372e-06, + "loss": 0.3859, + "step": 15450, + "vit_learning_rate": 1.3583705709927437e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7897, + "epoch": 0.8042030794839784, + "grad_norm": 1.6454672378446076, + "learning_rate": 6.787920050621331e-06, + "loss": 0.3841, + "step": 15460, + "vit_learning_rate": 1.3575840101242662e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7322, + "epoch": 0.8047232625884311, + "grad_norm": 1.4488753982401965, + "learning_rate": 6.78398597754698e-06, + "loss": 0.3612, + "step": 15470, + "vit_learning_rate": 1.3567971955093958e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7034, + "epoch": 0.8052434456928839, + "grad_norm": 1.6082527293724773, + "learning_rate": 6.7800506385323335e-06, + "loss": 0.3633, + "step": 15480, + "vit_learning_rate": 1.3560101277064666e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6742, + "epoch": 0.8057636287973367, + "grad_norm": 1.7170424077611297, + "learning_rate": 6.776114036369965e-06, + "loss": 0.3651, + "step": 15490, + "vit_learning_rate": 1.355222807273993e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7059, + "epoch": 0.8062838119017894, + "grad_norm": 1.1553190017418615, + "learning_rate": 6.772176173853336e-06, + "loss": 0.3732, + "step": 15500, + "vit_learning_rate": 1.3544352347706668e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6454, + "epoch": 0.8068039950062422, + "grad_norm": 2.1828743727901907, + "learning_rate": 6.768237053776809e-06, + "loss": 0.3933, + "step": 15510, + "vit_learning_rate": 1.3536474107553616e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6547, + "epoch": 0.807324178110695, + "grad_norm": 1.3370658737628152, + "learning_rate": 6.764296678935635e-06, + "loss": 0.3748, + "step": 15520, + "vit_learning_rate": 1.3528593357871267e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6776, + "epoch": 0.8078443612151477, + "grad_norm": 1.3374148359895617, + "learning_rate": 6.760355052125958e-06, + "loss": 0.3793, + "step": 15530, + "vit_learning_rate": 1.3520710104251913e-06 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.7704, + "epoch": 0.8083645443196005, + "grad_norm": 1.4588060180642959, + "learning_rate": 6.7564121761448066e-06, + "loss": 0.3855, + "step": 15540, + "vit_learning_rate": 1.351282435228961e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6383, + "epoch": 0.8088847274240533, + "grad_norm": 1.618239998978144, + "learning_rate": 6.752468053790101e-06, + "loss": 0.3811, + "step": 15550, + "vit_learning_rate": 1.35049361075802e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6415, + "epoch": 0.809404910528506, + "grad_norm": 1.517617407020956, + "learning_rate": 6.748522687860643e-06, + "loss": 0.3744, + "step": 15560, + "vit_learning_rate": 1.3497045375721284e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6795, + "epoch": 0.8099250936329588, + "grad_norm": 1.519132414649884, + "learning_rate": 6.744576081156114e-06, + "loss": 0.3917, + "step": 15570, + "vit_learning_rate": 1.3489152162312225e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6847, + "epoch": 0.8104452767374116, + "grad_norm": 1.5153448803666605, + "learning_rate": 6.740628236477083e-06, + "loss": 0.3603, + "step": 15580, + "vit_learning_rate": 1.3481256472954163e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6596, + "epoch": 0.8109654598418643, + "grad_norm": 1.7014701977535103, + "learning_rate": 6.736679156624988e-06, + "loss": 0.3903, + "step": 15590, + "vit_learning_rate": 1.3473358313249975e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.8128, + "epoch": 0.8114856429463171, + "grad_norm": 1.2208578457829797, + "learning_rate": 6.732728844402154e-06, + "loss": 0.3695, + "step": 15600, + "vit_learning_rate": 1.3465457688804308e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6414, + "epoch": 0.8120058260507699, + "grad_norm": 1.7528778540655225, + "learning_rate": 6.728777302611773e-06, + "loss": 0.3691, + "step": 15610, + "vit_learning_rate": 1.3457554605223545e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6711, + "epoch": 0.8125260091552227, + "grad_norm": 1.1010894397283022, + "learning_rate": 6.724824534057913e-06, + "loss": 0.3675, + "step": 15620, + "vit_learning_rate": 1.3449649068115825e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7237, + "epoch": 0.8130461922596754, + "grad_norm": 1.9810576356720544, + "learning_rate": 6.720870541545511e-06, + "loss": 0.3568, + "step": 15630, + "vit_learning_rate": 1.344174108309102e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.648, + "epoch": 0.8135663753641281, + "grad_norm": 1.6268187540035746, + "learning_rate": 6.716915327880372e-06, + "loss": 0.3648, + "step": 15640, + "vit_learning_rate": 1.3433830655760743e-06 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6658, + "epoch": 0.8140865584685809, + "grad_norm": 1.3486585235827275, + "learning_rate": 6.71295889586917e-06, + "loss": 0.3826, + "step": 15650, + "vit_learning_rate": 1.3425917791738336e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6903, + "epoch": 0.8146067415730337, + "grad_norm": 1.7790275582975168, + "learning_rate": 6.709001248319438e-06, + "loss": 0.3925, + "step": 15660, + "vit_learning_rate": 1.3418002496638874e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6714, + "epoch": 0.8151269246774865, + "grad_norm": 1.355484517505448, + "learning_rate": 6.705042388039581e-06, + "loss": 0.3817, + "step": 15670, + "vit_learning_rate": 1.341008477607916e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6944, + "epoch": 0.8156471077819393, + "grad_norm": 1.681020557752775, + "learning_rate": 6.701082317838858e-06, + "loss": 0.381, + "step": 15680, + "vit_learning_rate": 1.3402164635677713e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6501, + "epoch": 0.8161672908863921, + "grad_norm": 1.8152082291746297, + "learning_rate": 6.697121040527385e-06, + "loss": 0.3831, + "step": 15690, + "vit_learning_rate": 1.3394242081054768e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6604, + "epoch": 0.8166874739908447, + "grad_norm": 1.826132456070679, + "learning_rate": 6.6931585589161385e-06, + "loss": 0.3499, + "step": 15700, + "vit_learning_rate": 1.3386317117832276e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7764, + "epoch": 0.8172076570952975, + "grad_norm": 1.714456882807311, + "learning_rate": 6.68919487581695e-06, + "loss": 0.3715, + "step": 15710, + "vit_learning_rate": 1.3378389751633899e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6671, + "epoch": 0.8177278401997503, + "grad_norm": 1.5133116042736834, + "learning_rate": 6.685229994042502e-06, + "loss": 0.3671, + "step": 15720, + "vit_learning_rate": 1.3370459988085002e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7536, + "epoch": 0.8182480233042031, + "grad_norm": 1.923714006510877, + "learning_rate": 6.681263916406323e-06, + "loss": 0.3889, + "step": 15730, + "vit_learning_rate": 1.3362527832812644e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6999, + "epoch": 0.8187682064086559, + "grad_norm": 1.706670502319743, + "learning_rate": 6.6772966457227995e-06, + "loss": 0.3909, + "step": 15740, + "vit_learning_rate": 1.3354593291445596e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6978, + "epoch": 0.8192883895131086, + "grad_norm": 1.3154129823846685, + "learning_rate": 6.67332818480716e-06, + "loss": 0.3828, + "step": 15750, + "vit_learning_rate": 1.3346656369614317e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.3388, + "epoch": 0.8198085726175613, + "grad_norm": 1.694789657251368, + "learning_rate": 6.669358536475474e-06, + "loss": 0.3873, + "step": 15760, + "vit_learning_rate": 1.3338717072950946e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6734, + "epoch": 0.8203287557220141, + "grad_norm": 2.1738583728849346, + "learning_rate": 6.665387703544661e-06, + "loss": 0.3744, + "step": 15770, + "vit_learning_rate": 1.3330775407089321e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6629, + "epoch": 0.8208489388264669, + "grad_norm": 1.1447743668521637, + "learning_rate": 6.661415688832473e-06, + "loss": 0.3743, + "step": 15780, + "vit_learning_rate": 1.3322831377664946e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6396, + "epoch": 0.8213691219309197, + "grad_norm": 1.7106839291065075, + "learning_rate": 6.657442495157509e-06, + "loss": 0.3543, + "step": 15790, + "vit_learning_rate": 1.3314884990315016e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.705, + "epoch": 0.8218893050353725, + "grad_norm": 1.4049026815106571, + "learning_rate": 6.653468125339195e-06, + "loss": 0.3922, + "step": 15800, + "vit_learning_rate": 1.330693625067839e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6909, + "epoch": 0.8224094881398252, + "grad_norm": 1.566154549787463, + "learning_rate": 6.649492582197802e-06, + "loss": 0.3844, + "step": 15810, + "vit_learning_rate": 1.3298985164395602e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6532, + "epoch": 0.822929671244278, + "grad_norm": 1.5350624985101922, + "learning_rate": 6.645515868554426e-06, + "loss": 0.3873, + "step": 15820, + "vit_learning_rate": 1.3291031737108852e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7199, + "epoch": 0.8234498543487307, + "grad_norm": 1.2931353456222707, + "learning_rate": 6.6415379872309974e-06, + "loss": 0.3593, + "step": 15830, + "vit_learning_rate": 1.3283075974461991e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6575, + "epoch": 0.8239700374531835, + "grad_norm": 1.839116276245444, + "learning_rate": 6.637558941050273e-06, + "loss": 0.3619, + "step": 15840, + "vit_learning_rate": 1.3275117882100544e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6972, + "epoch": 0.8244902205576363, + "grad_norm": 1.7265346527921799, + "learning_rate": 6.633578732835837e-06, + "loss": 0.3649, + "step": 15850, + "vit_learning_rate": 1.3267157465671672e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6688, + "epoch": 0.8250104036620891, + "grad_norm": 2.449911506221966, + "learning_rate": 6.629597365412099e-06, + "loss": 0.3601, + "step": 15860, + "vit_learning_rate": 1.3259194730824198e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6858, + "epoch": 0.8255305867665418, + "grad_norm": 1.6570314210206745, + "learning_rate": 6.625614841604289e-06, + "loss": 0.357, + "step": 15870, + "vit_learning_rate": 1.3251229683208575e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6303, + "epoch": 0.8260507698709946, + "grad_norm": 1.8400807059025213, + "learning_rate": 6.621631164238462e-06, + "loss": 0.3745, + "step": 15880, + "vit_learning_rate": 1.3243262328476922e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6634, + "epoch": 0.8265709529754474, + "grad_norm": 2.054959098669832, + "learning_rate": 6.617646336141487e-06, + "loss": 0.3898, + "step": 15890, + "vit_learning_rate": 1.323529267228297e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6271, + "epoch": 0.8270911360799001, + "grad_norm": 1.3063144769488046, + "learning_rate": 6.6136603601410506e-06, + "loss": 0.3731, + "step": 15900, + "vit_learning_rate": 1.32273207202821e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6659, + "epoch": 0.8276113191843529, + "grad_norm": 1.5583918007775992, + "learning_rate": 6.609673239065656e-06, + "loss": 0.3605, + "step": 15910, + "vit_learning_rate": 1.321934647813131e-06 + }, + { + "avg_batch_load_time": 0.0087, + "avg_batch_processing_time": 2.4831, + "epoch": 0.8281315022888056, + "grad_norm": 1.2097844833211677, + "learning_rate": 6.605684975744618e-06, + "loss": 0.3729, + "step": 15920, + "vit_learning_rate": 1.3211369951489232e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.761, + "epoch": 0.8286516853932584, + "grad_norm": 1.620224396078577, + "learning_rate": 6.601695573008059e-06, + "loss": 0.3453, + "step": 15930, + "vit_learning_rate": 1.3203391146016116e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6643, + "epoch": 0.8291718684977112, + "grad_norm": 1.5014960899823435, + "learning_rate": 6.597705033686914e-06, + "loss": 0.3787, + "step": 15940, + "vit_learning_rate": 1.3195410067373828e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6854, + "epoch": 0.829692051602164, + "grad_norm": 1.8302367295617958, + "learning_rate": 6.5937133606129235e-06, + "loss": 0.3741, + "step": 15950, + "vit_learning_rate": 1.3187426721225845e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7157, + "epoch": 0.8302122347066168, + "grad_norm": 1.3948966099996951, + "learning_rate": 6.589720556618633e-06, + "loss": 0.3604, + "step": 15960, + "vit_learning_rate": 1.3179441113237264e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6295, + "epoch": 0.8307324178110695, + "grad_norm": 3.346390299186068, + "learning_rate": 6.585726624537386e-06, + "loss": 0.3599, + "step": 15970, + "vit_learning_rate": 1.3171453249074772e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6533, + "epoch": 0.8312526009155222, + "grad_norm": 2.0625638115882277, + "learning_rate": 6.581731567203334e-06, + "loss": 0.3635, + "step": 15980, + "vit_learning_rate": 1.3163463134406667e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6861, + "epoch": 0.831772784019975, + "grad_norm": 1.5910957589379735, + "learning_rate": 6.577735387451421e-06, + "loss": 0.3694, + "step": 15990, + "vit_learning_rate": 1.3155470774902842e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.673, + "epoch": 0.8322929671244278, + "grad_norm": 1.3115807788439517, + "learning_rate": 6.573738088117391e-06, + "loss": 0.3921, + "step": 16000, + "vit_learning_rate": 1.314747617623478e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6797, + "epoch": 0.8328131502288806, + "grad_norm": 2.557966796588566, + "learning_rate": 6.569739672037778e-06, + "loss": 0.3598, + "step": 16010, + "vit_learning_rate": 1.3139479344075555e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6719, + "epoch": 0.8333333333333334, + "grad_norm": 1.5392238515931924, + "learning_rate": 6.5657401420499155e-06, + "loss": 0.3718, + "step": 16020, + "vit_learning_rate": 1.313148028409983e-06 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6322, + "epoch": 0.833853516437786, + "grad_norm": 1.9513402943480433, + "learning_rate": 6.5617395009919196e-06, + "loss": 0.3772, + "step": 16030, + "vit_learning_rate": 1.3123479001983836e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6552, + "epoch": 0.8343736995422388, + "grad_norm": 1.5632891045945683, + "learning_rate": 6.557737751702702e-06, + "loss": 0.3734, + "step": 16040, + "vit_learning_rate": 1.3115475503405402e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6671, + "epoch": 0.8348938826466916, + "grad_norm": 1.471090390535308, + "learning_rate": 6.553734897021956e-06, + "loss": 0.3796, + "step": 16050, + "vit_learning_rate": 1.310746979404391e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6723, + "epoch": 0.8354140657511444, + "grad_norm": 2.120880133334099, + "learning_rate": 6.54973093979016e-06, + "loss": 0.3643, + "step": 16060, + "vit_learning_rate": 1.3099461879580318e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.3864, + "epoch": 0.8359342488555972, + "grad_norm": 1.6675699344576782, + "learning_rate": 6.545725882848577e-06, + "loss": 0.3565, + "step": 16070, + "vit_learning_rate": 1.3091451765697153e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6504, + "epoch": 0.83645443196005, + "grad_norm": 1.8441821368283762, + "learning_rate": 6.541719729039247e-06, + "loss": 0.36, + "step": 16080, + "vit_learning_rate": 1.308343945807849e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.659, + "epoch": 0.8369746150645027, + "grad_norm": 1.6824270482749193, + "learning_rate": 6.537712481204994e-06, + "loss": 0.3764, + "step": 16090, + "vit_learning_rate": 1.3075424962409986e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6634, + "epoch": 0.8374947981689554, + "grad_norm": 1.594193093223912, + "learning_rate": 6.53370414218941e-06, + "loss": 0.397, + "step": 16100, + "vit_learning_rate": 1.3067408284378817e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.675, + "epoch": 0.8380149812734082, + "grad_norm": 1.3234044822267388, + "learning_rate": 6.529694714836868e-06, + "loss": 0.3737, + "step": 16110, + "vit_learning_rate": 1.3059389429673735e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.657, + "epoch": 0.838535164377861, + "grad_norm": 2.23647103540149, + "learning_rate": 6.525684201992512e-06, + "loss": 0.355, + "step": 16120, + "vit_learning_rate": 1.3051368403985023e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7343, + "epoch": 0.8390553474823138, + "grad_norm": 1.430256608012849, + "learning_rate": 6.521672606502256e-06, + "loss": 0.3816, + "step": 16130, + "vit_learning_rate": 1.304334521300451e-06 + }, + { + "avg_batch_load_time": 0.086, + "avg_batch_processing_time": 2.3639, + "epoch": 0.8395755305867666, + "grad_norm": 2.493332297505908, + "learning_rate": 6.5176599312127805e-06, + "loss": 0.3755, + "step": 16140, + "vit_learning_rate": 1.303531986242556e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6684, + "epoch": 0.8400957136912193, + "grad_norm": 2.140470238395991, + "learning_rate": 6.513646178971532e-06, + "loss": 0.4198, + "step": 16150, + "vit_learning_rate": 1.3027292357943063e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7268, + "epoch": 0.8406158967956721, + "grad_norm": 2.4096179730872587, + "learning_rate": 6.509631352626726e-06, + "loss": 0.3654, + "step": 16160, + "vit_learning_rate": 1.301926270525345e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.4854, + "epoch": 0.8411360799001248, + "grad_norm": 1.472176651509935, + "learning_rate": 6.5056154550273345e-06, + "loss": 0.3817, + "step": 16170, + "vit_learning_rate": 1.3011230910054667e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6521, + "epoch": 0.8416562630045776, + "grad_norm": 1.6740635026501025, + "learning_rate": 6.501598489023093e-06, + "loss": 0.377, + "step": 16180, + "vit_learning_rate": 1.3003196978046185e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6629, + "epoch": 0.8421764461090304, + "grad_norm": 1.551937115376693, + "learning_rate": 6.497580457464495e-06, + "loss": 0.3895, + "step": 16190, + "vit_learning_rate": 1.2995160914928989e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6739, + "epoch": 0.8426966292134831, + "grad_norm": 1.4293563115774446, + "learning_rate": 6.493561363202789e-06, + "loss": 0.3594, + "step": 16200, + "vit_learning_rate": 1.2987122726405574e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6592, + "epoch": 0.8432168123179359, + "grad_norm": 1.3306440377117963, + "learning_rate": 6.489541209089975e-06, + "loss": 0.3932, + "step": 16210, + "vit_learning_rate": 1.297908241817995e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.641, + "epoch": 0.8437369954223887, + "grad_norm": 1.592841182544158, + "learning_rate": 6.485519997978813e-06, + "loss": 0.3663, + "step": 16220, + "vit_learning_rate": 1.2971039995957624e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.634, + "epoch": 0.8442571785268415, + "grad_norm": 1.4841083864006281, + "learning_rate": 6.481497732722806e-06, + "loss": 0.385, + "step": 16230, + "vit_learning_rate": 1.296299546544561e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7093, + "epoch": 0.8447773616312942, + "grad_norm": 1.4032592108922564, + "learning_rate": 6.477474416176206e-06, + "loss": 0.3585, + "step": 16240, + "vit_learning_rate": 1.295494883235241e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6874, + "epoch": 0.845297544735747, + "grad_norm": 1.2137772009432322, + "learning_rate": 6.473450051194014e-06, + "loss": 0.372, + "step": 16250, + "vit_learning_rate": 1.2946900102388026e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6949, + "epoch": 0.8458177278401997, + "grad_norm": 2.3686887032564976, + "learning_rate": 6.469424640631972e-06, + "loss": 0.3547, + "step": 16260, + "vit_learning_rate": 1.2938849281263942e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6465, + "epoch": 0.8463379109446525, + "grad_norm": 1.5144705207132247, + "learning_rate": 6.4653981873465655e-06, + "loss": 0.3664, + "step": 16270, + "vit_learning_rate": 1.2930796374693129e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7507, + "epoch": 0.8468580940491053, + "grad_norm": 1.8774377552100523, + "learning_rate": 6.4613706941950195e-06, + "loss": 0.3738, + "step": 16280, + "vit_learning_rate": 1.2922741388390038e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7004, + "epoch": 0.8473782771535581, + "grad_norm": 1.986567604898173, + "learning_rate": 6.457342164035296e-06, + "loss": 0.3562, + "step": 16290, + "vit_learning_rate": 1.291468432807059e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7358, + "epoch": 0.8478984602580109, + "grad_norm": 1.487182298963341, + "learning_rate": 6.453312599726098e-06, + "loss": 0.364, + "step": 16300, + "vit_learning_rate": 1.2906625199452194e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6564, + "epoch": 0.8484186433624636, + "grad_norm": 1.5724575766402797, + "learning_rate": 6.449282004126853e-06, + "loss": 0.3428, + "step": 16310, + "vit_learning_rate": 1.2898564008253705e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6745, + "epoch": 0.8489388264669163, + "grad_norm": 1.7032405057749982, + "learning_rate": 6.445250380097728e-06, + "loss": 0.3718, + "step": 16320, + "vit_learning_rate": 1.2890500760195455e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6625, + "epoch": 0.8494590095713691, + "grad_norm": 1.4882531012423101, + "learning_rate": 6.441217730499617e-06, + "loss": 0.3463, + "step": 16330, + "vit_learning_rate": 1.2882435460999232e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6308, + "epoch": 0.8499791926758219, + "grad_norm": 1.6755642596569569, + "learning_rate": 6.437184058194143e-06, + "loss": 0.3671, + "step": 16340, + "vit_learning_rate": 1.2874368116388284e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6702, + "epoch": 0.8504993757802747, + "grad_norm": 2.4625630753177457, + "learning_rate": 6.433149366043652e-06, + "loss": 0.3493, + "step": 16350, + "vit_learning_rate": 1.2866298732087302e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6345, + "epoch": 0.8510195588847275, + "grad_norm": 1.6321427817156215, + "learning_rate": 6.429113656911215e-06, + "loss": 0.3596, + "step": 16360, + "vit_learning_rate": 1.2858227313822427e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6425, + "epoch": 0.8515397419891801, + "grad_norm": 1.3333612433284407, + "learning_rate": 6.425076933660628e-06, + "loss": 0.356, + "step": 16370, + "vit_learning_rate": 1.2850153867321254e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6479, + "epoch": 0.8520599250936329, + "grad_norm": 1.7673456056039922, + "learning_rate": 6.4210391991564e-06, + "loss": 0.3881, + "step": 16380, + "vit_learning_rate": 1.28420783983128e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7072, + "epoch": 0.8525801081980857, + "grad_norm": 2.2103245392489717, + "learning_rate": 6.417000456263767e-06, + "loss": 0.3776, + "step": 16390, + "vit_learning_rate": 1.2834000912527532e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6592, + "epoch": 0.8531002913025385, + "grad_norm": 1.3660736276153045, + "learning_rate": 6.412960707848669e-06, + "loss": 0.3522, + "step": 16400, + "vit_learning_rate": 1.2825921415697336e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.4665, + "epoch": 0.8536204744069913, + "grad_norm": 1.8137229247265092, + "learning_rate": 6.408919956777768e-06, + "loss": 0.3741, + "step": 16410, + "vit_learning_rate": 1.2817839913555535e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.726, + "epoch": 0.8541406575114441, + "grad_norm": 1.204859360969457, + "learning_rate": 6.404878205918437e-06, + "loss": 0.3658, + "step": 16420, + "vit_learning_rate": 1.280975641183687e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.4533, + "epoch": 0.8546608406158968, + "grad_norm": 1.7715699403544705, + "learning_rate": 6.400835458138751e-06, + "loss": 0.386, + "step": 16430, + "vit_learning_rate": 1.28016709162775e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6371, + "epoch": 0.8551810237203495, + "grad_norm": 1.5811720454814968, + "learning_rate": 6.396791716307505e-06, + "loss": 0.372, + "step": 16440, + "vit_learning_rate": 1.2793583432615006e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.374, + "epoch": 0.8557012068248023, + "grad_norm": 1.5361077011193636, + "learning_rate": 6.392746983294184e-06, + "loss": 0.3765, + "step": 16450, + "vit_learning_rate": 1.2785493966588368e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6546, + "epoch": 0.8562213899292551, + "grad_norm": 1.9732939400933949, + "learning_rate": 6.388701261968993e-06, + "loss": 0.3445, + "step": 16460, + "vit_learning_rate": 1.2777402523937983e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7019, + "epoch": 0.8567415730337079, + "grad_norm": 1.4122664757009034, + "learning_rate": 6.384654555202824e-06, + "loss": 0.3626, + "step": 16470, + "vit_learning_rate": 1.2769309110405646e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6969, + "epoch": 0.8572617561381606, + "grad_norm": 1.8511142722999123, + "learning_rate": 6.380606865867276e-06, + "loss": 0.3863, + "step": 16480, + "vit_learning_rate": 1.276121373173455e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6557, + "epoch": 0.8577819392426134, + "grad_norm": 1.4148982889583817, + "learning_rate": 6.376558196834645e-06, + "loss": 0.3794, + "step": 16490, + "vit_learning_rate": 1.2753116393669288e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.3816, + "epoch": 0.8583021223470662, + "grad_norm": 1.6997072449431687, + "learning_rate": 6.372508550977918e-06, + "loss": 0.3822, + "step": 16500, + "vit_learning_rate": 1.2745017101955832e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6409, + "epoch": 0.8588223054515189, + "grad_norm": 1.3108913208202284, + "learning_rate": 6.36845793117078e-06, + "loss": 0.3865, + "step": 16510, + "vit_learning_rate": 1.2736915862341556e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.507, + "epoch": 0.8593424885559717, + "grad_norm": 1.319318671364116, + "learning_rate": 6.3644063402876025e-06, + "loss": 0.4043, + "step": 16520, + "vit_learning_rate": 1.2728812680575203e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6625, + "epoch": 0.8598626716604245, + "grad_norm": 1.8633286656257413, + "learning_rate": 6.3603537812034515e-06, + "loss": 0.3545, + "step": 16530, + "vit_learning_rate": 1.27207075624069e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.3691, + "epoch": 0.8603828547648772, + "grad_norm": 1.7242079888612771, + "learning_rate": 6.3563002567940744e-06, + "loss": 0.371, + "step": 16540, + "vit_learning_rate": 1.2712600513588148e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6621, + "epoch": 0.86090303786933, + "grad_norm": 1.5004795920149199, + "learning_rate": 6.35224576993591e-06, + "loss": 0.3619, + "step": 16550, + "vit_learning_rate": 1.2704491539871818e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6544, + "epoch": 0.8614232209737828, + "grad_norm": 1.2337393905647747, + "learning_rate": 6.348190323506073e-06, + "loss": 0.3922, + "step": 16560, + "vit_learning_rate": 1.2696380647012144e-06 + }, + { + "avg_batch_load_time": 0.0125, + "avg_batch_processing_time": 0.6363, + "epoch": 0.8619434040782356, + "grad_norm": 1.6749315652605579, + "learning_rate": 6.344133920382362e-06, + "loss": 0.3663, + "step": 16570, + "vit_learning_rate": 1.2688267840764722e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.8297, + "epoch": 0.8624635871826883, + "grad_norm": 1.7272363553548011, + "learning_rate": 6.3400765634432586e-06, + "loss": 0.3733, + "step": 16580, + "vit_learning_rate": 1.2680153126886514e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6776, + "epoch": 0.8629837702871411, + "grad_norm": 1.4867939333233264, + "learning_rate": 6.336018255567912e-06, + "loss": 0.3675, + "step": 16590, + "vit_learning_rate": 1.2672036511135822e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6526, + "epoch": 0.8635039533915938, + "grad_norm": 1.5003435528397655, + "learning_rate": 6.331958999636156e-06, + "loss": 0.372, + "step": 16600, + "vit_learning_rate": 1.2663917999272312e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6543, + "epoch": 0.8640241364960466, + "grad_norm": 1.2708338435007955, + "learning_rate": 6.327898798528492e-06, + "loss": 0.3766, + "step": 16610, + "vit_learning_rate": 1.2655797597056983e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7024, + "epoch": 0.8645443196004994, + "grad_norm": 3.44030298846486, + "learning_rate": 6.323837655126094e-06, + "loss": 0.3781, + "step": 16620, + "vit_learning_rate": 1.2647675310252185e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6647, + "epoch": 0.8650645027049522, + "grad_norm": 1.6907822667129317, + "learning_rate": 6.3197755723108e-06, + "loss": 0.3768, + "step": 16630, + "vit_learning_rate": 1.26395511446216e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7273, + "epoch": 0.865584685809405, + "grad_norm": 1.6149964249925477, + "learning_rate": 6.3157125529651205e-06, + "loss": 0.374, + "step": 16640, + "vit_learning_rate": 1.263142510593024e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6778, + "epoch": 0.8661048689138576, + "grad_norm": 1.830757330186087, + "learning_rate": 6.3116485999722325e-06, + "loss": 0.3753, + "step": 16650, + "vit_learning_rate": 1.2623297199944462e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6706, + "epoch": 0.8666250520183104, + "grad_norm": 1.4250900993120745, + "learning_rate": 6.307583716215962e-06, + "loss": 0.3978, + "step": 16660, + "vit_learning_rate": 1.2615167432431925e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6085, + "epoch": 0.8671452351227632, + "grad_norm": 1.4419143668752774, + "learning_rate": 6.3035179045808135e-06, + "loss": 0.3721, + "step": 16670, + "vit_learning_rate": 1.2607035809161626e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.4068, + "epoch": 0.867665418227216, + "grad_norm": 1.359507535754973, + "learning_rate": 6.299451167951936e-06, + "loss": 0.3611, + "step": 16680, + "vit_learning_rate": 1.259890233590387e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6198, + "epoch": 0.8681856013316688, + "grad_norm": 1.7062461431166267, + "learning_rate": 6.295383509215144e-06, + "loss": 0.3839, + "step": 16690, + "vit_learning_rate": 1.2590767018430286e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.4702, + "epoch": 0.8687057844361216, + "grad_norm": 1.6767430235920664, + "learning_rate": 6.2913149312569e-06, + "loss": 0.3781, + "step": 16700, + "vit_learning_rate": 1.2582629862513798e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.4002, + "epoch": 0.8692259675405742, + "grad_norm": 1.3200099597462662, + "learning_rate": 6.28724543696432e-06, + "loss": 0.3662, + "step": 16710, + "vit_learning_rate": 1.2574490873928637e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6434, + "epoch": 0.869746150645027, + "grad_norm": 2.0867970258712587, + "learning_rate": 6.2831750292251754e-06, + "loss": 0.3651, + "step": 16720, + "vit_learning_rate": 1.256635005845035e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6909, + "epoch": 0.8702663337494798, + "grad_norm": 1.62310228650554, + "learning_rate": 6.279103710927879e-06, + "loss": 0.3772, + "step": 16730, + "vit_learning_rate": 1.2558207421855756e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6865, + "epoch": 0.8707865168539326, + "grad_norm": 1.9308139712857988, + "learning_rate": 6.275031484961494e-06, + "loss": 0.3705, + "step": 16740, + "vit_learning_rate": 1.2550062969922985e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 2.3447, + "epoch": 0.8713066999583854, + "grad_norm": 1.6951490536786196, + "learning_rate": 6.270958354215725e-06, + "loss": 0.3703, + "step": 16750, + "vit_learning_rate": 1.254191670843145e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6412, + "epoch": 0.8718268830628381, + "grad_norm": 1.7098106160886664, + "learning_rate": 6.266884321580922e-06, + "loss": 0.3831, + "step": 16760, + "vit_learning_rate": 1.2533768643161841e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 2.3919, + "epoch": 0.8723470661672909, + "grad_norm": 1.211586563124873, + "learning_rate": 6.262809389948073e-06, + "loss": 0.325, + "step": 16770, + "vit_learning_rate": 1.2525618779896144e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 4.1193, + "epoch": 0.8728672492717436, + "grad_norm": 1.6511751453731511, + "learning_rate": 6.258733562208801e-06, + "loss": 0.3669, + "step": 16780, + "vit_learning_rate": 1.25174671244176e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6339, + "epoch": 0.8733874323761964, + "grad_norm": 1.597464846271481, + "learning_rate": 6.2546568412553695e-06, + "loss": 0.3872, + "step": 16790, + "vit_learning_rate": 1.2509313682510737e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7032, + "epoch": 0.8739076154806492, + "grad_norm": 1.1915322291763724, + "learning_rate": 6.250579229980676e-06, + "loss": 0.3684, + "step": 16800, + "vit_learning_rate": 1.250115845996135e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6863, + "epoch": 0.874427798585102, + "grad_norm": 1.2710911602385122, + "learning_rate": 6.246500731278246e-06, + "loss": 0.3833, + "step": 16810, + "vit_learning_rate": 1.2493001462556489e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7081, + "epoch": 0.8749479816895547, + "grad_norm": 1.6805566607456, + "learning_rate": 6.242421348042237e-06, + "loss": 0.3706, + "step": 16820, + "vit_learning_rate": 1.2484842696084473e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6655, + "epoch": 0.8754681647940075, + "grad_norm": 1.8052283545864316, + "learning_rate": 6.238341083167433e-06, + "loss": 0.3521, + "step": 16830, + "vit_learning_rate": 1.2476682166334865e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.612, + "epoch": 0.8759883478984603, + "grad_norm": 2.145137111879399, + "learning_rate": 6.234259939549247e-06, + "loss": 0.3656, + "step": 16840, + "vit_learning_rate": 1.2468519879098492e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.8176, + "epoch": 0.876508531002913, + "grad_norm": 1.7291825625064234, + "learning_rate": 6.230177920083711e-06, + "loss": 0.376, + "step": 16850, + "vit_learning_rate": 1.246035584016742e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7078, + "epoch": 0.8770287141073658, + "grad_norm": 1.7862382197314872, + "learning_rate": 6.226095027667482e-06, + "loss": 0.3625, + "step": 16860, + "vit_learning_rate": 1.245219005533496e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6323, + "epoch": 0.8775488972118186, + "grad_norm": 1.274820426484616, + "learning_rate": 6.222011265197832e-06, + "loss": 0.3545, + "step": 16870, + "vit_learning_rate": 1.244402253039566e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7021, + "epoch": 0.8780690803162713, + "grad_norm": 1.9157514656778838, + "learning_rate": 6.2179266355726565e-06, + "loss": 0.392, + "step": 16880, + "vit_learning_rate": 1.2435853271145312e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6474, + "epoch": 0.8785892634207241, + "grad_norm": 1.3567928668141034, + "learning_rate": 6.213841141690463e-06, + "loss": 0.3591, + "step": 16890, + "vit_learning_rate": 1.2427682283380925e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7468, + "epoch": 0.8791094465251769, + "grad_norm": 1.6173649010834246, + "learning_rate": 6.20975478645037e-06, + "loss": 0.3913, + "step": 16900, + "vit_learning_rate": 1.2419509572900738e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6406, + "epoch": 0.8796296296296297, + "grad_norm": 1.7531141799097452, + "learning_rate": 6.205667572752115e-06, + "loss": 0.3591, + "step": 16910, + "vit_learning_rate": 1.2411335145504226e-06 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6595, + "epoch": 0.8801498127340824, + "grad_norm": 1.4683163443104885, + "learning_rate": 6.2015795034960334e-06, + "loss": 0.3492, + "step": 16920, + "vit_learning_rate": 1.2403159006992065e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6388, + "epoch": 0.8806699958385351, + "grad_norm": 1.334462611749996, + "learning_rate": 6.197490581583078e-06, + "loss": 0.3625, + "step": 16930, + "vit_learning_rate": 1.2394981163166153e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7771, + "epoch": 0.8811901789429879, + "grad_norm": 2.005171987342989, + "learning_rate": 6.193400809914799e-06, + "loss": 0.3869, + "step": 16940, + "vit_learning_rate": 1.2386801619829597e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7477, + "epoch": 0.8817103620474407, + "grad_norm": 1.4061561700939211, + "learning_rate": 6.189310191393357e-06, + "loss": 0.378, + "step": 16950, + "vit_learning_rate": 1.237862038278671e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6392, + "epoch": 0.8822305451518935, + "grad_norm": 1.7488291123346191, + "learning_rate": 6.185218728921507e-06, + "loss": 0.3894, + "step": 16960, + "vit_learning_rate": 1.2370437457843012e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6825, + "epoch": 0.8827507282563463, + "grad_norm": 1.5823737490178171, + "learning_rate": 6.1811264254026045e-06, + "loss": 0.3647, + "step": 16970, + "vit_learning_rate": 1.236225285080521e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6895, + "epoch": 0.883270911360799, + "grad_norm": 1.8815708969513254, + "learning_rate": 6.177033283740607e-06, + "loss": 0.3812, + "step": 16980, + "vit_learning_rate": 1.2354066567481212e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6217, + "epoch": 0.8837910944652517, + "grad_norm": 1.2408320356982843, + "learning_rate": 6.1729393068400575e-06, + "loss": 0.4017, + "step": 16990, + "vit_learning_rate": 1.2345878613680114e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6319, + "epoch": 0.8843112775697045, + "grad_norm": 1.8201750639601455, + "learning_rate": 6.168844497606101e-06, + "loss": 0.3527, + "step": 17000, + "vit_learning_rate": 1.2337688995212201e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.396, + "epoch": 0.8848314606741573, + "grad_norm": 1.4619053423582598, + "learning_rate": 6.164748858944465e-06, + "loss": 0.3796, + "step": 17010, + "vit_learning_rate": 1.2329497717888927e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6644, + "epoch": 0.8853516437786101, + "grad_norm": 1.5199850166095614, + "learning_rate": 6.160652393761473e-06, + "loss": 0.3746, + "step": 17020, + "vit_learning_rate": 1.2321304787522943e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.3948, + "epoch": 0.8858718268830629, + "grad_norm": 2.0508182342918246, + "learning_rate": 6.1565551049640305e-06, + "loss": 0.363, + "step": 17030, + "vit_learning_rate": 1.231311020992806e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6455, + "epoch": 0.8863920099875156, + "grad_norm": 1.9108713391281773, + "learning_rate": 6.152456995459625e-06, + "loss": 0.3704, + "step": 17040, + "vit_learning_rate": 1.230491399091925e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6349, + "epoch": 0.8869121930919683, + "grad_norm": 1.6574202524204378, + "learning_rate": 6.1483580681563346e-06, + "loss": 0.3689, + "step": 17050, + "vit_learning_rate": 1.2296716136312669e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6314, + "epoch": 0.8874323761964211, + "grad_norm": 1.6442873857776887, + "learning_rate": 6.144258325962811e-06, + "loss": 0.3707, + "step": 17060, + "vit_learning_rate": 1.228851665192562e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6556, + "epoch": 0.8879525593008739, + "grad_norm": 1.606624162839537, + "learning_rate": 6.140157771788284e-06, + "loss": 0.3743, + "step": 17070, + "vit_learning_rate": 1.2280315543576567e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6223, + "epoch": 0.8884727424053267, + "grad_norm": 1.3577382768684336, + "learning_rate": 6.136056408542563e-06, + "loss": 0.3667, + "step": 17080, + "vit_learning_rate": 1.2272112817085126e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6453, + "epoch": 0.8889929255097795, + "grad_norm": 2.1096183861339446, + "learning_rate": 6.131954239136033e-06, + "loss": 0.3819, + "step": 17090, + "vit_learning_rate": 1.2263908478272064e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7029, + "epoch": 0.8895131086142322, + "grad_norm": 1.558863184913366, + "learning_rate": 6.127851266479646e-06, + "loss": 0.3707, + "step": 17100, + "vit_learning_rate": 1.2255702532959291e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.3552, + "epoch": 0.890033291718685, + "grad_norm": 1.5726317740288012, + "learning_rate": 6.1237474934849235e-06, + "loss": 0.3831, + "step": 17110, + "vit_learning_rate": 1.2247494986969845e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6368, + "epoch": 0.8905534748231377, + "grad_norm": 1.5252818699301711, + "learning_rate": 6.1196429230639635e-06, + "loss": 0.4025, + "step": 17120, + "vit_learning_rate": 1.2239285846127925e-06 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 2.4291, + "epoch": 0.8910736579275905, + "grad_norm": 1.560403234954303, + "learning_rate": 6.115537558129418e-06, + "loss": 0.3576, + "step": 17130, + "vit_learning_rate": 1.2231075116258835e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6371, + "epoch": 0.8915938410320433, + "grad_norm": 1.9721813800888912, + "learning_rate": 6.111431401594514e-06, + "loss": 0.3401, + "step": 17140, + "vit_learning_rate": 1.2222862803189027e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6795, + "epoch": 0.8921140241364961, + "grad_norm": 1.681266023622129, + "learning_rate": 6.1073244563730305e-06, + "loss": 0.3756, + "step": 17150, + "vit_learning_rate": 1.2214648912746058e-06 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 2.415, + "epoch": 0.8926342072409488, + "grad_norm": 1.7321904551608194, + "learning_rate": 6.103216725379315e-06, + "loss": 0.383, + "step": 17160, + "vit_learning_rate": 1.2206433450758628e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7036, + "epoch": 0.8931543903454016, + "grad_norm": 1.3170317217968077, + "learning_rate": 6.0991082115282665e-06, + "loss": 0.3564, + "step": 17170, + "vit_learning_rate": 1.219821642305653e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.3628, + "epoch": 0.8936745734498543, + "grad_norm": 1.2956576660155845, + "learning_rate": 6.0949989177353375e-06, + "loss": 0.3756, + "step": 17180, + "vit_learning_rate": 1.2189997835470673e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6966, + "epoch": 0.8941947565543071, + "grad_norm": 1.3629486336146648, + "learning_rate": 6.090888846916545e-06, + "loss": 0.376, + "step": 17190, + "vit_learning_rate": 1.2181777693833089e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6528, + "epoch": 0.8947149396587599, + "grad_norm": 1.3783128182610302, + "learning_rate": 6.086778001988445e-06, + "loss": 0.3651, + "step": 17200, + "vit_learning_rate": 1.2173556003976888e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7063, + "epoch": 0.8952351227632126, + "grad_norm": 2.012338726462087, + "learning_rate": 6.08266638586815e-06, + "loss": 0.3976, + "step": 17210, + "vit_learning_rate": 1.21653327717363e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6441, + "epoch": 0.8957553058676654, + "grad_norm": 1.6134339591269797, + "learning_rate": 6.078554001473317e-06, + "loss": 0.3523, + "step": 17220, + "vit_learning_rate": 1.2157108002946631e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6254, + "epoch": 0.8962754889721182, + "grad_norm": 1.5572427335738739, + "learning_rate": 6.07444085172215e-06, + "loss": 0.3534, + "step": 17230, + "vit_learning_rate": 1.2148881703444297e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6382, + "epoch": 0.896795672076571, + "grad_norm": 1.8113545767964423, + "learning_rate": 6.070326939533393e-06, + "loss": 0.3779, + "step": 17240, + "vit_learning_rate": 1.2140653879066785e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6985, + "epoch": 0.8973158551810237, + "grad_norm": 1.6385839666338446, + "learning_rate": 6.066212267826335e-06, + "loss": 0.3799, + "step": 17250, + "vit_learning_rate": 1.2132424535652668e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 7.6094, + "epoch": 0.8978360382854765, + "grad_norm": 1.0191988962799914, + "learning_rate": 6.062096839520802e-06, + "loss": 0.3513, + "step": 17260, + "vit_learning_rate": 1.2124193679041601e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6495, + "epoch": 0.8983562213899292, + "grad_norm": 1.5052018942530307, + "learning_rate": 6.057980657537156e-06, + "loss": 0.3642, + "step": 17270, + "vit_learning_rate": 1.211596131507431e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6716, + "epoch": 0.898876404494382, + "grad_norm": 1.426711654554919, + "learning_rate": 6.053863724796294e-06, + "loss": 0.3491, + "step": 17280, + "vit_learning_rate": 1.2107727449592586e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6566, + "epoch": 0.8993965875988348, + "grad_norm": 1.5662356943979212, + "learning_rate": 6.049746044219649e-06, + "loss": 0.3763, + "step": 17290, + "vit_learning_rate": 1.2099492088439297e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7711, + "epoch": 0.8999167707032876, + "grad_norm": 1.472775394588557, + "learning_rate": 6.04562761872918e-06, + "loss": 0.3712, + "step": 17300, + "vit_learning_rate": 1.209125523745836e-06 + }, + { + "avg_batch_load_time": 2.7946, + "avg_batch_processing_time": 0.6109, + "epoch": 0.9004369538077404, + "grad_norm": 1.2857824083704068, + "learning_rate": 6.0415084512473785e-06, + "loss": 0.3746, + "step": 17310, + "vit_learning_rate": 1.2083016902494757e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6544, + "epoch": 0.9009571369121931, + "grad_norm": 1.2960848816273323, + "learning_rate": 6.037388544697258e-06, + "loss": 0.3618, + "step": 17320, + "vit_learning_rate": 1.2074777089394513e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6348, + "epoch": 0.9014773200166458, + "grad_norm": 1.327632306376761, + "learning_rate": 6.0332679020023625e-06, + "loss": 0.3818, + "step": 17330, + "vit_learning_rate": 1.2066535804004723e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6875, + "epoch": 0.9019975031210986, + "grad_norm": 1.8233057430692257, + "learning_rate": 6.029146526086753e-06, + "loss": 0.3791, + "step": 17340, + "vit_learning_rate": 1.2058293052173504e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6458, + "epoch": 0.9025176862255514, + "grad_norm": 1.5737678138994469, + "learning_rate": 6.025024419875014e-06, + "loss": 0.3601, + "step": 17350, + "vit_learning_rate": 1.2050048839750026e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6056, + "epoch": 0.9030378693300042, + "grad_norm": 1.4194733069470582, + "learning_rate": 6.020901586292247e-06, + "loss": 0.3599, + "step": 17360, + "vit_learning_rate": 1.2041803172584494e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6171, + "epoch": 0.903558052434457, + "grad_norm": 1.7839124724397868, + "learning_rate": 6.016778028264069e-06, + "loss": 0.39, + "step": 17370, + "vit_learning_rate": 1.2033556056528138e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6319, + "epoch": 0.9040782355389096, + "grad_norm": 1.1100657898126909, + "learning_rate": 6.012653748716614e-06, + "loss": 0.4036, + "step": 17380, + "vit_learning_rate": 1.2025307497433226e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6308, + "epoch": 0.9045984186433624, + "grad_norm": 1.6500528842750912, + "learning_rate": 6.0085287505765224e-06, + "loss": 0.3797, + "step": 17390, + "vit_learning_rate": 1.2017057501153042e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7097, + "epoch": 0.9051186017478152, + "grad_norm": 1.8162155473298116, + "learning_rate": 6.004403036770951e-06, + "loss": 0.3748, + "step": 17400, + "vit_learning_rate": 1.2008806073541899e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 3.0083, + "epoch": 0.905638784852268, + "grad_norm": 1.8596214127523578, + "learning_rate": 6.00027661022756e-06, + "loss": 0.3669, + "step": 17410, + "vit_learning_rate": 1.2000553220455118e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6706, + "epoch": 0.9061589679567208, + "grad_norm": 1.4075539440451101, + "learning_rate": 5.996149473874519e-06, + "loss": 0.3681, + "step": 17420, + "vit_learning_rate": 1.1992298947749036e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7501, + "epoch": 0.9066791510611736, + "grad_norm": 1.500259002894385, + "learning_rate": 5.992021630640495e-06, + "loss": 0.3532, + "step": 17430, + "vit_learning_rate": 1.198404326128099e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7541, + "epoch": 0.9071993341656263, + "grad_norm": 2.527372433685828, + "learning_rate": 5.987893083454665e-06, + "loss": 0.3535, + "step": 17440, + "vit_learning_rate": 1.1975786166909327e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6294, + "epoch": 0.907719517270079, + "grad_norm": 1.599605910831061, + "learning_rate": 5.983763835246699e-06, + "loss": 0.3612, + "step": 17450, + "vit_learning_rate": 1.1967527670493397e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6789, + "epoch": 0.9082397003745318, + "grad_norm": 1.4806458810658774, + "learning_rate": 5.9796338889467656e-06, + "loss": 0.3751, + "step": 17460, + "vit_learning_rate": 1.1959267777893529e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.8, + "epoch": 0.9087598834789846, + "grad_norm": 2.124538242336279, + "learning_rate": 5.975503247485532e-06, + "loss": 0.3762, + "step": 17470, + "vit_learning_rate": 1.1951006494971064e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6976, + "epoch": 0.9092800665834374, + "grad_norm": 1.7999245037489677, + "learning_rate": 5.971371913794157e-06, + "loss": 0.3732, + "step": 17480, + "vit_learning_rate": 1.1942743827588314e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6837, + "epoch": 0.9098002496878901, + "grad_norm": 1.321995064426508, + "learning_rate": 5.967239890804288e-06, + "loss": 0.3492, + "step": 17490, + "vit_learning_rate": 1.1934479781608575e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7138, + "epoch": 0.9103204327923429, + "grad_norm": 1.9477743247532768, + "learning_rate": 5.963107181448067e-06, + "loss": 0.3641, + "step": 17500, + "vit_learning_rate": 1.1926214362896131e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7418, + "epoch": 0.9108406158967957, + "grad_norm": 1.3250311546547138, + "learning_rate": 5.958973788658115e-06, + "loss": 0.3558, + "step": 17510, + "vit_learning_rate": 1.191794757731623e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.3879, + "epoch": 0.9113607990012484, + "grad_norm": 1.5818126189269042, + "learning_rate": 5.954839715367548e-06, + "loss": 0.3789, + "step": 17520, + "vit_learning_rate": 1.1909679430735095e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7225, + "epoch": 0.9118809821057012, + "grad_norm": 1.2015698815761076, + "learning_rate": 5.950704964509956e-06, + "loss": 0.3604, + "step": 17530, + "vit_learning_rate": 1.190140992901991e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.64, + "epoch": 0.912401165210154, + "grad_norm": 1.252380405946156, + "learning_rate": 5.946569539019415e-06, + "loss": 0.3717, + "step": 17540, + "vit_learning_rate": 1.189313907803883e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.667, + "epoch": 0.9129213483146067, + "grad_norm": 1.9183791041567324, + "learning_rate": 5.9424334418304766e-06, + "loss": 0.3562, + "step": 17550, + "vit_learning_rate": 1.1884866883660952e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6687, + "epoch": 0.9134415314190595, + "grad_norm": 1.6351670967663046, + "learning_rate": 5.9382966758781736e-06, + "loss": 0.3957, + "step": 17560, + "vit_learning_rate": 1.1876593351756345e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6605, + "epoch": 0.9139617145235123, + "grad_norm": 1.455351444150899, + "learning_rate": 5.9341592440980075e-06, + "loss": 0.3577, + "step": 17570, + "vit_learning_rate": 1.1868318488196015e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6704, + "epoch": 0.9144818976279651, + "grad_norm": 1.7550999962468665, + "learning_rate": 5.930021149425957e-06, + "loss": 0.3742, + "step": 17580, + "vit_learning_rate": 1.1860042298851913e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.4042, + "epoch": 0.9150020807324178, + "grad_norm": 1.868313044444874, + "learning_rate": 5.925882394798469e-06, + "loss": 0.3665, + "step": 17590, + "vit_learning_rate": 1.1851764789596937e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6492, + "epoch": 0.9155222638368706, + "grad_norm": 1.6287358632126288, + "learning_rate": 5.921742983152458e-06, + "loss": 0.3792, + "step": 17600, + "vit_learning_rate": 1.1843485966304914e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6534, + "epoch": 0.9160424469413233, + "grad_norm": 1.5528832049201113, + "learning_rate": 5.917602917425307e-06, + "loss": 0.3431, + "step": 17610, + "vit_learning_rate": 1.1835205834850612e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6084, + "epoch": 0.9165626300457761, + "grad_norm": 1.645234668772245, + "learning_rate": 5.913462200554861e-06, + "loss": 0.3501, + "step": 17620, + "vit_learning_rate": 1.1826924401109721e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.648, + "epoch": 0.9170828131502289, + "grad_norm": 1.3005977179421724, + "learning_rate": 5.90932083547943e-06, + "loss": 0.3725, + "step": 17630, + "vit_learning_rate": 1.1818641670958858e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6483, + "epoch": 0.9176029962546817, + "grad_norm": 1.4378682508732674, + "learning_rate": 5.9051788251377805e-06, + "loss": 0.3993, + "step": 17640, + "vit_learning_rate": 1.181035765027556e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7319, + "epoch": 0.9181231793591345, + "grad_norm": 1.4937647591406906, + "learning_rate": 5.901036172469138e-06, + "loss": 0.352, + "step": 17650, + "vit_learning_rate": 1.1802072344938273e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7943, + "epoch": 0.9186433624635871, + "grad_norm": 1.7025997330764828, + "learning_rate": 5.896892880413185e-06, + "loss": 0.3556, + "step": 17660, + "vit_learning_rate": 1.1793785760826368e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6925, + "epoch": 0.9191635455680399, + "grad_norm": 2.035786488622821, + "learning_rate": 5.892748951910057e-06, + "loss": 0.3909, + "step": 17670, + "vit_learning_rate": 1.1785497903820112e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6863, + "epoch": 0.9196837286724927, + "grad_norm": 1.4560403322441895, + "learning_rate": 5.8886043899003405e-06, + "loss": 0.371, + "step": 17680, + "vit_learning_rate": 1.177720877980068e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6373, + "epoch": 0.9202039117769455, + "grad_norm": 2.80465678899522, + "learning_rate": 5.884459197325072e-06, + "loss": 0.346, + "step": 17690, + "vit_learning_rate": 1.1768918394650142e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6542, + "epoch": 0.9207240948813983, + "grad_norm": 1.8172226648264571, + "learning_rate": 5.880313377125737e-06, + "loss": 0.3982, + "step": 17700, + "vit_learning_rate": 1.1760626754251473e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6367, + "epoch": 0.9212442779858511, + "grad_norm": 1.2463204960001515, + "learning_rate": 5.876166932244263e-06, + "loss": 0.4002, + "step": 17710, + "vit_learning_rate": 1.1752333864488525e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.641, + "epoch": 0.9217644610903037, + "grad_norm": 1.6028885187752526, + "learning_rate": 5.872019865623024e-06, + "loss": 0.3886, + "step": 17720, + "vit_learning_rate": 1.1744039731246047e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.688, + "epoch": 0.9222846441947565, + "grad_norm": 1.5309703067400797, + "learning_rate": 5.867872180204833e-06, + "loss": 0.3612, + "step": 17730, + "vit_learning_rate": 1.1735744360409664e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6933, + "epoch": 0.9228048272992093, + "grad_norm": 1.7121418237616621, + "learning_rate": 5.863723878932943e-06, + "loss": 0.3724, + "step": 17740, + "vit_learning_rate": 1.1727447757865886e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6454, + "epoch": 0.9233250104036621, + "grad_norm": 1.5997492323857712, + "learning_rate": 5.859574964751047e-06, + "loss": 0.3841, + "step": 17750, + "vit_learning_rate": 1.1719149929502093e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6986, + "epoch": 0.9238451935081149, + "grad_norm": 1.3908893991514624, + "learning_rate": 5.855425440603265e-06, + "loss": 0.3799, + "step": 17760, + "vit_learning_rate": 1.1710850881206529e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6743, + "epoch": 0.9243653766125676, + "grad_norm": 1.5424220355391456, + "learning_rate": 5.851275309434158e-06, + "loss": 0.3904, + "step": 17770, + "vit_learning_rate": 1.1702550618868314e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7146, + "epoch": 0.9248855597170204, + "grad_norm": 1.527835125545137, + "learning_rate": 5.847124574188714e-06, + "loss": 0.3689, + "step": 17780, + "vit_learning_rate": 1.1694249148377426e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7332, + "epoch": 0.9254057428214731, + "grad_norm": 3.125092583268322, + "learning_rate": 5.8429732378123495e-06, + "loss": 0.3719, + "step": 17790, + "vit_learning_rate": 1.1685946475624698e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6806, + "epoch": 0.9259259259259259, + "grad_norm": 1.4768268027709242, + "learning_rate": 5.83882130325091e-06, + "loss": 0.3579, + "step": 17800, + "vit_learning_rate": 1.1677642606501819e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6988, + "epoch": 0.9264461090303787, + "grad_norm": 1.6198283493865036, + "learning_rate": 5.83466877345066e-06, + "loss": 0.3839, + "step": 17810, + "vit_learning_rate": 1.1669337546901318e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7615, + "epoch": 0.9269662921348315, + "grad_norm": 1.4618729663314267, + "learning_rate": 5.830515651358296e-06, + "loss": 0.3634, + "step": 17820, + "vit_learning_rate": 1.166103130271659e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6825, + "epoch": 0.9274864752392842, + "grad_norm": 1.3518451375403089, + "learning_rate": 5.826361939920923e-06, + "loss": 0.3888, + "step": 17830, + "vit_learning_rate": 1.1652723879841843e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7851, + "epoch": 0.928006658343737, + "grad_norm": 2.1887961517641368, + "learning_rate": 5.822207642086073e-06, + "loss": 0.3464, + "step": 17840, + "vit_learning_rate": 1.1644415284172144e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7007, + "epoch": 0.9285268414481898, + "grad_norm": 1.814370062189683, + "learning_rate": 5.818052760801692e-06, + "loss": 0.3725, + "step": 17850, + "vit_learning_rate": 1.1636105521603384e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6464, + "epoch": 0.9290470245526425, + "grad_norm": 1.2910121901312437, + "learning_rate": 5.813897299016138e-06, + "loss": 0.3811, + "step": 17860, + "vit_learning_rate": 1.1627794598032274e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.722, + "epoch": 0.9295672076570953, + "grad_norm": 1.6850049783161103, + "learning_rate": 5.809741259678183e-06, + "loss": 0.365, + "step": 17870, + "vit_learning_rate": 1.1619482519356365e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6523, + "epoch": 0.9300873907615481, + "grad_norm": 1.537516014850808, + "learning_rate": 5.805584645737007e-06, + "loss": 0.3641, + "step": 17880, + "vit_learning_rate": 1.1611169291474012e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6734, + "epoch": 0.9306075738660008, + "grad_norm": 1.4394574178725643, + "learning_rate": 5.801427460142201e-06, + "loss": 0.361, + "step": 17890, + "vit_learning_rate": 1.16028549202844e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6649, + "epoch": 0.9311277569704536, + "grad_norm": 1.4955483433886758, + "learning_rate": 5.797269705843756e-06, + "loss": 0.3692, + "step": 17900, + "vit_learning_rate": 1.159453941168751e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6641, + "epoch": 0.9316479400749064, + "grad_norm": 1.172823435640942, + "learning_rate": 5.793111385792072e-06, + "loss": 0.3462, + "step": 17910, + "vit_learning_rate": 1.1586222771584143e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7003, + "epoch": 0.9321681231793592, + "grad_norm": 1.4798508990990706, + "learning_rate": 5.788952502937949e-06, + "loss": 0.3954, + "step": 17920, + "vit_learning_rate": 1.1577905005875897e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6504, + "epoch": 0.9326883062838119, + "grad_norm": 1.3093285891895396, + "learning_rate": 5.784793060232585e-06, + "loss": 0.3938, + "step": 17930, + "vit_learning_rate": 1.1569586120465168e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6777, + "epoch": 0.9332084893882646, + "grad_norm": 1.1755309213258063, + "learning_rate": 5.780633060627576e-06, + "loss": 0.3739, + "step": 17940, + "vit_learning_rate": 1.156126612125515e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7048, + "epoch": 0.9337286724927174, + "grad_norm": 1.7118969624438358, + "learning_rate": 5.776472507074914e-06, + "loss": 0.3737, + "step": 17950, + "vit_learning_rate": 1.1552945014149827e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6335, + "epoch": 0.9342488555971702, + "grad_norm": 1.4960717346076342, + "learning_rate": 5.772311402526984e-06, + "loss": 0.3674, + "step": 17960, + "vit_learning_rate": 1.1544622805053967e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7211, + "epoch": 0.934769038701623, + "grad_norm": 1.4866577364438816, + "learning_rate": 5.768149749936559e-06, + "loss": 0.3872, + "step": 17970, + "vit_learning_rate": 1.1536299499873117e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6436, + "epoch": 0.9352892218060758, + "grad_norm": 1.114111967915645, + "learning_rate": 5.763987552256806e-06, + "loss": 0.3687, + "step": 17980, + "vit_learning_rate": 1.152797510451361e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.666, + "epoch": 0.9358094049105286, + "grad_norm": 1.6481809003944174, + "learning_rate": 5.759824812441275e-06, + "loss": 0.374, + "step": 17990, + "vit_learning_rate": 1.1519649624882549e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6193, + "epoch": 0.9363295880149812, + "grad_norm": 1.272965517600929, + "learning_rate": 5.755661533443901e-06, + "loss": 0.3723, + "step": 18000, + "vit_learning_rate": 1.15113230668878e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.644, + "epoch": 0.936849771119434, + "grad_norm": 1.4136875450994337, + "learning_rate": 5.751497718219003e-06, + "loss": 0.3706, + "step": 18010, + "vit_learning_rate": 1.1502995436438005e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6783, + "epoch": 0.9373699542238868, + "grad_norm": 1.9778155004853002, + "learning_rate": 5.747333369721279e-06, + "loss": 0.3809, + "step": 18020, + "vit_learning_rate": 1.1494666739442557e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.7296, + "epoch": 0.9378901373283396, + "grad_norm": 1.2103230380725751, + "learning_rate": 5.743168490905811e-06, + "loss": 0.3774, + "step": 18030, + "vit_learning_rate": 1.148633698181162e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6793, + "epoch": 0.9384103204327924, + "grad_norm": 2.249017062330341, + "learning_rate": 5.739003084728046e-06, + "loss": 0.3716, + "step": 18040, + "vit_learning_rate": 1.1478006169456091e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6516, + "epoch": 0.9389305035372452, + "grad_norm": 1.3598083881808092, + "learning_rate": 5.734837154143817e-06, + "loss": 0.3685, + "step": 18050, + "vit_learning_rate": 1.1469674308287633e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.621, + "epoch": 0.9394506866416978, + "grad_norm": 2.2761007383352294, + "learning_rate": 5.730670702109321e-06, + "loss": 0.3718, + "step": 18060, + "vit_learning_rate": 1.146134140421864e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7138, + "epoch": 0.9399708697461506, + "grad_norm": 1.5438322504510251, + "learning_rate": 5.72650373158113e-06, + "loss": 0.3589, + "step": 18070, + "vit_learning_rate": 1.1453007463162258e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6303, + "epoch": 0.9404910528506034, + "grad_norm": 1.6435606655286532, + "learning_rate": 5.7223362455161805e-06, + "loss": 0.3399, + "step": 18080, + "vit_learning_rate": 1.1444672491032358e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6683, + "epoch": 0.9410112359550562, + "grad_norm": 1.794520060222255, + "learning_rate": 5.718168246871775e-06, + "loss": 0.3746, + "step": 18090, + "vit_learning_rate": 1.1436336493743548e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7877, + "epoch": 0.941531419059509, + "grad_norm": 1.5767871251273444, + "learning_rate": 5.713999738605586e-06, + "loss": 0.3837, + "step": 18100, + "vit_learning_rate": 1.142799947721117e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6738, + "epoch": 0.9420516021639617, + "grad_norm": 1.5092801647671308, + "learning_rate": 5.709830723675636e-06, + "loss": 0.3857, + "step": 18110, + "vit_learning_rate": 1.1419661447351272e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.672, + "epoch": 0.9425717852684145, + "grad_norm": 1.4943652307931512, + "learning_rate": 5.7056612050403205e-06, + "loss": 0.3857, + "step": 18120, + "vit_learning_rate": 1.1411322410080639e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.722, + "epoch": 0.9430919683728672, + "grad_norm": 1.5071558535466847, + "learning_rate": 5.701491185658382e-06, + "loss": 0.3908, + "step": 18130, + "vit_learning_rate": 1.1402982371316763e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.619, + "epoch": 0.94361215147732, + "grad_norm": 1.5726367133276347, + "learning_rate": 5.6973206684889215e-06, + "loss": 0.3716, + "step": 18140, + "vit_learning_rate": 1.1394641336977841e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6228, + "epoch": 0.9441323345817728, + "grad_norm": 1.593603182322556, + "learning_rate": 5.6931496564913955e-06, + "loss": 0.3766, + "step": 18150, + "vit_learning_rate": 1.138629931298279e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7446, + "epoch": 0.9446525176862256, + "grad_norm": 1.3021862237096795, + "learning_rate": 5.688978152625608e-06, + "loss": 0.3675, + "step": 18160, + "vit_learning_rate": 1.1377956305251214e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6448, + "epoch": 0.9451727007906783, + "grad_norm": 1.3681959119414229, + "learning_rate": 5.684806159851716e-06, + "loss": 0.3829, + "step": 18170, + "vit_learning_rate": 1.1369612319703433e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.709, + "epoch": 0.9456928838951311, + "grad_norm": 1.5667194732258862, + "learning_rate": 5.680633681130219e-06, + "loss": 0.3964, + "step": 18180, + "vit_learning_rate": 1.1361267362260437e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7053, + "epoch": 0.9462130669995839, + "grad_norm": 1.43452439734166, + "learning_rate": 5.676460719421966e-06, + "loss": 0.3847, + "step": 18190, + "vit_learning_rate": 1.135292143884393e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6583, + "epoch": 0.9467332501040366, + "grad_norm": 1.4989680159651346, + "learning_rate": 5.672287277688146e-06, + "loss": 0.3747, + "step": 18200, + "vit_learning_rate": 1.134457455537629e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.8011, + "epoch": 0.9472534332084894, + "grad_norm": 1.7871805028280952, + "learning_rate": 5.668113358890287e-06, + "loss": 0.3561, + "step": 18210, + "vit_learning_rate": 1.1336226717780572e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.649, + "epoch": 0.9477736163129421, + "grad_norm": 1.9082019980631286, + "learning_rate": 5.6639389659902585e-06, + "loss": 0.3827, + "step": 18220, + "vit_learning_rate": 1.1327877931980516e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6419, + "epoch": 0.9482937994173949, + "grad_norm": 1.3048133724687931, + "learning_rate": 5.659764101950268e-06, + "loss": 0.3504, + "step": 18230, + "vit_learning_rate": 1.1319528203900535e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6713, + "epoch": 0.9488139825218477, + "grad_norm": 1.2817287488991633, + "learning_rate": 5.655588769732851e-06, + "loss": 0.3615, + "step": 18240, + "vit_learning_rate": 1.13111775394657e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7309, + "epoch": 0.9493341656263005, + "grad_norm": 1.5786263288795872, + "learning_rate": 5.65141297230088e-06, + "loss": 0.3573, + "step": 18250, + "vit_learning_rate": 1.130282594460176e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6405, + "epoch": 0.9498543487307533, + "grad_norm": 1.201565452568936, + "learning_rate": 5.64723671261756e-06, + "loss": 0.3495, + "step": 18260, + "vit_learning_rate": 1.1294473425235118e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6543, + "epoch": 0.950374531835206, + "grad_norm": 2.011343852711305, + "learning_rate": 5.643059993646417e-06, + "loss": 0.373, + "step": 18270, + "vit_learning_rate": 1.1286119987292833e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6422, + "epoch": 0.9508947149396587, + "grad_norm": 1.8457033433217092, + "learning_rate": 5.638882818351307e-06, + "loss": 0.368, + "step": 18280, + "vit_learning_rate": 1.1277765636702614e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6155, + "epoch": 0.9514148980441115, + "grad_norm": 1.3809945122031404, + "learning_rate": 5.634705189696412e-06, + "loss": 0.3976, + "step": 18290, + "vit_learning_rate": 1.1269410379392823e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6776, + "epoch": 0.9519350811485643, + "grad_norm": 1.5210415704956353, + "learning_rate": 5.63052711064623e-06, + "loss": 0.3603, + "step": 18300, + "vit_learning_rate": 1.1261054221292459e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6854, + "epoch": 0.9524552642530171, + "grad_norm": 1.5702816027267468, + "learning_rate": 5.626348584165583e-06, + "loss": 0.3543, + "step": 18310, + "vit_learning_rate": 1.1252697168331165e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7439, + "epoch": 0.9529754473574699, + "grad_norm": 1.6468333576088148, + "learning_rate": 5.622169613219609e-06, + "loss": 0.371, + "step": 18320, + "vit_learning_rate": 1.1244339226439217e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6976, + "epoch": 0.9534956304619226, + "grad_norm": 1.5714789080133575, + "learning_rate": 5.6179902007737615e-06, + "loss": 0.3639, + "step": 18330, + "vit_learning_rate": 1.1235980401547523e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6583, + "epoch": 0.9540158135663753, + "grad_norm": 1.2093250562592563, + "learning_rate": 5.61381034979381e-06, + "loss": 0.3866, + "step": 18340, + "vit_learning_rate": 1.122762069958762e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6744, + "epoch": 0.9545359966708281, + "grad_norm": 1.4251286185382916, + "learning_rate": 5.609630063245829e-06, + "loss": 0.3469, + "step": 18350, + "vit_learning_rate": 1.1219260126491655e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6788, + "epoch": 0.9550561797752809, + "grad_norm": 1.2849568540819234, + "learning_rate": 5.605449344096208e-06, + "loss": 0.3837, + "step": 18360, + "vit_learning_rate": 1.1210898688192414e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6685, + "epoch": 0.9555763628797337, + "grad_norm": 1.6239469626892729, + "learning_rate": 5.6012681953116395e-06, + "loss": 0.3757, + "step": 18370, + "vit_learning_rate": 1.1202536390623278e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6313, + "epoch": 0.9560965459841865, + "grad_norm": 1.5529041556870036, + "learning_rate": 5.5970866198591235e-06, + "loss": 0.3875, + "step": 18380, + "vit_learning_rate": 1.1194173239718245e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6552, + "epoch": 0.9566167290886392, + "grad_norm": 1.8096323044181994, + "learning_rate": 5.592904620705961e-06, + "loss": 0.3573, + "step": 18390, + "vit_learning_rate": 1.1185809241411922e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6377, + "epoch": 0.9571369121930919, + "grad_norm": 1.6334967961211195, + "learning_rate": 5.588722200819756e-06, + "loss": 0.3715, + "step": 18400, + "vit_learning_rate": 1.117744440163951e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6655, + "epoch": 0.9576570952975447, + "grad_norm": 1.7393415648947004, + "learning_rate": 5.58453936316841e-06, + "loss": 0.3522, + "step": 18410, + "vit_learning_rate": 1.1169078726336819e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6105, + "epoch": 0.9581772784019975, + "grad_norm": 1.5891008554930506, + "learning_rate": 5.580356110720118e-06, + "loss": 0.3642, + "step": 18420, + "vit_learning_rate": 1.1160712221440233e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6958, + "epoch": 0.9586974615064503, + "grad_norm": 1.446504865682666, + "learning_rate": 5.576172446443373e-06, + "loss": 0.3639, + "step": 18430, + "vit_learning_rate": 1.1152344892886745e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6757, + "epoch": 0.9592176446109031, + "grad_norm": 1.804245037367662, + "learning_rate": 5.57198837330696e-06, + "loss": 0.3688, + "step": 18440, + "vit_learning_rate": 1.1143976746613918e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7275, + "epoch": 0.9597378277153558, + "grad_norm": 1.7423926813342205, + "learning_rate": 5.567803894279953e-06, + "loss": 0.3964, + "step": 18450, + "vit_learning_rate": 1.1135607788559904e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6365, + "epoch": 0.9602580108198085, + "grad_norm": 1.7267801177659199, + "learning_rate": 5.563619012331711e-06, + "loss": 0.3735, + "step": 18460, + "vit_learning_rate": 1.1127238024663421e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.649, + "epoch": 0.9607781939242613, + "grad_norm": 1.800091039715485, + "learning_rate": 5.559433730431887e-06, + "loss": 0.3296, + "step": 18470, + "vit_learning_rate": 1.1118867460863771e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6809, + "epoch": 0.9612983770287141, + "grad_norm": 1.533954615613317, + "learning_rate": 5.55524805155041e-06, + "loss": 0.3675, + "step": 18480, + "vit_learning_rate": 1.111049610310082e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6828, + "epoch": 0.9618185601331669, + "grad_norm": 1.6079559205626586, + "learning_rate": 5.551061978657496e-06, + "loss": 0.3935, + "step": 18490, + "vit_learning_rate": 1.110212395731499e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6798, + "epoch": 0.9623387432376196, + "grad_norm": 2.0188810832341684, + "learning_rate": 5.546875514723636e-06, + "loss": 0.3535, + "step": 18500, + "vit_learning_rate": 1.109375102944727e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6805, + "epoch": 0.9628589263420724, + "grad_norm": 1.854094472958175, + "learning_rate": 5.542688662719601e-06, + "loss": 0.3744, + "step": 18510, + "vit_learning_rate": 1.10853773254392e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7103, + "epoch": 0.9633791094465252, + "grad_norm": 2.9119964800953757, + "learning_rate": 5.538501425616438e-06, + "loss": 0.3755, + "step": 18520, + "vit_learning_rate": 1.1077002851232874e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6362, + "epoch": 0.963899292550978, + "grad_norm": 1.1609465106184884, + "learning_rate": 5.534313806385465e-06, + "loss": 0.374, + "step": 18530, + "vit_learning_rate": 1.106862761277093e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7001, + "epoch": 0.9644194756554307, + "grad_norm": 1.3016493664087254, + "learning_rate": 5.530125807998276e-06, + "loss": 0.3409, + "step": 18540, + "vit_learning_rate": 1.106025161599655e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6376, + "epoch": 0.9649396587598835, + "grad_norm": 1.264666189341613, + "learning_rate": 5.525937433426727e-06, + "loss": 0.3732, + "step": 18550, + "vit_learning_rate": 1.1051874866853453e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.727, + "epoch": 0.9654598418643362, + "grad_norm": 1.9688872992126274, + "learning_rate": 5.521748685642946e-06, + "loss": 0.3568, + "step": 18560, + "vit_learning_rate": 1.104349737128589e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6644, + "epoch": 0.965980024968789, + "grad_norm": 1.6324000211269896, + "learning_rate": 5.517559567619324e-06, + "loss": 0.3509, + "step": 18570, + "vit_learning_rate": 1.1035119135238647e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6778, + "epoch": 0.9665002080732418, + "grad_norm": 1.994429998390384, + "learning_rate": 5.513370082328516e-06, + "loss": 0.385, + "step": 18580, + "vit_learning_rate": 1.1026740164657032e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6078, + "epoch": 0.9670203911776946, + "grad_norm": 1.5552177874537756, + "learning_rate": 5.5091802327434366e-06, + "loss": 0.3738, + "step": 18590, + "vit_learning_rate": 1.101836046548687e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7766, + "epoch": 0.9675405742821473, + "grad_norm": 1.2334962043085653, + "learning_rate": 5.504990021837256e-06, + "loss": 0.3678, + "step": 18600, + "vit_learning_rate": 1.1009980043674512e-06 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6398, + "epoch": 0.9680607573866001, + "grad_norm": 1.7036749108596063, + "learning_rate": 5.500799452583409e-06, + "loss": 0.3949, + "step": 18610, + "vit_learning_rate": 1.1001598905166819e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6625, + "epoch": 0.9685809404910528, + "grad_norm": 1.4170834171331355, + "learning_rate": 5.496608527955576e-06, + "loss": 0.3655, + "step": 18620, + "vit_learning_rate": 1.099321705591115e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.676, + "epoch": 0.9691011235955056, + "grad_norm": 2.016657935669629, + "learning_rate": 5.492417250927695e-06, + "loss": 0.3982, + "step": 18630, + "vit_learning_rate": 1.0984834501855388e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6657, + "epoch": 0.9696213066999584, + "grad_norm": 1.6577616481325397, + "learning_rate": 5.488225624473951e-06, + "loss": 0.3788, + "step": 18640, + "vit_learning_rate": 1.0976451248947902e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6599, + "epoch": 0.9701414898044112, + "grad_norm": 1.4883270509279258, + "learning_rate": 5.484033651568779e-06, + "loss": 0.3911, + "step": 18650, + "vit_learning_rate": 1.0968067303137555e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7275, + "epoch": 0.970661672908864, + "grad_norm": 1.6240969029088193, + "learning_rate": 5.479841335186858e-06, + "loss": 0.3799, + "step": 18660, + "vit_learning_rate": 1.0959682670373715e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6154, + "epoch": 0.9711818560133166, + "grad_norm": 1.8380908278554093, + "learning_rate": 5.475648678303112e-06, + "loss": 0.3634, + "step": 18670, + "vit_learning_rate": 1.0951297356606223e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6745, + "epoch": 0.9717020391177694, + "grad_norm": 1.394061494710852, + "learning_rate": 5.471455683892708e-06, + "loss": 0.3608, + "step": 18680, + "vit_learning_rate": 1.0942911367785415e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.713, + "epoch": 0.9722222222222222, + "grad_norm": 1.3160435602377012, + "learning_rate": 5.467262354931049e-06, + "loss": 0.3672, + "step": 18690, + "vit_learning_rate": 1.0934524709862097e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6433, + "epoch": 0.972742405326675, + "grad_norm": 1.868191500999166, + "learning_rate": 5.463068694393776e-06, + "loss": 0.3403, + "step": 18700, + "vit_learning_rate": 1.0926137388787552e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6941, + "epoch": 0.9732625884311278, + "grad_norm": 1.2796105934047148, + "learning_rate": 5.45887470525677e-06, + "loss": 0.3572, + "step": 18710, + "vit_learning_rate": 1.0917749410513538e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6885, + "epoch": 0.9737827715355806, + "grad_norm": 1.4181792310958317, + "learning_rate": 5.45468039049614e-06, + "loss": 0.3693, + "step": 18720, + "vit_learning_rate": 1.0909360780992278e-06 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.7037, + "epoch": 0.9743029546400332, + "grad_norm": 1.7185378822584392, + "learning_rate": 5.450485753088228e-06, + "loss": 0.3731, + "step": 18730, + "vit_learning_rate": 1.0900971506176456e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6448, + "epoch": 0.974823137744486, + "grad_norm": 1.74191955972518, + "learning_rate": 5.4462907960096036e-06, + "loss": 0.3658, + "step": 18740, + "vit_learning_rate": 1.0892581592019206e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6543, + "epoch": 0.9753433208489388, + "grad_norm": 1.2432538930409598, + "learning_rate": 5.442095522237067e-06, + "loss": 0.3707, + "step": 18750, + "vit_learning_rate": 1.0884191044474133e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6618, + "epoch": 0.9758635039533916, + "grad_norm": 1.641690717866113, + "learning_rate": 5.437899934747639e-06, + "loss": 0.3871, + "step": 18760, + "vit_learning_rate": 1.0875799869495277e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6822, + "epoch": 0.9763836870578444, + "grad_norm": 1.570778043710672, + "learning_rate": 5.433704036518564e-06, + "loss": 0.3559, + "step": 18770, + "vit_learning_rate": 1.0867408073037126e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7077, + "epoch": 0.9769038701622972, + "grad_norm": 1.2516733028775495, + "learning_rate": 5.429507830527308e-06, + "loss": 0.3672, + "step": 18780, + "vit_learning_rate": 1.0859015661054613e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7127, + "epoch": 0.9774240532667499, + "grad_norm": 1.8716130452739395, + "learning_rate": 5.425311319751554e-06, + "loss": 0.3695, + "step": 18790, + "vit_learning_rate": 1.0850622639503106e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7912, + "epoch": 0.9779442363712026, + "grad_norm": 1.556270566558989, + "learning_rate": 5.421114507169205e-06, + "loss": 0.3535, + "step": 18800, + "vit_learning_rate": 1.0842229014338408e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6744, + "epoch": 0.9784644194756554, + "grad_norm": 1.3105864682806576, + "learning_rate": 5.416917395758371e-06, + "loss": 0.3793, + "step": 18810, + "vit_learning_rate": 1.0833834791516741e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6592, + "epoch": 0.9789846025801082, + "grad_norm": 1.8594961606733234, + "learning_rate": 5.412719988497384e-06, + "loss": 0.3578, + "step": 18820, + "vit_learning_rate": 1.0825439976994765e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6379, + "epoch": 0.979504785684561, + "grad_norm": 1.7211465998830442, + "learning_rate": 5.408522288364775e-06, + "loss": 0.3704, + "step": 18830, + "vit_learning_rate": 1.0817044576729549e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6928, + "epoch": 0.9800249687890137, + "grad_norm": 1.1876253465355746, + "learning_rate": 5.404324298339288e-06, + "loss": 0.3833, + "step": 18840, + "vit_learning_rate": 1.0808648596678575e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7105, + "epoch": 0.9805451518934665, + "grad_norm": 1.3528791173994696, + "learning_rate": 5.400126021399879e-06, + "loss": 0.3641, + "step": 18850, + "vit_learning_rate": 1.0800252042799755e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6685, + "epoch": 0.9810653349979193, + "grad_norm": 1.567747796555024, + "learning_rate": 5.395927460525695e-06, + "loss": 0.3694, + "step": 18860, + "vit_learning_rate": 1.0791854921051389e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6736, + "epoch": 0.981585518102372, + "grad_norm": 1.321963734303241, + "learning_rate": 5.391728618696094e-06, + "loss": 0.352, + "step": 18870, + "vit_learning_rate": 1.0783457237392186e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.674, + "epoch": 0.9821057012068248, + "grad_norm": 1.3447217592747132, + "learning_rate": 5.387529498890628e-06, + "loss": 0.3484, + "step": 18880, + "vit_learning_rate": 1.0775058997781255e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6735, + "epoch": 0.9826258843112776, + "grad_norm": 1.4173146114426782, + "learning_rate": 5.3833301040890525e-06, + "loss": 0.3485, + "step": 18890, + "vit_learning_rate": 1.0766660208178105e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7092, + "epoch": 0.9831460674157303, + "grad_norm": 1.5915083388558655, + "learning_rate": 5.379130437271311e-06, + "loss": 0.3795, + "step": 18900, + "vit_learning_rate": 1.0758260874542619e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6343, + "epoch": 0.9836662505201831, + "grad_norm": 2.745632546912253, + "learning_rate": 5.374930501417543e-06, + "loss": 0.3734, + "step": 18910, + "vit_learning_rate": 1.0749861002835083e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6124, + "epoch": 0.9841864336246359, + "grad_norm": 1.2700194046961102, + "learning_rate": 5.37073029950808e-06, + "loss": 0.3922, + "step": 18920, + "vit_learning_rate": 1.0741460599016159e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6508, + "epoch": 0.9847066167290887, + "grad_norm": 1.9186526096976728, + "learning_rate": 5.366529834523443e-06, + "loss": 0.3574, + "step": 18930, + "vit_learning_rate": 1.0733059669046884e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6726, + "epoch": 0.9852267998335414, + "grad_norm": 1.6704587987686592, + "learning_rate": 5.3623291094443365e-06, + "loss": 0.3575, + "step": 18940, + "vit_learning_rate": 1.0724658218888672e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6562, + "epoch": 0.9857469829379941, + "grad_norm": 1.2805215755107968, + "learning_rate": 5.35812812725165e-06, + "loss": 0.353, + "step": 18950, + "vit_learning_rate": 1.0716256254503297e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.655, + "epoch": 0.9862671660424469, + "grad_norm": 1.9842346167900056, + "learning_rate": 5.35392689092646e-06, + "loss": 0.3747, + "step": 18960, + "vit_learning_rate": 1.0707853781852917e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6827, + "epoch": 0.9867873491468997, + "grad_norm": 1.424701982401203, + "learning_rate": 5.349725403450016e-06, + "loss": 0.3621, + "step": 18970, + "vit_learning_rate": 1.069945080690003e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.661, + "epoch": 0.9873075322513525, + "grad_norm": 1.6679104632640696, + "learning_rate": 5.345523667803751e-06, + "loss": 0.3651, + "step": 18980, + "vit_learning_rate": 1.06910473356075e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6355, + "epoch": 0.9878277153558053, + "grad_norm": 1.4292870294964415, + "learning_rate": 5.341321686969275e-06, + "loss": 0.3767, + "step": 18990, + "vit_learning_rate": 1.068264337393855e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.721, + "epoch": 0.9883478984602581, + "grad_norm": 1.3510723073933424, + "learning_rate": 5.337119463928366e-06, + "loss": 0.3813, + "step": 19000, + "vit_learning_rate": 1.067423892785673e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6925, + "epoch": 0.9888680815647107, + "grad_norm": 1.4754126036000874, + "learning_rate": 5.3329170016629815e-06, + "loss": 0.3866, + "step": 19010, + "vit_learning_rate": 1.0665834003325962e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6361, + "epoch": 0.9893882646691635, + "grad_norm": 1.3928006741373895, + "learning_rate": 5.328714303155243e-06, + "loss": 0.3585, + "step": 19020, + "vit_learning_rate": 1.0657428606310483e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7512, + "epoch": 0.9899084477736163, + "grad_norm": 2.1026523514088105, + "learning_rate": 5.324511371387441e-06, + "loss": 0.3692, + "step": 19030, + "vit_learning_rate": 1.064902274277488e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6633, + "epoch": 0.9904286308780691, + "grad_norm": 1.4466886174536773, + "learning_rate": 5.320308209342032e-06, + "loss": 0.3714, + "step": 19040, + "vit_learning_rate": 1.0640616418684063e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7059, + "epoch": 0.9909488139825219, + "grad_norm": 3.602952285219492, + "learning_rate": 5.316104820001636e-06, + "loss": 0.368, + "step": 19050, + "vit_learning_rate": 1.063220964000327e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6555, + "epoch": 0.9914689970869747, + "grad_norm": 1.729548029049253, + "learning_rate": 5.311901206349038e-06, + "loss": 0.3833, + "step": 19060, + "vit_learning_rate": 1.0623802412698075e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6969, + "epoch": 0.9919891801914273, + "grad_norm": 1.5278560455993053, + "learning_rate": 5.307697371367172e-06, + "loss": 0.3587, + "step": 19070, + "vit_learning_rate": 1.0615394742734343e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6755, + "epoch": 0.9925093632958801, + "grad_norm": 1.7442198304990821, + "learning_rate": 5.30349331803914e-06, + "loss": 0.3709, + "step": 19080, + "vit_learning_rate": 1.0606986636078279e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6516, + "epoch": 0.9930295464003329, + "grad_norm": 1.7933477454613937, + "learning_rate": 5.299289049348194e-06, + "loss": 0.3822, + "step": 19090, + "vit_learning_rate": 1.0598578098696387e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6433, + "epoch": 0.9935497295047857, + "grad_norm": 1.7991755849642588, + "learning_rate": 5.295084568277738e-06, + "loss": 0.374, + "step": 19100, + "vit_learning_rate": 1.0590169136555476e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6786, + "epoch": 0.9940699126092385, + "grad_norm": 1.970146065444181, + "learning_rate": 5.290879877811328e-06, + "loss": 0.3663, + "step": 19110, + "vit_learning_rate": 1.0581759755622654e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6679, + "epoch": 0.9945900957136912, + "grad_norm": 1.9704688460351754, + "learning_rate": 5.286674980932669e-06, + "loss": 0.3744, + "step": 19120, + "vit_learning_rate": 1.0573349961865337e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6989, + "epoch": 0.995110278818144, + "grad_norm": 1.4497261238079573, + "learning_rate": 5.282469880625613e-06, + "loss": 0.3529, + "step": 19130, + "vit_learning_rate": 1.0564939761251226e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7367, + "epoch": 0.9956304619225967, + "grad_norm": 1.985511766900686, + "learning_rate": 5.278264579874153e-06, + "loss": 0.3673, + "step": 19140, + "vit_learning_rate": 1.0556529159748304e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6248, + "epoch": 0.9961506450270495, + "grad_norm": 1.6331306853358767, + "learning_rate": 5.274059081662428e-06, + "loss": 0.3767, + "step": 19150, + "vit_learning_rate": 1.0548118163324856e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6412, + "epoch": 0.9966708281315023, + "grad_norm": 1.655607825922444, + "learning_rate": 5.269853388974717e-06, + "loss": 0.3642, + "step": 19160, + "vit_learning_rate": 1.0539706777949434e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6427, + "epoch": 0.9971910112359551, + "grad_norm": 1.6975081193255679, + "learning_rate": 5.265647504795435e-06, + "loss": 0.3719, + "step": 19170, + "vit_learning_rate": 1.0531295009590867e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6463, + "epoch": 0.9977111943404078, + "grad_norm": 1.1059531764477482, + "learning_rate": 5.261441432109131e-06, + "loss": 0.3832, + "step": 19180, + "vit_learning_rate": 1.0522882864218262e-06 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7024, + "epoch": 0.9982313774448606, + "grad_norm": 1.084463996413312, + "learning_rate": 5.257235173900492e-06, + "loss": 0.3666, + "step": 19190, + "vit_learning_rate": 1.0514470347800982e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6378, + "epoch": 0.9987515605493134, + "grad_norm": 1.50084605318352, + "learning_rate": 5.25302873315434e-06, + "loss": 0.3546, + "step": 19200, + "vit_learning_rate": 1.0506057466308678e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6675, + "epoch": 0.9992717436537661, + "grad_norm": 1.419787656377706, + "learning_rate": 5.248822112855613e-06, + "loss": 0.3706, + "step": 19210, + "vit_learning_rate": 1.0497644225711223e-06 + }, + { + "avg_batch_load_time": 0.0143, + "avg_batch_processing_time": 0.6222, + "epoch": 0.9997919267582189, + "grad_norm": 1.5245261978613287, + "learning_rate": 5.24461531598939e-06, + "loss": 0.3597, + "step": 19220, + "vit_learning_rate": 1.0489230631978778e-06 + }, + { + "avg_batch_load_time": 0.2572, + "avg_batch_processing_time": 0.8319, + "epoch": 1.0003121098626717, + "grad_norm": 1.4432549246466078, + "learning_rate": 5.2404083455408694e-06, + "loss": 0.3578, + "step": 19230, + "vit_learning_rate": 1.0480816691081738e-06 + }, + { + "avg_batch_load_time": 2.4726, + "avg_batch_processing_time": 0.6284, + "epoch": 1.0008322929671245, + "grad_norm": 1.4062653248708545, + "learning_rate": 5.236201204495375e-06, + "loss": 0.3256, + "step": 19240, + "vit_learning_rate": 1.0472402408990748e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6569, + "epoch": 1.0013524760715773, + "grad_norm": 1.4439797355681225, + "learning_rate": 5.231993895838348e-06, + "loss": 0.3258, + "step": 19250, + "vit_learning_rate": 1.0463987791676693e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6359, + "epoch": 1.0018726591760299, + "grad_norm": 1.27288451211382, + "learning_rate": 5.22778642255535e-06, + "loss": 0.3469, + "step": 19260, + "vit_learning_rate": 1.0455572845110699e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7008, + "epoch": 1.0023928422804826, + "grad_norm": 1.2637700708849418, + "learning_rate": 5.223578787632066e-06, + "loss": 0.3271, + "step": 19270, + "vit_learning_rate": 1.044715757526413e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6426, + "epoch": 1.0029130253849354, + "grad_norm": 1.3468378851871863, + "learning_rate": 5.219370994054282e-06, + "loss": 0.3305, + "step": 19280, + "vit_learning_rate": 1.0438741988108565e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6797, + "epoch": 1.0034332084893882, + "grad_norm": 1.536865968065867, + "learning_rate": 5.215163044807911e-06, + "loss": 0.3403, + "step": 19290, + "vit_learning_rate": 1.043032608961582e-06 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6311, + "epoch": 1.003953391593841, + "grad_norm": 1.4417179944550198, + "learning_rate": 5.210954942878968e-06, + "loss": 0.3504, + "step": 19300, + "vit_learning_rate": 1.0421909885757934e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6548, + "epoch": 1.0044735746982938, + "grad_norm": 1.070879362973045, + "learning_rate": 5.206746691253577e-06, + "loss": 0.3086, + "step": 19310, + "vit_learning_rate": 1.0413493382507153e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6718, + "epoch": 1.0049937578027466, + "grad_norm": 1.4081328433712774, + "learning_rate": 5.20253829291797e-06, + "loss": 0.3485, + "step": 19320, + "vit_learning_rate": 1.040507658583594e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6787, + "epoch": 1.0055139409071994, + "grad_norm": 1.246630207681816, + "learning_rate": 5.198329750858484e-06, + "loss": 0.3444, + "step": 19330, + "vit_learning_rate": 1.0396659501716967e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6757, + "epoch": 1.0060341240116522, + "grad_norm": 1.3487192249858295, + "learning_rate": 5.194121068061557e-06, + "loss": 0.3558, + "step": 19340, + "vit_learning_rate": 1.0388242136123114e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.671, + "epoch": 1.006554307116105, + "grad_norm": 1.3217740558682722, + "learning_rate": 5.189912247513724e-06, + "loss": 0.3004, + "step": 19350, + "vit_learning_rate": 1.0379824495027448e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6498, + "epoch": 1.0070744902205577, + "grad_norm": 1.72724761190427, + "learning_rate": 5.1857032922016225e-06, + "loss": 0.3274, + "step": 19360, + "vit_learning_rate": 1.0371406584403245e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6622, + "epoch": 1.0075946733250105, + "grad_norm": 1.7280688607411072, + "learning_rate": 5.181494205111983e-06, + "loss": 0.3426, + "step": 19370, + "vit_learning_rate": 1.0362988410223965e-06 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6708, + "epoch": 1.008114856429463, + "grad_norm": 1.4815923055850275, + "learning_rate": 5.1772849892316305e-06, + "loss": 0.341, + "step": 19380, + "vit_learning_rate": 1.035456997846326e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6535, + "epoch": 1.0086350395339159, + "grad_norm": 1.6980176910382292, + "learning_rate": 5.17307564754748e-06, + "loss": 0.3385, + "step": 19390, + "vit_learning_rate": 1.0346151295094958e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6576, + "epoch": 1.0091552226383687, + "grad_norm": 1.5099879430937893, + "learning_rate": 5.168866183046535e-06, + "loss": 0.3408, + "step": 19400, + "vit_learning_rate": 1.0337732366093069e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6822, + "epoch": 1.0096754057428214, + "grad_norm": 2.118412438869858, + "learning_rate": 5.164656598715893e-06, + "loss": 0.3481, + "step": 19410, + "vit_learning_rate": 1.0329313197431783e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6228, + "epoch": 1.0101955888472742, + "grad_norm": 1.5794187668337414, + "learning_rate": 5.1604468975427244e-06, + "loss": 0.3378, + "step": 19420, + "vit_learning_rate": 1.0320893795085448e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6417, + "epoch": 1.010715771951727, + "grad_norm": 1.6040331863789299, + "learning_rate": 5.156237082514294e-06, + "loss": 0.3253, + "step": 19430, + "vit_learning_rate": 1.0312474165028587e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6776, + "epoch": 1.0112359550561798, + "grad_norm": 2.0352104659373103, + "learning_rate": 5.1520271566179425e-06, + "loss": 0.3357, + "step": 19440, + "vit_learning_rate": 1.0304054313235884e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6591, + "epoch": 1.0117561381606326, + "grad_norm": 1.5251738187514468, + "learning_rate": 5.147817122841088e-06, + "loss": 0.3433, + "step": 19450, + "vit_learning_rate": 1.0295634245682174e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7461, + "epoch": 1.0122763212650854, + "grad_norm": 2.4407791955998324, + "learning_rate": 5.143606984171228e-06, + "loss": 0.3524, + "step": 19460, + "vit_learning_rate": 1.0287213968342454e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6598, + "epoch": 1.0127965043695382, + "grad_norm": 1.7194480745648562, + "learning_rate": 5.139396743595932e-06, + "loss": 0.3487, + "step": 19470, + "vit_learning_rate": 1.0278793487191862e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.672, + "epoch": 1.013316687473991, + "grad_norm": 1.6164007918488443, + "learning_rate": 5.135186404102844e-06, + "loss": 0.3235, + "step": 19480, + "vit_learning_rate": 1.0270372808205687e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6349, + "epoch": 1.0138368705784435, + "grad_norm": 2.345492949954082, + "learning_rate": 5.130975968679677e-06, + "loss": 0.3131, + "step": 19490, + "vit_learning_rate": 1.0261951937359353e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6467, + "epoch": 1.0143570536828963, + "grad_norm": 1.3197782402205573, + "learning_rate": 5.126765440314211e-06, + "loss": 0.3352, + "step": 19500, + "vit_learning_rate": 1.025353088062842e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.616, + "epoch": 1.014877236787349, + "grad_norm": 1.7145071506913738, + "learning_rate": 5.122554821994296e-06, + "loss": 0.3232, + "step": 19510, + "vit_learning_rate": 1.024510964398859e-06 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7256, + "epoch": 1.0153974198918019, + "grad_norm": 2.0919658643857018, + "learning_rate": 5.118344116707841e-06, + "loss": 0.3469, + "step": 19520, + "vit_learning_rate": 1.023668823341568e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6744, + "epoch": 1.0159176029962547, + "grad_norm": 1.3800787742914802, + "learning_rate": 5.114133327442817e-06, + "loss": 0.3435, + "step": 19530, + "vit_learning_rate": 1.0228266654885633e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6349, + "epoch": 1.0164377861007075, + "grad_norm": 1.8884476231573821, + "learning_rate": 5.10992245718726e-06, + "loss": 0.3099, + "step": 19540, + "vit_learning_rate": 1.0219844914374517e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6909, + "epoch": 1.0169579692051602, + "grad_norm": 1.9413430380202614, + "learning_rate": 5.1057115089292594e-06, + "loss": 0.3297, + "step": 19550, + "vit_learning_rate": 1.0211423017858518e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6475, + "epoch": 1.017478152309613, + "grad_norm": 1.2863945604134013, + "learning_rate": 5.101500485656957e-06, + "loss": 0.3368, + "step": 19560, + "vit_learning_rate": 1.0203000971313914e-06 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6718, + "epoch": 1.0179983354140658, + "grad_norm": 1.8073820228146371, + "learning_rate": 5.097289390358554e-06, + "loss": 0.3308, + "step": 19570, + "vit_learning_rate": 1.0194578780717107e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6217, + "epoch": 1.0185185185185186, + "grad_norm": 1.3937915450009581, + "learning_rate": 5.0930782260223e-06, + "loss": 0.371, + "step": 19580, + "vit_learning_rate": 1.01861564520446e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.611, + "epoch": 1.0190387016229714, + "grad_norm": 1.4948606386024832, + "learning_rate": 5.088866995636492e-06, + "loss": 0.3422, + "step": 19590, + "vit_learning_rate": 1.0177733991272982e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6301, + "epoch": 1.019558884727424, + "grad_norm": 1.8734077344914792, + "learning_rate": 5.084655702189477e-06, + "loss": 0.3379, + "step": 19600, + "vit_learning_rate": 1.0169311404378953e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6241, + "epoch": 1.0200790678318767, + "grad_norm": 2.1812658110895704, + "learning_rate": 5.080444348669643e-06, + "loss": 0.3377, + "step": 19610, + "vit_learning_rate": 1.0160888697339285e-06 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6927, + "epoch": 1.0205992509363295, + "grad_norm": 1.4003786421134539, + "learning_rate": 5.076232938065426e-06, + "loss": 0.3417, + "step": 19620, + "vit_learning_rate": 1.0152465876130851e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.652, + "epoch": 1.0211194340407823, + "grad_norm": 2.2962465219062085, + "learning_rate": 5.072021473365296e-06, + "loss": 0.3336, + "step": 19630, + "vit_learning_rate": 1.014404294673059e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6613, + "epoch": 1.021639617145235, + "grad_norm": 2.190083833993943, + "learning_rate": 5.067809957557766e-06, + "loss": 0.3231, + "step": 19640, + "vit_learning_rate": 1.013561991511553e-06 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6733, + "epoch": 1.0221598002496879, + "grad_norm": 1.5777898210336636, + "learning_rate": 5.063598393631384e-06, + "loss": 0.3381, + "step": 19650, + "vit_learning_rate": 1.0127196787262766e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6847, + "epoch": 1.0226799833541407, + "grad_norm": 1.6718119976129544, + "learning_rate": 5.059386784574731e-06, + "loss": 0.3263, + "step": 19660, + "vit_learning_rate": 1.0118773569149462e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6894, + "epoch": 1.0232001664585935, + "grad_norm": 2.2528332381702256, + "learning_rate": 5.055175133376423e-06, + "loss": 0.3517, + "step": 19670, + "vit_learning_rate": 1.0110350266752845e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6663, + "epoch": 1.0237203495630463, + "grad_norm": 1.3437531612516385, + "learning_rate": 5.050963443025102e-06, + "loss": 0.3019, + "step": 19680, + "vit_learning_rate": 1.0101926886050202e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6512, + "epoch": 1.024240532667499, + "grad_norm": 1.9737336269201176, + "learning_rate": 5.046751716509442e-06, + "loss": 0.3241, + "step": 19690, + "vit_learning_rate": 1.0093503433018885e-06 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6774, + "epoch": 1.0247607157719518, + "grad_norm": 1.4206625080082356, + "learning_rate": 5.042539956818137e-06, + "loss": 0.3425, + "step": 19700, + "vit_learning_rate": 1.0085079913636275e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6093, + "epoch": 1.0252808988764044, + "grad_norm": 1.3554165808949459, + "learning_rate": 5.038328166939912e-06, + "loss": 0.3392, + "step": 19710, + "vit_learning_rate": 1.0076656333879822e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6275, + "epoch": 1.0258010819808572, + "grad_norm": 1.7040160990123097, + "learning_rate": 5.0341163498635074e-06, + "loss": 0.3425, + "step": 19720, + "vit_learning_rate": 1.0068232699727014e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6654, + "epoch": 1.02632126508531, + "grad_norm": 1.4995614198542597, + "learning_rate": 5.029904508577685e-06, + "loss": 0.3392, + "step": 19730, + "vit_learning_rate": 1.0059809017155366e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6385, + "epoch": 1.0268414481897628, + "grad_norm": 2.3651376509815716, + "learning_rate": 5.0256926460712226e-06, + "loss": 0.33, + "step": 19740, + "vit_learning_rate": 1.0051385292142445e-06 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6387, + "epoch": 1.0273616312942155, + "grad_norm": 1.6519720045287491, + "learning_rate": 5.021480765332916e-06, + "loss": 0.3089, + "step": 19750, + "vit_learning_rate": 1.004296153066583e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6197, + "epoch": 1.0278818143986683, + "grad_norm": 1.4550249545852392, + "learning_rate": 5.017268869351571e-06, + "loss": 0.3383, + "step": 19760, + "vit_learning_rate": 1.0034537738703142e-06 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6602, + "epoch": 1.0284019975031211, + "grad_norm": 1.3688408982529952, + "learning_rate": 5.013056961116004e-06, + "loss": 0.3503, + "step": 19770, + "vit_learning_rate": 1.0026113922232008e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6698, + "epoch": 1.028922180607574, + "grad_norm": 1.2543111785252075, + "learning_rate": 5.008845043615045e-06, + "loss": 0.3253, + "step": 19780, + "vit_learning_rate": 1.0017690087230088e-06 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6647, + "epoch": 1.0294423637120267, + "grad_norm": 1.8413095804587132, + "learning_rate": 5.004633119837524e-06, + "loss": 0.3222, + "step": 19790, + "vit_learning_rate": 1.0009266239675047e-06 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.633, + "epoch": 1.0299625468164795, + "grad_norm": 1.4000470931992877, + "learning_rate": 5.00042119277228e-06, + "loss": 0.3515, + "step": 19800, + "vit_learning_rate": 1.0000842385544559e-06 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6968, + "epoch": 1.0304827299209323, + "grad_norm": 1.6584297470140135, + "learning_rate": 4.996209265408151e-06, + "loss": 0.3477, + "step": 19810, + "vit_learning_rate": 9.9924185308163e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6543, + "epoch": 1.031002913025385, + "grad_norm": 1.4861391299177764, + "learning_rate": 4.991997340733977e-06, + "loss": 0.3226, + "step": 19820, + "vit_learning_rate": 9.983994681467953e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6329, + "epoch": 1.0315230961298376, + "grad_norm": 1.544593137080601, + "learning_rate": 4.987785421738599e-06, + "loss": 0.3075, + "step": 19830, + "vit_learning_rate": 9.975570843477195e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6299, + "epoch": 1.0320432792342904, + "grad_norm": 1.5144601352314075, + "learning_rate": 4.983573511410849e-06, + "loss": 0.3347, + "step": 19840, + "vit_learning_rate": 9.967147022821696e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7136, + "epoch": 1.0325634623387432, + "grad_norm": 1.898222522005609, + "learning_rate": 4.979361612739555e-06, + "loss": 0.3304, + "step": 19850, + "vit_learning_rate": 9.958723225479107e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6402, + "epoch": 1.033083645443196, + "grad_norm": 1.3822458366935735, + "learning_rate": 4.975149728713534e-06, + "loss": 0.2932, + "step": 19860, + "vit_learning_rate": 9.950299457427065e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6193, + "epoch": 1.0336038285476488, + "grad_norm": 1.7965622146526936, + "learning_rate": 4.970937862321599e-06, + "loss": 0.322, + "step": 19870, + "vit_learning_rate": 9.941875724643197e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6806, + "epoch": 1.0341240116521015, + "grad_norm": 1.5017819354904907, + "learning_rate": 4.966726016552546e-06, + "loss": 0.3149, + "step": 19880, + "vit_learning_rate": 9.93345203310509e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6651, + "epoch": 1.0346441947565543, + "grad_norm": 1.4985668491358528, + "learning_rate": 4.962514194395154e-06, + "loss": 0.3297, + "step": 19890, + "vit_learning_rate": 9.925028388790308e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6931, + "epoch": 1.0351643778610071, + "grad_norm": 1.4409514860353885, + "learning_rate": 4.958302398838193e-06, + "loss": 0.3244, + "step": 19900, + "vit_learning_rate": 9.916604797676385e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6433, + "epoch": 1.03568456096546, + "grad_norm": 1.179540435627769, + "learning_rate": 4.954090632870408e-06, + "loss": 0.3022, + "step": 19910, + "vit_learning_rate": 9.908181265740815e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6854, + "epoch": 1.0362047440699127, + "grad_norm": 1.9420862362530762, + "learning_rate": 4.949878899480522e-06, + "loss": 0.3338, + "step": 19920, + "vit_learning_rate": 9.899757798961043e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6262, + "epoch": 1.0367249271743653, + "grad_norm": 1.720303475510658, + "learning_rate": 4.945667201657238e-06, + "loss": 0.324, + "step": 19930, + "vit_learning_rate": 9.891334403314475e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6543, + "epoch": 1.037245110278818, + "grad_norm": 1.53647318705261, + "learning_rate": 4.941455542389235e-06, + "loss": 0.3315, + "step": 19940, + "vit_learning_rate": 9.88291108477847e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.628, + "epoch": 1.0377652933832708, + "grad_norm": 1.4072190695847069, + "learning_rate": 4.937243924665162e-06, + "loss": 0.3337, + "step": 19950, + "vit_learning_rate": 9.874487849330322e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.688, + "epoch": 1.0382854764877236, + "grad_norm": 1.765221588546564, + "learning_rate": 4.9330323514736364e-06, + "loss": 0.3458, + "step": 19960, + "vit_learning_rate": 9.866064702947272e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6662, + "epoch": 1.0388056595921764, + "grad_norm": 1.7848703876008496, + "learning_rate": 4.928820825803251e-06, + "loss": 0.3272, + "step": 19970, + "vit_learning_rate": 9.857641651606503e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6777, + "epoch": 1.0393258426966292, + "grad_norm": 1.5893567639098771, + "learning_rate": 4.9246093506425605e-06, + "loss": 0.325, + "step": 19980, + "vit_learning_rate": 9.84921870128512e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7524, + "epoch": 1.039846025801082, + "grad_norm": 1.5059770462639464, + "learning_rate": 4.920397928980081e-06, + "loss": 0.3401, + "step": 19990, + "vit_learning_rate": 9.840795857960161e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6479, + "epoch": 1.0403662089055348, + "grad_norm": 1.4577009910576124, + "learning_rate": 4.916186563804294e-06, + "loss": 0.3156, + "step": 20000, + "vit_learning_rate": 9.832373127608588e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.644, + "epoch": 1.0408863920099876, + "grad_norm": 1.4623830465741576, + "learning_rate": 4.9119752581036425e-06, + "loss": 0.3434, + "step": 20010, + "vit_learning_rate": 9.823950516207285e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.635, + "epoch": 1.0414065751144403, + "grad_norm": 1.601315015186529, + "learning_rate": 4.907764014866525e-06, + "loss": 0.3223, + "step": 20020, + "vit_learning_rate": 9.81552802973305e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6601, + "epoch": 1.0419267582188931, + "grad_norm": 1.2671567211987524, + "learning_rate": 4.903552837081294e-06, + "loss": 0.3467, + "step": 20030, + "vit_learning_rate": 9.807105674162586e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6116, + "epoch": 1.042446941323346, + "grad_norm": 1.5263744065759406, + "learning_rate": 4.899341727736259e-06, + "loss": 0.3121, + "step": 20040, + "vit_learning_rate": 9.798683455472517e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6504, + "epoch": 1.0429671244277985, + "grad_norm": 1.860564447441257, + "learning_rate": 4.895130689819681e-06, + "loss": 0.3275, + "step": 20050, + "vit_learning_rate": 9.790261379639361e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.6023, + "epoch": 1.0434873075322513, + "grad_norm": 1.7448195796136154, + "learning_rate": 4.890919726319766e-06, + "loss": 0.3282, + "step": 20060, + "vit_learning_rate": 9.781839452639532e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6731, + "epoch": 1.044007490636704, + "grad_norm": 1.4689992587501386, + "learning_rate": 4.8867088402246704e-06, + "loss": 0.323, + "step": 20070, + "vit_learning_rate": 9.773417680449339e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6808, + "epoch": 1.0445276737411568, + "grad_norm": 1.3488440197812297, + "learning_rate": 4.882498034522496e-06, + "loss": 0.3234, + "step": 20080, + "vit_learning_rate": 9.764996069044992e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6768, + "epoch": 1.0450478568456096, + "grad_norm": 1.6394416026825946, + "learning_rate": 4.878287312201288e-06, + "loss": 0.3063, + "step": 20090, + "vit_learning_rate": 9.756574624402575e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6197, + "epoch": 1.0455680399500624, + "grad_norm": 1.5919661733195816, + "learning_rate": 4.874076676249028e-06, + "loss": 0.3407, + "step": 20100, + "vit_learning_rate": 9.748153352498055e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7106, + "epoch": 1.0460882230545152, + "grad_norm": 1.6129033921113634, + "learning_rate": 4.8698661296536435e-06, + "loss": 0.3568, + "step": 20110, + "vit_learning_rate": 9.739732259307286e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6169, + "epoch": 1.046608406158968, + "grad_norm": 1.4663505129902898, + "learning_rate": 4.865655675402993e-06, + "loss": 0.3318, + "step": 20120, + "vit_learning_rate": 9.731311350805986e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6575, + "epoch": 1.0471285892634208, + "grad_norm": 1.939525197231169, + "learning_rate": 4.86144531648487e-06, + "loss": 0.3568, + "step": 20130, + "vit_learning_rate": 9.72289063296974e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6167, + "epoch": 1.0476487723678736, + "grad_norm": 1.1928357445977205, + "learning_rate": 4.8572350558870015e-06, + "loss": 0.3379, + "step": 20140, + "vit_learning_rate": 9.714470111774001e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 3.4861, + "epoch": 1.0481689554723264, + "grad_norm": 1.4533375171883618, + "learning_rate": 4.853024896597046e-06, + "loss": 0.2873, + "step": 20150, + "vit_learning_rate": 9.706049793194092e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6702, + "epoch": 1.048689138576779, + "grad_norm": 1.2693374605206564, + "learning_rate": 4.8488148416025886e-06, + "loss": 0.3324, + "step": 20160, + "vit_learning_rate": 9.697629683205176e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.705, + "epoch": 1.0492093216812317, + "grad_norm": 1.899597665596513, + "learning_rate": 4.844604893891139e-06, + "loss": 0.3363, + "step": 20170, + "vit_learning_rate": 9.689209787782276e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6311, + "epoch": 1.0497295047856845, + "grad_norm": 1.9614326629777319, + "learning_rate": 4.8403950564501355e-06, + "loss": 0.3414, + "step": 20180, + "vit_learning_rate": 9.68079011290027e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 2.6643, + "epoch": 1.0502496878901373, + "grad_norm": 1.6533540828776412, + "learning_rate": 4.836185332266932e-06, + "loss": 0.3233, + "step": 20190, + "vit_learning_rate": 9.672370664533865e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6366, + "epoch": 1.05076987099459, + "grad_norm": 1.4555698767917877, + "learning_rate": 4.831975724328807e-06, + "loss": 0.3309, + "step": 20200, + "vit_learning_rate": 9.663951448657614e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6917, + "epoch": 1.0512900540990429, + "grad_norm": 1.4868972724129033, + "learning_rate": 4.827766235622952e-06, + "loss": 0.3342, + "step": 20210, + "vit_learning_rate": 9.655532471245903e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 3.5315, + "epoch": 1.0518102372034956, + "grad_norm": 1.8706843651757075, + "learning_rate": 4.82355686913648e-06, + "loss": 0.3373, + "step": 20220, + "vit_learning_rate": 9.64711373827296e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6538, + "epoch": 1.0523304203079484, + "grad_norm": 1.1580452747488283, + "learning_rate": 4.819347627856411e-06, + "loss": 0.3541, + "step": 20230, + "vit_learning_rate": 9.63869525571282e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7215, + "epoch": 1.0528506034124012, + "grad_norm": 2.06893129570204, + "learning_rate": 4.81513851476968e-06, + "loss": 0.3648, + "step": 20240, + "vit_learning_rate": 9.630277029539357e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6483, + "epoch": 1.053370786516854, + "grad_norm": 1.8489274089292587, + "learning_rate": 4.810929532863131e-06, + "loss": 0.32, + "step": 20250, + "vit_learning_rate": 9.62185906572626e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.675, + "epoch": 1.0538909696213068, + "grad_norm": 1.966303735425203, + "learning_rate": 4.8067206851235106e-06, + "loss": 0.3276, + "step": 20260, + "vit_learning_rate": 9.61344137024702e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6983, + "epoch": 1.0544111527257596, + "grad_norm": 1.4442130755604181, + "learning_rate": 4.802511974537476e-06, + "loss": 0.355, + "step": 20270, + "vit_learning_rate": 9.605023949074952e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6548, + "epoch": 1.0549313358302121, + "grad_norm": 1.429434739621525, + "learning_rate": 4.798303404091582e-06, + "loss": 0.3389, + "step": 20280, + "vit_learning_rate": 9.596606808183164e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7229, + "epoch": 1.055451518934665, + "grad_norm": 1.3871008623563452, + "learning_rate": 4.794094976772291e-06, + "loss": 0.3474, + "step": 20290, + "vit_learning_rate": 9.588189953544579e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6927, + "epoch": 1.0559717020391177, + "grad_norm": 1.8380683701795042, + "learning_rate": 4.789886695565955e-06, + "loss": 0.3383, + "step": 20300, + "vit_learning_rate": 9.579773391131907e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6626, + "epoch": 1.0564918851435705, + "grad_norm": 1.4820770325638997, + "learning_rate": 4.785678563458827e-06, + "loss": 0.3356, + "step": 20310, + "vit_learning_rate": 9.571357126917653e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6676, + "epoch": 1.0570120682480233, + "grad_norm": 1.2109229958859153, + "learning_rate": 4.781470583437058e-06, + "loss": 0.3312, + "step": 20320, + "vit_learning_rate": 9.562941166874116e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6664, + "epoch": 1.057532251352476, + "grad_norm": 1.3283464605189732, + "learning_rate": 4.777262758486681e-06, + "loss": 0.3416, + "step": 20330, + "vit_learning_rate": 9.554525516973361e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6973, + "epoch": 1.0580524344569289, + "grad_norm": 1.5683808130780068, + "learning_rate": 4.773055091593629e-06, + "loss": 0.3443, + "step": 20340, + "vit_learning_rate": 9.546110183187257e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6617, + "epoch": 1.0585726175613817, + "grad_norm": 1.635729055974689, + "learning_rate": 4.768847585743716e-06, + "loss": 0.3219, + "step": 20350, + "vit_learning_rate": 9.537695171487432e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6419, + "epoch": 1.0590928006658344, + "grad_norm": 1.6249417109670956, + "learning_rate": 4.764640243922646e-06, + "loss": 0.344, + "step": 20360, + "vit_learning_rate": 9.529280487845291e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6265, + "epoch": 1.0596129837702872, + "grad_norm": 1.777839438433593, + "learning_rate": 4.760433069116005e-06, + "loss": 0.345, + "step": 20370, + "vit_learning_rate": 9.52086613823201e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 2.6348, + "epoch": 1.0601331668747398, + "grad_norm": 1.2870785022049953, + "learning_rate": 4.756226064309259e-06, + "loss": 0.3333, + "step": 20380, + "vit_learning_rate": 9.512452128618518e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6509, + "epoch": 1.0606533499791926, + "grad_norm": 1.5097669907977933, + "learning_rate": 4.7520192324877596e-06, + "loss": 0.3323, + "step": 20390, + "vit_learning_rate": 9.504038464975518e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6579, + "epoch": 1.0611735330836454, + "grad_norm": 2.283349784070149, + "learning_rate": 4.7478125766367235e-06, + "loss": 0.3144, + "step": 20400, + "vit_learning_rate": 9.495625153273445e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6844, + "epoch": 1.0616937161880982, + "grad_norm": 1.3501107730883626, + "learning_rate": 4.743606099741255e-06, + "loss": 0.3319, + "step": 20410, + "vit_learning_rate": 9.487212199482509e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 2.5305, + "epoch": 1.062213899292551, + "grad_norm": 1.4193569736121296, + "learning_rate": 4.739399804786323e-06, + "loss": 0.3371, + "step": 20420, + "vit_learning_rate": 9.478799609572645e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6248, + "epoch": 1.0627340823970037, + "grad_norm": 1.932516823065932, + "learning_rate": 4.735193694756774e-06, + "loss": 0.3288, + "step": 20430, + "vit_learning_rate": 9.470387389513548e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6707, + "epoch": 1.0632542655014565, + "grad_norm": 1.9341593818153366, + "learning_rate": 4.730987772637319e-06, + "loss": 0.3181, + "step": 20440, + "vit_learning_rate": 9.461975545274637e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6794, + "epoch": 1.0637744486059093, + "grad_norm": 1.7044149139143328, + "learning_rate": 4.7267820414125345e-06, + "loss": 0.3409, + "step": 20450, + "vit_learning_rate": 9.453564082825067e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6317, + "epoch": 1.064294631710362, + "grad_norm": 1.8304316235800775, + "learning_rate": 4.722576504066868e-06, + "loss": 0.3308, + "step": 20460, + "vit_learning_rate": 9.445153008133735e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6357, + "epoch": 1.0648148148148149, + "grad_norm": 1.9217489761421396, + "learning_rate": 4.718371163584619e-06, + "loss": 0.323, + "step": 20470, + "vit_learning_rate": 9.436742327169237e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6669, + "epoch": 1.0653349979192677, + "grad_norm": 1.3931419166448005, + "learning_rate": 4.714166022949958e-06, + "loss": 0.3404, + "step": 20480, + "vit_learning_rate": 9.428332045899915e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.642, + "epoch": 1.0658551810237205, + "grad_norm": 1.4104494051479042, + "learning_rate": 4.709961085146907e-06, + "loss": 0.3158, + "step": 20490, + "vit_learning_rate": 9.419922170293812e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6517, + "epoch": 1.066375364128173, + "grad_norm": 1.6436645283900997, + "learning_rate": 4.705756353159345e-06, + "loss": 0.3142, + "step": 20500, + "vit_learning_rate": 9.411512706318689e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.633, + "epoch": 1.0668955472326258, + "grad_norm": 1.9080824085982702, + "learning_rate": 4.7015518299710094e-06, + "loss": 0.3385, + "step": 20510, + "vit_learning_rate": 9.403103659942018e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6539, + "epoch": 1.0674157303370786, + "grad_norm": 1.6116394459463341, + "learning_rate": 4.697347518565482e-06, + "loss": 0.3442, + "step": 20520, + "vit_learning_rate": 9.394695037130964e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 2.5917, + "epoch": 1.0679359134415314, + "grad_norm": 1.3429009625935513, + "learning_rate": 4.693143421926205e-06, + "loss": 0.326, + "step": 20530, + "vit_learning_rate": 9.386286843852409e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6971, + "epoch": 1.0684560965459842, + "grad_norm": 1.482566425668165, + "learning_rate": 4.688939543036453e-06, + "loss": 0.3295, + "step": 20540, + "vit_learning_rate": 9.377879086072906e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6409, + "epoch": 1.068976279650437, + "grad_norm": 1.5225498634654135, + "learning_rate": 4.68473588487936e-06, + "loss": 0.3206, + "step": 20550, + "vit_learning_rate": 9.369471769758719e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6783, + "epoch": 1.0694964627548897, + "grad_norm": 3.0465064798109354, + "learning_rate": 4.680532450437897e-06, + "loss": 0.3176, + "step": 20560, + "vit_learning_rate": 9.361064900875792e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6855, + "epoch": 1.0700166458593425, + "grad_norm": 1.5092836853064273, + "learning_rate": 4.676329242694876e-06, + "loss": 0.3202, + "step": 20570, + "vit_learning_rate": 9.352658485389751e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6954, + "epoch": 1.0705368289637953, + "grad_norm": 1.7562692070221126, + "learning_rate": 4.6721262646329506e-06, + "loss": 0.3429, + "step": 20580, + "vit_learning_rate": 9.3442525292659e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6964, + "epoch": 1.071057012068248, + "grad_norm": 1.3724509072835103, + "learning_rate": 4.66792351923461e-06, + "loss": 0.3262, + "step": 20590, + "vit_learning_rate": 9.335847038469218e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 4.5634, + "epoch": 1.071577195172701, + "grad_norm": 1.566378417839507, + "learning_rate": 4.6637210094821805e-06, + "loss": 0.3234, + "step": 20600, + "vit_learning_rate": 9.32744201896436e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6762, + "epoch": 1.0720973782771535, + "grad_norm": 2.2999312825791396, + "learning_rate": 4.659518738357815e-06, + "loss": 0.3341, + "step": 20610, + "vit_learning_rate": 9.319037476715627e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6421, + "epoch": 1.0726175613816062, + "grad_norm": 2.2763752677122335, + "learning_rate": 4.6553167088435035e-06, + "loss": 0.3281, + "step": 20620, + "vit_learning_rate": 9.310633417687006e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6393, + "epoch": 1.073137744486059, + "grad_norm": 1.5764018637184551, + "learning_rate": 4.651114923921063e-06, + "loss": 0.3214, + "step": 20630, + "vit_learning_rate": 9.302229847842125e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6473, + "epoch": 1.0736579275905118, + "grad_norm": 1.589467420696674, + "learning_rate": 4.646913386572134e-06, + "loss": 0.319, + "step": 20640, + "vit_learning_rate": 9.293826773144267e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6328, + "epoch": 1.0741781106949646, + "grad_norm": 1.535671318765138, + "learning_rate": 4.642712099778186e-06, + "loss": 0.3227, + "step": 20650, + "vit_learning_rate": 9.285424199556371e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.616, + "epoch": 1.0746982937994174, + "grad_norm": 1.244085208932254, + "learning_rate": 4.638511066520507e-06, + "loss": 0.3028, + "step": 20660, + "vit_learning_rate": 9.277022133041012e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6537, + "epoch": 1.0752184769038702, + "grad_norm": 1.4595174302971705, + "learning_rate": 4.634310289780209e-06, + "loss": 0.3282, + "step": 20670, + "vit_learning_rate": 9.268620579560416e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.679, + "epoch": 1.075738660008323, + "grad_norm": 2.4491048933322195, + "learning_rate": 4.630109772538214e-06, + "loss": 0.3484, + "step": 20680, + "vit_learning_rate": 9.260219545076428e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6857, + "epoch": 1.0762588431127758, + "grad_norm": 1.6251285118233467, + "learning_rate": 4.6259095177752695e-06, + "loss": 0.3415, + "step": 20690, + "vit_learning_rate": 9.251819035550538e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6654, + "epoch": 1.0767790262172285, + "grad_norm": 2.3679166765507387, + "learning_rate": 4.621709528471931e-06, + "loss": 0.3481, + "step": 20700, + "vit_learning_rate": 9.243419056943861e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6284, + "epoch": 1.0772992093216813, + "grad_norm": 2.0100670078713128, + "learning_rate": 4.617509807608566e-06, + "loss": 0.3382, + "step": 20710, + "vit_learning_rate": 9.235019615217131e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.651, + "epoch": 1.0778193924261341, + "grad_norm": 1.6563363372214572, + "learning_rate": 4.613310358165354e-06, + "loss": 0.3263, + "step": 20720, + "vit_learning_rate": 9.226620716330708e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.5745, + "epoch": 1.0783395755305867, + "grad_norm": 1.986698578157598, + "learning_rate": 4.60911118312228e-06, + "loss": 0.338, + "step": 20730, + "vit_learning_rate": 9.218222366244559e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.697, + "epoch": 1.0788597586350395, + "grad_norm": 1.5033274837370927, + "learning_rate": 4.604912285459138e-06, + "loss": 0.3349, + "step": 20740, + "vit_learning_rate": 9.209824570918274e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6607, + "epoch": 1.0793799417394923, + "grad_norm": 1.2750949646991552, + "learning_rate": 4.600713668155515e-06, + "loss": 0.328, + "step": 20750, + "vit_learning_rate": 9.201427336311029e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.6174, + "epoch": 1.079900124843945, + "grad_norm": 1.3083663685944806, + "learning_rate": 4.596515334190811e-06, + "loss": 0.3321, + "step": 20760, + "vit_learning_rate": 9.19303066838162e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6846, + "epoch": 1.0804203079483978, + "grad_norm": 1.4018524006956483, + "learning_rate": 4.592317286544218e-06, + "loss": 0.3373, + "step": 20770, + "vit_learning_rate": 9.184634573088435e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6683, + "epoch": 1.0809404910528506, + "grad_norm": 1.8498218473754817, + "learning_rate": 4.588119528194727e-06, + "loss": 0.3265, + "step": 20780, + "vit_learning_rate": 9.176239056389453e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.5327, + "epoch": 1.0814606741573034, + "grad_norm": 1.387543591635333, + "learning_rate": 4.583922062121125e-06, + "loss": 0.3418, + "step": 20790, + "vit_learning_rate": 9.167844124242249e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6023, + "epoch": 1.0819808572617562, + "grad_norm": 1.4078079970731647, + "learning_rate": 4.579724891301987e-06, + "loss": 0.3267, + "step": 20800, + "vit_learning_rate": 9.159449782603973e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6682, + "epoch": 1.082501040366209, + "grad_norm": 1.4459949799079492, + "learning_rate": 4.575528018715687e-06, + "loss": 0.3351, + "step": 20810, + "vit_learning_rate": 9.151056037431373e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6869, + "epoch": 1.0830212234706618, + "grad_norm": 2.384111653583658, + "learning_rate": 4.571331447340375e-06, + "loss": 0.3474, + "step": 20820, + "vit_learning_rate": 9.142662894680748e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.5901, + "epoch": 1.0835414065751143, + "grad_norm": 1.700402464249538, + "learning_rate": 4.567135180153997e-06, + "loss": 0.3247, + "step": 20830, + "vit_learning_rate": 9.134270360307994e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7037, + "epoch": 1.0840615896795671, + "grad_norm": 1.4891154472906265, + "learning_rate": 4.562939220134282e-06, + "loss": 0.3385, + "step": 20840, + "vit_learning_rate": 9.125878440268564e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6326, + "epoch": 1.08458177278402, + "grad_norm": 1.69355900136847, + "learning_rate": 4.558743570258737e-06, + "loss": 0.3398, + "step": 20850, + "vit_learning_rate": 9.117487140517472e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6351, + "epoch": 1.0851019558884727, + "grad_norm": 1.6236274328933402, + "learning_rate": 4.554548233504652e-06, + "loss": 0.332, + "step": 20860, + "vit_learning_rate": 9.109096467009304e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6732, + "epoch": 1.0856221389929255, + "grad_norm": 1.3205760433646168, + "learning_rate": 4.550353212849094e-06, + "loss": 0.333, + "step": 20870, + "vit_learning_rate": 9.100706425698188e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7643, + "epoch": 1.0861423220973783, + "grad_norm": 1.587365209612798, + "learning_rate": 4.546158511268909e-06, + "loss": 0.3742, + "step": 20880, + "vit_learning_rate": 9.092317022537818e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6344, + "epoch": 1.086662505201831, + "grad_norm": 1.9178947223642928, + "learning_rate": 4.541964131740707e-06, + "loss": 0.3228, + "step": 20890, + "vit_learning_rate": 9.083928263481413e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7105, + "epoch": 1.0871826883062838, + "grad_norm": 1.4480902564348006, + "learning_rate": 4.53777007724088e-06, + "loss": 0.3321, + "step": 20900, + "vit_learning_rate": 9.075540154481759e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.5803, + "epoch": 1.0877028714107366, + "grad_norm": 1.5187197943146262, + "learning_rate": 4.533576350745584e-06, + "loss": 0.303, + "step": 20910, + "vit_learning_rate": 9.067152701491166e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6436, + "epoch": 1.0882230545151894, + "grad_norm": 1.8042573200592467, + "learning_rate": 4.529382955230742e-06, + "loss": 0.3257, + "step": 20920, + "vit_learning_rate": 9.058765910461483e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6254, + "epoch": 1.0887432376196422, + "grad_norm": 1.8873375879248295, + "learning_rate": 4.525189893672046e-06, + "loss": 0.3483, + "step": 20930, + "vit_learning_rate": 9.050379787344092e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6357, + "epoch": 1.089263420724095, + "grad_norm": 1.431766275509874, + "learning_rate": 4.520997169044948e-06, + "loss": 0.3427, + "step": 20940, + "vit_learning_rate": 9.041994338089893e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6623, + "epoch": 1.0897836038285476, + "grad_norm": 1.3718102682791944, + "learning_rate": 4.516804784324662e-06, + "loss": 0.3356, + "step": 20950, + "vit_learning_rate": 9.033609568649323e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6221, + "epoch": 1.0903037869330003, + "grad_norm": 1.433727059182231, + "learning_rate": 4.512612742486157e-06, + "loss": 0.3199, + "step": 20960, + "vit_learning_rate": 9.025225484972312e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7153, + "epoch": 1.0908239700374531, + "grad_norm": 1.8463727686638203, + "learning_rate": 4.508421046504164e-06, + "loss": 0.3379, + "step": 20970, + "vit_learning_rate": 9.016842093008327e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6385, + "epoch": 1.091344153141906, + "grad_norm": 1.6111179831595848, + "learning_rate": 4.504229699353167e-06, + "loss": 0.3315, + "step": 20980, + "vit_learning_rate": 9.008459398706334e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6103, + "epoch": 1.0918643362463587, + "grad_norm": 1.615702195938999, + "learning_rate": 4.500038704007402e-06, + "loss": 0.3213, + "step": 20990, + "vit_learning_rate": 9.000077408014802e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 2.5854, + "epoch": 1.0923845193508115, + "grad_norm": 1.2664710827169086, + "learning_rate": 4.495848063440854e-06, + "loss": 0.3347, + "step": 21000, + "vit_learning_rate": 8.991696126881707e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.4947, + "epoch": 1.0929047024552643, + "grad_norm": 1.7933287220231555, + "learning_rate": 4.49165778062726e-06, + "loss": 0.3516, + "step": 21010, + "vit_learning_rate": 8.98331556125452e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.604, + "epoch": 1.093424885559717, + "grad_norm": 1.7251820740328954, + "learning_rate": 4.487467858540099e-06, + "loss": 0.3245, + "step": 21020, + "vit_learning_rate": 8.974935717080197e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6552, + "epoch": 1.0939450686641699, + "grad_norm": 1.5116151097568176, + "learning_rate": 4.483278300152593e-06, + "loss": 0.3257, + "step": 21030, + "vit_learning_rate": 8.966556600305186e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6496, + "epoch": 1.0944652517686226, + "grad_norm": 2.255510266714489, + "learning_rate": 4.479089108437714e-06, + "loss": 0.3386, + "step": 21040, + "vit_learning_rate": 8.958178216875427e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6532, + "epoch": 1.0949854348730754, + "grad_norm": 1.594004771743007, + "learning_rate": 4.474900286368165e-06, + "loss": 0.3234, + "step": 21050, + "vit_learning_rate": 8.949800572736329e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.5643, + "epoch": 1.095505617977528, + "grad_norm": 1.5518089912555397, + "learning_rate": 4.47071183691639e-06, + "loss": 0.3157, + "step": 21060, + "vit_learning_rate": 8.94142367383278e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.611, + "epoch": 1.0960258010819808, + "grad_norm": 1.4120867650789488, + "learning_rate": 4.466523763054571e-06, + "loss": 0.3276, + "step": 21070, + "vit_learning_rate": 8.933047526109141e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6563, + "epoch": 1.0965459841864336, + "grad_norm": 1.620034683984104, + "learning_rate": 4.462336067754621e-06, + "loss": 0.3082, + "step": 21080, + "vit_learning_rate": 8.92467213550924e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 2.6627, + "epoch": 1.0970661672908864, + "grad_norm": 1.90324396937659, + "learning_rate": 4.4581487539881815e-06, + "loss": 0.3357, + "step": 21090, + "vit_learning_rate": 8.916297507976362e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6429, + "epoch": 1.0975863503953391, + "grad_norm": 2.3528237999887796, + "learning_rate": 4.453961824726628e-06, + "loss": 0.3484, + "step": 21100, + "vit_learning_rate": 8.907923649453255e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6663, + "epoch": 1.098106533499792, + "grad_norm": 1.7638214155836414, + "learning_rate": 4.449775282941062e-06, + "loss": 0.3232, + "step": 21110, + "vit_learning_rate": 8.899550565882124e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6991, + "epoch": 1.0986267166042447, + "grad_norm": 1.3396202583472205, + "learning_rate": 4.445589131602311e-06, + "loss": 0.312, + "step": 21120, + "vit_learning_rate": 8.89117826320462e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6113, + "epoch": 1.0991468997086975, + "grad_norm": 2.319507310596879, + "learning_rate": 4.441403373680919e-06, + "loss": 0.3356, + "step": 21130, + "vit_learning_rate": 8.882806747361838e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.6529, + "epoch": 1.0996670828131503, + "grad_norm": 2.1703578763528966, + "learning_rate": 4.437218012147161e-06, + "loss": 0.3275, + "step": 21140, + "vit_learning_rate": 8.874436024294321e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.7001, + "epoch": 1.100187265917603, + "grad_norm": 1.755201201771041, + "learning_rate": 4.433033049971024e-06, + "loss": 0.3429, + "step": 21150, + "vit_learning_rate": 8.866066099942047e-07 + }, + { + "avg_batch_load_time": 2.4212, + "avg_batch_processing_time": 0.6769, + "epoch": 1.1007074490220559, + "grad_norm": 1.4603090815216537, + "learning_rate": 4.428848490122212e-06, + "loss": 0.3334, + "step": 21160, + "vit_learning_rate": 8.857696980244422e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6777, + "epoch": 1.1012276321265086, + "grad_norm": 1.4841154904451441, + "learning_rate": 4.424664335570143e-06, + "loss": 0.3415, + "step": 21170, + "vit_learning_rate": 8.849328671140285e-07 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.6928, + "epoch": 1.1017478152309612, + "grad_norm": 1.5459966157096048, + "learning_rate": 4.4204805892839535e-06, + "loss": 0.3056, + "step": 21180, + "vit_learning_rate": 8.840961178567905e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6491, + "epoch": 1.102267998335414, + "grad_norm": 2.6580624198917318, + "learning_rate": 4.4162972542324826e-06, + "loss": 0.3461, + "step": 21190, + "vit_learning_rate": 8.832594508464963e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6526, + "epoch": 1.1027881814398668, + "grad_norm": 1.3919104362577508, + "learning_rate": 4.41211433338428e-06, + "loss": 0.3379, + "step": 21200, + "vit_learning_rate": 8.824228666768558e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6582, + "epoch": 1.1033083645443196, + "grad_norm": 1.473360043036596, + "learning_rate": 4.407931829707605e-06, + "loss": 0.3322, + "step": 21210, + "vit_learning_rate": 8.815863659415209e-07 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 2.6498, + "epoch": 1.1038285476487724, + "grad_norm": 1.6656839915309074, + "learning_rate": 4.4037497461704185e-06, + "loss": 0.3313, + "step": 21220, + "vit_learning_rate": 8.807499492340835e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6164, + "epoch": 1.1043487307532251, + "grad_norm": 1.2645726644937814, + "learning_rate": 4.399568085740379e-06, + "loss": 0.3214, + "step": 21230, + "vit_learning_rate": 8.799136171480757e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.637, + "epoch": 1.104868913857678, + "grad_norm": 1.5246030113555389, + "learning_rate": 4.395386851384851e-06, + "loss": 0.329, + "step": 21240, + "vit_learning_rate": 8.790773702769702e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6135, + "epoch": 1.1053890969621307, + "grad_norm": 1.6561499791383185, + "learning_rate": 4.391206046070895e-06, + "loss": 0.3175, + "step": 21250, + "vit_learning_rate": 8.782412092141788e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6379, + "epoch": 1.1059092800665835, + "grad_norm": 1.544697461709569, + "learning_rate": 4.387025672765265e-06, + "loss": 0.3193, + "step": 21260, + "vit_learning_rate": 8.77405134553053e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6577, + "epoch": 1.1064294631710363, + "grad_norm": 1.6858963176618995, + "learning_rate": 4.38284573443441e-06, + "loss": 0.3115, + "step": 21270, + "vit_learning_rate": 8.765691468868819e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6172, + "epoch": 1.1069496462754889, + "grad_norm": 1.1756098422582586, + "learning_rate": 4.378666234044471e-06, + "loss": 0.3176, + "step": 21280, + "vit_learning_rate": 8.757332468088941e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7005, + "epoch": 1.1074698293799417, + "grad_norm": 1.8045831649396213, + "learning_rate": 4.3744871745612776e-06, + "loss": 0.3135, + "step": 21290, + "vit_learning_rate": 8.748974349122554e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7776, + "epoch": 1.1079900124843944, + "grad_norm": 1.659611725825699, + "learning_rate": 4.370308558950345e-06, + "loss": 0.3558, + "step": 21300, + "vit_learning_rate": 8.740617117900689e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6114, + "epoch": 1.1085101955888472, + "grad_norm": 1.6075336545007528, + "learning_rate": 4.366130390176873e-06, + "loss": 0.3349, + "step": 21310, + "vit_learning_rate": 8.732260780353745e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6566, + "epoch": 1.1090303786933, + "grad_norm": 1.6530714750293616, + "learning_rate": 4.361952671205749e-06, + "loss": 0.3307, + "step": 21320, + "vit_learning_rate": 8.723905342411497e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.6326, + "epoch": 1.1095505617977528, + "grad_norm": 1.308808889492006, + "learning_rate": 4.357775405001539e-06, + "loss": 0.3359, + "step": 21330, + "vit_learning_rate": 8.715550810003076e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.6702, + "epoch": 1.1100707449022056, + "grad_norm": 1.725730884410086, + "learning_rate": 4.353598594528482e-06, + "loss": 0.347, + "step": 21340, + "vit_learning_rate": 8.707197189056963e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7635, + "epoch": 1.1105909280066584, + "grad_norm": 1.7746584358692739, + "learning_rate": 4.349422242750504e-06, + "loss": 0.3456, + "step": 21350, + "vit_learning_rate": 8.698844485501007e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 3.3546, + "epoch": 1.1111111111111112, + "grad_norm": 1.4700902309702992, + "learning_rate": 4.345246352631199e-06, + "loss": 0.3248, + "step": 21360, + "vit_learning_rate": 8.690492705262395e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6304, + "epoch": 1.111631294215564, + "grad_norm": 1.5651792338369215, + "learning_rate": 4.341070927133831e-06, + "loss": 0.3379, + "step": 21370, + "vit_learning_rate": 8.682141854267661e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7226, + "epoch": 1.1121514773200167, + "grad_norm": 1.7005245787843761, + "learning_rate": 4.33689596922134e-06, + "loss": 0.354, + "step": 21380, + "vit_learning_rate": 8.673791938442679e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 3.5218, + "epoch": 1.1126716604244695, + "grad_norm": 2.1665881134565286, + "learning_rate": 4.332721481856332e-06, + "loss": 0.3327, + "step": 21390, + "vit_learning_rate": 8.665442963712663e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7487, + "epoch": 1.113191843528922, + "grad_norm": 1.8190992605755458, + "learning_rate": 4.32854746800108e-06, + "loss": 0.3212, + "step": 21400, + "vit_learning_rate": 8.657094936002159e-07 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 2.6566, + "epoch": 1.1137120266333749, + "grad_norm": 1.75564617785654, + "learning_rate": 4.324373930617517e-06, + "loss": 0.3484, + "step": 21410, + "vit_learning_rate": 8.648747861235033e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.695, + "epoch": 1.1142322097378277, + "grad_norm": 1.5160999511548916, + "learning_rate": 4.320200872667244e-06, + "loss": 0.3276, + "step": 21420, + "vit_learning_rate": 8.640401745334487e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6897, + "epoch": 1.1147523928422804, + "grad_norm": 1.8363648844441944, + "learning_rate": 4.316028297111519e-06, + "loss": 0.3396, + "step": 21430, + "vit_learning_rate": 8.632056594223038e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6459, + "epoch": 1.1152725759467332, + "grad_norm": 1.0688534784833725, + "learning_rate": 4.311856206911255e-06, + "loss": 0.3379, + "step": 21440, + "vit_learning_rate": 8.623712413822508e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6083, + "epoch": 1.115792759051186, + "grad_norm": 2.2486603563267367, + "learning_rate": 4.307684605027022e-06, + "loss": 0.3201, + "step": 21450, + "vit_learning_rate": 8.615369210054044e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6365, + "epoch": 1.1163129421556388, + "grad_norm": 1.1439326253861224, + "learning_rate": 4.303513494419048e-06, + "loss": 0.325, + "step": 21460, + "vit_learning_rate": 8.607026988838095e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6843, + "epoch": 1.1168331252600916, + "grad_norm": 2.1794923305671667, + "learning_rate": 4.299342878047207e-06, + "loss": 0.3284, + "step": 21470, + "vit_learning_rate": 8.598685756094413e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6802, + "epoch": 1.1173533083645444, + "grad_norm": 1.9295071148074132, + "learning_rate": 4.295172758871023e-06, + "loss": 0.3449, + "step": 21480, + "vit_learning_rate": 8.590345517742046e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6581, + "epoch": 1.1178734914689972, + "grad_norm": 1.5633438311341639, + "learning_rate": 4.291003139849671e-06, + "loss": 0.3323, + "step": 21490, + "vit_learning_rate": 8.582006279699342e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6538, + "epoch": 1.1183936745734497, + "grad_norm": 1.6896883639609064, + "learning_rate": 4.286834023941969e-06, + "loss": 0.3587, + "step": 21500, + "vit_learning_rate": 8.573668047883936e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6378, + "epoch": 1.1189138576779025, + "grad_norm": 1.2670263809059286, + "learning_rate": 4.2826654141063736e-06, + "loss": 0.3199, + "step": 21510, + "vit_learning_rate": 8.565330828212746e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.6005, + "epoch": 1.1194340407823553, + "grad_norm": 2.094546346141093, + "learning_rate": 4.278497313300986e-06, + "loss": 0.3283, + "step": 21520, + "vit_learning_rate": 8.556994626601971e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6678, + "epoch": 1.119954223886808, + "grad_norm": 2.131088269850861, + "learning_rate": 4.27432972448355e-06, + "loss": 0.3165, + "step": 21530, + "vit_learning_rate": 8.548659448967098e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6665, + "epoch": 1.1204744069912609, + "grad_norm": 1.996327644751979, + "learning_rate": 4.270162650611439e-06, + "loss": 0.3611, + "step": 21540, + "vit_learning_rate": 8.540325301222878e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6771, + "epoch": 1.1209945900957137, + "grad_norm": 1.9541087930870589, + "learning_rate": 4.265996094641666e-06, + "loss": 0.3367, + "step": 21550, + "vit_learning_rate": 8.531992189283331e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6604, + "epoch": 1.1215147732001665, + "grad_norm": 1.4684763743259934, + "learning_rate": 4.261830059530876e-06, + "loss": 0.3539, + "step": 21560, + "vit_learning_rate": 8.523660119061749e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6961, + "epoch": 1.1220349563046192, + "grad_norm": 1.4066766549041019, + "learning_rate": 4.257664548235341e-06, + "loss": 0.3323, + "step": 21570, + "vit_learning_rate": 8.515329096470681e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.6639, + "epoch": 1.122555139409072, + "grad_norm": 1.3899019104137371, + "learning_rate": 4.253499563710964e-06, + "loss": 0.3292, + "step": 21580, + "vit_learning_rate": 8.506999127421928e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6394, + "epoch": 1.1230753225135248, + "grad_norm": 1.8154935039877929, + "learning_rate": 4.2493351089132735e-06, + "loss": 0.3447, + "step": 21590, + "vit_learning_rate": 8.498670217826545e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 2.6144, + "epoch": 1.1235955056179776, + "grad_norm": 1.7573944861038684, + "learning_rate": 4.245171186797423e-06, + "loss": 0.3348, + "step": 21600, + "vit_learning_rate": 8.490342373594844e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6674, + "epoch": 1.1241156887224304, + "grad_norm": 1.7785831209563199, + "learning_rate": 4.241007800318187e-06, + "loss": 0.3431, + "step": 21610, + "vit_learning_rate": 8.482015600636373e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 3.2761, + "epoch": 1.124635871826883, + "grad_norm": 1.8431535642358676, + "learning_rate": 4.236844952429958e-06, + "loss": 0.3116, + "step": 21620, + "vit_learning_rate": 8.473689904859916e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6522, + "epoch": 1.1251560549313357, + "grad_norm": 1.4531083824014046, + "learning_rate": 4.232682646086754e-06, + "loss": 0.319, + "step": 21630, + "vit_learning_rate": 8.465365292173507e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7238, + "epoch": 1.1256762380357885, + "grad_norm": 1.0210262435205284, + "learning_rate": 4.228520884242199e-06, + "loss": 0.3087, + "step": 21640, + "vit_learning_rate": 8.457041768484398e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6187, + "epoch": 1.1261964211402413, + "grad_norm": 1.5525299788132936, + "learning_rate": 4.224359669849536e-06, + "loss": 0.3273, + "step": 21650, + "vit_learning_rate": 8.448719339699069e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6129, + "epoch": 1.126716604244694, + "grad_norm": 1.411434246903693, + "learning_rate": 4.220199005861616e-06, + "loss": 0.3303, + "step": 21660, + "vit_learning_rate": 8.440398011723231e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6502, + "epoch": 1.127236787349147, + "grad_norm": 1.4798175457852345, + "learning_rate": 4.216038895230905e-06, + "loss": 0.3403, + "step": 21670, + "vit_learning_rate": 8.432077790461809e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6956, + "epoch": 1.1277569704535997, + "grad_norm": 1.3701289475351603, + "learning_rate": 4.2118793409094714e-06, + "loss": 0.3174, + "step": 21680, + "vit_learning_rate": 8.423758681818942e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6979, + "epoch": 1.1282771535580525, + "grad_norm": 1.57126325974674, + "learning_rate": 4.20772034584899e-06, + "loss": 0.33, + "step": 21690, + "vit_learning_rate": 8.41544069169798e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6641, + "epoch": 1.1287973366625053, + "grad_norm": 2.571093126576272, + "learning_rate": 4.203561913000743e-06, + "loss": 0.3402, + "step": 21700, + "vit_learning_rate": 8.407123826001484e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6318, + "epoch": 1.129317519766958, + "grad_norm": 2.0395426597121293, + "learning_rate": 4.199404045315607e-06, + "loss": 0.3339, + "step": 21710, + "vit_learning_rate": 8.398808090631213e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.7151, + "epoch": 1.1298377028714108, + "grad_norm": 1.43264069861721, + "learning_rate": 4.19524674574406e-06, + "loss": 0.3308, + "step": 21720, + "vit_learning_rate": 8.390493491488118e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6584, + "epoch": 1.1303578859758634, + "grad_norm": 1.3852624559499993, + "learning_rate": 4.191090017236177e-06, + "loss": 0.3498, + "step": 21730, + "vit_learning_rate": 8.382180034472353e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6892, + "epoch": 1.1308780690803162, + "grad_norm": 1.3827272109946194, + "learning_rate": 4.18693386274163e-06, + "loss": 0.3199, + "step": 21740, + "vit_learning_rate": 8.373867725483259e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6519, + "epoch": 1.131398252184769, + "grad_norm": 1.3838406805056969, + "learning_rate": 4.182778285209681e-06, + "loss": 0.3159, + "step": 21750, + "vit_learning_rate": 8.365556570419361e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.635, + "epoch": 1.1319184352892218, + "grad_norm": 1.4345731154726309, + "learning_rate": 4.178623287589182e-06, + "loss": 0.3583, + "step": 21760, + "vit_learning_rate": 8.357246575178363e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6254, + "epoch": 1.1324386183936745, + "grad_norm": 1.7152727307125348, + "learning_rate": 4.17446887282858e-06, + "loss": 0.3064, + "step": 21770, + "vit_learning_rate": 8.348937745657158e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6202, + "epoch": 1.1329588014981273, + "grad_norm": 1.5751779487915738, + "learning_rate": 4.170315043875896e-06, + "loss": 0.3361, + "step": 21780, + "vit_learning_rate": 8.34063008775179e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6466, + "epoch": 1.1334789846025801, + "grad_norm": 1.7919756335733399, + "learning_rate": 4.166161803678747e-06, + "loss": 0.3375, + "step": 21790, + "vit_learning_rate": 8.332323607357492e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6624, + "epoch": 1.133999167707033, + "grad_norm": 1.535866992609764, + "learning_rate": 4.162009155184325e-06, + "loss": 0.3445, + "step": 21800, + "vit_learning_rate": 8.324018310368649e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6532, + "epoch": 1.1345193508114857, + "grad_norm": 1.6555256623269186, + "learning_rate": 4.157857101339408e-06, + "loss": 0.3365, + "step": 21810, + "vit_learning_rate": 8.315714202678815e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6195, + "epoch": 1.1350395339159385, + "grad_norm": 1.565869776082611, + "learning_rate": 4.153705645090348e-06, + "loss": 0.3194, + "step": 21820, + "vit_learning_rate": 8.307411290180695e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6255, + "epoch": 1.1355597170203913, + "grad_norm": 1.6189939017706532, + "learning_rate": 4.149554789383071e-06, + "loss": 0.3398, + "step": 21830, + "vit_learning_rate": 8.299109578766141e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6297, + "epoch": 1.136079900124844, + "grad_norm": 1.5099625252031452, + "learning_rate": 4.145404537163086e-06, + "loss": 0.3289, + "step": 21840, + "vit_learning_rate": 8.290809074326171e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6536, + "epoch": 1.1366000832292966, + "grad_norm": 1.613982759723027, + "learning_rate": 4.1412548913754605e-06, + "loss": 0.3257, + "step": 21850, + "vit_learning_rate": 8.282509782750921e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6291, + "epoch": 1.1371202663337494, + "grad_norm": 1.9246442493024154, + "learning_rate": 4.1371058549648425e-06, + "loss": 0.3357, + "step": 21860, + "vit_learning_rate": 8.274211709929684e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6597, + "epoch": 1.1376404494382022, + "grad_norm": 1.8191417910697252, + "learning_rate": 4.132957430875442e-06, + "loss": 0.3494, + "step": 21870, + "vit_learning_rate": 8.265914861750883e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6388, + "epoch": 1.138160632542655, + "grad_norm": 1.5993275525224886, + "learning_rate": 4.128809622051037e-06, + "loss": 0.3177, + "step": 21880, + "vit_learning_rate": 8.257619244102073e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6793, + "epoch": 1.1386808156471078, + "grad_norm": 1.67401931494012, + "learning_rate": 4.124662431434968e-06, + "loss": 0.3259, + "step": 21890, + "vit_learning_rate": 8.249324862869935e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7276, + "epoch": 1.1392009987515606, + "grad_norm": 1.3703569216642923, + "learning_rate": 4.120515861970137e-06, + "loss": 0.3404, + "step": 21900, + "vit_learning_rate": 8.241031723940273e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6451, + "epoch": 1.1397211818560133, + "grad_norm": 1.6275897768268655, + "learning_rate": 4.116369916599007e-06, + "loss": 0.3248, + "step": 21910, + "vit_learning_rate": 8.232739833198014e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6484, + "epoch": 1.1402413649604661, + "grad_norm": 1.2921151924084873, + "learning_rate": 4.1122245982635914e-06, + "loss": 0.3191, + "step": 21920, + "vit_learning_rate": 8.224449196527182e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7124, + "epoch": 1.140761548064919, + "grad_norm": 2.7428572432752607, + "learning_rate": 4.108079909905468e-06, + "loss": 0.3303, + "step": 21930, + "vit_learning_rate": 8.216159819810935e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6292, + "epoch": 1.1412817311693717, + "grad_norm": 1.45461803864462, + "learning_rate": 4.10393585446576e-06, + "loss": 0.3238, + "step": 21940, + "vit_learning_rate": 8.207871708931519e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6366, + "epoch": 1.1418019142738243, + "grad_norm": 1.4690173402694284, + "learning_rate": 4.099792434885147e-06, + "loss": 0.3355, + "step": 21950, + "vit_learning_rate": 8.199584869770293e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6901, + "epoch": 1.142322097378277, + "grad_norm": 1.4844566309730465, + "learning_rate": 4.095649654103854e-06, + "loss": 0.3375, + "step": 21960, + "vit_learning_rate": 8.191299308207707e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6679, + "epoch": 1.1428422804827298, + "grad_norm": 1.1820483407613056, + "learning_rate": 4.091507515061651e-06, + "loss": 0.3362, + "step": 21970, + "vit_learning_rate": 8.183015030123302e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6986, + "epoch": 1.1433624635871826, + "grad_norm": 1.6031824515514266, + "learning_rate": 4.087366020697863e-06, + "loss": 0.3159, + "step": 21980, + "vit_learning_rate": 8.174732041395724e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6633, + "epoch": 1.1438826466916354, + "grad_norm": 1.7420737035911176, + "learning_rate": 4.08322517395134e-06, + "loss": 0.3242, + "step": 21990, + "vit_learning_rate": 8.166450347902678e-07 + }, + { + "avg_batch_load_time": 0.0119, + "avg_batch_processing_time": 0.6262, + "epoch": 1.1444028297960882, + "grad_norm": 1.45267511953158, + "learning_rate": 4.079084977760487e-06, + "loss": 0.3334, + "step": 22000, + "vit_learning_rate": 8.158169955520974e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6944, + "epoch": 1.144923012900541, + "grad_norm": 1.950487559500974, + "learning_rate": 4.074945435063242e-06, + "loss": 0.3198, + "step": 22010, + "vit_learning_rate": 8.149890870126484e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7565, + "epoch": 1.1454431960049938, + "grad_norm": 1.3591208496764997, + "learning_rate": 4.070806548797081e-06, + "loss": 0.3205, + "step": 22020, + "vit_learning_rate": 8.141613097594162e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6466, + "epoch": 1.1459633791094466, + "grad_norm": 1.7168850611805568, + "learning_rate": 4.066668321899013e-06, + "loss": 0.3424, + "step": 22030, + "vit_learning_rate": 8.133336643798024e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6514, + "epoch": 1.1464835622138994, + "grad_norm": 1.618458422932081, + "learning_rate": 4.062530757305576e-06, + "loss": 0.3497, + "step": 22040, + "vit_learning_rate": 8.125061514611151e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6587, + "epoch": 1.1470037453183521, + "grad_norm": 1.5768109161576624, + "learning_rate": 4.058393857952847e-06, + "loss": 0.3265, + "step": 22050, + "vit_learning_rate": 8.116787715905692e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7099, + "epoch": 1.147523928422805, + "grad_norm": 1.529147728456222, + "learning_rate": 4.054257626776417e-06, + "loss": 0.3333, + "step": 22060, + "vit_learning_rate": 8.108515253552834e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6374, + "epoch": 1.1480441115272577, + "grad_norm": 1.137456246817297, + "learning_rate": 4.050122066711417e-06, + "loss": 0.3203, + "step": 22070, + "vit_learning_rate": 8.100244133422833e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.724, + "epoch": 1.1485642946317103, + "grad_norm": 1.7561996175094003, + "learning_rate": 4.045987180692493e-06, + "loss": 0.3561, + "step": 22080, + "vit_learning_rate": 8.091974361384986e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6436, + "epoch": 1.149084477736163, + "grad_norm": 1.5562617553711433, + "learning_rate": 4.041852971653817e-06, + "loss": 0.3234, + "step": 22090, + "vit_learning_rate": 8.083705943307634e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6763, + "epoch": 1.1496046608406159, + "grad_norm": 2.023536020390022, + "learning_rate": 4.037719442529079e-06, + "loss": 0.3447, + "step": 22100, + "vit_learning_rate": 8.075438885058157e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6479, + "epoch": 1.1501248439450686, + "grad_norm": 1.6012431260863191, + "learning_rate": 4.033586596251485e-06, + "loss": 0.3299, + "step": 22110, + "vit_learning_rate": 8.067173192502968e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6491, + "epoch": 1.1506450270495214, + "grad_norm": 1.3945458761121552, + "learning_rate": 4.029454435753761e-06, + "loss": 0.3256, + "step": 22120, + "vit_learning_rate": 8.058908871507521e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7313, + "epoch": 1.1511652101539742, + "grad_norm": 2.0533609352766997, + "learning_rate": 4.025322963968138e-06, + "loss": 0.3394, + "step": 22130, + "vit_learning_rate": 8.050645927936276e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6815, + "epoch": 1.151685393258427, + "grad_norm": 2.1246226376737014, + "learning_rate": 4.021192183826369e-06, + "loss": 0.3354, + "step": 22140, + "vit_learning_rate": 8.042384367652737e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6257, + "epoch": 1.1522055763628798, + "grad_norm": 1.2656426294801164, + "learning_rate": 4.017062098259707e-06, + "loss": 0.3303, + "step": 22150, + "vit_learning_rate": 8.034124196519412e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6393, + "epoch": 1.1527257594673326, + "grad_norm": 1.8321896640360842, + "learning_rate": 4.012932710198918e-06, + "loss": 0.3358, + "step": 22160, + "vit_learning_rate": 8.025865420397834e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6993, + "epoch": 1.1532459425717854, + "grad_norm": 1.7620339703854155, + "learning_rate": 4.00880402257427e-06, + "loss": 0.3186, + "step": 22170, + "vit_learning_rate": 8.017608045148538e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6507, + "epoch": 1.153766125676238, + "grad_norm": 1.6479250077482441, + "learning_rate": 4.004676038315535e-06, + "loss": 0.3281, + "step": 22180, + "vit_learning_rate": 8.009352076631069e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6669, + "epoch": 1.1542863087806907, + "grad_norm": 1.4295590375980165, + "learning_rate": 4.000548760351989e-06, + "loss": 0.3373, + "step": 22190, + "vit_learning_rate": 8.001097520703976e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6755, + "epoch": 1.1548064918851435, + "grad_norm": 1.06211930202088, + "learning_rate": 3.996422191612397e-06, + "loss": 0.3501, + "step": 22200, + "vit_learning_rate": 7.992844383224794e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6418, + "epoch": 1.1553266749895963, + "grad_norm": 1.5941056806575244, + "learning_rate": 3.992296335025033e-06, + "loss": 0.3234, + "step": 22210, + "vit_learning_rate": 7.984592670050065e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6802, + "epoch": 1.155846858094049, + "grad_norm": 1.6361295334863208, + "learning_rate": 3.988171193517657e-06, + "loss": 0.3512, + "step": 22220, + "vit_learning_rate": 7.976342387035312e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6993, + "epoch": 1.1563670411985019, + "grad_norm": 1.3697329457120992, + "learning_rate": 3.9840467700175255e-06, + "loss": 0.3205, + "step": 22230, + "vit_learning_rate": 7.96809354003505e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6459, + "epoch": 1.1568872243029547, + "grad_norm": 1.5881697231216076, + "learning_rate": 3.979923067451385e-06, + "loss": 0.3395, + "step": 22240, + "vit_learning_rate": 7.95984613490277e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6303, + "epoch": 1.1574074074074074, + "grad_norm": 1.2616340353982916, + "learning_rate": 3.97580008874547e-06, + "loss": 0.3259, + "step": 22250, + "vit_learning_rate": 7.951600177490939e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6374, + "epoch": 1.1579275905118602, + "grad_norm": 1.2785820264725796, + "learning_rate": 3.971677836825502e-06, + "loss": 0.3236, + "step": 22260, + "vit_learning_rate": 7.943355673651004e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.741, + "epoch": 1.158447773616313, + "grad_norm": 1.5636899881218174, + "learning_rate": 3.967556314616683e-06, + "loss": 0.338, + "step": 22270, + "vit_learning_rate": 7.935112629233365e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6826, + "epoch": 1.1589679567207658, + "grad_norm": 1.3490630809986264, + "learning_rate": 3.963435525043703e-06, + "loss": 0.3657, + "step": 22280, + "vit_learning_rate": 7.926871050087405e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7041, + "epoch": 1.1594881398252186, + "grad_norm": 3.93490523991842, + "learning_rate": 3.959315471030729e-06, + "loss": 0.3254, + "step": 22290, + "vit_learning_rate": 7.918630942061456e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6738, + "epoch": 1.1600083229296712, + "grad_norm": 1.5623202091579342, + "learning_rate": 3.955196155501404e-06, + "loss": 0.3254, + "step": 22300, + "vit_learning_rate": 7.910392311002807e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6515, + "epoch": 1.160528506034124, + "grad_norm": 2.595062884744902, + "learning_rate": 3.9510775813788525e-06, + "loss": 0.3453, + "step": 22310, + "vit_learning_rate": 7.902155162757705e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7132, + "epoch": 1.1610486891385767, + "grad_norm": 2.413703628673187, + "learning_rate": 3.946959751585667e-06, + "loss": 0.3627, + "step": 22320, + "vit_learning_rate": 7.893919503171333e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6682, + "epoch": 1.1615688722430295, + "grad_norm": 1.199079251309553, + "learning_rate": 3.942842669043921e-06, + "loss": 0.3285, + "step": 22330, + "vit_learning_rate": 7.88568533808784e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6258, + "epoch": 1.1620890553474823, + "grad_norm": 1.5227619021949703, + "learning_rate": 3.938726336675142e-06, + "loss": 0.3297, + "step": 22340, + "vit_learning_rate": 7.877452673350282e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.693, + "epoch": 1.162609238451935, + "grad_norm": 1.3015761178535605, + "learning_rate": 3.934610757400339e-06, + "loss": 0.3505, + "step": 22350, + "vit_learning_rate": 7.869221514800677e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6411, + "epoch": 1.1631294215563879, + "grad_norm": 1.6330580973604427, + "learning_rate": 3.930495934139983e-06, + "loss": 0.3253, + "step": 22360, + "vit_learning_rate": 7.860991868279965e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.686, + "epoch": 1.1636496046608407, + "grad_norm": 1.6906171215097952, + "learning_rate": 3.926381869814006e-06, + "loss": 0.3333, + "step": 22370, + "vit_learning_rate": 7.85276373962801e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6467, + "epoch": 1.1641697877652935, + "grad_norm": 3.112844588730589, + "learning_rate": 3.922268567341803e-06, + "loss": 0.3371, + "step": 22380, + "vit_learning_rate": 7.844537134683605e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7044, + "epoch": 1.1646899708697462, + "grad_norm": 1.3156903541417266, + "learning_rate": 3.9181560296422285e-06, + "loss": 0.3303, + "step": 22390, + "vit_learning_rate": 7.836312059284456e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6396, + "epoch": 1.1652101539741988, + "grad_norm": 1.8566665562102567, + "learning_rate": 3.914044259633598e-06, + "loss": 0.3329, + "step": 22400, + "vit_learning_rate": 7.828088519267196e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6538, + "epoch": 1.1657303370786516, + "grad_norm": 1.4316317698051069, + "learning_rate": 3.909933260233672e-06, + "loss": 0.3312, + "step": 22410, + "vit_learning_rate": 7.819866520467343e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6796, + "epoch": 1.1662505201831044, + "grad_norm": 1.2859160551590685, + "learning_rate": 3.905823034359675e-06, + "loss": 0.3148, + "step": 22420, + "vit_learning_rate": 7.811646068719348e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6264, + "epoch": 1.1667707032875572, + "grad_norm": 1.6451779350240658, + "learning_rate": 3.9017135849282765e-06, + "loss": 0.3175, + "step": 22430, + "vit_learning_rate": 7.803427169856551e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6255, + "epoch": 1.16729088639201, + "grad_norm": 1.6420940114971427, + "learning_rate": 3.897604914855596e-06, + "loss": 0.3376, + "step": 22440, + "vit_learning_rate": 7.795209829711191e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6266, + "epoch": 1.1678110694964627, + "grad_norm": 1.3948515574179041, + "learning_rate": 3.893497027057203e-06, + "loss": 0.3244, + "step": 22450, + "vit_learning_rate": 7.786994054114406e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.648, + "epoch": 1.1683312526009155, + "grad_norm": 1.6455374185107152, + "learning_rate": 3.88938992444811e-06, + "loss": 0.358, + "step": 22460, + "vit_learning_rate": 7.778779848896218e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6435, + "epoch": 1.1688514357053683, + "grad_norm": 1.2552347412417195, + "learning_rate": 3.885283609942769e-06, + "loss": 0.3312, + "step": 22470, + "vit_learning_rate": 7.770567219885537e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6812, + "epoch": 1.169371618809821, + "grad_norm": 1.5445954415506158, + "learning_rate": 3.881178086455076e-06, + "loss": 0.3278, + "step": 22480, + "vit_learning_rate": 7.76235617291015e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6781, + "epoch": 1.1698918019142739, + "grad_norm": 1.5233037395855629, + "learning_rate": 3.877073356898367e-06, + "loss": 0.3176, + "step": 22490, + "vit_learning_rate": 7.754146713796733e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7082, + "epoch": 1.1704119850187267, + "grad_norm": 1.6675149426554967, + "learning_rate": 3.872969424185413e-06, + "loss": 0.311, + "step": 22500, + "vit_learning_rate": 7.745938848370824e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7161, + "epoch": 1.1709321681231795, + "grad_norm": 1.4924131253359356, + "learning_rate": 3.868866291228417e-06, + "loss": 0.3226, + "step": 22510, + "vit_learning_rate": 7.737732582456833e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6225, + "epoch": 1.1714523512276322, + "grad_norm": 1.5840619743779578, + "learning_rate": 3.864763960939019e-06, + "loss": 0.3293, + "step": 22520, + "vit_learning_rate": 7.729527921878038e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6634, + "epoch": 1.1719725343320848, + "grad_norm": 1.4144033599346133, + "learning_rate": 3.8606624362282905e-06, + "loss": 0.3353, + "step": 22530, + "vit_learning_rate": 7.721324872456581e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.7016, + "epoch": 1.1724927174365376, + "grad_norm": 1.2949769452843098, + "learning_rate": 3.856561720006723e-06, + "loss": 0.3312, + "step": 22540, + "vit_learning_rate": 7.713123440013445e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6638, + "epoch": 1.1730129005409904, + "grad_norm": 1.514442955552519, + "learning_rate": 3.85246181518424e-06, + "loss": 0.3363, + "step": 22550, + "vit_learning_rate": 7.70492363036848e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6317, + "epoch": 1.1735330836454432, + "grad_norm": 1.655242406204682, + "learning_rate": 3.848362724670191e-06, + "loss": 0.3287, + "step": 22560, + "vit_learning_rate": 7.696725449340382e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6891, + "epoch": 1.174053266749896, + "grad_norm": 1.716730676798459, + "learning_rate": 3.844264451373346e-06, + "loss": 0.3313, + "step": 22570, + "vit_learning_rate": 7.688528902746691e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6528, + "epoch": 1.1745734498543488, + "grad_norm": 1.6071629751903433, + "learning_rate": 3.840166998201891e-06, + "loss": 0.352, + "step": 22580, + "vit_learning_rate": 7.680333996403782e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7596, + "epoch": 1.1750936329588015, + "grad_norm": 1.34742009230219, + "learning_rate": 3.836070368063437e-06, + "loss": 0.3315, + "step": 22590, + "vit_learning_rate": 7.672140736126875e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6471, + "epoch": 1.1756138160632543, + "grad_norm": 1.3910689570320454, + "learning_rate": 3.831974563865008e-06, + "loss": 0.3381, + "step": 22600, + "vit_learning_rate": 7.663949127730015e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.641, + "epoch": 1.1761339991677071, + "grad_norm": 1.21715277085301, + "learning_rate": 3.827879588513038e-06, + "loss": 0.3115, + "step": 22610, + "vit_learning_rate": 7.655759177026074e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6773, + "epoch": 1.1766541822721597, + "grad_norm": 1.7090263562240227, + "learning_rate": 3.823785444913377e-06, + "loss": 0.3436, + "step": 22620, + "vit_learning_rate": 7.647570889826752e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6561, + "epoch": 1.1771743653766125, + "grad_norm": 1.339411510632921, + "learning_rate": 3.819692135971285e-06, + "loss": 0.3432, + "step": 22630, + "vit_learning_rate": 7.639384271942569e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6372, + "epoch": 1.1776945484810653, + "grad_norm": 2.1189786350835256, + "learning_rate": 3.81559966459143e-06, + "loss": 0.336, + "step": 22640, + "vit_learning_rate": 7.631199329182858e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6733, + "epoch": 1.178214731585518, + "grad_norm": 1.2321830928764204, + "learning_rate": 3.8115080336778805e-06, + "loss": 0.3248, + "step": 22650, + "vit_learning_rate": 7.62301606735576e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.711, + "epoch": 1.1787349146899708, + "grad_norm": 1.8468011225653207, + "learning_rate": 3.8074172461341165e-06, + "loss": 0.3118, + "step": 22660, + "vit_learning_rate": 7.614834492268232e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6086, + "epoch": 1.1792550977944236, + "grad_norm": 1.7017371029436767, + "learning_rate": 3.803327304863015e-06, + "loss": 0.3402, + "step": 22670, + "vit_learning_rate": 7.60665460972603e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6164, + "epoch": 1.1797752808988764, + "grad_norm": 1.619303232496906, + "learning_rate": 3.7992382127668516e-06, + "loss": 0.3515, + "step": 22680, + "vit_learning_rate": 7.598476425533702e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6534, + "epoch": 1.1802954640033292, + "grad_norm": 1.409390270527317, + "learning_rate": 3.7951499727472994e-06, + "loss": 0.3457, + "step": 22690, + "vit_learning_rate": 7.590299945494598e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6448, + "epoch": 1.180815647107782, + "grad_norm": 1.5285138065686712, + "learning_rate": 3.791062587705431e-06, + "loss": 0.3268, + "step": 22700, + "vit_learning_rate": 7.582125175410861e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.7173, + "epoch": 1.1813358302122348, + "grad_norm": 2.8828816356019766, + "learning_rate": 3.7869760605417084e-06, + "loss": 0.3148, + "step": 22710, + "vit_learning_rate": 7.573952121083416e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6351, + "epoch": 1.1818560133166875, + "grad_norm": 1.4253474408921833, + "learning_rate": 3.782890394155985e-06, + "loss": 0.3622, + "step": 22720, + "vit_learning_rate": 7.565780788311968e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6573, + "epoch": 1.1823761964211403, + "grad_norm": 1.6484787292318888, + "learning_rate": 3.778805591447505e-06, + "loss": 0.3452, + "step": 22730, + "vit_learning_rate": 7.55761118289501e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7588, + "epoch": 1.1828963795255931, + "grad_norm": 1.3500961553707973, + "learning_rate": 3.7747216553149014e-06, + "loss": 0.3201, + "step": 22740, + "vit_learning_rate": 7.549443310629801e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6076, + "epoch": 1.1834165626300457, + "grad_norm": 1.4146824686767554, + "learning_rate": 3.770638588656186e-06, + "loss": 0.3409, + "step": 22750, + "vit_learning_rate": 7.54127717731237e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6548, + "epoch": 1.1839367457344985, + "grad_norm": 1.4144454268628421, + "learning_rate": 3.7665563943687576e-06, + "loss": 0.316, + "step": 22760, + "vit_learning_rate": 7.533112788737514e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6498, + "epoch": 1.1844569288389513, + "grad_norm": 1.9104848068602096, + "learning_rate": 3.762475075349399e-06, + "loss": 0.3213, + "step": 22770, + "vit_learning_rate": 7.524950150698797e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6674, + "epoch": 1.184977111943404, + "grad_norm": 1.7373912671199867, + "learning_rate": 3.7583946344942663e-06, + "loss": 0.3153, + "step": 22780, + "vit_learning_rate": 7.516789268988532e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6354, + "epoch": 1.1854972950478568, + "grad_norm": 1.7210384678529547, + "learning_rate": 3.754315074698895e-06, + "loss": 0.322, + "step": 22790, + "vit_learning_rate": 7.508630149397789e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6372, + "epoch": 1.1860174781523096, + "grad_norm": 2.2848649987129814, + "learning_rate": 3.7502363988581962e-06, + "loss": 0.334, + "step": 22800, + "vit_learning_rate": 7.500472797716392e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7221, + "epoch": 1.1865376612567624, + "grad_norm": 1.582836887320991, + "learning_rate": 3.7461586098664536e-06, + "loss": 0.326, + "step": 22810, + "vit_learning_rate": 7.492317219732907e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6504, + "epoch": 1.1870578443612152, + "grad_norm": 2.2612173796042003, + "learning_rate": 3.742081710617319e-06, + "loss": 0.3473, + "step": 22820, + "vit_learning_rate": 7.484163421234637e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6371, + "epoch": 1.187578027465668, + "grad_norm": 1.4951691057285637, + "learning_rate": 3.7380057040038133e-06, + "loss": 0.3652, + "step": 22830, + "vit_learning_rate": 7.476011408007626e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7129, + "epoch": 1.1880982105701208, + "grad_norm": 1.3765143676585692, + "learning_rate": 3.733930592918329e-06, + "loss": 0.3456, + "step": 22840, + "vit_learning_rate": 7.467861185836657e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6326, + "epoch": 1.1886183936745733, + "grad_norm": 1.686898176603142, + "learning_rate": 3.729856380252617e-06, + "loss": 0.3194, + "step": 22850, + "vit_learning_rate": 7.459712760505232e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6462, + "epoch": 1.1891385767790261, + "grad_norm": 2.00477064555661, + "learning_rate": 3.7257830688977918e-06, + "loss": 0.3374, + "step": 22860, + "vit_learning_rate": 7.451566137795583e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6502, + "epoch": 1.189658759883479, + "grad_norm": 1.5707697290538687, + "learning_rate": 3.721710661744333e-06, + "loss": 0.3449, + "step": 22870, + "vit_learning_rate": 7.443421323488666e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6142, + "epoch": 1.1901789429879317, + "grad_norm": 1.572284246523793, + "learning_rate": 3.7176391616820752e-06, + "loss": 0.341, + "step": 22880, + "vit_learning_rate": 7.435278323364149e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6574, + "epoch": 1.1906991260923845, + "grad_norm": 1.7517385169606785, + "learning_rate": 3.7135685716002058e-06, + "loss": 0.3309, + "step": 22890, + "vit_learning_rate": 7.42713714320041e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6336, + "epoch": 1.1912193091968373, + "grad_norm": 1.4966442173799812, + "learning_rate": 3.70949889438727e-06, + "loss": 0.3234, + "step": 22900, + "vit_learning_rate": 7.418997788774539e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7263, + "epoch": 1.19173949230129, + "grad_norm": 1.582934384787482, + "learning_rate": 3.705430132931169e-06, + "loss": 0.3145, + "step": 22910, + "vit_learning_rate": 7.410860265862336e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6551, + "epoch": 1.1922596754057428, + "grad_norm": 2.1806897707886823, + "learning_rate": 3.701362290119147e-06, + "loss": 0.3182, + "step": 22920, + "vit_learning_rate": 7.402724580238293e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7022, + "epoch": 1.1927798585101956, + "grad_norm": 1.210892056181574, + "learning_rate": 3.697295368837801e-06, + "loss": 0.3337, + "step": 22930, + "vit_learning_rate": 7.394590737675601e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.66, + "epoch": 1.1933000416146484, + "grad_norm": 1.640085641299009, + "learning_rate": 3.6932293719730727e-06, + "loss": 0.3269, + "step": 22940, + "vit_learning_rate": 7.386458743946144e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6128, + "epoch": 1.1938202247191012, + "grad_norm": 2.236089571058858, + "learning_rate": 3.6891643024102486e-06, + "loss": 0.3429, + "step": 22950, + "vit_learning_rate": 7.378328604820497e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6635, + "epoch": 1.194340407823554, + "grad_norm": 1.6737547909714225, + "learning_rate": 3.6851001630339545e-06, + "loss": 0.3484, + "step": 22960, + "vit_learning_rate": 7.370200326067908e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6695, + "epoch": 1.1948605909280068, + "grad_norm": 1.3435517261696284, + "learning_rate": 3.681036956728158e-06, + "loss": 0.3237, + "step": 22970, + "vit_learning_rate": 7.362073913456314e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6713, + "epoch": 1.1953807740324593, + "grad_norm": 2.2837356082546587, + "learning_rate": 3.6769746863761656e-06, + "loss": 0.3168, + "step": 22980, + "vit_learning_rate": 7.35394937275233e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6618, + "epoch": 1.1959009571369121, + "grad_norm": 1.4343552624613443, + "learning_rate": 3.672913354860619e-06, + "loss": 0.3117, + "step": 22990, + "vit_learning_rate": 7.345826709721237e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.654, + "epoch": 1.196421140241365, + "grad_norm": 1.8314294539942528, + "learning_rate": 3.668852965063492e-06, + "loss": 0.3374, + "step": 23000, + "vit_learning_rate": 7.337705930126983e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6163, + "epoch": 1.1969413233458177, + "grad_norm": 1.396677856363428, + "learning_rate": 3.6647935198660932e-06, + "loss": 0.316, + "step": 23010, + "vit_learning_rate": 7.329587039732185e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6881, + "epoch": 1.1974615064502705, + "grad_norm": 1.2803082806918298, + "learning_rate": 3.6607350221490593e-06, + "loss": 0.3348, + "step": 23020, + "vit_learning_rate": 7.321470044298118e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6403, + "epoch": 1.1979816895547233, + "grad_norm": 1.8717911805662057, + "learning_rate": 3.6566774747923526e-06, + "loss": 0.3207, + "step": 23030, + "vit_learning_rate": 7.313354949584705e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6978, + "epoch": 1.198501872659176, + "grad_norm": 3.656978360476156, + "learning_rate": 3.6526208806752626e-06, + "loss": 0.3369, + "step": 23040, + "vit_learning_rate": 7.305241761350525e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6563, + "epoch": 1.1990220557636289, + "grad_norm": 1.8028981515508256, + "learning_rate": 3.648565242676405e-06, + "loss": 0.3143, + "step": 23050, + "vit_learning_rate": 7.29713048535281e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7033, + "epoch": 1.1995422388680816, + "grad_norm": 1.2203195655950343, + "learning_rate": 3.6445105636737142e-06, + "loss": 0.316, + "step": 23060, + "vit_learning_rate": 7.289021127347428e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.658, + "epoch": 1.2000624219725342, + "grad_norm": 1.6887314855337636, + "learning_rate": 3.6404568465444424e-06, + "loss": 0.3396, + "step": 23070, + "vit_learning_rate": 7.280913693088883e-07 + }, + { + "avg_batch_load_time": 0.0008, + "avg_batch_processing_time": 1.2829, + "epoch": 1.200582605076987, + "grad_norm": 1.7345223549143522, + "learning_rate": 3.6364040941651645e-06, + "loss": 0.3378, + "step": 23080, + "vit_learning_rate": 7.272808188330329e-07 + }, + { + "avg_batch_load_time": 0.119, + "avg_batch_processing_time": 0.7084, + "epoch": 1.2011027881814398, + "grad_norm": 1.2511658136784067, + "learning_rate": 3.6323523094117675e-06, + "loss": 0.3297, + "step": 23090, + "vit_learning_rate": 7.264704618823534e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6509, + "epoch": 1.2016229712858926, + "grad_norm": 1.8238181067925585, + "learning_rate": 3.6283014951594494e-06, + "loss": 0.3442, + "step": 23100, + "vit_learning_rate": 7.256602990318897e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6619, + "epoch": 1.2021431543903454, + "grad_norm": 1.4802526666856528, + "learning_rate": 3.624251654282722e-06, + "loss": 0.3133, + "step": 23110, + "vit_learning_rate": 7.248503308565443e-07 + }, + { + "avg_batch_load_time": 0.001, + "avg_batch_processing_time": 0.636, + "epoch": 1.2026633374947981, + "grad_norm": 1.66382973684824, + "learning_rate": 3.6202027896554103e-06, + "loss": 0.3317, + "step": 23120, + "vit_learning_rate": 7.240405579310819e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6891, + "epoch": 1.203183520599251, + "grad_norm": 1.8277312500044012, + "learning_rate": 3.6161549041506407e-06, + "loss": 0.3561, + "step": 23130, + "vit_learning_rate": 7.23230980830128e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6724, + "epoch": 1.2037037037037037, + "grad_norm": 1.8310377852295547, + "learning_rate": 3.6121080006408447e-06, + "loss": 0.3541, + "step": 23140, + "vit_learning_rate": 7.224216001281689e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6887, + "epoch": 1.2042238868081565, + "grad_norm": 1.683116333879107, + "learning_rate": 3.608062081997763e-06, + "loss": 0.3327, + "step": 23150, + "vit_learning_rate": 7.216124163995525e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6669, + "epoch": 1.2047440699126093, + "grad_norm": 1.9714808349604047, + "learning_rate": 3.6040171510924327e-06, + "loss": 0.3411, + "step": 23160, + "vit_learning_rate": 7.208034302184864e-07 + }, + { + "avg_batch_load_time": 0.001, + "avg_batch_processing_time": 0.7166, + "epoch": 1.205264253017062, + "grad_norm": 1.7120653233615044, + "learning_rate": 3.599973210795189e-06, + "loss": 0.3365, + "step": 23170, + "vit_learning_rate": 7.199946421590377e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.8735, + "epoch": 1.2057844361215149, + "grad_norm": 1.6522400135957898, + "learning_rate": 3.595930263975665e-06, + "loss": 0.315, + "step": 23180, + "vit_learning_rate": 7.191860527951329e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.763, + "epoch": 1.2063046192259677, + "grad_norm": 1.3660619263840457, + "learning_rate": 3.5918883135027934e-06, + "loss": 0.3164, + "step": 23190, + "vit_learning_rate": 7.183776627005586e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.62, + "epoch": 1.2068248023304202, + "grad_norm": 3.3158999649277088, + "learning_rate": 3.587847362244794e-06, + "loss": 0.3541, + "step": 23200, + "vit_learning_rate": 7.175694724489587e-07 + }, + { + "avg_batch_load_time": 0.001, + "avg_batch_processing_time": 0.6522, + "epoch": 1.207344985434873, + "grad_norm": 1.6052257002801416, + "learning_rate": 3.5838074130691793e-06, + "loss": 0.3216, + "step": 23210, + "vit_learning_rate": 7.167614826138358e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7721, + "epoch": 1.2078651685393258, + "grad_norm": 1.3859865220841912, + "learning_rate": 3.5797684688427546e-06, + "loss": 0.3386, + "step": 23220, + "vit_learning_rate": 7.159536937685508e-07 + }, + { + "avg_batch_load_time": 0.0008, + "avg_batch_processing_time": 0.669, + "epoch": 1.2083853516437786, + "grad_norm": 1.6400815829767574, + "learning_rate": 3.5757305324316053e-06, + "loss": 0.323, + "step": 23230, + "vit_learning_rate": 7.15146106486321e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6306, + "epoch": 1.2089055347482314, + "grad_norm": 2.706872116123271, + "learning_rate": 3.5716936067011065e-06, + "loss": 0.3384, + "step": 23240, + "vit_learning_rate": 7.143387213402212e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7793, + "epoch": 1.2094257178526842, + "grad_norm": 1.9109342185561855, + "learning_rate": 3.5676576945159123e-06, + "loss": 0.3236, + "step": 23250, + "vit_learning_rate": 7.135315389031824e-07 + }, + { + "avg_batch_load_time": 0.0009, + "avg_batch_processing_time": 0.6724, + "epoch": 1.209945900957137, + "grad_norm": 1.6147027549682356, + "learning_rate": 3.563622798739964e-06, + "loss": 0.3299, + "step": 23260, + "vit_learning_rate": 7.127245597479927e-07 + }, + { + "avg_batch_load_time": 0.0312, + "avg_batch_processing_time": 0.6623, + "epoch": 1.2104660840615897, + "grad_norm": 1.5725640448873561, + "learning_rate": 3.5595889222364743e-06, + "loss": 0.3143, + "step": 23270, + "vit_learning_rate": 7.119177844472947e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6888, + "epoch": 1.2109862671660425, + "grad_norm": 2.723329242569119, + "learning_rate": 3.5555560678679367e-06, + "loss": 0.3274, + "step": 23280, + "vit_learning_rate": 7.111112135735873e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6633, + "epoch": 1.2115064502704953, + "grad_norm": 1.3278760822990565, + "learning_rate": 3.5515242384961214e-06, + "loss": 0.3269, + "step": 23290, + "vit_learning_rate": 7.103048476992241e-07 + }, + { + "avg_batch_load_time": 0.001, + "avg_batch_processing_time": 0.6097, + "epoch": 1.2120266333749479, + "grad_norm": 1.2200898852297446, + "learning_rate": 3.5474934369820642e-06, + "loss": 0.3189, + "step": 23300, + "vit_learning_rate": 7.094986873964127e-07 + }, + { + "avg_batch_load_time": 0.001, + "avg_batch_processing_time": 0.6449, + "epoch": 1.2125468164794007, + "grad_norm": 1.4518295557179635, + "learning_rate": 3.5434636661860776e-06, + "loss": 0.3266, + "step": 23310, + "vit_learning_rate": 7.086927332372155e-07 + }, + { + "avg_batch_load_time": 0.0009, + "avg_batch_processing_time": 0.6589, + "epoch": 1.2130669995838534, + "grad_norm": 1.8560442749062733, + "learning_rate": 3.53943492896774e-06, + "loss": 0.3321, + "step": 23320, + "vit_learning_rate": 7.078869857935478e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7429, + "epoch": 1.2135871826883062, + "grad_norm": 2.03382725555277, + "learning_rate": 3.535407228185898e-06, + "loss": 0.3457, + "step": 23330, + "vit_learning_rate": 7.070814456371795e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7279, + "epoch": 1.214107365792759, + "grad_norm": 1.7635533907116772, + "learning_rate": 3.531380566698662e-06, + "loss": 0.3277, + "step": 23340, + "vit_learning_rate": 7.062761133397324e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6777, + "epoch": 1.2146275488972118, + "grad_norm": 1.2985910717278062, + "learning_rate": 3.527354947363404e-06, + "loss": 0.3202, + "step": 23350, + "vit_learning_rate": 7.054709894726807e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.639, + "epoch": 1.2151477320016646, + "grad_norm": 2.6607238704671428, + "learning_rate": 3.5233303730367597e-06, + "loss": 0.3334, + "step": 23360, + "vit_learning_rate": 7.046660746073518e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6427, + "epoch": 1.2156679151061174, + "grad_norm": 1.629080371089442, + "learning_rate": 3.5193068465746182e-06, + "loss": 0.3395, + "step": 23370, + "vit_learning_rate": 7.038613693149236e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6766, + "epoch": 1.2161880982105702, + "grad_norm": 1.4372335929645903, + "learning_rate": 3.5152843708321284e-06, + "loss": 0.3218, + "step": 23380, + "vit_learning_rate": 7.030568741664256e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.642, + "epoch": 1.216708281315023, + "grad_norm": 1.5383199217286543, + "learning_rate": 3.5112629486636914e-06, + "loss": 0.3569, + "step": 23390, + "vit_learning_rate": 7.022525897327381e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6315, + "epoch": 1.2172284644194757, + "grad_norm": 1.3550168657569435, + "learning_rate": 3.5072425829229645e-06, + "loss": 0.3225, + "step": 23400, + "vit_learning_rate": 7.014485165845928e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6507, + "epoch": 1.2177486475239285, + "grad_norm": 1.7462457850957642, + "learning_rate": 3.5032232764628526e-06, + "loss": 0.3199, + "step": 23410, + "vit_learning_rate": 7.006446552925705e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6753, + "epoch": 1.218268830628381, + "grad_norm": 1.6962770218557734, + "learning_rate": 3.4992050321355074e-06, + "loss": 0.3436, + "step": 23420, + "vit_learning_rate": 6.998410064271014e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6397, + "epoch": 1.2187890137328339, + "grad_norm": 3.7605613045593547, + "learning_rate": 3.4951878527923332e-06, + "loss": 0.3204, + "step": 23430, + "vit_learning_rate": 6.990375705584665e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6997, + "epoch": 1.2193091968372867, + "grad_norm": 1.7212267839570077, + "learning_rate": 3.491171741283968e-06, + "loss": 0.305, + "step": 23440, + "vit_learning_rate": 6.982343482567935e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.7301, + "epoch": 1.2198293799417395, + "grad_norm": 2.1567001934782035, + "learning_rate": 3.4871567004603026e-06, + "loss": 0.3468, + "step": 23450, + "vit_learning_rate": 6.974313400920604e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6471, + "epoch": 1.2203495630461922, + "grad_norm": 1.925212787533516, + "learning_rate": 3.4831427331704604e-06, + "loss": 0.3523, + "step": 23460, + "vit_learning_rate": 6.96628546634092e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6536, + "epoch": 1.220869746150645, + "grad_norm": 1.5136678870306242, + "learning_rate": 3.47912984226281e-06, + "loss": 0.3096, + "step": 23470, + "vit_learning_rate": 6.958259684525619e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6976, + "epoch": 1.2213899292550978, + "grad_norm": 1.4311080107106964, + "learning_rate": 3.4751180305849503e-06, + "loss": 0.3266, + "step": 23480, + "vit_learning_rate": 6.9502360611699e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7023, + "epoch": 1.2219101123595506, + "grad_norm": 1.2086169346205078, + "learning_rate": 3.471107300983716e-06, + "loss": 0.3144, + "step": 23490, + "vit_learning_rate": 6.942214601967431e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6886, + "epoch": 1.2224302954640034, + "grad_norm": 1.7039673805523734, + "learning_rate": 3.4670976563051777e-06, + "loss": 0.3293, + "step": 23500, + "vit_learning_rate": 6.934195312610355e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6623, + "epoch": 1.2229504785684562, + "grad_norm": 1.4001097376552716, + "learning_rate": 3.4630890993946274e-06, + "loss": 0.3286, + "step": 23510, + "vit_learning_rate": 6.926178198789254e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7912, + "epoch": 1.2234706616729087, + "grad_norm": 1.1535827433432924, + "learning_rate": 3.459081633096595e-06, + "loss": 0.3323, + "step": 23520, + "vit_learning_rate": 6.918163266193189e-07 + }, + { + "avg_batch_load_time": 0.0043, + "avg_batch_processing_time": 0.7521, + "epoch": 1.2239908447773615, + "grad_norm": 1.639164907658355, + "learning_rate": 3.45507526025483e-06, + "loss": 0.3635, + "step": 23530, + "vit_learning_rate": 6.910150520509659e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6124, + "epoch": 1.2245110278818143, + "grad_norm": 2.579327603012384, + "learning_rate": 3.451069983712309e-06, + "loss": 0.3175, + "step": 23540, + "vit_learning_rate": 6.902139967424617e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6808, + "epoch": 1.225031210986267, + "grad_norm": 1.5526442833771417, + "learning_rate": 3.44706580631123e-06, + "loss": 0.3091, + "step": 23550, + "vit_learning_rate": 6.89413161262246e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6387, + "epoch": 1.22555139409072, + "grad_norm": 1.3117734876687188, + "learning_rate": 3.4430627308930097e-06, + "loss": 0.311, + "step": 23560, + "vit_learning_rate": 6.886125461786019e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.8164, + "epoch": 1.2260715771951727, + "grad_norm": 2.8882756794780615, + "learning_rate": 3.439060760298287e-06, + "loss": 0.3416, + "step": 23570, + "vit_learning_rate": 6.878121520596574e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.8127, + "epoch": 1.2265917602996255, + "grad_norm": 1.6620212027531702, + "learning_rate": 3.4350598973669093e-06, + "loss": 0.3289, + "step": 23580, + "vit_learning_rate": 6.870119794733817e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6908, + "epoch": 1.2271119434040783, + "grad_norm": 1.285415772018167, + "learning_rate": 3.4310601449379445e-06, + "loss": 0.3104, + "step": 23590, + "vit_learning_rate": 6.862120289875887e-07 + }, + { + "avg_batch_load_time": 0.0288, + "avg_batch_processing_time": 0.7145, + "epoch": 1.227632126508531, + "grad_norm": 1.5761675109571687, + "learning_rate": 3.42706150584967e-06, + "loss": 0.3268, + "step": 23600, + "vit_learning_rate": 6.854123011699338e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6837, + "epoch": 1.2281523096129838, + "grad_norm": 1.5046894291273851, + "learning_rate": 3.423063982939574e-06, + "loss": 0.3399, + "step": 23610, + "vit_learning_rate": 6.846127965879147e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6387, + "epoch": 1.2286724927174366, + "grad_norm": 1.8454256431011813, + "learning_rate": 3.419067579044353e-06, + "loss": 0.3094, + "step": 23620, + "vit_learning_rate": 6.838135158088704e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6839, + "epoch": 1.2291926758218894, + "grad_norm": 1.3897293583972918, + "learning_rate": 3.4150722969999063e-06, + "loss": 0.3326, + "step": 23630, + "vit_learning_rate": 6.830144593999811e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7803, + "epoch": 1.2297128589263422, + "grad_norm": 1.4348337820600092, + "learning_rate": 3.411078139641344e-06, + "loss": 0.3093, + "step": 23640, + "vit_learning_rate": 6.822156279282687e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6357, + "epoch": 1.2302330420307948, + "grad_norm": 1.6034982539362346, + "learning_rate": 3.407085109802968e-06, + "loss": 0.3694, + "step": 23650, + "vit_learning_rate": 6.814170219605935e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6398, + "epoch": 1.2307532251352475, + "grad_norm": 1.8923406242484935, + "learning_rate": 3.4030932103182877e-06, + "loss": 0.3448, + "step": 23660, + "vit_learning_rate": 6.806186420636575e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6435, + "epoch": 1.2312734082397003, + "grad_norm": 1.5812965876071776, + "learning_rate": 3.3991024440200084e-06, + "loss": 0.337, + "step": 23670, + "vit_learning_rate": 6.798204888040016e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6536, + "epoch": 1.2317935913441531, + "grad_norm": 1.653111765825777, + "learning_rate": 3.3951128137400325e-06, + "loss": 0.3118, + "step": 23680, + "vit_learning_rate": 6.790225627480064e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6451, + "epoch": 1.232313774448606, + "grad_norm": 1.3850836630955115, + "learning_rate": 3.3911243223094537e-06, + "loss": 0.3522, + "step": 23690, + "vit_learning_rate": 6.782248644618907e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6465, + "epoch": 1.2328339575530587, + "grad_norm": 1.9438040697712662, + "learning_rate": 3.387136972558558e-06, + "loss": 0.3418, + "step": 23700, + "vit_learning_rate": 6.774273945117115e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6731, + "epoch": 1.2333541406575115, + "grad_norm": 1.4761768435364058, + "learning_rate": 3.383150767316826e-06, + "loss": 0.3308, + "step": 23710, + "vit_learning_rate": 6.766301534633652e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6488, + "epoch": 1.2338743237619643, + "grad_norm": 1.7057154848243756, + "learning_rate": 3.379165709412917e-06, + "loss": 0.3238, + "step": 23720, + "vit_learning_rate": 6.758331418825833e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6582, + "epoch": 1.234394506866417, + "grad_norm": 1.4861898129180822, + "learning_rate": 3.3751818016746844e-06, + "loss": 0.324, + "step": 23730, + "vit_learning_rate": 6.750363603349368e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6859, + "epoch": 1.2349146899708698, + "grad_norm": 1.6931343369585523, + "learning_rate": 3.3711990469291606e-06, + "loss": 0.3387, + "step": 23740, + "vit_learning_rate": 6.74239809385832e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7066, + "epoch": 1.2354348730753224, + "grad_norm": 1.3602243090929398, + "learning_rate": 3.367217448002563e-06, + "loss": 0.3443, + "step": 23750, + "vit_learning_rate": 6.734434896005126e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6301, + "epoch": 1.2359550561797752, + "grad_norm": 1.2280562095017649, + "learning_rate": 3.363237007720288e-06, + "loss": 0.3275, + "step": 23760, + "vit_learning_rate": 6.726474015440575e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6706, + "epoch": 1.236475239284228, + "grad_norm": 1.556244187963275, + "learning_rate": 3.3592577289069073e-06, + "loss": 0.324, + "step": 23770, + "vit_learning_rate": 6.718515457813814e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6507, + "epoch": 1.2369954223886808, + "grad_norm": 1.6186827034102516, + "learning_rate": 3.3552796143861744e-06, + "loss": 0.327, + "step": 23780, + "vit_learning_rate": 6.710559228772348e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.628, + "epoch": 1.2375156054931336, + "grad_norm": 2.257259366469295, + "learning_rate": 3.3513026669810057e-06, + "loss": 0.3151, + "step": 23790, + "vit_learning_rate": 6.702605333962011e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6374, + "epoch": 1.2380357885975863, + "grad_norm": 1.2934022327649144, + "learning_rate": 3.347326889513501e-06, + "loss": 0.3497, + "step": 23800, + "vit_learning_rate": 6.694653779027001e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.814, + "epoch": 1.2385559717020391, + "grad_norm": 1.437744070180385, + "learning_rate": 3.3433522848049237e-06, + "loss": 0.342, + "step": 23810, + "vit_learning_rate": 6.686704569609846e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6558, + "epoch": 1.239076154806492, + "grad_norm": 1.5252657002067036, + "learning_rate": 3.339378855675708e-06, + "loss": 0.3544, + "step": 23820, + "vit_learning_rate": 6.678757711351414e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6574, + "epoch": 1.2395963379109447, + "grad_norm": 1.4383123019007986, + "learning_rate": 3.3354066049454515e-06, + "loss": 0.311, + "step": 23830, + "vit_learning_rate": 6.670813209890902e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6763, + "epoch": 1.2401165210153975, + "grad_norm": 1.425354962108084, + "learning_rate": 3.331435535432914e-06, + "loss": 0.3268, + "step": 23840, + "vit_learning_rate": 6.662871070865828e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.8635, + "epoch": 1.2406367041198503, + "grad_norm": 1.5000528347065807, + "learning_rate": 3.3274656499560255e-06, + "loss": 0.3325, + "step": 23850, + "vit_learning_rate": 6.65493129991205e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.7056, + "epoch": 1.241156887224303, + "grad_norm": 2.1503832281916173, + "learning_rate": 3.323496951331862e-06, + "loss": 0.3171, + "step": 23860, + "vit_learning_rate": 6.646993902663724e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6667, + "epoch": 1.2416770703287556, + "grad_norm": 2.1743163463241078, + "learning_rate": 3.3195294423766705e-06, + "loss": 0.3397, + "step": 23870, + "vit_learning_rate": 6.63905888475334e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6967, + "epoch": 1.2421972534332084, + "grad_norm": 1.4695185651860232, + "learning_rate": 3.315563125905846e-06, + "loss": 0.3192, + "step": 23880, + "vit_learning_rate": 6.63112625181169e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7101, + "epoch": 1.2427174365376612, + "grad_norm": 1.725786310315282, + "learning_rate": 3.3115980047339415e-06, + "loss": 0.3368, + "step": 23890, + "vit_learning_rate": 6.623196009467882e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6149, + "epoch": 1.243237619642114, + "grad_norm": 1.32369308040056, + "learning_rate": 3.307634081674658e-06, + "loss": 0.3402, + "step": 23900, + "vit_learning_rate": 6.615268163349316e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6687, + "epoch": 1.2437578027465668, + "grad_norm": 2.096402825314736, + "learning_rate": 3.3036713595408494e-06, + "loss": 0.3289, + "step": 23910, + "vit_learning_rate": 6.607342719081698e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6775, + "epoch": 1.2442779858510196, + "grad_norm": 1.8189949321645265, + "learning_rate": 3.299709841144519e-06, + "loss": 0.3315, + "step": 23920, + "vit_learning_rate": 6.599419682289037e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.701, + "epoch": 1.2447981689554724, + "grad_norm": 2.1317582818001384, + "learning_rate": 3.2957495292968067e-06, + "loss": 0.3108, + "step": 23930, + "vit_learning_rate": 6.591499058593612e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7156, + "epoch": 1.2453183520599251, + "grad_norm": 1.2098010107718058, + "learning_rate": 3.291790426808008e-06, + "loss": 0.3479, + "step": 23940, + "vit_learning_rate": 6.583580853616016e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7107, + "epoch": 1.245838535164378, + "grad_norm": 2.0178194588320317, + "learning_rate": 3.2878325364875525e-06, + "loss": 0.3392, + "step": 23950, + "vit_learning_rate": 6.575665072975104e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6856, + "epoch": 1.2463587182688307, + "grad_norm": 1.7341383480786798, + "learning_rate": 3.2838758611440134e-06, + "loss": 0.335, + "step": 23960, + "vit_learning_rate": 6.567751722288026e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.648, + "epoch": 1.2468789013732833, + "grad_norm": 1.9610317154487233, + "learning_rate": 3.279920403585101e-06, + "loss": 0.3416, + "step": 23970, + "vit_learning_rate": 6.559840807170201e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6289, + "epoch": 1.247399084477736, + "grad_norm": 1.5300182202432637, + "learning_rate": 3.275966166617659e-06, + "loss": 0.3266, + "step": 23980, + "vit_learning_rate": 6.551932333235318e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6515, + "epoch": 1.2479192675821889, + "grad_norm": 1.658038055945666, + "learning_rate": 3.272013153047668e-06, + "loss": 0.3268, + "step": 23990, + "vit_learning_rate": 6.544026306095335e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6227, + "epoch": 1.2484394506866416, + "grad_norm": 1.4742515474810582, + "learning_rate": 3.2680613656802374e-06, + "loss": 0.3391, + "step": 24000, + "vit_learning_rate": 6.536122731360474e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6654, + "epoch": 1.2489596337910944, + "grad_norm": 1.1706587878432961, + "learning_rate": 3.2641108073196102e-06, + "loss": 0.3471, + "step": 24010, + "vit_learning_rate": 6.52822161463922e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7053, + "epoch": 1.2494798168955472, + "grad_norm": 1.5210553331840193, + "learning_rate": 3.2601614807691544e-06, + "loss": 0.3318, + "step": 24020, + "vit_learning_rate": 6.520322961538308e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6724, + "epoch": 1.25, + "grad_norm": 1.701684266936196, + "learning_rate": 3.2562133888313664e-06, + "loss": 0.3302, + "step": 24030, + "vit_learning_rate": 6.512426777662731e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6442, + "epoch": 1.2505201831044528, + "grad_norm": 2.07314099869906, + "learning_rate": 3.252266534307864e-06, + "loss": 0.3286, + "step": 24040, + "vit_learning_rate": 6.504533068615728e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6699, + "epoch": 1.2510403662089056, + "grad_norm": 1.736808555603449, + "learning_rate": 3.2483209199993897e-06, + "loss": 0.3344, + "step": 24050, + "vit_learning_rate": 6.496641839998779e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6566, + "epoch": 1.2515605493133584, + "grad_norm": 1.9893135253107763, + "learning_rate": 3.244376548705801e-06, + "loss": 0.3205, + "step": 24060, + "vit_learning_rate": 6.488753097411601e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6751, + "epoch": 1.2520807324178111, + "grad_norm": 1.8315799545920577, + "learning_rate": 3.240433423226077e-06, + "loss": 0.3289, + "step": 24070, + "vit_learning_rate": 6.480866846452153e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.624, + "epoch": 1.252600915522264, + "grad_norm": 1.688276300977166, + "learning_rate": 3.2364915463583135e-06, + "loss": 0.3296, + "step": 24080, + "vit_learning_rate": 6.472983092716627e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6527, + "epoch": 1.2531210986267167, + "grad_norm": 1.9794111187740848, + "learning_rate": 3.2325509208997184e-06, + "loss": 0.3463, + "step": 24090, + "vit_learning_rate": 6.465101841799436e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7015, + "epoch": 1.2536412817311695, + "grad_norm": 1.550687562631246, + "learning_rate": 3.2286115496466137e-06, + "loss": 0.3246, + "step": 24100, + "vit_learning_rate": 6.457223099293227e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6485, + "epoch": 1.254161464835622, + "grad_norm": 1.5291689124237073, + "learning_rate": 3.224673435394428e-06, + "loss": 0.3419, + "step": 24110, + "vit_learning_rate": 6.449346870788856e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6963, + "epoch": 1.2546816479400749, + "grad_norm": 1.5648478863530404, + "learning_rate": 3.2207365809377024e-06, + "loss": 0.3153, + "step": 24120, + "vit_learning_rate": 6.441473161875404e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6287, + "epoch": 1.2552018310445276, + "grad_norm": 1.4247683475267647, + "learning_rate": 3.216800989070078e-06, + "loss": 0.3313, + "step": 24130, + "vit_learning_rate": 6.433601978140155e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6278, + "epoch": 1.2557220141489804, + "grad_norm": 1.9459830850778956, + "learning_rate": 3.2128666625843034e-06, + "loss": 0.3277, + "step": 24140, + "vit_learning_rate": 6.425733325168606e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6419, + "epoch": 1.2562421972534332, + "grad_norm": 1.516096758589267, + "learning_rate": 3.2089336042722324e-06, + "loss": 0.3012, + "step": 24150, + "vit_learning_rate": 6.417867208544464e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.633, + "epoch": 1.256762380357886, + "grad_norm": 1.6028619808069016, + "learning_rate": 3.2050018169248143e-06, + "loss": 0.3436, + "step": 24160, + "vit_learning_rate": 6.410003633849627e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7086, + "epoch": 1.2572825634623388, + "grad_norm": 1.5716024467793603, + "learning_rate": 3.201071303332096e-06, + "loss": 0.3445, + "step": 24170, + "vit_learning_rate": 6.402142606664191e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7552, + "epoch": 1.2578027465667916, + "grad_norm": 1.6046560947126853, + "learning_rate": 3.197142066283225e-06, + "loss": 0.3347, + "step": 24180, + "vit_learning_rate": 6.39428413256645e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6578, + "epoch": 1.2583229296712442, + "grad_norm": 2.2691946769058133, + "learning_rate": 3.193214108566442e-06, + "loss": 0.3148, + "step": 24190, + "vit_learning_rate": 6.386428217132883e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6451, + "epoch": 1.258843112775697, + "grad_norm": 1.9957216628649217, + "learning_rate": 3.189287432969075e-06, + "loss": 0.3262, + "step": 24200, + "vit_learning_rate": 6.378574865938149e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7249, + "epoch": 1.2593632958801497, + "grad_norm": 2.250833626747706, + "learning_rate": 3.1853620422775445e-06, + "loss": 0.3354, + "step": 24210, + "vit_learning_rate": 6.370724084555088e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6876, + "epoch": 1.2598834789846025, + "grad_norm": 1.6861369984379393, + "learning_rate": 3.1814379392773633e-06, + "loss": 0.3214, + "step": 24220, + "vit_learning_rate": 6.362875878554725e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6616, + "epoch": 1.2604036620890553, + "grad_norm": 1.6729882236875702, + "learning_rate": 3.1775151267531263e-06, + "loss": 0.3231, + "step": 24230, + "vit_learning_rate": 6.355030253506251e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.8115, + "epoch": 1.260923845193508, + "grad_norm": 1.4658365560059836, + "learning_rate": 3.173593607488512e-06, + "loss": 0.3275, + "step": 24240, + "vit_learning_rate": 6.347187214977023e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6254, + "epoch": 1.2614440282979609, + "grad_norm": 1.6373539297816888, + "learning_rate": 3.1696733842662863e-06, + "loss": 0.3256, + "step": 24250, + "vit_learning_rate": 6.339346768532571e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6329, + "epoch": 1.2619642114024137, + "grad_norm": 1.45883822299532, + "learning_rate": 3.165754459868291e-06, + "loss": 0.3484, + "step": 24260, + "vit_learning_rate": 6.331508919736581e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6321, + "epoch": 1.2624843945068664, + "grad_norm": 1.3968289327351648, + "learning_rate": 3.161836837075446e-06, + "loss": 0.321, + "step": 24270, + "vit_learning_rate": 6.323673674150891e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6625, + "epoch": 1.2630045776113192, + "grad_norm": 1.367419237401929, + "learning_rate": 3.157920518667747e-06, + "loss": 0.3172, + "step": 24280, + "vit_learning_rate": 6.315841037335493e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7447, + "epoch": 1.263524760715772, + "grad_norm": 1.7271270437976733, + "learning_rate": 3.15400550742427e-06, + "loss": 0.3232, + "step": 24290, + "vit_learning_rate": 6.308011014848539e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6546, + "epoch": 1.2640449438202248, + "grad_norm": 1.2662182475858836, + "learning_rate": 3.1500918061231567e-06, + "loss": 0.3441, + "step": 24300, + "vit_learning_rate": 6.300183612246312e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6647, + "epoch": 1.2645651269246776, + "grad_norm": 2.1542846087763543, + "learning_rate": 3.1461794175416215e-06, + "loss": 0.3244, + "step": 24310, + "vit_learning_rate": 6.292358835083243e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6438, + "epoch": 1.2650853100291304, + "grad_norm": 1.2223246789958502, + "learning_rate": 3.1422683444559505e-06, + "loss": 0.2985, + "step": 24320, + "vit_learning_rate": 6.2845366889119e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6889, + "epoch": 1.265605493133583, + "grad_norm": 1.3750047422387477, + "learning_rate": 3.138358589641492e-06, + "loss": 0.3262, + "step": 24330, + "vit_learning_rate": 6.276717179282983e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6553, + "epoch": 1.2661256762380357, + "grad_norm": 1.5003929911609302, + "learning_rate": 3.134450155872659e-06, + "loss": 0.328, + "step": 24340, + "vit_learning_rate": 6.268900311745318e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6692, + "epoch": 1.2666458593424885, + "grad_norm": 1.4513093213443509, + "learning_rate": 3.130543045922928e-06, + "loss": 0.3231, + "step": 24350, + "vit_learning_rate": 6.261086091845855e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6674, + "epoch": 1.2671660424469413, + "grad_norm": 1.6160404138413493, + "learning_rate": 3.126637262564839e-06, + "loss": 0.3454, + "step": 24360, + "vit_learning_rate": 6.253274525129677e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6827, + "epoch": 1.267686225551394, + "grad_norm": 1.528515821729969, + "learning_rate": 3.122732808569986e-06, + "loss": 0.3272, + "step": 24370, + "vit_learning_rate": 6.245465617139971e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6625, + "epoch": 1.2682064086558469, + "grad_norm": 1.5187266482414299, + "learning_rate": 3.1188296867090225e-06, + "loss": 0.3372, + "step": 24380, + "vit_learning_rate": 6.237659373418044e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6452, + "epoch": 1.2687265917602997, + "grad_norm": 1.3325947048052424, + "learning_rate": 3.1149278997516563e-06, + "loss": 0.3428, + "step": 24390, + "vit_learning_rate": 6.229855799503312e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6358, + "epoch": 1.2692467748647525, + "grad_norm": 1.4104367584609683, + "learning_rate": 3.111027450466648e-06, + "loss": 0.3253, + "step": 24400, + "vit_learning_rate": 6.222054900933295e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6183, + "epoch": 1.269766957969205, + "grad_norm": 1.5742695556506159, + "learning_rate": 3.107128341621807e-06, + "loss": 0.3295, + "step": 24410, + "vit_learning_rate": 6.214256683243612e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6589, + "epoch": 1.2702871410736578, + "grad_norm": 1.556250121995024, + "learning_rate": 3.1032305759839925e-06, + "loss": 0.3522, + "step": 24420, + "vit_learning_rate": 6.206461151967984e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6354, + "epoch": 1.2708073241781106, + "grad_norm": 1.769503631995345, + "learning_rate": 3.0993341563191137e-06, + "loss": 0.3222, + "step": 24430, + "vit_learning_rate": 6.198668312638226e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6251, + "epoch": 1.2713275072825634, + "grad_norm": 1.7346645872583992, + "learning_rate": 3.0954390853921206e-06, + "loss": 0.3154, + "step": 24440, + "vit_learning_rate": 6.19087817078424e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6681, + "epoch": 1.2718476903870162, + "grad_norm": 1.9042773377428048, + "learning_rate": 3.0915453659670068e-06, + "loss": 0.3349, + "step": 24450, + "vit_learning_rate": 6.183090731934013e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.645, + "epoch": 1.272367873491469, + "grad_norm": 1.6155704353525364, + "learning_rate": 3.0876530008068093e-06, + "loss": 0.3314, + "step": 24460, + "vit_learning_rate": 6.175306001613618e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6649, + "epoch": 1.2728880565959217, + "grad_norm": 1.6277503432813614, + "learning_rate": 3.0837619926736027e-06, + "loss": 0.3256, + "step": 24470, + "vit_learning_rate": 6.167523985347205e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6256, + "epoch": 1.2734082397003745, + "grad_norm": 1.568431335268954, + "learning_rate": 3.079872344328497e-06, + "loss": 0.3361, + "step": 24480, + "vit_learning_rate": 6.159744688656994e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6452, + "epoch": 1.2739284228048273, + "grad_norm": 1.4206293237433778, + "learning_rate": 3.0759840585316375e-06, + "loss": 0.3107, + "step": 24490, + "vit_learning_rate": 6.151968117063275e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7069, + "epoch": 1.27444860590928, + "grad_norm": 1.4246256986758525, + "learning_rate": 3.072097138042206e-06, + "loss": 0.316, + "step": 24500, + "vit_learning_rate": 6.144194276084411e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6917, + "epoch": 1.274968789013733, + "grad_norm": 1.2820153936825596, + "learning_rate": 3.0682115856184138e-06, + "loss": 0.3202, + "step": 24510, + "vit_learning_rate": 6.136423171236826e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7007, + "epoch": 1.2754889721181857, + "grad_norm": 1.4586359121316494, + "learning_rate": 3.0643274040174975e-06, + "loss": 0.3291, + "step": 24520, + "vit_learning_rate": 6.128654808034994e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6607, + "epoch": 1.2760091552226385, + "grad_norm": 1.614196158448774, + "learning_rate": 3.0604445959957275e-06, + "loss": 0.3308, + "step": 24530, + "vit_learning_rate": 6.120889191991454e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6602, + "epoch": 1.2765293383270913, + "grad_norm": 2.203404518078546, + "learning_rate": 3.056563164308396e-06, + "loss": 0.3357, + "step": 24540, + "vit_learning_rate": 6.113126328616792e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6707, + "epoch": 1.2770495214315438, + "grad_norm": 1.753463704966847, + "learning_rate": 3.0526831117098164e-06, + "loss": 0.3252, + "step": 24550, + "vit_learning_rate": 6.105366223419633e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6467, + "epoch": 1.2775697045359966, + "grad_norm": 1.3946362927083924, + "learning_rate": 3.048804440953328e-06, + "loss": 0.345, + "step": 24560, + "vit_learning_rate": 6.097608881906655e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6738, + "epoch": 1.2780898876404494, + "grad_norm": 1.7714686967239794, + "learning_rate": 3.044927154791286e-06, + "loss": 0.3152, + "step": 24570, + "vit_learning_rate": 6.089854309582572e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6747, + "epoch": 1.2786100707449022, + "grad_norm": 1.3598730716575471, + "learning_rate": 3.0410512559750665e-06, + "loss": 0.3411, + "step": 24580, + "vit_learning_rate": 6.082102511950132e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6534, + "epoch": 1.279130253849355, + "grad_norm": 1.5324149501651914, + "learning_rate": 3.0371767472550564e-06, + "loss": 0.3443, + "step": 24590, + "vit_learning_rate": 6.074353494510112e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6278, + "epoch": 1.2796504369538078, + "grad_norm": 1.2597490775330082, + "learning_rate": 3.0333036313806617e-06, + "loss": 0.3372, + "step": 24600, + "vit_learning_rate": 6.066607262761323e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6664, + "epoch": 1.2801706200582605, + "grad_norm": 2.402980418081745, + "learning_rate": 3.0294319111002967e-06, + "loss": 0.3236, + "step": 24610, + "vit_learning_rate": 6.058863822200593e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6672, + "epoch": 1.2806908031627133, + "grad_norm": 1.9852736365246288, + "learning_rate": 3.0255615891613837e-06, + "loss": 0.351, + "step": 24620, + "vit_learning_rate": 6.051123178322767e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6517, + "epoch": 1.2812109862671661, + "grad_norm": 1.6112484663902853, + "learning_rate": 3.0216926683103553e-06, + "loss": 0.3397, + "step": 24630, + "vit_learning_rate": 6.04338533662071e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6526, + "epoch": 1.2817311693716187, + "grad_norm": 2.0197682162389943, + "learning_rate": 3.0178251512926516e-06, + "loss": 0.3375, + "step": 24640, + "vit_learning_rate": 6.035650302585303e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6823, + "epoch": 1.2822513524760715, + "grad_norm": 1.491787252422387, + "learning_rate": 3.013959040852713e-06, + "loss": 0.3257, + "step": 24650, + "vit_learning_rate": 6.027918081705426e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6908, + "epoch": 1.2827715355805243, + "grad_norm": 1.413925599260886, + "learning_rate": 3.010094339733982e-06, + "loss": 0.3338, + "step": 24660, + "vit_learning_rate": 6.020188679467963e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6988, + "epoch": 1.283291718684977, + "grad_norm": 1.9110878416217554, + "learning_rate": 3.0062310506789057e-06, + "loss": 0.3368, + "step": 24670, + "vit_learning_rate": 6.01246210135781e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7384, + "epoch": 1.2838119017894298, + "grad_norm": 1.906541243955115, + "learning_rate": 3.002369176428922e-06, + "loss": 0.3308, + "step": 24680, + "vit_learning_rate": 6.004738352857843e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6954, + "epoch": 1.2843320848938826, + "grad_norm": 1.553987205792057, + "learning_rate": 2.99850871972447e-06, + "loss": 0.3463, + "step": 24690, + "vit_learning_rate": 5.99701743944894e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6832, + "epoch": 1.2848522679983354, + "grad_norm": 1.6502183698557142, + "learning_rate": 2.9946496833049806e-06, + "loss": 0.314, + "step": 24700, + "vit_learning_rate": 5.98929936660996e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7162, + "epoch": 1.2853724511027882, + "grad_norm": 1.5820155453031226, + "learning_rate": 2.990792069908879e-06, + "loss": 0.3202, + "step": 24710, + "vit_learning_rate": 5.981584139817757e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7385, + "epoch": 1.285892634207241, + "grad_norm": 1.342845387850292, + "learning_rate": 2.986935882273578e-06, + "loss": 0.315, + "step": 24720, + "vit_learning_rate": 5.973871764547156e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6459, + "epoch": 1.2864128173116938, + "grad_norm": 1.2126400306940142, + "learning_rate": 2.9830811231354796e-06, + "loss": 0.3084, + "step": 24730, + "vit_learning_rate": 5.966162246270958e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6995, + "epoch": 1.2869330004161466, + "grad_norm": 1.7451435065045942, + "learning_rate": 2.9792277952299753e-06, + "loss": 0.3286, + "step": 24740, + "vit_learning_rate": 5.95845559045995e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6213, + "epoch": 1.2874531835205993, + "grad_norm": 1.4254219719994308, + "learning_rate": 2.9753759012914353e-06, + "loss": 0.328, + "step": 24750, + "vit_learning_rate": 5.950751802582869e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6409, + "epoch": 1.2879733666250521, + "grad_norm": 1.4822422694352826, + "learning_rate": 2.9715254440532147e-06, + "loss": 0.3081, + "step": 24760, + "vit_learning_rate": 5.943050888106429e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6901, + "epoch": 1.288493549729505, + "grad_norm": 1.3821651399429313, + "learning_rate": 2.96767642624765e-06, + "loss": 0.3199, + "step": 24770, + "vit_learning_rate": 5.935352852495299e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6637, + "epoch": 1.2890137328339575, + "grad_norm": 2.6989044179139716, + "learning_rate": 2.9638288506060565e-06, + "loss": 0.334, + "step": 24780, + "vit_learning_rate": 5.927657701212113e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6634, + "epoch": 1.2895339159384103, + "grad_norm": 1.9241656037658568, + "learning_rate": 2.9599827198587255e-06, + "loss": 0.3441, + "step": 24790, + "vit_learning_rate": 5.919965439717451e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6733, + "epoch": 1.290054099042863, + "grad_norm": 1.862896205477446, + "learning_rate": 2.956138036734921e-06, + "loss": 0.3403, + "step": 24800, + "vit_learning_rate": 5.912276073469841e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.618, + "epoch": 1.2905742821473158, + "grad_norm": 1.4936523791201635, + "learning_rate": 2.952294803962884e-06, + "loss": 0.3353, + "step": 24810, + "vit_learning_rate": 5.904589607925768e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7683, + "epoch": 1.2910944652517686, + "grad_norm": 1.5901955727403383, + "learning_rate": 2.9484530242698224e-06, + "loss": 0.3229, + "step": 24820, + "vit_learning_rate": 5.896906048539644e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6597, + "epoch": 1.2916146483562214, + "grad_norm": 1.5882157849171303, + "learning_rate": 2.944612700381914e-06, + "loss": 0.3359, + "step": 24830, + "vit_learning_rate": 5.889225400763828e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6398, + "epoch": 1.2921348314606742, + "grad_norm": 1.789977148201953, + "learning_rate": 2.940773835024304e-06, + "loss": 0.3101, + "step": 24840, + "vit_learning_rate": 5.881547670048607e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6352, + "epoch": 1.292655014565127, + "grad_norm": 1.834886162901242, + "learning_rate": 2.9369364309211034e-06, + "loss": 0.3318, + "step": 24850, + "vit_learning_rate": 5.873872861842206e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.644, + "epoch": 1.2931751976695796, + "grad_norm": 1.4840805978362397, + "learning_rate": 2.9331004907953853e-06, + "loss": 0.3379, + "step": 24860, + "vit_learning_rate": 5.86620098159077e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6829, + "epoch": 1.2936953807740323, + "grad_norm": 2.1908960993951405, + "learning_rate": 2.9292660173691826e-06, + "loss": 0.3371, + "step": 24870, + "vit_learning_rate": 5.858532034738365e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6892, + "epoch": 1.2942155638784851, + "grad_norm": 1.8250529890140312, + "learning_rate": 2.925433013363492e-06, + "loss": 0.3318, + "step": 24880, + "vit_learning_rate": 5.850866026726983e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6282, + "epoch": 1.294735746982938, + "grad_norm": 1.4587465005255231, + "learning_rate": 2.9216014814982628e-06, + "loss": 0.3272, + "step": 24890, + "vit_learning_rate": 5.843202962996525e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6599, + "epoch": 1.2952559300873907, + "grad_norm": 1.249561204567165, + "learning_rate": 2.9177714244924e-06, + "loss": 0.3386, + "step": 24900, + "vit_learning_rate": 5.835542848984799e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6638, + "epoch": 1.2957761131918435, + "grad_norm": 1.4784274870781582, + "learning_rate": 2.9139428450637643e-06, + "loss": 0.31, + "step": 24910, + "vit_learning_rate": 5.827885690127527e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6959, + "epoch": 1.2962962962962963, + "grad_norm": 1.8401124599906058, + "learning_rate": 2.9101157459291644e-06, + "loss": 0.328, + "step": 24920, + "vit_learning_rate": 5.820231491858328e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6694, + "epoch": 1.296816479400749, + "grad_norm": 1.5361481615761272, + "learning_rate": 2.906290129804366e-06, + "loss": 0.3259, + "step": 24930, + "vit_learning_rate": 5.812580259608732e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6753, + "epoch": 1.2973366625052019, + "grad_norm": 1.5097039688275635, + "learning_rate": 2.9024659994040743e-06, + "loss": 0.3068, + "step": 24940, + "vit_learning_rate": 5.804931998808148e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.678, + "epoch": 1.2978568456096546, + "grad_norm": 1.649590499455451, + "learning_rate": 2.898643357441946e-06, + "loss": 0.322, + "step": 24950, + "vit_learning_rate": 5.797286714883891e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6236, + "epoch": 1.2983770287141074, + "grad_norm": 1.40135707078997, + "learning_rate": 2.894822206630574e-06, + "loss": 0.3232, + "step": 24960, + "vit_learning_rate": 5.789644413261147e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6466, + "epoch": 1.2988972118185602, + "grad_norm": 1.3090373722861763, + "learning_rate": 2.8910025496814996e-06, + "loss": 0.3358, + "step": 24970, + "vit_learning_rate": 5.782005099362999e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6429, + "epoch": 1.299417394923013, + "grad_norm": 1.8107008898734582, + "learning_rate": 2.8871843893052055e-06, + "loss": 0.3527, + "step": 24980, + "vit_learning_rate": 5.77436877861041e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6416, + "epoch": 1.2999375780274658, + "grad_norm": 1.8022775430242683, + "learning_rate": 2.8833677282111073e-06, + "loss": 0.326, + "step": 24990, + "vit_learning_rate": 5.766735456422214e-07 + }, + { + "avg_batch_load_time": 2.1589, + "avg_batch_processing_time": 0.6338, + "epoch": 1.3004577611319184, + "grad_norm": 1.6918330521993943, + "learning_rate": 2.879552569107559e-06, + "loss": 0.3418, + "step": 25000, + "vit_learning_rate": 5.759105138215117e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.7093, + "epoch": 1.3009779442363711, + "grad_norm": 1.8900558666614156, + "learning_rate": 2.875738914701848e-06, + "loss": 0.3463, + "step": 25010, + "vit_learning_rate": 5.751477829403694e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6531, + "epoch": 1.301498127340824, + "grad_norm": 1.6350065470244153, + "learning_rate": 2.871926767700194e-06, + "loss": 0.3523, + "step": 25020, + "vit_learning_rate": 5.743853535400387e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6233, + "epoch": 1.3020183104452767, + "grad_norm": 1.8704248732837696, + "learning_rate": 2.868116130807749e-06, + "loss": 0.3276, + "step": 25030, + "vit_learning_rate": 5.736232261615497e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6861, + "epoch": 1.3025384935497295, + "grad_norm": 1.5702794843742245, + "learning_rate": 2.8643070067285907e-06, + "loss": 0.3325, + "step": 25040, + "vit_learning_rate": 5.72861401345718e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6397, + "epoch": 1.3030586766541823, + "grad_norm": 2.52097839319793, + "learning_rate": 2.8604993981657247e-06, + "loss": 0.3318, + "step": 25050, + "vit_learning_rate": 5.720998796331448e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.719, + "epoch": 1.303578859758635, + "grad_norm": 1.640760955466387, + "learning_rate": 2.856693307821081e-06, + "loss": 0.3132, + "step": 25060, + "vit_learning_rate": 5.713386615642162e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6278, + "epoch": 1.3040990428630879, + "grad_norm": 1.7789954598594806, + "learning_rate": 2.8528887383955113e-06, + "loss": 0.3411, + "step": 25070, + "vit_learning_rate": 5.705777476791022e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6618, + "epoch": 1.3046192259675407, + "grad_norm": 1.4883289994183992, + "learning_rate": 2.849085692588791e-06, + "loss": 0.331, + "step": 25080, + "vit_learning_rate": 5.698171385177582e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6766, + "epoch": 1.3051394090719932, + "grad_norm": 1.6360930543342391, + "learning_rate": 2.845284173099613e-06, + "loss": 0.3361, + "step": 25090, + "vit_learning_rate": 5.690568346199224e-07 + }, + { + "avg_batch_load_time": 0.0035, + "avg_batch_processing_time": 0.6492, + "epoch": 1.305659592176446, + "grad_norm": 1.3988194698195402, + "learning_rate": 2.8414841826255824e-06, + "loss": 0.3145, + "step": 25100, + "vit_learning_rate": 5.682968365251164e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6718, + "epoch": 1.3061797752808988, + "grad_norm": 1.8415031066404148, + "learning_rate": 2.837685723863223e-06, + "loss": 0.3261, + "step": 25110, + "vit_learning_rate": 5.675371447726445e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.7314, + "epoch": 1.3066999583853516, + "grad_norm": 1.16279935181176, + "learning_rate": 2.833888799507977e-06, + "loss": 0.3327, + "step": 25120, + "vit_learning_rate": 5.667777599015952e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7175, + "epoch": 1.3072201414898044, + "grad_norm": 1.450094066643432, + "learning_rate": 2.830093412254189e-06, + "loss": 0.3236, + "step": 25130, + "vit_learning_rate": 5.660186824508377e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.644, + "epoch": 1.3077403245942572, + "grad_norm": 1.5634528879682799, + "learning_rate": 2.8262995647951175e-06, + "loss": 0.3335, + "step": 25140, + "vit_learning_rate": 5.652599129590234e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6513, + "epoch": 1.30826050769871, + "grad_norm": 1.866460006444149, + "learning_rate": 2.8225072598229275e-06, + "loss": 0.3251, + "step": 25150, + "vit_learning_rate": 5.645014519645855e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6975, + "epoch": 1.3087806908031627, + "grad_norm": 1.3852298097860214, + "learning_rate": 2.8187165000286885e-06, + "loss": 0.3151, + "step": 25160, + "vit_learning_rate": 5.637433000057376e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6165, + "epoch": 1.3093008739076155, + "grad_norm": 1.8215241100426622, + "learning_rate": 2.8149272881023748e-06, + "loss": 0.3254, + "step": 25170, + "vit_learning_rate": 5.629854576204748e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6457, + "epoch": 1.3098210570120683, + "grad_norm": 2.209331019528733, + "learning_rate": 2.8111396267328627e-06, + "loss": 0.3235, + "step": 25180, + "vit_learning_rate": 5.622279253465725e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6434, + "epoch": 1.310341240116521, + "grad_norm": 1.6174408591160692, + "learning_rate": 2.807353518607926e-06, + "loss": 0.3033, + "step": 25190, + "vit_learning_rate": 5.614707037215851e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6632, + "epoch": 1.3108614232209739, + "grad_norm": 1.576523186893485, + "learning_rate": 2.8035689664142372e-06, + "loss": 0.3218, + "step": 25200, + "vit_learning_rate": 5.607137932828474e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.7156, + "epoch": 1.3113816063254267, + "grad_norm": 1.4748607459239955, + "learning_rate": 2.799785972837365e-06, + "loss": 0.327, + "step": 25210, + "vit_learning_rate": 5.599571945674729e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6518, + "epoch": 1.3119017894298795, + "grad_norm": 1.8055583357869167, + "learning_rate": 2.7960045405617755e-06, + "loss": 0.3232, + "step": 25220, + "vit_learning_rate": 5.59200908112355e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6563, + "epoch": 1.312421972534332, + "grad_norm": 1.583557680489765, + "learning_rate": 2.7922246722708225e-06, + "loss": 0.3417, + "step": 25230, + "vit_learning_rate": 5.584449344541644e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6221, + "epoch": 1.3129421556387848, + "grad_norm": 1.5253377395377976, + "learning_rate": 2.788446370646748e-06, + "loss": 0.35, + "step": 25240, + "vit_learning_rate": 5.576892741293495e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.648, + "epoch": 1.3134623387432376, + "grad_norm": 1.3023116340452907, + "learning_rate": 2.784669638370685e-06, + "loss": 0.3297, + "step": 25250, + "vit_learning_rate": 5.56933927674137e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.8345, + "epoch": 1.3139825218476904, + "grad_norm": 2.8576371047257503, + "learning_rate": 2.7808944781226576e-06, + "loss": 0.3101, + "step": 25260, + "vit_learning_rate": 5.561788956245314e-07 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.7028, + "epoch": 1.3145027049521432, + "grad_norm": 1.6258415830742545, + "learning_rate": 2.7771208925815678e-06, + "loss": 0.3323, + "step": 25270, + "vit_learning_rate": 5.554241785163135e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6297, + "epoch": 1.315022888056596, + "grad_norm": 2.0831638691211714, + "learning_rate": 2.773348884425202e-06, + "loss": 0.341, + "step": 25280, + "vit_learning_rate": 5.546697768850404e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6714, + "epoch": 1.3155430711610487, + "grad_norm": 1.2537067476479153, + "learning_rate": 2.7695784563302284e-06, + "loss": 0.3279, + "step": 25290, + "vit_learning_rate": 5.539156912660456e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6743, + "epoch": 1.3160632542655015, + "grad_norm": 1.9610189899670873, + "learning_rate": 2.765809610972192e-06, + "loss": 0.3521, + "step": 25300, + "vit_learning_rate": 5.531619221944383e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6628, + "epoch": 1.316583437369954, + "grad_norm": 1.4508938810642225, + "learning_rate": 2.7620423510255173e-06, + "loss": 0.3292, + "step": 25310, + "vit_learning_rate": 5.524084702051034e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6633, + "epoch": 1.3171036204744069, + "grad_norm": 1.5992320006618375, + "learning_rate": 2.7582766791635008e-06, + "loss": 0.3395, + "step": 25320, + "vit_learning_rate": 5.516553358327001e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6733, + "epoch": 1.3176238035788597, + "grad_norm": 1.3887184754934512, + "learning_rate": 2.7545125980583146e-06, + "loss": 0.324, + "step": 25330, + "vit_learning_rate": 5.50902519611663e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6512, + "epoch": 1.3181439866833125, + "grad_norm": 1.1899213537559612, + "learning_rate": 2.750750110381001e-06, + "loss": 0.3249, + "step": 25340, + "vit_learning_rate": 5.501500220762e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6546, + "epoch": 1.3186641697877652, + "grad_norm": 1.5615068394193021, + "learning_rate": 2.746989218801469e-06, + "loss": 0.3344, + "step": 25350, + "vit_learning_rate": 5.493978437602937e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6447, + "epoch": 1.319184352892218, + "grad_norm": 1.5514490978510207, + "learning_rate": 2.743229925988501e-06, + "loss": 0.3341, + "step": 25360, + "vit_learning_rate": 5.486459851977001e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6587, + "epoch": 1.3197045359966708, + "grad_norm": 1.687518439741295, + "learning_rate": 2.739472234609743e-06, + "loss": 0.3289, + "step": 25370, + "vit_learning_rate": 5.478944469219485e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.689, + "epoch": 1.3202247191011236, + "grad_norm": 1.6259609007403288, + "learning_rate": 2.735716147331698e-06, + "loss": 0.3218, + "step": 25380, + "vit_learning_rate": 5.471432294663395e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6506, + "epoch": 1.3207449022055764, + "grad_norm": 1.4802933348351068, + "learning_rate": 2.7319616668197355e-06, + "loss": 0.3079, + "step": 25390, + "vit_learning_rate": 5.463923333639471e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6509, + "epoch": 1.3212650853100292, + "grad_norm": 2.387453644293158, + "learning_rate": 2.72820879573809e-06, + "loss": 0.3468, + "step": 25400, + "vit_learning_rate": 5.456417591476179e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.682, + "epoch": 1.321785268414482, + "grad_norm": 1.2457478465060907, + "learning_rate": 2.724457536749846e-06, + "loss": 0.3302, + "step": 25410, + "vit_learning_rate": 5.448915073499691e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6362, + "epoch": 1.3223054515189347, + "grad_norm": 1.4460089202616497, + "learning_rate": 2.720707892516947e-06, + "loss": 0.3482, + "step": 25420, + "vit_learning_rate": 5.441415785033894e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6801, + "epoch": 1.3228256346233875, + "grad_norm": 1.3708747095474971, + "learning_rate": 2.716959865700192e-06, + "loss": 0.337, + "step": 25430, + "vit_learning_rate": 5.433919731400383e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6661, + "epoch": 1.3233458177278403, + "grad_norm": 2.0877378875013712, + "learning_rate": 2.7132134589592297e-06, + "loss": 0.348, + "step": 25440, + "vit_learning_rate": 5.426426917918459e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6984, + "epoch": 1.3238660008322929, + "grad_norm": 1.6805602447477006, + "learning_rate": 2.709468674952561e-06, + "loss": 0.3435, + "step": 25450, + "vit_learning_rate": 5.418937349905122e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6599, + "epoch": 1.3243861839367457, + "grad_norm": 1.5610682725898724, + "learning_rate": 2.705725516337534e-06, + "loss": 0.31, + "step": 25460, + "vit_learning_rate": 5.411451032675067e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6772, + "epoch": 1.3249063670411985, + "grad_norm": 1.1889121751237777, + "learning_rate": 2.7019839857703447e-06, + "loss": 0.3355, + "step": 25470, + "vit_learning_rate": 5.403967971540688e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.8106, + "epoch": 1.3254265501456512, + "grad_norm": 2.3144799243694782, + "learning_rate": 2.6982440859060334e-06, + "loss": 0.3262, + "step": 25480, + "vit_learning_rate": 5.396488171812065e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6406, + "epoch": 1.325946733250104, + "grad_norm": 1.643738485162428, + "learning_rate": 2.6945058193984797e-06, + "loss": 0.3238, + "step": 25490, + "vit_learning_rate": 5.389011638796958e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6467, + "epoch": 1.3264669163545568, + "grad_norm": 1.3010813619965438, + "learning_rate": 2.6907691889004153e-06, + "loss": 0.3319, + "step": 25500, + "vit_learning_rate": 5.38153837780083e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6373, + "epoch": 1.3269870994590096, + "grad_norm": 1.4065800295372248, + "learning_rate": 2.6870341970633955e-06, + "loss": 0.3315, + "step": 25510, + "vit_learning_rate": 5.374068394126791e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6883, + "epoch": 1.3275072825634624, + "grad_norm": 1.2995457585067298, + "learning_rate": 2.6833008465378237e-06, + "loss": 0.3209, + "step": 25520, + "vit_learning_rate": 5.366601693075647e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6502, + "epoch": 1.3280274656679152, + "grad_norm": 1.415114339066202, + "learning_rate": 2.6795691399729322e-06, + "loss": 0.3163, + "step": 25530, + "vit_learning_rate": 5.359138279945864e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.673, + "epoch": 1.3285476487723678, + "grad_norm": 1.4520167505086936, + "learning_rate": 2.6758390800167956e-06, + "loss": 0.3112, + "step": 25540, + "vit_learning_rate": 5.35167816003359e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6511, + "epoch": 1.3290678318768205, + "grad_norm": 1.6837181255712266, + "learning_rate": 2.6721106693163103e-06, + "loss": 0.3319, + "step": 25550, + "vit_learning_rate": 5.344221338632619e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7618, + "epoch": 1.3295880149812733, + "grad_norm": 1.776066885548573, + "learning_rate": 2.6683839105172072e-06, + "loss": 0.3325, + "step": 25560, + "vit_learning_rate": 5.336767821034414e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6697, + "epoch": 1.3301081980857261, + "grad_norm": 1.2460493614996357, + "learning_rate": 2.664658806264044e-06, + "loss": 0.3362, + "step": 25570, + "vit_learning_rate": 5.329317612528087e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7119, + "epoch": 1.330628381190179, + "grad_norm": 1.3184315855065547, + "learning_rate": 2.660935359200205e-06, + "loss": 0.303, + "step": 25580, + "vit_learning_rate": 5.32187071840041e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6823, + "epoch": 1.3311485642946317, + "grad_norm": 1.9914860352668948, + "learning_rate": 2.6572135719678973e-06, + "loss": 0.3261, + "step": 25590, + "vit_learning_rate": 5.314427143935793e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.688, + "epoch": 1.3316687473990845, + "grad_norm": 1.6586373271287416, + "learning_rate": 2.653493447208152e-06, + "loss": 0.3259, + "step": 25600, + "vit_learning_rate": 5.306986894416303e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7488, + "epoch": 1.3321889305035373, + "grad_norm": 1.6410724794969151, + "learning_rate": 2.6497749875608176e-06, + "loss": 0.3183, + "step": 25610, + "vit_learning_rate": 5.299549975121635e-07 + }, + { + "avg_batch_load_time": 0.0224, + "avg_batch_processing_time": 0.7085, + "epoch": 1.33270911360799, + "grad_norm": 1.3261447823054162, + "learning_rate": 2.646058195664564e-06, + "loss": 0.3279, + "step": 25620, + "vit_learning_rate": 5.292116391329126e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6837, + "epoch": 1.3332292967124428, + "grad_norm": 2.135101149238015, + "learning_rate": 2.6423430741568746e-06, + "loss": 0.3454, + "step": 25630, + "vit_learning_rate": 5.284686148313749e-07 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.7073, + "epoch": 1.3337494798168956, + "grad_norm": 1.8004469615222716, + "learning_rate": 2.638629625674054e-06, + "loss": 0.3404, + "step": 25640, + "vit_learning_rate": 5.277259251348108e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6588, + "epoch": 1.3342696629213484, + "grad_norm": 1.5770146764951902, + "learning_rate": 2.63491785285121e-06, + "loss": 0.3365, + "step": 25650, + "vit_learning_rate": 5.269835705702419e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6667, + "epoch": 1.3347898460258012, + "grad_norm": 2.9205049257387588, + "learning_rate": 2.6312077583222684e-06, + "loss": 0.3364, + "step": 25660, + "vit_learning_rate": 5.262415516644536e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7194, + "epoch": 1.335310029130254, + "grad_norm": 1.323579793231057, + "learning_rate": 2.6274993447199585e-06, + "loss": 0.3283, + "step": 25670, + "vit_learning_rate": 5.254998689439917e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6585, + "epoch": 1.3358302122347065, + "grad_norm": 2.370444025146522, + "learning_rate": 2.623792614675826e-06, + "loss": 0.3151, + "step": 25680, + "vit_learning_rate": 5.247585229351651e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.703, + "epoch": 1.3363503953391593, + "grad_norm": 1.4323532296767831, + "learning_rate": 2.620087570820212e-06, + "loss": 0.3283, + "step": 25690, + "vit_learning_rate": 5.240175141640424e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7184, + "epoch": 1.3368705784436121, + "grad_norm": 1.720481786844751, + "learning_rate": 2.616384215782267e-06, + "loss": 0.3164, + "step": 25700, + "vit_learning_rate": 5.232768431564533e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7406, + "epoch": 1.337390761548065, + "grad_norm": 1.714971111333433, + "learning_rate": 2.61268255218994e-06, + "loss": 0.3253, + "step": 25710, + "vit_learning_rate": 5.22536510437988e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.7043, + "epoch": 1.3379109446525177, + "grad_norm": 1.5786877802465118, + "learning_rate": 2.6089825826699815e-06, + "loss": 0.3121, + "step": 25720, + "vit_learning_rate": 5.217965165339962e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.7119, + "epoch": 1.3384311277569705, + "grad_norm": 1.4801723079164972, + "learning_rate": 2.6052843098479397e-06, + "loss": 0.3376, + "step": 25730, + "vit_learning_rate": 5.210568619695879e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6701, + "epoch": 1.3389513108614233, + "grad_norm": 1.8122909629669324, + "learning_rate": 2.6015877363481574e-06, + "loss": 0.3242, + "step": 25740, + "vit_learning_rate": 5.203175472696314e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6872, + "epoch": 1.339471493965876, + "grad_norm": 2.422301867226431, + "learning_rate": 2.597892864793773e-06, + "loss": 0.3231, + "step": 25750, + "vit_learning_rate": 5.195785729587545e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.78, + "epoch": 1.3399916770703286, + "grad_norm": 2.3234196587877376, + "learning_rate": 2.5941996978067173e-06, + "loss": 0.3263, + "step": 25760, + "vit_learning_rate": 5.188399395613434e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6457, + "epoch": 1.3405118601747814, + "grad_norm": 1.324296732507856, + "learning_rate": 2.5905082380077085e-06, + "loss": 0.3335, + "step": 25770, + "vit_learning_rate": 5.181016476015416e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6403, + "epoch": 1.3410320432792342, + "grad_norm": 1.2422046262832038, + "learning_rate": 2.586818488016262e-06, + "loss": 0.3236, + "step": 25780, + "vit_learning_rate": 5.173636976032522e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6691, + "epoch": 1.341552226383687, + "grad_norm": 1.8512105098861094, + "learning_rate": 2.5831304504506664e-06, + "loss": 0.321, + "step": 25790, + "vit_learning_rate": 5.166260900901332e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.7164, + "epoch": 1.3420724094881398, + "grad_norm": 1.3798442257990402, + "learning_rate": 2.579444127928007e-06, + "loss": 0.3155, + "step": 25800, + "vit_learning_rate": 5.158888255856013e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6724, + "epoch": 1.3425925925925926, + "grad_norm": 1.9916959113981783, + "learning_rate": 2.575759523064143e-06, + "loss": 0.3335, + "step": 25810, + "vit_learning_rate": 5.151519046128285e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6692, + "epoch": 1.3431127756970453, + "grad_norm": 1.6559303640788985, + "learning_rate": 2.572076638473725e-06, + "loss": 0.297, + "step": 25820, + "vit_learning_rate": 5.144153276947449e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6777, + "epoch": 1.3436329588014981, + "grad_norm": 1.514628159641216, + "learning_rate": 2.568395476770175e-06, + "loss": 0.3471, + "step": 25830, + "vit_learning_rate": 5.136790953540348e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6567, + "epoch": 1.344153141905951, + "grad_norm": 2.299840379775126, + "learning_rate": 2.564716040565693e-06, + "loss": 0.3442, + "step": 25840, + "vit_learning_rate": 5.129432081131385e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6671, + "epoch": 1.3446733250104037, + "grad_norm": 1.2780875400640617, + "learning_rate": 2.561038332471257e-06, + "loss": 0.3242, + "step": 25850, + "vit_learning_rate": 5.122076664942514e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.627, + "epoch": 1.3451935081148565, + "grad_norm": 1.6538344396319276, + "learning_rate": 2.5573623550966187e-06, + "loss": 0.3286, + "step": 25860, + "vit_learning_rate": 5.114724710193237e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6734, + "epoch": 1.3457136912193093, + "grad_norm": 1.8076165962836537, + "learning_rate": 2.5536881110502998e-06, + "loss": 0.3025, + "step": 25870, + "vit_learning_rate": 5.107376222100599e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.656, + "epoch": 1.346233874323762, + "grad_norm": 1.5254784362174427, + "learning_rate": 2.5500156029395935e-06, + "loss": 0.3181, + "step": 25880, + "vit_learning_rate": 5.100031205879187e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6415, + "epoch": 1.3467540574282149, + "grad_norm": 2.0758507253017613, + "learning_rate": 2.54634483337056e-06, + "loss": 0.3142, + "step": 25890, + "vit_learning_rate": 5.092689666741119e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6771, + "epoch": 1.3472742405326674, + "grad_norm": 1.3984713351641656, + "learning_rate": 2.542675804948027e-06, + "loss": 0.3264, + "step": 25900, + "vit_learning_rate": 5.085351609896054e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6642, + "epoch": 1.3477944236371202, + "grad_norm": 1.482427710245937, + "learning_rate": 2.5390085202755844e-06, + "loss": 0.3242, + "step": 25910, + "vit_learning_rate": 5.078017040551168e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6674, + "epoch": 1.348314606741573, + "grad_norm": 1.7707511704738474, + "learning_rate": 2.535342981955591e-06, + "loss": 0.3145, + "step": 25920, + "vit_learning_rate": 5.070685963911181e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6542, + "epoch": 1.3488347898460258, + "grad_norm": 3.5146546097716738, + "learning_rate": 2.531679192589157e-06, + "loss": 0.3406, + "step": 25930, + "vit_learning_rate": 5.063358385178314e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7005, + "epoch": 1.3493549729504786, + "grad_norm": 1.8352059057706311, + "learning_rate": 2.528017154776158e-06, + "loss": 0.3432, + "step": 25940, + "vit_learning_rate": 5.056034309552315e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6957, + "epoch": 1.3498751560549314, + "grad_norm": 2.487888815524362, + "learning_rate": 2.5243568711152234e-06, + "loss": 0.3187, + "step": 25950, + "vit_learning_rate": 5.048713742230446e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6675, + "epoch": 1.3503953391593841, + "grad_norm": 2.606215552976002, + "learning_rate": 2.5206983442037425e-06, + "loss": 0.3272, + "step": 25960, + "vit_learning_rate": 5.041396688407485e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6556, + "epoch": 1.350915522263837, + "grad_norm": 1.9539064482587183, + "learning_rate": 2.517041576637854e-06, + "loss": 0.3176, + "step": 25970, + "vit_learning_rate": 5.034083153275707e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.63, + "epoch": 1.3514357053682897, + "grad_norm": 1.6078279418036956, + "learning_rate": 2.5133865710124485e-06, + "loss": 0.3176, + "step": 25980, + "vit_learning_rate": 5.026773142024896e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6973, + "epoch": 1.3519558884727423, + "grad_norm": 1.4706849081681468, + "learning_rate": 2.5097333299211657e-06, + "loss": 0.3501, + "step": 25990, + "vit_learning_rate": 5.019466659842331e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6355, + "epoch": 1.352476071577195, + "grad_norm": 1.4518412231768256, + "learning_rate": 2.506081855956396e-06, + "loss": 0.3415, + "step": 26000, + "vit_learning_rate": 5.012163711912792e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6471, + "epoch": 1.3529962546816479, + "grad_norm": 1.422694488186426, + "learning_rate": 2.5024321517092743e-06, + "loss": 0.3403, + "step": 26010, + "vit_learning_rate": 5.004864303418548e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6384, + "epoch": 1.3535164377861006, + "grad_norm": 0.9916817718826617, + "learning_rate": 2.498784219769678e-06, + "loss": 0.3202, + "step": 26020, + "vit_learning_rate": 4.997568439539355e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6306, + "epoch": 1.3540366208905534, + "grad_norm": 2.079595962012938, + "learning_rate": 2.495138062726229e-06, + "loss": 0.3283, + "step": 26030, + "vit_learning_rate": 4.990276125452457e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6484, + "epoch": 1.3545568039950062, + "grad_norm": 1.2938335533956709, + "learning_rate": 2.491493683166289e-06, + "loss": 0.3306, + "step": 26040, + "vit_learning_rate": 4.982987366332577e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6404, + "epoch": 1.355076987099459, + "grad_norm": 1.3945073795494107, + "learning_rate": 2.487851083675957e-06, + "loss": 0.3374, + "step": 26050, + "vit_learning_rate": 4.975702167351913e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6618, + "epoch": 1.3555971702039118, + "grad_norm": 1.5442598746379237, + "learning_rate": 2.484210266840076e-06, + "loss": 0.3329, + "step": 26060, + "vit_learning_rate": 4.968420533680151e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.6998, + "epoch": 1.3561173533083646, + "grad_norm": 1.6450548937271259, + "learning_rate": 2.4805712352422116e-06, + "loss": 0.3444, + "step": 26070, + "vit_learning_rate": 4.961142470484423e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6624, + "epoch": 1.3566375364128174, + "grad_norm": 1.4294806035468828, + "learning_rate": 2.4769339914646722e-06, + "loss": 0.3095, + "step": 26080, + "vit_learning_rate": 4.953867982929344e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6357, + "epoch": 1.3571577195172702, + "grad_norm": 1.468453583278383, + "learning_rate": 2.473298538088492e-06, + "loss": 0.3204, + "step": 26090, + "vit_learning_rate": 4.946597076176984e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6548, + "epoch": 1.357677902621723, + "grad_norm": 1.5209178702957165, + "learning_rate": 2.469664877693442e-06, + "loss": 0.3561, + "step": 26100, + "vit_learning_rate": 4.939329755386883e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6355, + "epoch": 1.3581980857261757, + "grad_norm": 1.3795851367943524, + "learning_rate": 2.4660330128580135e-06, + "loss": 0.306, + "step": 26110, + "vit_learning_rate": 4.932066025716026e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6721, + "epoch": 1.3587182688306285, + "grad_norm": 1.441023531738021, + "learning_rate": 2.4624029461594262e-06, + "loss": 0.3169, + "step": 26120, + "vit_learning_rate": 4.924805892318852e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6508, + "epoch": 1.359238451935081, + "grad_norm": 1.1183480953389566, + "learning_rate": 2.458774680173625e-06, + "loss": 0.3479, + "step": 26130, + "vit_learning_rate": 4.91754936034725e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6738, + "epoch": 1.3597586350395339, + "grad_norm": 2.1702517275707174, + "learning_rate": 2.455148217475275e-06, + "loss": 0.342, + "step": 26140, + "vit_learning_rate": 4.91029643495055e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6489, + "epoch": 1.3602788181439867, + "grad_norm": 1.5996715202591134, + "learning_rate": 2.4515235606377625e-06, + "loss": 0.3395, + "step": 26150, + "vit_learning_rate": 4.903047121275524e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6524, + "epoch": 1.3607990012484394, + "grad_norm": 1.4669468504666523, + "learning_rate": 2.4479007122331928e-06, + "loss": 0.3224, + "step": 26160, + "vit_learning_rate": 4.895801424466385e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7891, + "epoch": 1.3613191843528922, + "grad_norm": 1.2770500736387032, + "learning_rate": 2.4442796748323875e-06, + "loss": 0.3098, + "step": 26170, + "vit_learning_rate": 4.888559349664775e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7159, + "epoch": 1.361839367457345, + "grad_norm": 1.4732389383471123, + "learning_rate": 2.4406604510048836e-06, + "loss": 0.3278, + "step": 26180, + "vit_learning_rate": 4.881320902009766e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6716, + "epoch": 1.3623595505617978, + "grad_norm": 1.807975849900544, + "learning_rate": 2.43704304331893e-06, + "loss": 0.3128, + "step": 26190, + "vit_learning_rate": 4.874086086637859e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.7027, + "epoch": 1.3628797336662506, + "grad_norm": 1.465840395434687, + "learning_rate": 2.433427454341488e-06, + "loss": 0.3068, + "step": 26200, + "vit_learning_rate": 4.866854908682974e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6577, + "epoch": 1.3633999167707032, + "grad_norm": 2.1591800861784756, + "learning_rate": 2.429813686638227e-06, + "loss": 0.3344, + "step": 26210, + "vit_learning_rate": 4.859627373276454e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7131, + "epoch": 1.363920099875156, + "grad_norm": 1.0771755446812792, + "learning_rate": 2.426201742773526e-06, + "loss": 0.3145, + "step": 26220, + "vit_learning_rate": 4.852403485547052e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6774, + "epoch": 1.3644402829796087, + "grad_norm": 3.4378972222458364, + "learning_rate": 2.4225916253104663e-06, + "loss": 0.3402, + "step": 26230, + "vit_learning_rate": 4.845183250620932e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6399, + "epoch": 1.3649604660840615, + "grad_norm": 1.7698357457738716, + "learning_rate": 2.418983336810839e-06, + "loss": 0.3227, + "step": 26240, + "vit_learning_rate": 4.837966673621677e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6588, + "epoch": 1.3654806491885143, + "grad_norm": 1.582897419175609, + "learning_rate": 2.4153768798351324e-06, + "loss": 0.3254, + "step": 26250, + "vit_learning_rate": 4.830753759670264e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6318, + "epoch": 1.366000832292967, + "grad_norm": 2.1025194621977557, + "learning_rate": 2.4117722569425385e-06, + "loss": 0.3266, + "step": 26260, + "vit_learning_rate": 4.823544513885076e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6709, + "epoch": 1.3665210153974199, + "grad_norm": 1.7875336217159865, + "learning_rate": 2.4081694706909414e-06, + "loss": 0.3533, + "step": 26270, + "vit_learning_rate": 4.816338941381882e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6401, + "epoch": 1.3670411985018727, + "grad_norm": 1.819120549853206, + "learning_rate": 2.404568523636927e-06, + "loss": 0.3403, + "step": 26280, + "vit_learning_rate": 4.809137047273854e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6813, + "epoch": 1.3675613816063255, + "grad_norm": 2.424928018028091, + "learning_rate": 2.400969418335779e-06, + "loss": 0.3431, + "step": 26290, + "vit_learning_rate": 4.801938836671558e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.6647, + "epoch": 1.3680815647107782, + "grad_norm": 1.539065682945252, + "learning_rate": 2.3973721573414693e-06, + "loss": 0.3321, + "step": 26300, + "vit_learning_rate": 4.794744314682938e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6702, + "epoch": 1.368601747815231, + "grad_norm": 1.7633761318250663, + "learning_rate": 2.3937767432066615e-06, + "loss": 0.3213, + "step": 26310, + "vit_learning_rate": 4.787553486413323e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6257, + "epoch": 1.3691219309196838, + "grad_norm": 1.5510008254921284, + "learning_rate": 2.3901831784827103e-06, + "loss": 0.3312, + "step": 26320, + "vit_learning_rate": 4.780366356965419e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6565, + "epoch": 1.3696421140241366, + "grad_norm": 1.9133705381164956, + "learning_rate": 2.3865914657196566e-06, + "loss": 0.3277, + "step": 26330, + "vit_learning_rate": 4.773182931439313e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7241, + "epoch": 1.3701622971285894, + "grad_norm": 1.5584581157831792, + "learning_rate": 2.3830016074662276e-06, + "loss": 0.3013, + "step": 26340, + "vit_learning_rate": 4.7660032149324546e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6399, + "epoch": 1.370682480233042, + "grad_norm": 1.7509536364023033, + "learning_rate": 2.379413606269836e-06, + "loss": 0.3431, + "step": 26350, + "vit_learning_rate": 4.7588272125396713e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6795, + "epoch": 1.3712026633374947, + "grad_norm": 1.478470308988547, + "learning_rate": 2.3758274646765738e-06, + "loss": 0.3371, + "step": 26360, + "vit_learning_rate": 4.7516549293531474e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.707, + "epoch": 1.3717228464419475, + "grad_norm": 1.6859698144838176, + "learning_rate": 2.3722431852312144e-06, + "loss": 0.3401, + "step": 26370, + "vit_learning_rate": 4.744486370462428e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6915, + "epoch": 1.3722430295464003, + "grad_norm": 1.6135521362132463, + "learning_rate": 2.3686607704772134e-06, + "loss": 0.3037, + "step": 26380, + "vit_learning_rate": 4.7373215409544264e-07 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.671, + "epoch": 1.372763212650853, + "grad_norm": 1.3970300538177227, + "learning_rate": 2.365080222956699e-06, + "loss": 0.3161, + "step": 26390, + "vit_learning_rate": 4.730160445913397e-07 + }, + { + "avg_batch_load_time": 0.0038, + "avg_batch_processing_time": 0.7323, + "epoch": 1.373283395755306, + "grad_norm": 2.111868686221846, + "learning_rate": 2.361501545210477e-06, + "loss": 0.3404, + "step": 26400, + "vit_learning_rate": 4.723003090420953e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7288, + "epoch": 1.3738035788597587, + "grad_norm": 1.1539034421431367, + "learning_rate": 2.3579247397780205e-06, + "loss": 0.3147, + "step": 26410, + "vit_learning_rate": 4.71584947955604e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6908, + "epoch": 1.3743237619642115, + "grad_norm": 1.9346735319403632, + "learning_rate": 2.354349809197479e-06, + "loss": 0.3492, + "step": 26420, + "vit_learning_rate": 4.708699618394958e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6774, + "epoch": 1.3748439450686643, + "grad_norm": 1.68275695821912, + "learning_rate": 2.350776756005676e-06, + "loss": 0.3154, + "step": 26430, + "vit_learning_rate": 4.7015535120113517e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.713, + "epoch": 1.3753641281731168, + "grad_norm": 1.7524964925648883, + "learning_rate": 2.3472055827380942e-06, + "loss": 0.3092, + "step": 26440, + "vit_learning_rate": 4.6944111654761874e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6519, + "epoch": 1.3758843112775696, + "grad_norm": 1.5201734586836193, + "learning_rate": 2.3436362919288868e-06, + "loss": 0.3189, + "step": 26450, + "vit_learning_rate": 4.6872725838577734e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6488, + "epoch": 1.3764044943820224, + "grad_norm": 1.6646452707259185, + "learning_rate": 2.34006888611087e-06, + "loss": 0.3293, + "step": 26460, + "vit_learning_rate": 4.6801377722217395e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.7127, + "epoch": 1.3769246774864752, + "grad_norm": 1.3914985859565938, + "learning_rate": 2.3365033678155225e-06, + "loss": 0.3378, + "step": 26470, + "vit_learning_rate": 4.6730067356310445e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.668, + "epoch": 1.377444860590928, + "grad_norm": 1.4062341332368324, + "learning_rate": 2.3329397395729843e-06, + "loss": 0.3102, + "step": 26480, + "vit_learning_rate": 4.6658794791459676e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6986, + "epoch": 1.3779650436953808, + "grad_norm": 1.8171577960003191, + "learning_rate": 2.329378003912052e-06, + "loss": 0.3416, + "step": 26490, + "vit_learning_rate": 4.658756007824104e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6514, + "epoch": 1.3784852267998335, + "grad_norm": 1.4451086738274803, + "learning_rate": 2.3258181633601836e-06, + "loss": 0.3127, + "step": 26500, + "vit_learning_rate": 4.6516363267203664e-07 + }, + { + "avg_batch_load_time": 0.0191, + "avg_batch_processing_time": 0.7178, + "epoch": 1.3790054099042863, + "grad_norm": 1.4761092823086879, + "learning_rate": 2.322260220443485e-06, + "loss": 0.3237, + "step": 26510, + "vit_learning_rate": 4.6445204408869696e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6528, + "epoch": 1.3795255930087391, + "grad_norm": 1.2827820684155062, + "learning_rate": 2.3187041776867253e-06, + "loss": 0.3261, + "step": 26520, + "vit_learning_rate": 4.63740835537345e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6908, + "epoch": 1.380045776113192, + "grad_norm": 1.155374651288378, + "learning_rate": 2.3151500376133177e-06, + "loss": 0.3039, + "step": 26530, + "vit_learning_rate": 4.6303000752266354e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6522, + "epoch": 1.3805659592176447, + "grad_norm": 1.2233673609532434, + "learning_rate": 2.3115978027453296e-06, + "loss": 0.3482, + "step": 26540, + "vit_learning_rate": 4.6231956054906583e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7232, + "epoch": 1.3810861423220975, + "grad_norm": 1.4544957729538788, + "learning_rate": 2.30804747560347e-06, + "loss": 0.3425, + "step": 26550, + "vit_learning_rate": 4.616094951206939e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6274, + "epoch": 1.3816063254265503, + "grad_norm": 1.4052451216740733, + "learning_rate": 2.304499058707099e-06, + "loss": 0.3251, + "step": 26560, + "vit_learning_rate": 4.608998117414198e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.637, + "epoch": 1.382126508531003, + "grad_norm": 2.1819398046814658, + "learning_rate": 2.300952554574224e-06, + "loss": 0.3276, + "step": 26570, + "vit_learning_rate": 4.6019051091484475e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6362, + "epoch": 1.3826466916354556, + "grad_norm": 1.4773463807980605, + "learning_rate": 2.29740796572149e-06, + "loss": 0.3308, + "step": 26580, + "vit_learning_rate": 4.5948159314429804e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6769, + "epoch": 1.3831668747399084, + "grad_norm": 2.0565077495438278, + "learning_rate": 2.2938652946641855e-06, + "loss": 0.3282, + "step": 26590, + "vit_learning_rate": 4.58773058932837e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6525, + "epoch": 1.3836870578443612, + "grad_norm": 1.2204031062035088, + "learning_rate": 2.2903245439162365e-06, + "loss": 0.3316, + "step": 26600, + "vit_learning_rate": 4.580649087832472e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7029, + "epoch": 1.384207240948814, + "grad_norm": 2.1187032703272473, + "learning_rate": 2.2867857159902063e-06, + "loss": 0.3127, + "step": 26610, + "vit_learning_rate": 4.573571431980412e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.732, + "epoch": 1.3847274240532668, + "grad_norm": 1.811374980466135, + "learning_rate": 2.2832488133972952e-06, + "loss": 0.3255, + "step": 26620, + "vit_learning_rate": 4.56649762679459e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.6821, + "epoch": 1.3852476071577196, + "grad_norm": 1.2043569557720584, + "learning_rate": 2.2797138386473373e-06, + "loss": 0.3226, + "step": 26630, + "vit_learning_rate": 4.559427677294674e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6446, + "epoch": 1.3857677902621723, + "grad_norm": 1.831397525639736, + "learning_rate": 2.2761807942487964e-06, + "loss": 0.3175, + "step": 26640, + "vit_learning_rate": 4.552361588497592e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6333, + "epoch": 1.3862879733666251, + "grad_norm": 1.5231480140007867, + "learning_rate": 2.2726496827087676e-06, + "loss": 0.328, + "step": 26650, + "vit_learning_rate": 4.5452993654175345e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6707, + "epoch": 1.3868081564710777, + "grad_norm": 1.8346601160669676, + "learning_rate": 2.2691205065329773e-06, + "loss": 0.3303, + "step": 26660, + "vit_learning_rate": 4.538241013065954e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7073, + "epoch": 1.3873283395755305, + "grad_norm": 1.6563247601094049, + "learning_rate": 2.2655932682257755e-06, + "loss": 0.3338, + "step": 26670, + "vit_learning_rate": 4.53118653645155e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6685, + "epoch": 1.3878485226799833, + "grad_norm": 1.761631113909584, + "learning_rate": 2.2620679702901384e-06, + "loss": 0.3326, + "step": 26680, + "vit_learning_rate": 4.524135940580276e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6682, + "epoch": 1.388368705784436, + "grad_norm": 2.2397308591044265, + "learning_rate": 2.2585446152276615e-06, + "loss": 0.3259, + "step": 26690, + "vit_learning_rate": 4.5170892304553223e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.7373, + "epoch": 1.3888888888888888, + "grad_norm": 1.571016469740784, + "learning_rate": 2.255023205538564e-06, + "loss": 0.3176, + "step": 26700, + "vit_learning_rate": 4.5100464110771274e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7125, + "epoch": 1.3894090719933416, + "grad_norm": 1.3914461550813177, + "learning_rate": 2.2515037437216895e-06, + "loss": 0.326, + "step": 26710, + "vit_learning_rate": 4.503007487443379e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.863, + "epoch": 1.3899292550977944, + "grad_norm": 2.050375144945346, + "learning_rate": 2.2479862322744937e-06, + "loss": 0.3417, + "step": 26720, + "vit_learning_rate": 4.495972464548987e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6838, + "epoch": 1.3904494382022472, + "grad_norm": 1.3497016783696747, + "learning_rate": 2.244470673693049e-06, + "loss": 0.3378, + "step": 26730, + "vit_learning_rate": 4.488941347386097e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6481, + "epoch": 1.3909696213067, + "grad_norm": 1.892347062085644, + "learning_rate": 2.2409570704720427e-06, + "loss": 0.3167, + "step": 26740, + "vit_learning_rate": 4.4819141409440853e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6829, + "epoch": 1.3914898044111528, + "grad_norm": 1.7771080645181763, + "learning_rate": 2.2374454251047757e-06, + "loss": 0.3184, + "step": 26750, + "vit_learning_rate": 4.474890850209551e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.9174, + "epoch": 1.3920099875156056, + "grad_norm": 1.3580740283629613, + "learning_rate": 2.233935740083157e-06, + "loss": 0.331, + "step": 26760, + "vit_learning_rate": 4.467871480166313e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7523, + "epoch": 1.3925301706200583, + "grad_norm": 2.8720767080601166, + "learning_rate": 2.230428017897707e-06, + "loss": 0.326, + "step": 26770, + "vit_learning_rate": 4.460856035795413e-07 + }, + { + "avg_batch_load_time": 0.0036, + "avg_batch_processing_time": 0.7086, + "epoch": 1.3930503537245111, + "grad_norm": 1.5629234070629632, + "learning_rate": 2.226922261037551e-06, + "loss": 0.3281, + "step": 26780, + "vit_learning_rate": 4.453844522075102e-07 + }, + { + "avg_batch_load_time": 0.0078, + "avg_batch_processing_time": 0.7672, + "epoch": 1.393570536828964, + "grad_norm": 1.7148763579621165, + "learning_rate": 2.223418471990421e-06, + "loss": 0.3259, + "step": 26790, + "vit_learning_rate": 4.446836943980842e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.7293, + "epoch": 1.3940907199334165, + "grad_norm": 1.7951134103708932, + "learning_rate": 2.2199166532426553e-06, + "loss": 0.3425, + "step": 26800, + "vit_learning_rate": 4.43983330648531e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6074, + "epoch": 1.3946109030378693, + "grad_norm": 1.5786480029243608, + "learning_rate": 2.2164168072791886e-06, + "loss": 0.3202, + "step": 26810, + "vit_learning_rate": 4.4328336145583766e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6691, + "epoch": 1.395131086142322, + "grad_norm": 2.209789564340985, + "learning_rate": 2.2129189365835618e-06, + "loss": 0.3294, + "step": 26820, + "vit_learning_rate": 4.425837873167123e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6323, + "epoch": 1.3956512692467749, + "grad_norm": 1.3406977320068105, + "learning_rate": 2.2094230436379054e-06, + "loss": 0.3132, + "step": 26830, + "vit_learning_rate": 4.4188460872758105e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6777, + "epoch": 1.3961714523512276, + "grad_norm": 1.9401485659768787, + "learning_rate": 2.205929130922953e-06, + "loss": 0.321, + "step": 26840, + "vit_learning_rate": 4.4118582618459055e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.8647, + "epoch": 1.3966916354556804, + "grad_norm": 1.685408629738832, + "learning_rate": 2.202437200918035e-06, + "loss": 0.3326, + "step": 26850, + "vit_learning_rate": 4.4048744018360696e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6891, + "epoch": 1.3972118185601332, + "grad_norm": 1.3937789715872457, + "learning_rate": 2.19894725610107e-06, + "loss": 0.3527, + "step": 26860, + "vit_learning_rate": 4.397894512202139e-07 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.7292, + "epoch": 1.397732001664586, + "grad_norm": 1.5541513811245178, + "learning_rate": 2.195459298948569e-06, + "loss": 0.3195, + "step": 26870, + "vit_learning_rate": 4.3909185978971377e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6428, + "epoch": 1.3982521847690386, + "grad_norm": 1.6295128945771353, + "learning_rate": 2.1919733319356333e-06, + "loss": 0.3194, + "step": 26880, + "vit_learning_rate": 4.3839466638712665e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.674, + "epoch": 1.3987723678734914, + "grad_norm": 1.4867719066550222, + "learning_rate": 2.1884893575359518e-06, + "loss": 0.3504, + "step": 26890, + "vit_learning_rate": 4.3769787150719027e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6913, + "epoch": 1.3992925509779441, + "grad_norm": 1.3172289711312326, + "learning_rate": 2.1850073782217982e-06, + "loss": 0.3194, + "step": 26900, + "vit_learning_rate": 4.3700147564435963e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7065, + "epoch": 1.399812734082397, + "grad_norm": 1.6444720085879558, + "learning_rate": 2.181527396464034e-06, + "loss": 0.3353, + "step": 26910, + "vit_learning_rate": 4.363054792928067e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6722, + "epoch": 1.4003329171868497, + "grad_norm": 1.4722184536807608, + "learning_rate": 2.178049414732098e-06, + "loss": 0.3421, + "step": 26920, + "vit_learning_rate": 4.356098829464195e-07 + }, + { + "avg_batch_load_time": 0.0006, + "avg_batch_processing_time": 0.9156, + "epoch": 1.4008531002913025, + "grad_norm": 1.5836301019461652, + "learning_rate": 2.1745734354940144e-06, + "loss": 0.3283, + "step": 26930, + "vit_learning_rate": 4.349146870988029e-07 + }, + { + "avg_batch_load_time": 0.1292, + "avg_batch_processing_time": 0.7009, + "epoch": 1.4013732833957553, + "grad_norm": 2.087277379199312, + "learning_rate": 2.171099461216382e-06, + "loss": 0.3326, + "step": 26940, + "vit_learning_rate": 4.342198922432764e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.8102, + "epoch": 1.401893466500208, + "grad_norm": 2.250586405995666, + "learning_rate": 2.167627494364385e-06, + "loss": 0.3187, + "step": 26950, + "vit_learning_rate": 4.3352549887287695e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6835, + "epoch": 1.4024136496046609, + "grad_norm": 1.4899645119213623, + "learning_rate": 2.164157537401772e-06, + "loss": 0.3123, + "step": 26960, + "vit_learning_rate": 4.3283150748035435e-07 + }, + { + "avg_batch_load_time": 0.001, + "avg_batch_processing_time": 0.6586, + "epoch": 1.4029338327091136, + "grad_norm": 1.2788736227440416, + "learning_rate": 2.160689592790873e-06, + "loss": 0.3232, + "step": 26970, + "vit_learning_rate": 4.321379185581745e-07 + }, + { + "avg_batch_load_time": 0.0008, + "avg_batch_processing_time": 0.6663, + "epoch": 1.4034540158135664, + "grad_norm": 2.678792787007878, + "learning_rate": 2.1572236629925845e-06, + "loss": 0.3433, + "step": 26980, + "vit_learning_rate": 4.314447325985169e-07 + }, + { + "avg_batch_load_time": 0.0009, + "avg_batch_processing_time": 0.6633, + "epoch": 1.4039741989180192, + "grad_norm": 1.7020799897129928, + "learning_rate": 2.153759750466382e-06, + "loss": 0.3349, + "step": 26990, + "vit_learning_rate": 4.307519500932764e-07 + }, + { + "avg_batch_load_time": 0.001, + "avg_batch_processing_time": 0.6506, + "epoch": 1.404494382022472, + "grad_norm": 1.430486297136629, + "learning_rate": 2.150297857670302e-06, + "loss": 0.3439, + "step": 27000, + "vit_learning_rate": 4.3005957153406037e-07 + }, + { + "avg_batch_load_time": 0.0009, + "avg_batch_processing_time": 0.6979, + "epoch": 1.4050145651269248, + "grad_norm": 1.8924288339197521, + "learning_rate": 2.1468379870609497e-06, + "loss": 0.342, + "step": 27010, + "vit_learning_rate": 4.2936759741218985e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7554, + "epoch": 1.4055347482313774, + "grad_norm": 1.479990352036297, + "learning_rate": 2.143380141093495e-06, + "loss": 0.3136, + "step": 27020, + "vit_learning_rate": 4.2867602821869897e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6538, + "epoch": 1.4060549313358301, + "grad_norm": 1.097045945772355, + "learning_rate": 2.139924322221672e-06, + "loss": 0.3254, + "step": 27030, + "vit_learning_rate": 4.2798486444433433e-07 + }, + { + "avg_batch_load_time": 0.001, + "avg_batch_processing_time": 0.6971, + "epoch": 1.406575114440283, + "grad_norm": 1.7654262928368116, + "learning_rate": 2.1364705328977758e-06, + "loss": 0.3271, + "step": 27040, + "vit_learning_rate": 4.272941065795551e-07 + }, + { + "avg_batch_load_time": 0.0008, + "avg_batch_processing_time": 0.7778, + "epoch": 1.4070952975447357, + "grad_norm": 1.9980911690627974, + "learning_rate": 2.1330187755726622e-06, + "loss": 0.3323, + "step": 27050, + "vit_learning_rate": 4.266037551145324e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6549, + "epoch": 1.4076154806491885, + "grad_norm": 1.3507792221572803, + "learning_rate": 2.1295690526957434e-06, + "loss": 0.3162, + "step": 27060, + "vit_learning_rate": 4.2591381053914863e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.788, + "epoch": 1.4081356637536413, + "grad_norm": 1.5137028538317967, + "learning_rate": 2.12612136671499e-06, + "loss": 0.3141, + "step": 27070, + "vit_learning_rate": 4.252242733429979e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7058, + "epoch": 1.408655846858094, + "grad_norm": 1.264252893669838, + "learning_rate": 2.1226757200769225e-06, + "loss": 0.3423, + "step": 27080, + "vit_learning_rate": 4.245351440153845e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6849, + "epoch": 1.4091760299625469, + "grad_norm": 1.3907708348522592, + "learning_rate": 2.119232115226625e-06, + "loss": 0.3359, + "step": 27090, + "vit_learning_rate": 4.238464230453249e-07 + }, + { + "avg_batch_load_time": 0.0038, + "avg_batch_processing_time": 0.733, + "epoch": 1.4096962130669997, + "grad_norm": 1.2857666241299852, + "learning_rate": 2.115790554607719e-06, + "loss": 0.3192, + "step": 27100, + "vit_learning_rate": 4.2315811092154375e-07 + }, + { + "avg_batch_load_time": 0.0009, + "avg_batch_processing_time": 0.7144, + "epoch": 1.4102163961714522, + "grad_norm": 1.6040098204215203, + "learning_rate": 2.1123510406623837e-06, + "loss": 0.3195, + "step": 27110, + "vit_learning_rate": 4.224702081324767e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6629, + "epoch": 1.410736579275905, + "grad_norm": 1.6266646829482745, + "learning_rate": 2.108913575831343e-06, + "loss": 0.334, + "step": 27120, + "vit_learning_rate": 4.217827151662685e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6142, + "epoch": 1.4112567623803578, + "grad_norm": 1.9965240261816055, + "learning_rate": 2.1054781625538712e-06, + "loss": 0.355, + "step": 27130, + "vit_learning_rate": 4.2109563251077417e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.7818, + "epoch": 1.4117769454848106, + "grad_norm": 1.3225249729194115, + "learning_rate": 2.1020448032677822e-06, + "loss": 0.3414, + "step": 27140, + "vit_learning_rate": 4.2040896065355636e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6975, + "epoch": 1.4122971285892634, + "grad_norm": 2.9470869485387894, + "learning_rate": 2.098613500409432e-06, + "loss": 0.3108, + "step": 27150, + "vit_learning_rate": 4.197227000818864e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7367, + "epoch": 1.4128173116937162, + "grad_norm": 1.3575866353251496, + "learning_rate": 2.095184256413721e-06, + "loss": 0.3448, + "step": 27160, + "vit_learning_rate": 4.1903685128274413e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7043, + "epoch": 1.413337494798169, + "grad_norm": 1.2774826453313426, + "learning_rate": 2.0917570737140857e-06, + "loss": 0.3171, + "step": 27170, + "vit_learning_rate": 4.183514147428171e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6775, + "epoch": 1.4138576779026217, + "grad_norm": 1.564318438414721, + "learning_rate": 2.0883319547425002e-06, + "loss": 0.334, + "step": 27180, + "vit_learning_rate": 4.1766639094849996e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6539, + "epoch": 1.4143778610070745, + "grad_norm": 1.9038509934294494, + "learning_rate": 2.0849089019294757e-06, + "loss": 0.3418, + "step": 27190, + "vit_learning_rate": 4.1698178038589505e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7095, + "epoch": 1.4148980441115273, + "grad_norm": 1.4292637489022966, + "learning_rate": 2.0814879177040555e-06, + "loss": 0.3294, + "step": 27200, + "vit_learning_rate": 4.16297583540811e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6834, + "epoch": 1.41541822721598, + "grad_norm": 1.5778581461805556, + "learning_rate": 2.078069004493816e-06, + "loss": 0.3329, + "step": 27210, + "vit_learning_rate": 4.1561380089876307e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.648, + "epoch": 1.4159384103204329, + "grad_norm": 1.477061628250616, + "learning_rate": 2.0746521647248615e-06, + "loss": 0.3305, + "step": 27220, + "vit_learning_rate": 4.149304329449722e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.7724, + "epoch": 1.4164585934248857, + "grad_norm": 1.345533887688298, + "learning_rate": 2.071237400821833e-06, + "loss": 0.3329, + "step": 27230, + "vit_learning_rate": 4.142474801643666e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6694, + "epoch": 1.4169787765293385, + "grad_norm": 1.559309239584505, + "learning_rate": 2.067824715207888e-06, + "loss": 0.3158, + "step": 27240, + "vit_learning_rate": 4.135649430415775e-07 + }, + { + "avg_batch_load_time": 0.0331, + "avg_batch_processing_time": 0.7159, + "epoch": 1.417498959633791, + "grad_norm": 2.2140186992418074, + "learning_rate": 2.064414110304714e-06, + "loss": 0.3215, + "step": 27250, + "vit_learning_rate": 4.128828220609427e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 1.0323, + "epoch": 1.4180191427382438, + "grad_norm": 1.544463571142167, + "learning_rate": 2.061005588532521e-06, + "loss": 0.3342, + "step": 27260, + "vit_learning_rate": 4.122011177065041e-07 + }, + { + "avg_batch_load_time": 0.0057, + "avg_batch_processing_time": 0.7822, + "epoch": 1.4185393258426966, + "grad_norm": 2.255590281715965, + "learning_rate": 2.057599152310045e-06, + "loss": 0.318, + "step": 27270, + "vit_learning_rate": 4.1151983046200887e-07 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.719, + "epoch": 1.4190595089471494, + "grad_norm": 1.766203652243497, + "learning_rate": 2.0541948040545366e-06, + "loss": 0.3334, + "step": 27280, + "vit_learning_rate": 4.108389608109073e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7591, + "epoch": 1.4195796920516022, + "grad_norm": 1.583349469901559, + "learning_rate": 2.0507925461817675e-06, + "loss": 0.3212, + "step": 27290, + "vit_learning_rate": 4.1015850923635343e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6732, + "epoch": 1.420099875156055, + "grad_norm": 1.4718839840013054, + "learning_rate": 2.0473923811060247e-06, + "loss": 0.3273, + "step": 27300, + "vit_learning_rate": 4.094784762212049e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.7048, + "epoch": 1.4206200582605077, + "grad_norm": 1.3198024920496871, + "learning_rate": 2.043994311240111e-06, + "loss": 0.3398, + "step": 27310, + "vit_learning_rate": 4.0879886224802216e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.8807, + "epoch": 1.4211402413649605, + "grad_norm": 1.7535298985507717, + "learning_rate": 2.0405983389953426e-06, + "loss": 0.3284, + "step": 27320, + "vit_learning_rate": 4.081196677990685e-07 + }, + { + "avg_batch_load_time": 0.0033, + "avg_batch_processing_time": 0.8198, + "epoch": 1.421660424469413, + "grad_norm": 1.6054873500219886, + "learning_rate": 2.037204466781546e-06, + "loss": 0.3289, + "step": 27330, + "vit_learning_rate": 4.0744089335630913e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.752, + "epoch": 1.4221806075738659, + "grad_norm": 1.3880823107882954, + "learning_rate": 2.033812697007059e-06, + "loss": 0.3568, + "step": 27340, + "vit_learning_rate": 4.067625394014117e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6404, + "epoch": 1.4227007906783187, + "grad_norm": 1.353476954192583, + "learning_rate": 2.030423032078726e-06, + "loss": 0.342, + "step": 27350, + "vit_learning_rate": 4.0608460641574513e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6566, + "epoch": 1.4232209737827715, + "grad_norm": 1.2485207663190931, + "learning_rate": 2.0270354744018976e-06, + "loss": 0.3148, + "step": 27360, + "vit_learning_rate": 4.054070948803795e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6351, + "epoch": 1.4237411568872242, + "grad_norm": 1.3802042279776585, + "learning_rate": 2.0236500263804355e-06, + "loss": 0.3284, + "step": 27370, + "vit_learning_rate": 4.04730005276087e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6418, + "epoch": 1.424261339991677, + "grad_norm": 1.8872703426349577, + "learning_rate": 2.0202666904166924e-06, + "loss": 0.314, + "step": 27380, + "vit_learning_rate": 4.040533380833384e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6558, + "epoch": 1.4247815230961298, + "grad_norm": 1.856764890875366, + "learning_rate": 2.0168854689115306e-06, + "loss": 0.3262, + "step": 27390, + "vit_learning_rate": 4.033770937823061e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6931, + "epoch": 1.4253017062005826, + "grad_norm": 1.728555224272831, + "learning_rate": 2.01350636426431e-06, + "loss": 0.3042, + "step": 27400, + "vit_learning_rate": 4.0270127285286194e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.7019, + "epoch": 1.4258218893050354, + "grad_norm": 1.6160217584197687, + "learning_rate": 2.01012937887289e-06, + "loss": 0.3107, + "step": 27410, + "vit_learning_rate": 4.0202587577457793e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.699, + "epoch": 1.4263420724094882, + "grad_norm": 1.380394820068909, + "learning_rate": 2.0067545151336235e-06, + "loss": 0.3424, + "step": 27420, + "vit_learning_rate": 4.013509030267247e-07 + }, + { + "avg_batch_load_time": 0.0033, + "avg_batch_processing_time": 0.7406, + "epoch": 1.426862255513941, + "grad_norm": 1.545796514941782, + "learning_rate": 2.0033817754413595e-06, + "loss": 0.3235, + "step": 27430, + "vit_learning_rate": 4.006763550882718e-07 + }, + { + "avg_batch_load_time": 0.0035, + "avg_batch_processing_time": 0.6757, + "epoch": 1.4273824386183938, + "grad_norm": 1.5330068776818508, + "learning_rate": 2.000011162189438e-06, + "loss": 0.3262, + "step": 27440, + "vit_learning_rate": 4.0000223243788753e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6707, + "epoch": 1.4279026217228465, + "grad_norm": 1.9491796394048002, + "learning_rate": 1.996642677769692e-06, + "loss": 0.3407, + "step": 27450, + "vit_learning_rate": 3.993285355539383e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6787, + "epoch": 1.4284228048272993, + "grad_norm": 1.327580901800108, + "learning_rate": 1.9932763245724428e-06, + "loss": 0.3356, + "step": 27460, + "vit_learning_rate": 3.9865526491448856e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7594, + "epoch": 1.428942987931752, + "grad_norm": 2.296845541181574, + "learning_rate": 1.9899121049865004e-06, + "loss": 0.3261, + "step": 27470, + "vit_learning_rate": 3.9798242099730004e-07 + }, + { + "avg_batch_load_time": 0.0641, + "avg_batch_processing_time": 3.4585, + "epoch": 1.4294631710362047, + "grad_norm": 1.4942692486342957, + "learning_rate": 1.986550021399159e-06, + "loss": 0.3289, + "step": 27480, + "vit_learning_rate": 3.9731000427983175e-07 + }, + { + "avg_batch_load_time": 0.0043, + "avg_batch_processing_time": 0.7771, + "epoch": 1.4299833541406575, + "grad_norm": 1.4115083393812862, + "learning_rate": 1.983190076196198e-06, + "loss": 0.3013, + "step": 27490, + "vit_learning_rate": 3.9663801523923957e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6795, + "epoch": 1.4305035372451103, + "grad_norm": 1.3865781443084348, + "learning_rate": 1.9798322717618785e-06, + "loss": 0.313, + "step": 27500, + "vit_learning_rate": 3.959664543523756e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6721, + "epoch": 1.431023720349563, + "grad_norm": 1.1880863225334006, + "learning_rate": 1.9764766104789477e-06, + "loss": 0.3211, + "step": 27510, + "vit_learning_rate": 3.9529532209578955e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.7124, + "epoch": 1.4315439034540158, + "grad_norm": 1.5703199579393323, + "learning_rate": 1.9731230947286236e-06, + "loss": 0.321, + "step": 27520, + "vit_learning_rate": 3.946246189457246e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6879, + "epoch": 1.4320640865584686, + "grad_norm": 2.1222773856922923, + "learning_rate": 1.9697717268906063e-06, + "loss": 0.3105, + "step": 27530, + "vit_learning_rate": 3.939543453781212e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.705, + "epoch": 1.4325842696629214, + "grad_norm": 1.7002587343566118, + "learning_rate": 1.9664225093430705e-06, + "loss": 0.3398, + "step": 27540, + "vit_learning_rate": 3.932845018686141e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.7589, + "epoch": 1.4331044527673742, + "grad_norm": 2.8900718821263136, + "learning_rate": 1.963075444462668e-06, + "loss": 0.3094, + "step": 27550, + "vit_learning_rate": 3.926150888925336e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 1.0782, + "epoch": 1.4336246358718268, + "grad_norm": 3.4529520446501123, + "learning_rate": 1.959730534624521e-06, + "loss": 0.3244, + "step": 27560, + "vit_learning_rate": 3.9194610692490415e-07 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.7333, + "epoch": 1.4341448189762795, + "grad_norm": 1.8902439266041797, + "learning_rate": 1.95638778220222e-06, + "loss": 0.3388, + "step": 27570, + "vit_learning_rate": 3.91277556440444e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.7397, + "epoch": 1.4346650020807323, + "grad_norm": 1.526417131089836, + "learning_rate": 1.953047189567828e-06, + "loss": 0.3075, + "step": 27580, + "vit_learning_rate": 3.906094379135656e-07 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.7747, + "epoch": 1.4351851851851851, + "grad_norm": 2.552422455645319, + "learning_rate": 1.9497087590918745e-06, + "loss": 0.3194, + "step": 27590, + "vit_learning_rate": 3.8994175181837487e-07 + }, + { + "avg_batch_load_time": 0.0047, + "avg_batch_processing_time": 2.0762, + "epoch": 1.435705368289638, + "grad_norm": 1.759389857765656, + "learning_rate": 1.9463724931433537e-06, + "loss": 0.3287, + "step": 27600, + "vit_learning_rate": 3.8927449862867066e-07 + }, + { + "avg_batch_load_time": 0.0036, + "avg_batch_processing_time": 0.7106, + "epoch": 1.4362255513940907, + "grad_norm": 1.5028829876729557, + "learning_rate": 1.9430383940897245e-06, + "loss": 0.3108, + "step": 27610, + "vit_learning_rate": 3.8860767881794486e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6836, + "epoch": 1.4367457344985435, + "grad_norm": 3.2474582405613126, + "learning_rate": 1.9397064642969087e-06, + "loss": 0.3387, + "step": 27620, + "vit_learning_rate": 3.8794129285938173e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6883, + "epoch": 1.4372659176029963, + "grad_norm": 1.7375182459579888, + "learning_rate": 1.9363767061292875e-06, + "loss": 0.337, + "step": 27630, + "vit_learning_rate": 3.8727534122585747e-07 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.6817, + "epoch": 1.437786100707449, + "grad_norm": 1.5942415836177348, + "learning_rate": 1.933049121949703e-06, + "loss": 0.3329, + "step": 27640, + "vit_learning_rate": 3.866098243899405e-07 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.6232, + "epoch": 1.4383062838119018, + "grad_norm": 2.145722268796097, + "learning_rate": 1.9297237141194513e-06, + "loss": 0.342, + "step": 27650, + "vit_learning_rate": 3.8594474282389023e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6446, + "epoch": 1.4388264669163546, + "grad_norm": 1.6525463517501606, + "learning_rate": 1.926400484998289e-06, + "loss": 0.3463, + "step": 27660, + "vit_learning_rate": 3.852800969996577e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6514, + "epoch": 1.4393466500208074, + "grad_norm": 1.665051765005399, + "learning_rate": 1.923079436944421e-06, + "loss": 0.3233, + "step": 27670, + "vit_learning_rate": 3.846158873888842e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6794, + "epoch": 1.4398668331252602, + "grad_norm": 1.4295780212519482, + "learning_rate": 1.9197605723145075e-06, + "loss": 0.3345, + "step": 27680, + "vit_learning_rate": 3.8395211446290143e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6367, + "epoch": 1.440387016229713, + "grad_norm": 4.453766405047779, + "learning_rate": 1.9164438934636627e-06, + "loss": 0.3174, + "step": 27690, + "vit_learning_rate": 3.8328877869273245e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6839, + "epoch": 1.4409071993341656, + "grad_norm": 2.197297882002249, + "learning_rate": 1.9131294027454433e-06, + "loss": 0.3435, + "step": 27700, + "vit_learning_rate": 3.826258805490886e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6774, + "epoch": 1.4414273824386183, + "grad_norm": 1.661223313319095, + "learning_rate": 1.9098171025118594e-06, + "loss": 0.3087, + "step": 27710, + "vit_learning_rate": 3.8196342050237183e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6549, + "epoch": 1.4419475655430711, + "grad_norm": 2.1715610874626106, + "learning_rate": 1.9065069951133575e-06, + "loss": 0.3254, + "step": 27720, + "vit_learning_rate": 3.813013990226714e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6196, + "epoch": 1.442467748647524, + "grad_norm": 1.3451649292183232, + "learning_rate": 1.9031990828988389e-06, + "loss": 0.3054, + "step": 27730, + "vit_learning_rate": 3.8063981657976776e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6853, + "epoch": 1.4429879317519767, + "grad_norm": 1.7195268296308592, + "learning_rate": 1.8998933682156418e-06, + "loss": 0.3293, + "step": 27740, + "vit_learning_rate": 3.799786736431283e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.7678, + "epoch": 1.4435081148564295, + "grad_norm": 1.4965309442385288, + "learning_rate": 1.896589853409544e-06, + "loss": 0.32, + "step": 27750, + "vit_learning_rate": 3.793179706819087e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.7718, + "epoch": 1.4440282979608823, + "grad_norm": 1.5462300096809614, + "learning_rate": 1.8932885408247647e-06, + "loss": 0.3204, + "step": 27760, + "vit_learning_rate": 3.786577081649529e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6698, + "epoch": 1.444548481065335, + "grad_norm": 1.534053426833435, + "learning_rate": 1.8899894328039597e-06, + "loss": 0.3121, + "step": 27770, + "vit_learning_rate": 3.779978865607919e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6638, + "epoch": 1.4450686641697876, + "grad_norm": 1.2851509186003092, + "learning_rate": 1.886692531688219e-06, + "loss": 0.3399, + "step": 27780, + "vit_learning_rate": 3.7733850633764374e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6748, + "epoch": 1.4455888472742404, + "grad_norm": 1.6926497787215529, + "learning_rate": 1.8833978398170689e-06, + "loss": 0.3226, + "step": 27790, + "vit_learning_rate": 3.766795679634137e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6752, + "epoch": 1.4461090303786932, + "grad_norm": 1.8914878088924294, + "learning_rate": 1.8801053595284663e-06, + "loss": 0.3239, + "step": 27800, + "vit_learning_rate": 3.7602107190569323e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.8391, + "epoch": 1.446629213483146, + "grad_norm": 3.031442298759916, + "learning_rate": 1.8768150931587992e-06, + "loss": 0.3303, + "step": 27810, + "vit_learning_rate": 3.7536301863175977e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7463, + "epoch": 1.4471493965875988, + "grad_norm": 1.2425144370511965, + "learning_rate": 1.8735270430428826e-06, + "loss": 0.3372, + "step": 27820, + "vit_learning_rate": 3.7470540860857646e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6835, + "epoch": 1.4476695796920516, + "grad_norm": 1.406258892188088, + "learning_rate": 1.8702412115139646e-06, + "loss": 0.3097, + "step": 27830, + "vit_learning_rate": 3.7404824230279286e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.7233, + "epoch": 1.4481897627965044, + "grad_norm": 1.3454980288309362, + "learning_rate": 1.8669576009037139e-06, + "loss": 0.3177, + "step": 27840, + "vit_learning_rate": 3.733915201807427e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6883, + "epoch": 1.4487099459009571, + "grad_norm": 2.009486995735215, + "learning_rate": 1.8636762135422248e-06, + "loss": 0.34, + "step": 27850, + "vit_learning_rate": 3.727352427084449e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6929, + "epoch": 1.44923012900541, + "grad_norm": 1.6867520870793054, + "learning_rate": 1.86039705175801e-06, + "loss": 0.3267, + "step": 27860, + "vit_learning_rate": 3.7207941035160196e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7511, + "epoch": 1.4497503121098627, + "grad_norm": 1.748953655542545, + "learning_rate": 1.8571201178780106e-06, + "loss": 0.3556, + "step": 27870, + "vit_learning_rate": 3.714240235756021e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.7451, + "epoch": 1.4502704952143155, + "grad_norm": 1.4182208129620821, + "learning_rate": 1.853845414227582e-06, + "loss": 0.3121, + "step": 27880, + "vit_learning_rate": 3.707690828455163e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7376, + "epoch": 1.4507906783187683, + "grad_norm": 1.5697972620259546, + "learning_rate": 1.8505729431304963e-06, + "loss": 0.3464, + "step": 27890, + "vit_learning_rate": 3.701145886260992e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.7549, + "epoch": 1.451310861423221, + "grad_norm": 1.2298551263363382, + "learning_rate": 1.8473027069089438e-06, + "loss": 0.3343, + "step": 27900, + "vit_learning_rate": 3.6946054138178875e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.7186, + "epoch": 1.4518310445276739, + "grad_norm": 1.3183216384723249, + "learning_rate": 1.844034707883528e-06, + "loss": 0.3452, + "step": 27910, + "vit_learning_rate": 3.6880694157670556e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6997, + "epoch": 1.4523512276321264, + "grad_norm": 1.6427224257811368, + "learning_rate": 1.8407689483732637e-06, + "loss": 0.3169, + "step": 27920, + "vit_learning_rate": 3.681537896746527e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6747, + "epoch": 1.4528714107365792, + "grad_norm": 1.8369344796945917, + "learning_rate": 1.8375054306955781e-06, + "loss": 0.3462, + "step": 27930, + "vit_learning_rate": 3.675010861391156e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6584, + "epoch": 1.453391593841032, + "grad_norm": 2.8032787119112657, + "learning_rate": 1.8342441571663066e-06, + "loss": 0.3257, + "step": 27940, + "vit_learning_rate": 3.668488314332613e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.7067, + "epoch": 1.4539117769454848, + "grad_norm": 1.2850017380897623, + "learning_rate": 1.8309851300996934e-06, + "loss": 0.3348, + "step": 27950, + "vit_learning_rate": 3.661970260199386e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.804, + "epoch": 1.4544319600499376, + "grad_norm": 1.3796205785243671, + "learning_rate": 1.8277283518083849e-06, + "loss": 0.3347, + "step": 27960, + "vit_learning_rate": 3.6554567036167693e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.643, + "epoch": 1.4549521431543904, + "grad_norm": 1.3779110910632848, + "learning_rate": 1.8244738246034383e-06, + "loss": 0.3452, + "step": 27970, + "vit_learning_rate": 3.648947649206876e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6669, + "epoch": 1.4554723262588432, + "grad_norm": 1.4446232061208217, + "learning_rate": 1.8212215507943082e-06, + "loss": 0.3243, + "step": 27980, + "vit_learning_rate": 3.642443101588616e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7185, + "epoch": 1.455992509363296, + "grad_norm": 1.3416403042880993, + "learning_rate": 1.8179715326888526e-06, + "loss": 0.3327, + "step": 27990, + "vit_learning_rate": 3.6359430653777046e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6734, + "epoch": 1.4565126924677487, + "grad_norm": 1.5903414841122971, + "learning_rate": 1.8147237725933248e-06, + "loss": 0.3498, + "step": 28000, + "vit_learning_rate": 3.6294475451866493e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.698, + "epoch": 1.4570328755722013, + "grad_norm": 1.7078923583882397, + "learning_rate": 1.8114782728123792e-06, + "loss": 0.3205, + "step": 28010, + "vit_learning_rate": 3.622956545624758e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.6615, + "epoch": 1.457553058676654, + "grad_norm": 1.8146421727119146, + "learning_rate": 1.8082350356490685e-06, + "loss": 0.3196, + "step": 28020, + "vit_learning_rate": 3.616470071298137e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6728, + "epoch": 1.4580732417811069, + "grad_norm": 1.3906437245014807, + "learning_rate": 1.8049940634048358e-06, + "loss": 0.3439, + "step": 28030, + "vit_learning_rate": 3.609988126809671e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6881, + "epoch": 1.4585934248855597, + "grad_norm": 1.6187024007007624, + "learning_rate": 1.8017553583795177e-06, + "loss": 0.3458, + "step": 28040, + "vit_learning_rate": 3.603510716759035e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.7638, + "epoch": 1.4591136079900124, + "grad_norm": 1.409968427342468, + "learning_rate": 1.7985189228713435e-06, + "loss": 0.3111, + "step": 28050, + "vit_learning_rate": 3.5970378457426863e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6713, + "epoch": 1.4596337910944652, + "grad_norm": 1.634629869749181, + "learning_rate": 1.7952847591769306e-06, + "loss": 0.3214, + "step": 28060, + "vit_learning_rate": 3.590569518353861e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.7559, + "epoch": 1.460153974198918, + "grad_norm": 3.169007015050034, + "learning_rate": 1.7920528695912853e-06, + "loss": 0.3296, + "step": 28070, + "vit_learning_rate": 3.5841057391825704e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6657, + "epoch": 1.4606741573033708, + "grad_norm": 1.5000472943370862, + "learning_rate": 1.788823256407799e-06, + "loss": 0.3531, + "step": 28080, + "vit_learning_rate": 3.5776465128155977e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6849, + "epoch": 1.4611943404078236, + "grad_norm": 1.823678505063384, + "learning_rate": 1.7855959219182483e-06, + "loss": 0.3211, + "step": 28090, + "vit_learning_rate": 3.571191843836496e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6939, + "epoch": 1.4617145235122764, + "grad_norm": 1.408214157540923, + "learning_rate": 1.7823708684127916e-06, + "loss": 0.3205, + "step": 28100, + "vit_learning_rate": 3.5647417368255827e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6474, + "epoch": 1.4622347066167292, + "grad_norm": 1.3518484077112132, + "learning_rate": 1.7791480981799736e-06, + "loss": 0.3225, + "step": 28110, + "vit_learning_rate": 3.5582961963599466e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6616, + "epoch": 1.462754889721182, + "grad_norm": 1.8420510338548124, + "learning_rate": 1.7759276135067128e-06, + "loss": 0.3198, + "step": 28120, + "vit_learning_rate": 3.551855227013425e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6802, + "epoch": 1.4632750728256347, + "grad_norm": 1.3364707116824204, + "learning_rate": 1.77270941667831e-06, + "loss": 0.3268, + "step": 28130, + "vit_learning_rate": 3.5454188333566195e-07 + }, + { + "avg_batch_load_time": 0.0047, + "avg_batch_processing_time": 0.6614, + "epoch": 1.4637952559300875, + "grad_norm": 1.1789833942989476, + "learning_rate": 1.7694935099784376e-06, + "loss": 0.3111, + "step": 28140, + "vit_learning_rate": 3.538987019956875e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6927, + "epoch": 1.46431543903454, + "grad_norm": 1.8122638770050337, + "learning_rate": 1.7662798956891457e-06, + "loss": 0.3166, + "step": 28150, + "vit_learning_rate": 3.532559791378291e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6792, + "epoch": 1.4648356221389929, + "grad_norm": 1.9184719837579476, + "learning_rate": 1.7630685760908623e-06, + "loss": 0.3206, + "step": 28160, + "vit_learning_rate": 3.526137152181724e-07 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.6835, + "epoch": 1.4653558052434457, + "grad_norm": 1.4920877332931173, + "learning_rate": 1.7598595534623792e-06, + "loss": 0.3228, + "step": 28170, + "vit_learning_rate": 3.519719106924758e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6963, + "epoch": 1.4658759883478985, + "grad_norm": 1.531363206266077, + "learning_rate": 1.7566528300808633e-06, + "loss": 0.321, + "step": 28180, + "vit_learning_rate": 3.513305660161726e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.679, + "epoch": 1.4663961714523512, + "grad_norm": 1.5070943165481463, + "learning_rate": 1.7534484082218473e-06, + "loss": 0.31, + "step": 28190, + "vit_learning_rate": 3.506896816443694e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6735, + "epoch": 1.466916354556804, + "grad_norm": 1.3207791651126546, + "learning_rate": 1.7502462901592326e-06, + "loss": 0.3255, + "step": 28200, + "vit_learning_rate": 3.500492580318465e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6265, + "epoch": 1.4674365376612568, + "grad_norm": 1.6713813003777624, + "learning_rate": 1.7470464781652847e-06, + "loss": 0.3303, + "step": 28210, + "vit_learning_rate": 3.494092956330569e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6506, + "epoch": 1.4679567207657096, + "grad_norm": 1.7382742931864323, + "learning_rate": 1.7438489745106325e-06, + "loss": 0.3181, + "step": 28220, + "vit_learning_rate": 3.487697949021264e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6788, + "epoch": 1.4684769038701622, + "grad_norm": 1.3838303133470156, + "learning_rate": 1.7406537814642681e-06, + "loss": 0.3401, + "step": 28230, + "vit_learning_rate": 3.481307562928536e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6678, + "epoch": 1.468997086974615, + "grad_norm": 1.6076782797068292, + "learning_rate": 1.7374609012935412e-06, + "loss": 0.3254, + "step": 28240, + "vit_learning_rate": 3.474921802587082e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.695, + "epoch": 1.4695172700790677, + "grad_norm": 13.872515558764679, + "learning_rate": 1.734270336264165e-06, + "loss": 0.3341, + "step": 28250, + "vit_learning_rate": 3.468540672528329e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.7006, + "epoch": 1.4700374531835205, + "grad_norm": 2.1412909208080144, + "learning_rate": 1.7310820886402064e-06, + "loss": 0.3197, + "step": 28260, + "vit_learning_rate": 3.4621641772804124e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.703, + "epoch": 1.4705576362879733, + "grad_norm": 1.1603250579159798, + "learning_rate": 1.72789616068409e-06, + "loss": 0.3119, + "step": 28270, + "vit_learning_rate": 3.455792321368179e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6917, + "epoch": 1.471077819392426, + "grad_norm": 1.5665062782455308, + "learning_rate": 1.724712554656589e-06, + "loss": 0.3381, + "step": 28280, + "vit_learning_rate": 3.4494251093131777e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.7985, + "epoch": 1.4715980024968789, + "grad_norm": 1.8498465449757044, + "learning_rate": 1.7215312728168327e-06, + "loss": 0.3356, + "step": 28290, + "vit_learning_rate": 3.443062545633665e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.8296, + "epoch": 1.4721181856013317, + "grad_norm": 1.088981174267521, + "learning_rate": 1.7183523174223055e-06, + "loss": 0.3182, + "step": 28300, + "vit_learning_rate": 3.4367046348446107e-07 + }, + { + "avg_batch_load_time": 0.0052, + "avg_batch_processing_time": 0.8808, + "epoch": 1.4726383687057845, + "grad_norm": 1.7777170986070097, + "learning_rate": 1.7151756907288342e-06, + "loss": 0.3362, + "step": 28310, + "vit_learning_rate": 3.4303513814576677e-07 + }, + { + "avg_batch_load_time": 0.0088, + "avg_batch_processing_time": 0.9043, + "epoch": 1.4731585518102372, + "grad_norm": 1.355446338897722, + "learning_rate": 1.7120013949905956e-06, + "loss": 0.3226, + "step": 28320, + "vit_learning_rate": 3.4240027899811906e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6411, + "epoch": 1.47367873491469, + "grad_norm": 1.9308033380949534, + "learning_rate": 1.7088294324601124e-06, + "loss": 0.3407, + "step": 28330, + "vit_learning_rate": 3.417658864920224e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6799, + "epoch": 1.4741989180191428, + "grad_norm": 1.579369188691499, + "learning_rate": 1.7056598053882518e-06, + "loss": 0.3201, + "step": 28340, + "vit_learning_rate": 3.411319610776503e-07 + }, + { + "avg_batch_load_time": 0.076, + "avg_batch_processing_time": 0.6811, + "epoch": 1.4747191011235956, + "grad_norm": 1.9066970526606983, + "learning_rate": 1.7024925160242234e-06, + "loss": 0.3121, + "step": 28350, + "vit_learning_rate": 3.4049850320484463e-07 + }, + { + "avg_batch_load_time": 0.0034, + "avg_batch_processing_time": 0.6857, + "epoch": 1.4752392842280484, + "grad_norm": 1.2856309616252535, + "learning_rate": 1.6993275666155785e-06, + "loss": 0.3266, + "step": 28360, + "vit_learning_rate": 3.3986551332311566e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.736, + "epoch": 1.475759467332501, + "grad_norm": 1.5162819113702823, + "learning_rate": 1.696164959408207e-06, + "loss": 0.332, + "step": 28370, + "vit_learning_rate": 3.3923299188164134e-07 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.7037, + "epoch": 1.4762796504369537, + "grad_norm": 1.507908818029592, + "learning_rate": 1.6930046966463353e-06, + "loss": 0.3307, + "step": 28380, + "vit_learning_rate": 3.3860093932926706e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.688, + "epoch": 1.4767998335414065, + "grad_norm": 1.7588627330757587, + "learning_rate": 1.6898467805725322e-06, + "loss": 0.2991, + "step": 28390, + "vit_learning_rate": 3.379693561145064e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.7062, + "epoch": 1.4773200166458593, + "grad_norm": 1.7464677822725532, + "learning_rate": 1.6866912134276969e-06, + "loss": 0.3131, + "step": 28400, + "vit_learning_rate": 3.3733824268553934e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7729, + "epoch": 1.4778401997503121, + "grad_norm": 1.5698892356518324, + "learning_rate": 1.6835379974510585e-06, + "loss": 0.2953, + "step": 28410, + "vit_learning_rate": 3.3670759949021166e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.774, + "epoch": 1.478360382854765, + "grad_norm": 1.881045717965767, + "learning_rate": 1.6803871348801826e-06, + "loss": 0.31, + "step": 28420, + "vit_learning_rate": 3.3607742697603644e-07 + }, + { + "avg_batch_load_time": 0.0033, + "avg_batch_processing_time": 0.7311, + "epoch": 1.4788805659592177, + "grad_norm": 1.5204558960829253, + "learning_rate": 1.6772386279509622e-06, + "loss": 0.3316, + "step": 28430, + "vit_learning_rate": 3.354477255901924e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6522, + "epoch": 1.4794007490636705, + "grad_norm": 1.3046880742522577, + "learning_rate": 1.674092478897623e-06, + "loss": 0.341, + "step": 28440, + "vit_learning_rate": 3.348184957795246e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.6461, + "epoch": 1.4799209321681233, + "grad_norm": 1.4474299360157348, + "learning_rate": 1.6709486899527133e-06, + "loss": 0.3267, + "step": 28450, + "vit_learning_rate": 3.341897379905426e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.7154, + "epoch": 1.4804411152725758, + "grad_norm": 1.1326377890452508, + "learning_rate": 1.667807263347107e-06, + "loss": 0.332, + "step": 28460, + "vit_learning_rate": 3.3356145266942136e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.8035, + "epoch": 1.4809612983770286, + "grad_norm": 1.6869865605588978, + "learning_rate": 1.664668201310003e-06, + "loss": 0.3355, + "step": 28470, + "vit_learning_rate": 3.3293364026200055e-07 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.7446, + "epoch": 1.4814814814814814, + "grad_norm": 1.7979776558240852, + "learning_rate": 1.6615315060689208e-06, + "loss": 0.3272, + "step": 28480, + "vit_learning_rate": 3.323063012137841e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.7127, + "epoch": 1.4820016645859342, + "grad_norm": 1.5105649205151677, + "learning_rate": 1.6583971798497023e-06, + "loss": 0.3309, + "step": 28490, + "vit_learning_rate": 3.316794359699404e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.6611, + "epoch": 1.482521847690387, + "grad_norm": 1.7002772127100196, + "learning_rate": 1.6552652248765066e-06, + "loss": 0.3461, + "step": 28500, + "vit_learning_rate": 3.310530449753013e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.7064, + "epoch": 1.4830420307948398, + "grad_norm": 1.2332798568713879, + "learning_rate": 1.652135643371811e-06, + "loss": 0.3164, + "step": 28510, + "vit_learning_rate": 3.304271286743622e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7051, + "epoch": 1.4835622138992925, + "grad_norm": 2.0027122837285276, + "learning_rate": 1.6490084375564069e-06, + "loss": 0.334, + "step": 28520, + "vit_learning_rate": 3.2980168751128135e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.6898, + "epoch": 1.4840823970037453, + "grad_norm": 1.6724994627470944, + "learning_rate": 1.6458836096494046e-06, + "loss": 0.2941, + "step": 28530, + "vit_learning_rate": 3.2917672192988087e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.701, + "epoch": 1.4846025801081981, + "grad_norm": 1.1014198954812744, + "learning_rate": 1.6427611618682238e-06, + "loss": 0.3294, + "step": 28540, + "vit_learning_rate": 3.285522323736447e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.7095, + "epoch": 1.485122763212651, + "grad_norm": 1.5575963470916836, + "learning_rate": 1.6396410964285914e-06, + "loss": 0.3111, + "step": 28550, + "vit_learning_rate": 3.2792821928571824e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6804, + "epoch": 1.4856429463171037, + "grad_norm": 1.5600217365073468, + "learning_rate": 1.6365234155445493e-06, + "loss": 0.3299, + "step": 28560, + "vit_learning_rate": 3.273046831089098e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.686, + "epoch": 1.4861631294215565, + "grad_norm": 1.4750731898004432, + "learning_rate": 1.6334081214284441e-06, + "loss": 0.2947, + "step": 28570, + "vit_learning_rate": 3.266816242856888e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6911, + "epoch": 1.4866833125260093, + "grad_norm": 2.0235561124491115, + "learning_rate": 1.6302952162909335e-06, + "loss": 0.342, + "step": 28580, + "vit_learning_rate": 3.2605904325818667e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.7158, + "epoch": 1.487203495630462, + "grad_norm": 1.8012597825923256, + "learning_rate": 1.6271847023409753e-06, + "loss": 0.3094, + "step": 28590, + "vit_learning_rate": 3.25436940468195e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.7363, + "epoch": 1.4877236787349146, + "grad_norm": 1.705828404220498, + "learning_rate": 1.6240765817858307e-06, + "loss": 0.3361, + "step": 28600, + "vit_learning_rate": 3.248153163571661e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6529, + "epoch": 1.4882438618393674, + "grad_norm": 1.614260647107634, + "learning_rate": 1.6209708568310644e-06, + "loss": 0.2956, + "step": 28610, + "vit_learning_rate": 3.241941713662129e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6374, + "epoch": 1.4887640449438202, + "grad_norm": 1.2839460412391026, + "learning_rate": 1.6178675296805406e-06, + "loss": 0.3241, + "step": 28620, + "vit_learning_rate": 3.235735059361081e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6832, + "epoch": 1.489284228048273, + "grad_norm": 1.4999600603496481, + "learning_rate": 1.6147666025364206e-06, + "loss": 0.3426, + "step": 28630, + "vit_learning_rate": 3.2295332050728405e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.7432, + "epoch": 1.4898044111527258, + "grad_norm": 1.5718527861991647, + "learning_rate": 1.611668077599165e-06, + "loss": 0.3274, + "step": 28640, + "vit_learning_rate": 3.2233361551983295e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7101, + "epoch": 1.4903245942571786, + "grad_norm": 1.72090919029112, + "learning_rate": 1.608571957067528e-06, + "loss": 0.314, + "step": 28650, + "vit_learning_rate": 3.2171439141350553e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6738, + "epoch": 1.4908447773616313, + "grad_norm": 1.6053720827495264, + "learning_rate": 1.6054782431385556e-06, + "loss": 0.3323, + "step": 28660, + "vit_learning_rate": 3.210956486277111e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6847, + "epoch": 1.4913649604660841, + "grad_norm": 1.471105178868476, + "learning_rate": 1.6023869380075934e-06, + "loss": 0.3059, + "step": 28670, + "vit_learning_rate": 3.2047738760151867e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6749, + "epoch": 1.4918851435705367, + "grad_norm": 1.917280715121081, + "learning_rate": 1.5992980438682715e-06, + "loss": 0.3171, + "step": 28680, + "vit_learning_rate": 3.1985960877365423e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.684, + "epoch": 1.4924053266749895, + "grad_norm": 1.6858348495452735, + "learning_rate": 1.596211562912508e-06, + "loss": 0.328, + "step": 28690, + "vit_learning_rate": 3.1924231258250155e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6658, + "epoch": 1.4929255097794423, + "grad_norm": 1.6426547422110465, + "learning_rate": 1.5931274973305128e-06, + "loss": 0.3071, + "step": 28700, + "vit_learning_rate": 3.186254994661025e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6943, + "epoch": 1.493445692883895, + "grad_norm": 1.5170150969838558, + "learning_rate": 1.5900458493107779e-06, + "loss": 0.3337, + "step": 28710, + "vit_learning_rate": 3.1800916986215553e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7519, + "epoch": 1.4939658759883478, + "grad_norm": 1.6893829492715648, + "learning_rate": 1.5869666210400854e-06, + "loss": 0.3142, + "step": 28720, + "vit_learning_rate": 3.17393324208017e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6912, + "epoch": 1.4944860590928006, + "grad_norm": 1.6544577813925803, + "learning_rate": 1.5838898147034948e-06, + "loss": 0.3541, + "step": 28730, + "vit_learning_rate": 3.1677796294069894e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6675, + "epoch": 1.4950062421972534, + "grad_norm": 1.3692962538625832, + "learning_rate": 1.5808154324843494e-06, + "loss": 0.3423, + "step": 28740, + "vit_learning_rate": 3.1616308649686986e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.678, + "epoch": 1.4955264253017062, + "grad_norm": 1.3583229296321846, + "learning_rate": 1.5777434765642719e-06, + "loss": 0.3148, + "step": 28750, + "vit_learning_rate": 3.1554869531285434e-07 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.718, + "epoch": 1.496046608406159, + "grad_norm": 1.65779631313366, + "learning_rate": 1.5746739491231632e-06, + "loss": 0.3404, + "step": 28760, + "vit_learning_rate": 3.149347898246326e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.7784, + "epoch": 1.4965667915106118, + "grad_norm": 1.3444600868430692, + "learning_rate": 1.5716068523392013e-06, + "loss": 0.3273, + "step": 28770, + "vit_learning_rate": 3.1432137046784024e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6924, + "epoch": 1.4970869746150646, + "grad_norm": 1.4315863365704764, + "learning_rate": 1.5685421883888386e-06, + "loss": 0.3366, + "step": 28780, + "vit_learning_rate": 3.1370843767776767e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6958, + "epoch": 1.4976071577195174, + "grad_norm": 1.5046398767529623, + "learning_rate": 1.5654799594468017e-06, + "loss": 0.3312, + "step": 28790, + "vit_learning_rate": 3.1309599188936033e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6646, + "epoch": 1.4981273408239701, + "grad_norm": 1.201983399300302, + "learning_rate": 1.56242016768609e-06, + "loss": 0.3186, + "step": 28800, + "vit_learning_rate": 3.1248403353721796e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.7324, + "epoch": 1.498647523928423, + "grad_norm": 1.8968380339916855, + "learning_rate": 1.5593628152779695e-06, + "loss": 0.3336, + "step": 28810, + "vit_learning_rate": 3.118725630555938e-07 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.7305, + "epoch": 1.4991677070328755, + "grad_norm": 2.085812699432292, + "learning_rate": 1.5563079043919843e-06, + "loss": 0.3451, + "step": 28820, + "vit_learning_rate": 3.1126158087839684e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7225, + "epoch": 1.4996878901373283, + "grad_norm": 1.7899000402421918, + "learning_rate": 1.5532554371959353e-06, + "loss": 0.3241, + "step": 28830, + "vit_learning_rate": 3.10651087439187e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6747, + "epoch": 1.500208073241781, + "grad_norm": 1.7519694873620915, + "learning_rate": 1.5502054158558944e-06, + "loss": 0.304, + "step": 28840, + "vit_learning_rate": 3.1004108317117885e-07 + }, + { + "avg_batch_load_time": 2.7467, + "avg_batch_processing_time": 0.6951, + "epoch": 1.5007282563462339, + "grad_norm": 1.699143113584975, + "learning_rate": 1.5471578425361972e-06, + "loss": 0.3337, + "step": 28850, + "vit_learning_rate": 3.094315685072394e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.7025, + "epoch": 1.5012484394506866, + "grad_norm": 1.9876807756087174, + "learning_rate": 1.544112719399445e-06, + "loss": 0.3348, + "step": 28860, + "vit_learning_rate": 3.0882254387988893e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6865, + "epoch": 1.5017686225551394, + "grad_norm": 1.9170669390878676, + "learning_rate": 1.541070048606496e-06, + "loss": 0.3116, + "step": 28870, + "vit_learning_rate": 3.082140097212992e-07 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.7183, + "epoch": 1.5022888056595922, + "grad_norm": 1.4486955582083478, + "learning_rate": 1.5380298323164705e-06, + "loss": 0.3041, + "step": 28880, + "vit_learning_rate": 3.0760596646329406e-07 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.7453, + "epoch": 1.5028089887640448, + "grad_norm": 1.5329122733096792, + "learning_rate": 1.5349920726867474e-06, + "loss": 0.3145, + "step": 28890, + "vit_learning_rate": 3.0699841453734944e-07 + }, + { + "avg_batch_load_time": 0.0033, + "avg_batch_processing_time": 0.6897, + "epoch": 1.5033291718684976, + "grad_norm": 1.310286495967884, + "learning_rate": 1.5319567718729599e-06, + "loss": 0.3113, + "step": 28900, + "vit_learning_rate": 3.06391354374592e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6843, + "epoch": 1.5038493549729504, + "grad_norm": 1.6418406131374774, + "learning_rate": 1.5289239320289995e-06, + "loss": 0.3368, + "step": 28910, + "vit_learning_rate": 3.0578478640579985e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6733, + "epoch": 1.5043695380774031, + "grad_norm": 1.9177033562214023, + "learning_rate": 1.5258935553070087e-06, + "loss": 0.3268, + "step": 28920, + "vit_learning_rate": 3.051787110614017e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6652, + "epoch": 1.504889721181856, + "grad_norm": 1.1586901903384705, + "learning_rate": 1.5228656438573842e-06, + "loss": 0.3242, + "step": 28930, + "vit_learning_rate": 3.0457312877147676e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.707, + "epoch": 1.5054099042863087, + "grad_norm": 1.5074986372782773, + "learning_rate": 1.519840199828772e-06, + "loss": 0.3211, + "step": 28940, + "vit_learning_rate": 3.039680399657544e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.7238, + "epoch": 1.5059300873907615, + "grad_norm": 1.6526693498248457, + "learning_rate": 1.5168172253680658e-06, + "loss": 0.338, + "step": 28950, + "vit_learning_rate": 3.0336344507361313e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7111, + "epoch": 1.5064502704952143, + "grad_norm": 2.185455984975489, + "learning_rate": 1.5137967226204141e-06, + "loss": 0.3175, + "step": 28960, + "vit_learning_rate": 3.027593445240828e-07 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.6879, + "epoch": 1.506970453599667, + "grad_norm": 1.9022816916485743, + "learning_rate": 1.5107786937292012e-06, + "loss": 0.3247, + "step": 28970, + "vit_learning_rate": 3.021557387458402e-07 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.6721, + "epoch": 1.5074906367041199, + "grad_norm": 1.8515269169426363, + "learning_rate": 1.5077631408360617e-06, + "loss": 0.344, + "step": 28980, + "vit_learning_rate": 3.015526281672123e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6768, + "epoch": 1.5080108198085727, + "grad_norm": 1.5672219857552812, + "learning_rate": 1.5047500660808712e-06, + "loss": 0.3361, + "step": 28990, + "vit_learning_rate": 3.009500132161742e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7206, + "epoch": 1.5085310029130254, + "grad_norm": 1.9526697565098095, + "learning_rate": 1.5017394716017503e-06, + "loss": 0.3205, + "step": 29000, + "vit_learning_rate": 3.0034789432035003e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6684, + "epoch": 1.5090511860174782, + "grad_norm": 1.2053390934549673, + "learning_rate": 1.4987313595350566e-06, + "loss": 0.3149, + "step": 29010, + "vit_learning_rate": 2.997462719070113e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.7855, + "epoch": 1.509571369121931, + "grad_norm": 1.2916563547058841, + "learning_rate": 1.4957257320153856e-06, + "loss": 0.3154, + "step": 29020, + "vit_learning_rate": 2.991451464030771e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7074, + "epoch": 1.5100915522263838, + "grad_norm": 1.605690440714375, + "learning_rate": 1.492722591175571e-06, + "loss": 0.3289, + "step": 29030, + "vit_learning_rate": 2.9854451823511417e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6956, + "epoch": 1.5106117353308366, + "grad_norm": 1.8037639280825948, + "learning_rate": 1.4897219391466828e-06, + "loss": 0.3281, + "step": 29040, + "vit_learning_rate": 2.979443878293365e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.7054, + "epoch": 1.5111319184352894, + "grad_norm": 1.3643789496797891, + "learning_rate": 1.4867237780580217e-06, + "loss": 0.3115, + "step": 29050, + "vit_learning_rate": 2.973447556116043e-07 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.7213, + "epoch": 1.511652101539742, + "grad_norm": 1.584336781875156, + "learning_rate": 1.4837281100371248e-06, + "loss": 0.3222, + "step": 29060, + "vit_learning_rate": 2.9674562200742494e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6838, + "epoch": 1.5121722846441947, + "grad_norm": 1.7621802240181392, + "learning_rate": 1.4807349372097574e-06, + "loss": 0.3222, + "step": 29070, + "vit_learning_rate": 2.9614698744195145e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6885, + "epoch": 1.5126924677486475, + "grad_norm": 1.767864599133926, + "learning_rate": 1.4777442616999143e-06, + "loss": 0.3544, + "step": 29080, + "vit_learning_rate": 2.955488523399828e-07 + }, + { + "avg_batch_load_time": 0.0033, + "avg_batch_processing_time": 0.7722, + "epoch": 1.5132126508531003, + "grad_norm": 1.6936321687797222, + "learning_rate": 1.47475608562982e-06, + "loss": 0.3217, + "step": 29090, + "vit_learning_rate": 2.9495121712596396e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7302, + "epoch": 1.513732833957553, + "grad_norm": 1.7808593135414523, + "learning_rate": 1.4717704111199233e-06, + "loss": 0.3276, + "step": 29100, + "vit_learning_rate": 2.943540822239846e-07 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.733, + "epoch": 1.5142530170620059, + "grad_norm": 1.9969452853883112, + "learning_rate": 1.4687872402888991e-06, + "loss": 0.3228, + "step": 29110, + "vit_learning_rate": 2.937574480577798e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.6874, + "epoch": 1.5147732001664584, + "grad_norm": 1.5570081882777485, + "learning_rate": 1.4658065752536455e-06, + "loss": 0.3447, + "step": 29120, + "vit_learning_rate": 2.9316131505072903e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.6609, + "epoch": 1.5152933832709112, + "grad_norm": 1.8406502757573182, + "learning_rate": 1.46282841812928e-06, + "loss": 0.3556, + "step": 29130, + "vit_learning_rate": 2.92565683625856e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.7815, + "epoch": 1.515813566375364, + "grad_norm": 1.5083179352640574, + "learning_rate": 1.4598527710291465e-06, + "loss": 0.3337, + "step": 29140, + "vit_learning_rate": 2.9197055420582926e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6795, + "epoch": 1.5163337494798168, + "grad_norm": 1.7628162058587336, + "learning_rate": 1.4568796360648025e-06, + "loss": 0.3272, + "step": 29150, + "vit_learning_rate": 2.9137592721296044e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6847, + "epoch": 1.5168539325842696, + "grad_norm": 1.2511892649027214, + "learning_rate": 1.453909015346025e-06, + "loss": 0.302, + "step": 29160, + "vit_learning_rate": 2.9078180306920496e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7083, + "epoch": 1.5173741156887224, + "grad_norm": 1.6153660975246202, + "learning_rate": 1.450940910980802e-06, + "loss": 0.313, + "step": 29170, + "vit_learning_rate": 2.901881821961604e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7453, + "epoch": 1.5178942987931752, + "grad_norm": 1.8688644791864415, + "learning_rate": 1.4479753250753448e-06, + "loss": 0.3286, + "step": 29180, + "vit_learning_rate": 2.895950650150689e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6787, + "epoch": 1.518414481897628, + "grad_norm": 1.4467186328285229, + "learning_rate": 1.4450122597340716e-06, + "loss": 0.3158, + "step": 29190, + "vit_learning_rate": 2.890024519468143e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.629, + "epoch": 1.5189346650020807, + "grad_norm": 1.5694221301998577, + "learning_rate": 1.442051717059612e-06, + "loss": 0.3305, + "step": 29200, + "vit_learning_rate": 2.884103434119224e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.638, + "epoch": 1.5194548481065335, + "grad_norm": 1.6725161769335226, + "learning_rate": 1.4390936991528075e-06, + "loss": 0.3273, + "step": 29210, + "vit_learning_rate": 2.8781873983056146e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.7126, + "epoch": 1.5199750312109863, + "grad_norm": 1.4232561329546374, + "learning_rate": 1.4361382081127073e-06, + "loss": 0.3168, + "step": 29220, + "vit_learning_rate": 2.8722764162254145e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6651, + "epoch": 1.520495214315439, + "grad_norm": 1.8793952725526195, + "learning_rate": 1.433185246036567e-06, + "loss": 0.3399, + "step": 29230, + "vit_learning_rate": 2.8663704920731335e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6488, + "epoch": 1.521015397419892, + "grad_norm": 1.5733382649597567, + "learning_rate": 1.4302348150198482e-06, + "loss": 0.3484, + "step": 29240, + "vit_learning_rate": 2.8604696300396956e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.8238, + "epoch": 1.5215355805243447, + "grad_norm": 1.7112670741862581, + "learning_rate": 1.4272869171562153e-06, + "loss": 0.3417, + "step": 29250, + "vit_learning_rate": 2.8545738343124305e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.64, + "epoch": 1.5220557636287975, + "grad_norm": 1.866876266364403, + "learning_rate": 1.424341554537537e-06, + "loss": 0.332, + "step": 29260, + "vit_learning_rate": 2.848683109075074e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.7581, + "epoch": 1.5225759467332503, + "grad_norm": 1.8448333968589927, + "learning_rate": 1.4213987292538806e-06, + "loss": 0.3107, + "step": 29270, + "vit_learning_rate": 2.8427974585077607e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6381, + "epoch": 1.5230961298377028, + "grad_norm": 1.3709018829787192, + "learning_rate": 1.4184584433935167e-06, + "loss": 0.2997, + "step": 29280, + "vit_learning_rate": 2.836916886787033e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6636, + "epoch": 1.5236163129421556, + "grad_norm": 1.256286859917721, + "learning_rate": 1.41552069904291e-06, + "loss": 0.3398, + "step": 29290, + "vit_learning_rate": 2.8310413980858196e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6856, + "epoch": 1.5241364960466084, + "grad_norm": 1.4587914500463244, + "learning_rate": 1.4125854982867248e-06, + "loss": 0.3127, + "step": 29300, + "vit_learning_rate": 2.825170996573449e-07 + }, + { + "avg_batch_load_time": 0.0033, + "avg_batch_processing_time": 0.7061, + "epoch": 1.5246566791510612, + "grad_norm": 1.660420440547655, + "learning_rate": 1.4096528432078138e-06, + "loss": 0.3088, + "step": 29310, + "vit_learning_rate": 2.8193056864156275e-07 + }, + { + "avg_batch_load_time": 0.0034, + "avg_batch_processing_time": 0.6478, + "epoch": 1.525176862255514, + "grad_norm": 1.2347531832045284, + "learning_rate": 1.4067227358872338e-06, + "loss": 0.3269, + "step": 29320, + "vit_learning_rate": 2.813445471774467e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7051, + "epoch": 1.5256970453599668, + "grad_norm": 1.6009513820074808, + "learning_rate": 1.4037951784042258e-06, + "loss": 0.314, + "step": 29330, + "vit_learning_rate": 2.807590356808451e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6762, + "epoch": 1.5262172284644193, + "grad_norm": 1.4139148175408742, + "learning_rate": 1.400870172836224e-06, + "loss": 0.3242, + "step": 29340, + "vit_learning_rate": 2.8017403456724475e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7027, + "epoch": 1.526737411568872, + "grad_norm": 2.296100947772307, + "learning_rate": 1.3979477212588511e-06, + "loss": 0.3112, + "step": 29350, + "vit_learning_rate": 2.795895442517702e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7002, + "epoch": 1.527257594673325, + "grad_norm": 1.479389475541869, + "learning_rate": 1.3950278257459176e-06, + "loss": 0.2958, + "step": 29360, + "vit_learning_rate": 2.790055651491835e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7525, + "epoch": 1.5277777777777777, + "grad_norm": 1.7653080322650079, + "learning_rate": 1.3921104883694215e-06, + "loss": 0.3448, + "step": 29370, + "vit_learning_rate": 2.7842209767388423e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7386, + "epoch": 1.5282979608822305, + "grad_norm": 1.5212881232222608, + "learning_rate": 1.3891957111995436e-06, + "loss": 0.308, + "step": 29380, + "vit_learning_rate": 2.7783914223990865e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.7268, + "epoch": 1.5288181439866833, + "grad_norm": 1.6131115222758536, + "learning_rate": 1.386283496304649e-06, + "loss": 0.3075, + "step": 29390, + "vit_learning_rate": 2.7725669926092974e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.7649, + "epoch": 1.529338327091136, + "grad_norm": 1.5603158685729857, + "learning_rate": 1.3833738457512842e-06, + "loss": 0.3321, + "step": 29400, + "vit_learning_rate": 2.766747691502568e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6634, + "epoch": 1.5298585101955888, + "grad_norm": 1.4981356248722333, + "learning_rate": 1.380466761604174e-06, + "loss": 0.306, + "step": 29410, + "vit_learning_rate": 2.7609335232083476e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.687, + "epoch": 1.5303786933000416, + "grad_norm": 1.380997745429647, + "learning_rate": 1.3775622459262283e-06, + "loss": 0.3118, + "step": 29420, + "vit_learning_rate": 2.7551244918524563e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.7328, + "epoch": 1.5308988764044944, + "grad_norm": 1.6016243826454433, + "learning_rate": 1.374660300778528e-06, + "loss": 0.3105, + "step": 29430, + "vit_learning_rate": 2.7493206015570556e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6272, + "epoch": 1.5314190595089472, + "grad_norm": 1.472856534409772, + "learning_rate": 1.3717609282203343e-06, + "loss": 0.3364, + "step": 29440, + "vit_learning_rate": 2.743521856440668e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6267, + "epoch": 1.5319392426134, + "grad_norm": 1.5102542264539032, + "learning_rate": 1.368864130309075e-06, + "loss": 0.3327, + "step": 29450, + "vit_learning_rate": 2.7377282606181496e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6199, + "epoch": 1.5324594257178528, + "grad_norm": 1.31809116937891, + "learning_rate": 1.3659699091003625e-06, + "loss": 0.3426, + "step": 29460, + "vit_learning_rate": 2.731939818200725e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6619, + "epoch": 1.5329796088223056, + "grad_norm": 1.3763217447793243, + "learning_rate": 1.3630782666479724e-06, + "loss": 0.325, + "step": 29470, + "vit_learning_rate": 2.7261565332959447e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6484, + "epoch": 1.5334997919267583, + "grad_norm": 6.88016251949961, + "learning_rate": 1.3601892050038534e-06, + "loss": 0.3197, + "step": 29480, + "vit_learning_rate": 2.7203784100077064e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.632, + "epoch": 1.5340199750312111, + "grad_norm": 1.715674705184469, + "learning_rate": 1.357302726218122e-06, + "loss": 0.3192, + "step": 29490, + "vit_learning_rate": 2.714605452436244e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6476, + "epoch": 1.534540158135664, + "grad_norm": 1.2678133978736492, + "learning_rate": 1.3544188323390622e-06, + "loss": 0.341, + "step": 29500, + "vit_learning_rate": 2.7088376646781244e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6202, + "epoch": 1.5350603412401165, + "grad_norm": 1.6915229482638405, + "learning_rate": 1.3515375254131236e-06, + "loss": 0.3318, + "step": 29510, + "vit_learning_rate": 2.7030750508262467e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6535, + "epoch": 1.5355805243445693, + "grad_norm": 1.619462664056714, + "learning_rate": 1.3486588074849199e-06, + "loss": 0.3375, + "step": 29520, + "vit_learning_rate": 2.6973176149698393e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6134, + "epoch": 1.536100707449022, + "grad_norm": 1.796132323147783, + "learning_rate": 1.3457826805972285e-06, + "loss": 0.34, + "step": 29530, + "vit_learning_rate": 2.6915653611944566e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6427, + "epoch": 1.5366208905534748, + "grad_norm": 1.595843713651909, + "learning_rate": 1.3429091467909867e-06, + "loss": 0.3033, + "step": 29540, + "vit_learning_rate": 2.6858182935819727e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6942, + "epoch": 1.5371410736579276, + "grad_norm": 1.2946360546644997, + "learning_rate": 1.3400382081052915e-06, + "loss": 0.3082, + "step": 29550, + "vit_learning_rate": 2.6800764162105826e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6471, + "epoch": 1.5376612567623804, + "grad_norm": 1.8914638317302435, + "learning_rate": 1.3371698665774019e-06, + "loss": 0.3194, + "step": 29560, + "vit_learning_rate": 2.6743397331548035e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6522, + "epoch": 1.538181439866833, + "grad_norm": 1.8169127437175447, + "learning_rate": 1.3343041242427302e-06, + "loss": 0.3316, + "step": 29570, + "vit_learning_rate": 2.66860824848546e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6763, + "epoch": 1.5387016229712858, + "grad_norm": 2.665741434927105, + "learning_rate": 1.3314409831348468e-06, + "loss": 0.3261, + "step": 29580, + "vit_learning_rate": 2.6628819662696935e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6793, + "epoch": 1.5392218060757386, + "grad_norm": 1.7889701245360807, + "learning_rate": 1.3285804452854706e-06, + "loss": 0.3348, + "step": 29590, + "vit_learning_rate": 2.6571608905709407e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7224, + "epoch": 1.5397419891801913, + "grad_norm": 2.1146674989171435, + "learning_rate": 1.325722512724482e-06, + "loss": 0.3143, + "step": 29600, + "vit_learning_rate": 2.651445025448964e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6671, + "epoch": 1.5402621722846441, + "grad_norm": 3.0260272956893735, + "learning_rate": 1.3228671874799066e-06, + "loss": 0.3352, + "step": 29610, + "vit_learning_rate": 2.645734374959813e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6217, + "epoch": 1.540782355389097, + "grad_norm": 1.5703261240688715, + "learning_rate": 1.3200144715779212e-06, + "loss": 0.3122, + "step": 29620, + "vit_learning_rate": 2.640028943155842e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.8563, + "epoch": 1.5413025384935497, + "grad_norm": 1.3560431005444946, + "learning_rate": 1.3171643670428513e-06, + "loss": 0.2984, + "step": 29630, + "vit_learning_rate": 2.634328734085702e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6524, + "epoch": 1.5418227215980025, + "grad_norm": 2.1496680136480566, + "learning_rate": 1.3143168758971692e-06, + "loss": 0.3302, + "step": 29640, + "vit_learning_rate": 2.628633751794338e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6637, + "epoch": 1.5423429047024553, + "grad_norm": 1.690755184812029, + "learning_rate": 1.3114720001614923e-06, + "loss": 0.3185, + "step": 29650, + "vit_learning_rate": 2.622944000322984e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.742, + "epoch": 1.542863087806908, + "grad_norm": 1.5996138746258133, + "learning_rate": 1.3086297418545828e-06, + "loss": 0.307, + "step": 29660, + "vit_learning_rate": 2.6172594837091654e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6439, + "epoch": 1.5433832709113608, + "grad_norm": 1.4236106205226002, + "learning_rate": 1.3057901029933455e-06, + "loss": 0.3375, + "step": 29670, + "vit_learning_rate": 2.611580205986691e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.7219, + "epoch": 1.5439034540158136, + "grad_norm": 2.708464182828744, + "learning_rate": 1.3029530855928253e-06, + "loss": 0.344, + "step": 29680, + "vit_learning_rate": 2.60590617118565e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6534, + "epoch": 1.5444236371202664, + "grad_norm": 3.660919023249171, + "learning_rate": 1.3001186916662066e-06, + "loss": 0.324, + "step": 29690, + "vit_learning_rate": 2.600237383332413e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6769, + "epoch": 1.5449438202247192, + "grad_norm": 1.6410326785749352, + "learning_rate": 1.2972869232248165e-06, + "loss": 0.2975, + "step": 29700, + "vit_learning_rate": 2.5945738464496324e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6851, + "epoch": 1.545464003329172, + "grad_norm": 1.435103493993094, + "learning_rate": 1.2944577822781135e-06, + "loss": 0.3224, + "step": 29710, + "vit_learning_rate": 2.5889155645562267e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.639, + "epoch": 1.5459841864336248, + "grad_norm": 1.4316602443381667, + "learning_rate": 1.291631270833696e-06, + "loss": 0.329, + "step": 29720, + "vit_learning_rate": 2.5832625416673915e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.636, + "epoch": 1.5465043695380774, + "grad_norm": 2.177149094513981, + "learning_rate": 1.2888073908972893e-06, + "loss": 0.3326, + "step": 29730, + "vit_learning_rate": 2.577614781794578e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6513, + "epoch": 1.5470245526425301, + "grad_norm": 1.5126408142777783, + "learning_rate": 1.2859861444727606e-06, + "loss": 0.3388, + "step": 29740, + "vit_learning_rate": 2.571972288945521e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6276, + "epoch": 1.547544735746983, + "grad_norm": 1.8771707220913447, + "learning_rate": 1.2831675335621041e-06, + "loss": 0.3094, + "step": 29750, + "vit_learning_rate": 2.566335067124208e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.8021, + "epoch": 1.5480649188514357, + "grad_norm": 1.3342150830699055, + "learning_rate": 1.280351560165442e-06, + "loss": 0.3283, + "step": 29760, + "vit_learning_rate": 2.560703120330884e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.7167, + "epoch": 1.5485851019558885, + "grad_norm": 1.7403186448294508, + "learning_rate": 1.2775382262810275e-06, + "loss": 0.3153, + "step": 29770, + "vit_learning_rate": 2.555076452562055e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6353, + "epoch": 1.5491052850603413, + "grad_norm": 1.5840141770971512, + "learning_rate": 1.27472753390524e-06, + "loss": 0.3167, + "step": 29780, + "vit_learning_rate": 2.5494550678104796e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6243, + "epoch": 1.5496254681647939, + "grad_norm": 1.1761956353014218, + "learning_rate": 1.271919485032584e-06, + "loss": 0.3162, + "step": 29790, + "vit_learning_rate": 2.5438389700651673e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7489, + "epoch": 1.5501456512692466, + "grad_norm": 1.6094294608786688, + "learning_rate": 1.269114081655689e-06, + "loss": 0.2953, + "step": 29800, + "vit_learning_rate": 2.5382281633113777e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6647, + "epoch": 1.5506658343736994, + "grad_norm": 1.4222456597463322, + "learning_rate": 1.2663113257653059e-06, + "loss": 0.3235, + "step": 29810, + "vit_learning_rate": 2.5326226515306117e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6504, + "epoch": 1.5511860174781522, + "grad_norm": 1.2723654349013733, + "learning_rate": 1.2635112193503085e-06, + "loss": 0.3228, + "step": 29820, + "vit_learning_rate": 2.5270224387006167e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7893, + "epoch": 1.551706200582605, + "grad_norm": 1.6119167627939786, + "learning_rate": 1.2607137643976885e-06, + "loss": 0.3334, + "step": 29830, + "vit_learning_rate": 2.521427528795377e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6987, + "epoch": 1.5522263836870578, + "grad_norm": 1.9480590952829515, + "learning_rate": 1.2579189628925593e-06, + "loss": 0.3409, + "step": 29840, + "vit_learning_rate": 2.5158379257851183e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6761, + "epoch": 1.5527465667915106, + "grad_norm": 1.6248322353836786, + "learning_rate": 1.2551268168181501e-06, + "loss": 0.3428, + "step": 29850, + "vit_learning_rate": 2.5102536336363e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6674, + "epoch": 1.5532667498959634, + "grad_norm": 1.502158414073093, + "learning_rate": 1.2523373281558015e-06, + "loss": 0.3203, + "step": 29860, + "vit_learning_rate": 2.5046746563116026e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6266, + "epoch": 1.5537869330004161, + "grad_norm": 1.8210313663906377, + "learning_rate": 1.2495504988849744e-06, + "loss": 0.3162, + "step": 29870, + "vit_learning_rate": 2.4991009977699484e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6576, + "epoch": 1.554307116104869, + "grad_norm": 2.2718227940730293, + "learning_rate": 1.2467663309832379e-06, + "loss": 0.3253, + "step": 29880, + "vit_learning_rate": 2.493532661966475e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.761, + "epoch": 1.5548272992093217, + "grad_norm": 1.3739841824936585, + "learning_rate": 1.2439848264262777e-06, + "loss": 0.3323, + "step": 29890, + "vit_learning_rate": 2.487969652852555e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.8291, + "epoch": 1.5553474823137745, + "grad_norm": 1.2396607268180908, + "learning_rate": 1.2412059871878851e-06, + "loss": 0.3036, + "step": 29900, + "vit_learning_rate": 2.4824119743757696e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6215, + "epoch": 1.5558676654182273, + "grad_norm": 1.9460693082473732, + "learning_rate": 1.238429815239962e-06, + "loss": 0.3314, + "step": 29910, + "vit_learning_rate": 2.476859630479924e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6542, + "epoch": 1.55638784852268, + "grad_norm": 1.6524397592320488, + "learning_rate": 1.2356563125525173e-06, + "loss": 0.3403, + "step": 29920, + "vit_learning_rate": 2.471312625105034e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.689, + "epoch": 1.5569080316271329, + "grad_norm": 1.2970911389514004, + "learning_rate": 1.2328854810936647e-06, + "loss": 0.3152, + "step": 29930, + "vit_learning_rate": 2.465770962187329e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.7032, + "epoch": 1.5574282147315857, + "grad_norm": 1.5245596830943915, + "learning_rate": 1.2301173228296237e-06, + "loss": 0.3257, + "step": 29940, + "vit_learning_rate": 2.4602346456592473e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6337, + "epoch": 1.5579483978360384, + "grad_norm": 1.3878425835470412, + "learning_rate": 1.2273518397247163e-06, + "loss": 0.3271, + "step": 29950, + "vit_learning_rate": 2.4547036794494323e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6774, + "epoch": 1.558468580940491, + "grad_norm": 1.6860865809309127, + "learning_rate": 1.2245890337413669e-06, + "loss": 0.3526, + "step": 29960, + "vit_learning_rate": 2.4491780674827333e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6567, + "epoch": 1.5589887640449438, + "grad_norm": 1.3932980024063988, + "learning_rate": 1.2218289068400967e-06, + "loss": 0.3298, + "step": 29970, + "vit_learning_rate": 2.443657813680193e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.662, + "epoch": 1.5595089471493966, + "grad_norm": 2.9692595351943973, + "learning_rate": 1.2190714609795334e-06, + "loss": 0.3229, + "step": 29980, + "vit_learning_rate": 2.4381429219590667e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6777, + "epoch": 1.5600291302538494, + "grad_norm": 1.740609208646177, + "learning_rate": 1.216316698116396e-06, + "loss": 0.3167, + "step": 29990, + "vit_learning_rate": 2.4326333962327915e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6762, + "epoch": 1.5605493133583022, + "grad_norm": 1.7125823675077814, + "learning_rate": 1.2135646202054986e-06, + "loss": 0.3347, + "step": 30000, + "vit_learning_rate": 2.427129240410997e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6469, + "epoch": 1.5610694964627547, + "grad_norm": 1.580879639363882, + "learning_rate": 1.2108152291997539e-06, + "loss": 0.3307, + "step": 30010, + "vit_learning_rate": 2.421630458399507e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6917, + "epoch": 1.5615896795672075, + "grad_norm": 1.0549238763500084, + "learning_rate": 1.2080685270501647e-06, + "loss": 0.3387, + "step": 30020, + "vit_learning_rate": 2.4161370541003287e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6736, + "epoch": 1.5621098626716603, + "grad_norm": 1.6360013451888218, + "learning_rate": 1.2053245157058313e-06, + "loss": 0.3317, + "step": 30030, + "vit_learning_rate": 2.410649031411662e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7544, + "epoch": 1.562630045776113, + "grad_norm": 1.1566479480787606, + "learning_rate": 1.2025831971139385e-06, + "loss": 0.3328, + "step": 30040, + "vit_learning_rate": 2.405166394227877e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7516, + "epoch": 1.5631502288805659, + "grad_norm": 1.7442965499549947, + "learning_rate": 1.1998445732197633e-06, + "loss": 0.3314, + "step": 30050, + "vit_learning_rate": 2.3996891464395264e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6406, + "epoch": 1.5636704119850187, + "grad_norm": 1.5043450752725749, + "learning_rate": 1.1971086459666699e-06, + "loss": 0.3354, + "step": 30060, + "vit_learning_rate": 2.3942172919333393e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6195, + "epoch": 1.5641905950894714, + "grad_norm": 1.610523260276546, + "learning_rate": 1.1943754172961075e-06, + "loss": 0.3221, + "step": 30070, + "vit_learning_rate": 2.388750834592215e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6712, + "epoch": 1.5647107781939242, + "grad_norm": 2.2564057294134843, + "learning_rate": 1.1916448891476135e-06, + "loss": 0.3205, + "step": 30080, + "vit_learning_rate": 2.3832897782952267e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6754, + "epoch": 1.565230961298377, + "grad_norm": 1.5584844956558652, + "learning_rate": 1.188917063458806e-06, + "loss": 0.3284, + "step": 30090, + "vit_learning_rate": 2.3778341269176116e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6288, + "epoch": 1.5657511444028298, + "grad_norm": 1.4210212331241918, + "learning_rate": 1.1861919421653872e-06, + "loss": 0.3069, + "step": 30100, + "vit_learning_rate": 2.372383884330774e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6421, + "epoch": 1.5662713275072826, + "grad_norm": 1.6473321688396778, + "learning_rate": 1.1834695272011376e-06, + "loss": 0.3262, + "step": 30110, + "vit_learning_rate": 2.366939054402275e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6569, + "epoch": 1.5667915106117354, + "grad_norm": 1.5774691128758034, + "learning_rate": 1.1807498204979222e-06, + "loss": 0.321, + "step": 30120, + "vit_learning_rate": 2.3614996409958443e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.68, + "epoch": 1.5673116937161882, + "grad_norm": 1.6928323824373148, + "learning_rate": 1.1780328239856814e-06, + "loss": 0.3323, + "step": 30130, + "vit_learning_rate": 2.3560656479713625e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.674, + "epoch": 1.567831876820641, + "grad_norm": 2.7845067107848074, + "learning_rate": 1.1753185395924299e-06, + "loss": 0.3321, + "step": 30140, + "vit_learning_rate": 2.3506370791848596e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6491, + "epoch": 1.5683520599250937, + "grad_norm": 1.6234805105628523, + "learning_rate": 1.1726069692442604e-06, + "loss": 0.3128, + "step": 30150, + "vit_learning_rate": 2.3452139384885207e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6763, + "epoch": 1.5688722430295465, + "grad_norm": 1.874455178660875, + "learning_rate": 1.1698981148653394e-06, + "loss": 0.3269, + "step": 30160, + "vit_learning_rate": 2.3397962297306784e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6765, + "epoch": 1.5693924261339993, + "grad_norm": 1.491954773089241, + "learning_rate": 1.167191978377908e-06, + "loss": 0.3195, + "step": 30170, + "vit_learning_rate": 2.3343839567558155e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6691, + "epoch": 1.5699126092384519, + "grad_norm": 1.5609595682914483, + "learning_rate": 1.1644885617022756e-06, + "loss": 0.3082, + "step": 30180, + "vit_learning_rate": 2.3289771234045509e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6781, + "epoch": 1.5704327923429047, + "grad_norm": 1.4705904573383937, + "learning_rate": 1.1617878667568234e-06, + "loss": 0.3351, + "step": 30190, + "vit_learning_rate": 2.3235757335136463e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6405, + "epoch": 1.5709529754473575, + "grad_norm": 1.2441789757520518, + "learning_rate": 1.1590898954579993e-06, + "loss": 0.337, + "step": 30200, + "vit_learning_rate": 2.3181797909159983e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7882, + "epoch": 1.5714731585518102, + "grad_norm": 1.5498755402164486, + "learning_rate": 1.1563946497203215e-06, + "loss": 0.3044, + "step": 30210, + "vit_learning_rate": 2.3127892994406427e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6326, + "epoch": 1.571993341656263, + "grad_norm": 1.6116798275337385, + "learning_rate": 1.1537021314563713e-06, + "loss": 0.3412, + "step": 30220, + "vit_learning_rate": 2.3074042629127422e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6583, + "epoch": 1.5725135247607158, + "grad_norm": 1.8088976997235282, + "learning_rate": 1.1510123425767961e-06, + "loss": 0.3115, + "step": 30230, + "vit_learning_rate": 2.3020246851535919e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6737, + "epoch": 1.5730337078651684, + "grad_norm": 1.6758615174721516, + "learning_rate": 1.1483252849903053e-06, + "loss": 0.3353, + "step": 30240, + "vit_learning_rate": 2.2966505699806104e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6796, + "epoch": 1.5735538909696212, + "grad_norm": 1.2475284150167765, + "learning_rate": 1.1456409606036707e-06, + "loss": 0.3377, + "step": 30250, + "vit_learning_rate": 2.2912819212073408e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7027, + "epoch": 1.574074074074074, + "grad_norm": 1.4732876988480825, + "learning_rate": 1.1429593713217262e-06, + "loss": 0.3242, + "step": 30260, + "vit_learning_rate": 2.2859187426434522e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6861, + "epoch": 1.5745942571785267, + "grad_norm": 1.6751608544926766, + "learning_rate": 1.1402805190473649e-06, + "loss": 0.3214, + "step": 30270, + "vit_learning_rate": 2.2805610380947293e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6792, + "epoch": 1.5751144402829795, + "grad_norm": 1.4659292556955739, + "learning_rate": 1.1376044056815321e-06, + "loss": 0.3398, + "step": 30280, + "vit_learning_rate": 2.275208811363064e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.647, + "epoch": 1.5756346233874323, + "grad_norm": 1.5468292282613405, + "learning_rate": 1.1349310331232354e-06, + "loss": 0.3003, + "step": 30290, + "vit_learning_rate": 2.2698620662464707e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7311, + "epoch": 1.576154806491885, + "grad_norm": 1.9336734354332212, + "learning_rate": 1.1322604032695345e-06, + "loss": 0.3243, + "step": 30300, + "vit_learning_rate": 2.2645208065390686e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6712, + "epoch": 1.576674989596338, + "grad_norm": 1.748941359420825, + "learning_rate": 1.1295925180155466e-06, + "loss": 0.3243, + "step": 30310, + "vit_learning_rate": 2.2591850360310927e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6628, + "epoch": 1.5771951727007907, + "grad_norm": 1.8557024319996038, + "learning_rate": 1.1269273792544378e-06, + "loss": 0.3374, + "step": 30320, + "vit_learning_rate": 2.253854758508875e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6103, + "epoch": 1.5777153558052435, + "grad_norm": 1.1887632567010094, + "learning_rate": 1.1242649888774254e-06, + "loss": 0.3163, + "step": 30330, + "vit_learning_rate": 2.2485299777548505e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6886, + "epoch": 1.5782355389096963, + "grad_norm": 1.4847028175815202, + "learning_rate": 1.1216053487737776e-06, + "loss": 0.3282, + "step": 30340, + "vit_learning_rate": 2.243210697547555e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7139, + "epoch": 1.578755722014149, + "grad_norm": 1.8990255940914365, + "learning_rate": 1.118948460830811e-06, + "loss": 0.3127, + "step": 30350, + "vit_learning_rate": 2.2378969216616218e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6317, + "epoch": 1.5792759051186018, + "grad_norm": 1.2120401034874584, + "learning_rate": 1.1162943269338883e-06, + "loss": 0.322, + "step": 30360, + "vit_learning_rate": 2.2325886538677762e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7063, + "epoch": 1.5797960882230546, + "grad_norm": 1.7042146277611576, + "learning_rate": 1.113642948966418e-06, + "loss": 0.3334, + "step": 30370, + "vit_learning_rate": 2.2272858979328357e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6497, + "epoch": 1.5803162713275074, + "grad_norm": 1.5157601087016632, + "learning_rate": 1.1109943288098546e-06, + "loss": 0.3388, + "step": 30380, + "vit_learning_rate": 2.2219886576197088e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6439, + "epoch": 1.5808364544319602, + "grad_norm": 1.4438009776810874, + "learning_rate": 1.1083484683436918e-06, + "loss": 0.3132, + "step": 30390, + "vit_learning_rate": 2.2166969366873833e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6304, + "epoch": 1.581356637536413, + "grad_norm": 1.649174345963161, + "learning_rate": 1.1057053694454707e-06, + "loss": 0.3369, + "step": 30400, + "vit_learning_rate": 2.211410738890941e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.642, + "epoch": 1.5818768206408655, + "grad_norm": 1.8779658004230064, + "learning_rate": 1.1030650339907695e-06, + "loss": 0.3375, + "step": 30410, + "vit_learning_rate": 2.2061300679815387e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7257, + "epoch": 1.5823970037453183, + "grad_norm": 1.8690984569474651, + "learning_rate": 1.1004274638532026e-06, + "loss": 0.3224, + "step": 30420, + "vit_learning_rate": 2.200854927706405e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7269, + "epoch": 1.5829171868497711, + "grad_norm": 1.4741849676768293, + "learning_rate": 1.097792660904427e-06, + "loss": 0.3012, + "step": 30430, + "vit_learning_rate": 2.1955853218088538e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6422, + "epoch": 1.583437369954224, + "grad_norm": 1.6673839870357476, + "learning_rate": 1.0951606270141324e-06, + "loss": 0.3379, + "step": 30440, + "vit_learning_rate": 2.1903212540282645e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7012, + "epoch": 1.5839575530586767, + "grad_norm": 1.8108869803050949, + "learning_rate": 1.0925313640500484e-06, + "loss": 0.3161, + "step": 30450, + "vit_learning_rate": 2.1850627281000966e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6726, + "epoch": 1.5844777361631293, + "grad_norm": 1.299735841830634, + "learning_rate": 1.0899048738779328e-06, + "loss": 0.3165, + "step": 30460, + "vit_learning_rate": 2.1798097477558652e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.656, + "epoch": 1.584997919267582, + "grad_norm": 1.3633895000338274, + "learning_rate": 1.0872811583615789e-06, + "loss": 0.332, + "step": 30470, + "vit_learning_rate": 2.1745623167231576e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7548, + "epoch": 1.5855181023720348, + "grad_norm": 1.7954522857435111, + "learning_rate": 1.0846602193628098e-06, + "loss": 0.3115, + "step": 30480, + "vit_learning_rate": 2.169320438725619e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.658, + "epoch": 1.5860382854764876, + "grad_norm": 1.889765541411907, + "learning_rate": 1.0820420587414794e-06, + "loss": 0.3426, + "step": 30490, + "vit_learning_rate": 2.1640841174829582e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6711, + "epoch": 1.5865584685809404, + "grad_norm": 1.417469147312735, + "learning_rate": 1.079426678355469e-06, + "loss": 0.3401, + "step": 30500, + "vit_learning_rate": 2.158853356710938e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6464, + "epoch": 1.5870786516853932, + "grad_norm": 1.4348986892720137, + "learning_rate": 1.0768140800606874e-06, + "loss": 0.3189, + "step": 30510, + "vit_learning_rate": 2.1536281601213747e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6324, + "epoch": 1.587598834789846, + "grad_norm": 1.5174627716572338, + "learning_rate": 1.0742042657110701e-06, + "loss": 0.3056, + "step": 30520, + "vit_learning_rate": 2.14840853142214e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7433, + "epoch": 1.5881190178942988, + "grad_norm": 1.6567605094788023, + "learning_rate": 1.0715972371585752e-06, + "loss": 0.3209, + "step": 30530, + "vit_learning_rate": 2.1431944743171505e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6855, + "epoch": 1.5886392009987516, + "grad_norm": 2.0876220183128855, + "learning_rate": 1.0689929962531841e-06, + "loss": 0.3163, + "step": 30540, + "vit_learning_rate": 2.137985992506368e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.644, + "epoch": 1.5891593841032043, + "grad_norm": 2.1385587445319705, + "learning_rate": 1.0663915448429057e-06, + "loss": 0.3274, + "step": 30550, + "vit_learning_rate": 2.1327830896858113e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.657, + "epoch": 1.5896795672076571, + "grad_norm": 1.1832484403752774, + "learning_rate": 1.0637928847737594e-06, + "loss": 0.3493, + "step": 30560, + "vit_learning_rate": 2.1275857695475185e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6181, + "epoch": 1.59019975031211, + "grad_norm": 1.7135660167555573, + "learning_rate": 1.0611970178897911e-06, + "loss": 0.3315, + "step": 30570, + "vit_learning_rate": 2.1223940357795822e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6905, + "epoch": 1.5907199334165627, + "grad_norm": 1.9027674597100843, + "learning_rate": 1.058603946033061e-06, + "loss": 0.3356, + "step": 30580, + "vit_learning_rate": 2.117207892066122e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6597, + "epoch": 1.5912401165210155, + "grad_norm": 1.43370075997424, + "learning_rate": 1.0560136710436509e-06, + "loss": 0.3047, + "step": 30590, + "vit_learning_rate": 2.1120273420873015e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.628, + "epoch": 1.5917602996254683, + "grad_norm": 1.65777987209005, + "learning_rate": 1.0534261947596525e-06, + "loss": 0.3503, + "step": 30600, + "vit_learning_rate": 2.1068523895193046e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6809, + "epoch": 1.592280482729921, + "grad_norm": 1.7307982152907333, + "learning_rate": 1.0508415190171756e-06, + "loss": 0.3219, + "step": 30610, + "vit_learning_rate": 2.101683038034351e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.8153, + "epoch": 1.5928006658343739, + "grad_norm": 1.3854604450450474, + "learning_rate": 1.0482596456503358e-06, + "loss": 0.321, + "step": 30620, + "vit_learning_rate": 2.0965192913006713e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6187, + "epoch": 1.5933208489388264, + "grad_norm": 1.459018707429984, + "learning_rate": 1.0456805764912692e-06, + "loss": 0.3308, + "step": 30630, + "vit_learning_rate": 2.0913611529825382e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6625, + "epoch": 1.5938410320432792, + "grad_norm": 1.5381889576319194, + "learning_rate": 1.0431043133701169e-06, + "loss": 0.3303, + "step": 30640, + "vit_learning_rate": 2.0862086267402335e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6437, + "epoch": 1.594361215147732, + "grad_norm": 1.3760450864591474, + "learning_rate": 1.0405308581150291e-06, + "loss": 0.3164, + "step": 30650, + "vit_learning_rate": 2.0810617162300582e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6536, + "epoch": 1.5948813982521848, + "grad_norm": 1.422948422089582, + "learning_rate": 1.0379602125521642e-06, + "loss": 0.3209, + "step": 30660, + "vit_learning_rate": 2.0759204251043282e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6527, + "epoch": 1.5954015813566376, + "grad_norm": 1.3014595274927747, + "learning_rate": 1.0353923785056869e-06, + "loss": 0.3156, + "step": 30670, + "vit_learning_rate": 2.0707847570113734e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.7476, + "epoch": 1.5959217644610904, + "grad_norm": 1.8033169870503183, + "learning_rate": 1.0328273577977665e-06, + "loss": 0.2973, + "step": 30680, + "vit_learning_rate": 2.0656547155955329e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6867, + "epoch": 1.596441947565543, + "grad_norm": 1.241129729634838, + "learning_rate": 1.0302651522485757e-06, + "loss": 0.3177, + "step": 30690, + "vit_learning_rate": 2.0605303044971513e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.7109, + "epoch": 1.5969621306699957, + "grad_norm": 1.4372948118341762, + "learning_rate": 1.0277057636762893e-06, + "loss": 0.3458, + "step": 30700, + "vit_learning_rate": 2.0554115273525784e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 3.7227, + "epoch": 1.5974823137744485, + "grad_norm": 1.55423995214843, + "learning_rate": 1.0251491938970854e-06, + "loss": 0.3326, + "step": 30710, + "vit_learning_rate": 2.0502983877941703e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.7365, + "epoch": 1.5980024968789013, + "grad_norm": 1.5153401596112135, + "learning_rate": 1.0225954447251368e-06, + "loss": 0.3289, + "step": 30720, + "vit_learning_rate": 2.0451908894502734e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6423, + "epoch": 1.598522679983354, + "grad_norm": 2.0098676285783115, + "learning_rate": 1.020044517972622e-06, + "loss": 0.3239, + "step": 30730, + "vit_learning_rate": 2.0400890359452438e-07 + }, + { + "avg_batch_load_time": 0.0034, + "avg_batch_processing_time": 0.6722, + "epoch": 1.5990428630878069, + "grad_norm": 2.2459946142416674, + "learning_rate": 1.0174964154497108e-06, + "loss": 0.3254, + "step": 30740, + "vit_learning_rate": 2.0349928308994212e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7073, + "epoch": 1.5995630461922596, + "grad_norm": 2.9262621259428543, + "learning_rate": 1.0149511389645721e-06, + "loss": 0.3512, + "step": 30750, + "vit_learning_rate": 2.0299022779291442e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6867, + "epoch": 1.6000832292967124, + "grad_norm": 1.3391656322778647, + "learning_rate": 1.0124086903233644e-06, + "loss": 0.3253, + "step": 30760, + "vit_learning_rate": 2.0248173806467285e-07 + }, + { + "avg_batch_load_time": 2.671, + "avg_batch_processing_time": 0.6729, + "epoch": 1.6006034124011652, + "grad_norm": 1.265501206860842, + "learning_rate": 1.0098690713302473e-06, + "loss": 0.3079, + "step": 30770, + "vit_learning_rate": 2.0197381426604942e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.7253, + "epoch": 1.601123595505618, + "grad_norm": 1.851325619085451, + "learning_rate": 1.0073322837873667e-06, + "loss": 0.3339, + "step": 30780, + "vit_learning_rate": 2.0146645675747332e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.7101, + "epoch": 1.6016437786100708, + "grad_norm": 2.7078618366400495, + "learning_rate": 1.004798329494861e-06, + "loss": 0.3246, + "step": 30790, + "vit_learning_rate": 2.0095966589897218e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.749, + "epoch": 1.6021639617145236, + "grad_norm": 1.9316292851830459, + "learning_rate": 1.0022672102508579e-06, + "loss": 0.3135, + "step": 30800, + "vit_learning_rate": 2.0045344205017155e-07 + }, + { + "avg_batch_load_time": 0.0034, + "avg_batch_processing_time": 0.7344, + "epoch": 1.6026841448189764, + "grad_norm": 1.4261710844354278, + "learning_rate": 9.997389278514742e-07, + "loss": 0.3349, + "step": 30810, + "vit_learning_rate": 1.999477855702948e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.737, + "epoch": 1.6032043279234292, + "grad_norm": 1.5381014965420874, + "learning_rate": 9.97213484090812e-07, + "loss": 0.323, + "step": 30820, + "vit_learning_rate": 1.994426968181624e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6743, + "epoch": 1.603724511027882, + "grad_norm": 1.4244336494209056, + "learning_rate": 9.946908807609606e-07, + "loss": 0.3215, + "step": 30830, + "vit_learning_rate": 1.989381761521921e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7276, + "epoch": 1.6042446941323347, + "grad_norm": 1.7248627057340244, + "learning_rate": 9.921711196519928e-07, + "loss": 0.3152, + "step": 30840, + "vit_learning_rate": 1.9843422393039854e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6624, + "epoch": 1.6047648772367875, + "grad_norm": 1.7673993768462766, + "learning_rate": 9.896542025519645e-07, + "loss": 0.3096, + "step": 30850, + "vit_learning_rate": 1.979308405103929e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6595, + "epoch": 1.60528506034124, + "grad_norm": 2.5840574851897014, + "learning_rate": 9.87140131246913e-07, + "loss": 0.3378, + "step": 30860, + "vit_learning_rate": 1.974280262493826e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.4803, + "epoch": 1.6058052434456929, + "grad_norm": 1.6810126443591853, + "learning_rate": 9.846289075208588e-07, + "loss": 0.3236, + "step": 30870, + "vit_learning_rate": 1.9692578150417172e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6432, + "epoch": 1.6063254265501457, + "grad_norm": 1.2340619625830898, + "learning_rate": 9.821205331557987e-07, + "loss": 0.3192, + "step": 30880, + "vit_learning_rate": 1.9642410663115972e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.7882, + "epoch": 1.6068456096545984, + "grad_norm": 1.556550892248362, + "learning_rate": 9.796150099317102e-07, + "loss": 0.3275, + "step": 30890, + "vit_learning_rate": 1.9592300198634205e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6646, + "epoch": 1.6073657927590512, + "grad_norm": 2.0665758793214404, + "learning_rate": 9.771123396265425e-07, + "loss": 0.3086, + "step": 30900, + "vit_learning_rate": 1.9542246792530847e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6633, + "epoch": 1.6078859758635038, + "grad_norm": 1.7776972654425398, + "learning_rate": 9.746125240162269e-07, + "loss": 0.3434, + "step": 30910, + "vit_learning_rate": 1.9492250480324535e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.8285, + "epoch": 1.6084061589679566, + "grad_norm": 1.7075477646988027, + "learning_rate": 9.721155648746656e-07, + "loss": 0.3398, + "step": 30920, + "vit_learning_rate": 1.9442311297493307e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6496, + "epoch": 1.6089263420724094, + "grad_norm": 1.3743707139141308, + "learning_rate": 9.696214639737334e-07, + "loss": 0.3374, + "step": 30930, + "vit_learning_rate": 1.9392429279474665e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 2.8061, + "epoch": 1.6094465251768622, + "grad_norm": 1.9064964567857454, + "learning_rate": 9.67130223083278e-07, + "loss": 0.3494, + "step": 30940, + "vit_learning_rate": 1.9342604461665558e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 4.752, + "epoch": 1.609966708281315, + "grad_norm": 1.318374421025002, + "learning_rate": 9.646418439711175e-07, + "loss": 0.3061, + "step": 30950, + "vit_learning_rate": 1.9292836879422346e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6576, + "epoch": 1.6104868913857677, + "grad_norm": 1.8006693125617415, + "learning_rate": 9.621563284030382e-07, + "loss": 0.3141, + "step": 30960, + "vit_learning_rate": 1.9243126568060764e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6741, + "epoch": 1.6110070744902205, + "grad_norm": 1.4315539023671149, + "learning_rate": 9.596736781427957e-07, + "loss": 0.3335, + "step": 30970, + "vit_learning_rate": 1.919347356285591e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6437, + "epoch": 1.6115272575946733, + "grad_norm": 1.603589097602998, + "learning_rate": 9.57193894952112e-07, + "loss": 0.3171, + "step": 30980, + "vit_learning_rate": 1.9143877899042238e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6497, + "epoch": 1.612047440699126, + "grad_norm": 1.593925450776074, + "learning_rate": 9.54716980590674e-07, + "loss": 0.3144, + "step": 30990, + "vit_learning_rate": 1.909433961181348e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 5.0209, + "epoch": 1.6125676238035789, + "grad_norm": 1.6190311241496076, + "learning_rate": 9.522429368161323e-07, + "loss": 0.3441, + "step": 31000, + "vit_learning_rate": 1.9044858736322644e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6693, + "epoch": 1.6130878069080317, + "grad_norm": 1.5806948234432672, + "learning_rate": 9.497717653841038e-07, + "loss": 0.3122, + "step": 31010, + "vit_learning_rate": 1.8995435307682072e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6572, + "epoch": 1.6136079900124844, + "grad_norm": 1.4281956048588091, + "learning_rate": 9.473034680481629e-07, + "loss": 0.3107, + "step": 31020, + "vit_learning_rate": 1.8946069360963256e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 4.7962, + "epoch": 1.6141281731169372, + "grad_norm": 1.3106412507822933, + "learning_rate": 9.448380465598488e-07, + "loss": 0.3352, + "step": 31030, + "vit_learning_rate": 1.8896760931196976e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 4.9051, + "epoch": 1.61464835622139, + "grad_norm": 1.4950555044913694, + "learning_rate": 9.423755026686531e-07, + "loss": 0.3147, + "step": 31040, + "vit_learning_rate": 1.8847510053373062e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 2.6338, + "epoch": 1.6151685393258428, + "grad_norm": 2.1190241613417724, + "learning_rate": 9.399158381220341e-07, + "loss": 0.3274, + "step": 31050, + "vit_learning_rate": 1.879831676244068e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.7192, + "epoch": 1.6156887224302956, + "grad_norm": 1.4728972997756273, + "learning_rate": 9.374590546654005e-07, + "loss": 0.3182, + "step": 31060, + "vit_learning_rate": 1.8749181093308008e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6859, + "epoch": 1.6162089055347484, + "grad_norm": 1.3103134394898368, + "learning_rate": 9.35005154042119e-07, + "loss": 0.3229, + "step": 31070, + "vit_learning_rate": 1.8700103080842378e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6631, + "epoch": 1.616729088639201, + "grad_norm": 1.2989274926285828, + "learning_rate": 9.32554137993511e-07, + "loss": 0.3239, + "step": 31080, + "vit_learning_rate": 1.8651082759870219e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6807, + "epoch": 1.6172492717436537, + "grad_norm": 1.8042344781259012, + "learning_rate": 9.301060082588492e-07, + "loss": 0.3244, + "step": 31090, + "vit_learning_rate": 1.8602120165176981e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 2.8619, + "epoch": 1.6177694548481065, + "grad_norm": 1.3142752321219016, + "learning_rate": 9.276607665753601e-07, + "loss": 0.3308, + "step": 31100, + "vit_learning_rate": 1.85532153315072e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 6.7326, + "epoch": 1.6182896379525593, + "grad_norm": 1.5254362505523027, + "learning_rate": 9.252184146782189e-07, + "loss": 0.3196, + "step": 31110, + "vit_learning_rate": 1.8504368293564377e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 4.8422, + "epoch": 1.618809821057012, + "grad_norm": 1.9511337956842627, + "learning_rate": 9.227789543005522e-07, + "loss": 0.3447, + "step": 31120, + "vit_learning_rate": 1.8455579086011043e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6602, + "epoch": 1.6193300041614649, + "grad_norm": 1.5405175302437764, + "learning_rate": 9.203423871734335e-07, + "loss": 0.3325, + "step": 31130, + "vit_learning_rate": 1.8406847743468668e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.747, + "epoch": 1.6198501872659175, + "grad_norm": 1.8134046703626183, + "learning_rate": 9.179087150258814e-07, + "loss": 0.3372, + "step": 31140, + "vit_learning_rate": 1.8358174300517625e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 5.2049, + "epoch": 1.6203703703703702, + "grad_norm": 1.527668307396066, + "learning_rate": 9.154779395848651e-07, + "loss": 0.3139, + "step": 31150, + "vit_learning_rate": 1.8309558791697299e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 2.8914, + "epoch": 1.620890553474823, + "grad_norm": 1.3150189197616207, + "learning_rate": 9.130500625752942e-07, + "loss": 0.3288, + "step": 31160, + "vit_learning_rate": 1.826100125150588e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6436, + "epoch": 1.6214107365792758, + "grad_norm": 2.1888647613146457, + "learning_rate": 9.106250857200238e-07, + "loss": 0.3345, + "step": 31170, + "vit_learning_rate": 1.8212501714400474e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6319, + "epoch": 1.6219309196837286, + "grad_norm": 1.3226771259728338, + "learning_rate": 9.08203010739846e-07, + "loss": 0.3222, + "step": 31180, + "vit_learning_rate": 1.8164060214796916e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 4.655, + "epoch": 1.6224511027881814, + "grad_norm": 1.7970293066575793, + "learning_rate": 9.057838393535018e-07, + "loss": 0.3158, + "step": 31190, + "vit_learning_rate": 1.8115676787070033e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6778, + "epoch": 1.6229712858926342, + "grad_norm": 1.5528936065857066, + "learning_rate": 9.033675732776664e-07, + "loss": 0.3132, + "step": 31200, + "vit_learning_rate": 1.8067351465553325e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.669, + "epoch": 1.623491468997087, + "grad_norm": 1.498387741546567, + "learning_rate": 9.009542142269534e-07, + "loss": 0.3336, + "step": 31210, + "vit_learning_rate": 1.8019084284539067e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7043, + "epoch": 1.6240116521015397, + "grad_norm": 2.0773838476548327, + "learning_rate": 8.985437639139155e-07, + "loss": 0.3197, + "step": 31220, + "vit_learning_rate": 1.7970875278278307e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.5504, + "epoch": 1.6245318352059925, + "grad_norm": 1.7678572602266296, + "learning_rate": 8.961362240490407e-07, + "loss": 0.3234, + "step": 31230, + "vit_learning_rate": 1.792272448098081e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6273, + "epoch": 1.6250520183104453, + "grad_norm": 1.7870776566605122, + "learning_rate": 8.937315963407506e-07, + "loss": 0.3102, + "step": 31240, + "vit_learning_rate": 1.787463192681501e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 11.2239, + "epoch": 1.625572201414898, + "grad_norm": 1.5242529016599606, + "learning_rate": 8.91329882495402e-07, + "loss": 0.3145, + "step": 31250, + "vit_learning_rate": 1.7826597649908037e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6519, + "epoch": 1.626092384519351, + "grad_norm": 1.5825905144405394, + "learning_rate": 8.889310842172827e-07, + "loss": 0.3317, + "step": 31260, + "vit_learning_rate": 1.7778621684345653e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.7017, + "epoch": 1.6266125676238037, + "grad_norm": 2.097787515147294, + "learning_rate": 8.865352032086122e-07, + "loss": 0.3348, + "step": 31270, + "vit_learning_rate": 1.773070406417224e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6556, + "epoch": 1.6271327507282565, + "grad_norm": 1.4345334524674664, + "learning_rate": 8.841422411695383e-07, + "loss": 0.3146, + "step": 31280, + "vit_learning_rate": 1.7682844823390763e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7569, + "epoch": 1.6276529338327093, + "grad_norm": 1.479738306691059, + "learning_rate": 8.817521997981409e-07, + "loss": 0.3432, + "step": 31290, + "vit_learning_rate": 1.7635043995962818e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 4.9892, + "epoch": 1.628173116937162, + "grad_norm": 1.5514358147565679, + "learning_rate": 8.793650807904263e-07, + "loss": 0.3188, + "step": 31300, + "vit_learning_rate": 1.7587301615808526e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6899, + "epoch": 1.6286933000416146, + "grad_norm": 1.2751603441883828, + "learning_rate": 8.769808858403228e-07, + "loss": 0.3077, + "step": 31310, + "vit_learning_rate": 1.7539617716806454e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6703, + "epoch": 1.6292134831460674, + "grad_norm": 1.6245716181996637, + "learning_rate": 8.745996166396875e-07, + "loss": 0.3145, + "step": 31320, + "vit_learning_rate": 1.7491992332793748e-07 + }, + { + "avg_batch_load_time": 0.0039, + "avg_batch_processing_time": 0.7408, + "epoch": 1.6297336662505202, + "grad_norm": 2.342064182337807, + "learning_rate": 8.722212748783033e-07, + "loss": 0.3195, + "step": 31330, + "vit_learning_rate": 1.744442549756606e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6493, + "epoch": 1.630253849354973, + "grad_norm": 1.7915796296622197, + "learning_rate": 8.69845862243871e-07, + "loss": 0.3191, + "step": 31340, + "vit_learning_rate": 1.7396917244877418e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6513, + "epoch": 1.6307740324594258, + "grad_norm": 1.7238726169470597, + "learning_rate": 8.67473380422017e-07, + "loss": 0.3264, + "step": 31350, + "vit_learning_rate": 1.734946760844034e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6458, + "epoch": 1.6312942155638783, + "grad_norm": 1.8559091700946528, + "learning_rate": 8.651038310962845e-07, + "loss": 0.3254, + "step": 31360, + "vit_learning_rate": 1.7302076621925688e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7386, + "epoch": 1.6318143986683311, + "grad_norm": 1.3423916644946723, + "learning_rate": 8.627372159481379e-07, + "loss": 0.3092, + "step": 31370, + "vit_learning_rate": 1.7254744318962755e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.689, + "epoch": 1.632334581772784, + "grad_norm": 2.0365318009408275, + "learning_rate": 8.603735366569594e-07, + "loss": 0.3411, + "step": 31380, + "vit_learning_rate": 1.7207470733139185e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 2.5899, + "epoch": 1.6328547648772367, + "grad_norm": 1.5832435341878095, + "learning_rate": 8.580127949000466e-07, + "loss": 0.3397, + "step": 31390, + "vit_learning_rate": 1.716025589800093e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7123, + "epoch": 1.6333749479816895, + "grad_norm": 1.4884417917611081, + "learning_rate": 8.556549923526142e-07, + "loss": 0.327, + "step": 31400, + "vit_learning_rate": 1.7113099847052282e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6565, + "epoch": 1.6338951310861423, + "grad_norm": 1.8778883993457223, + "learning_rate": 8.533001306877897e-07, + "loss": 0.3374, + "step": 31410, + "vit_learning_rate": 1.7066002613755792e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6609, + "epoch": 1.634415314190595, + "grad_norm": 1.2451951093226576, + "learning_rate": 8.509482115766138e-07, + "loss": 0.3318, + "step": 31420, + "vit_learning_rate": 1.7018964231532275e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.8796, + "epoch": 1.6349354972950478, + "grad_norm": 1.5744419479700074, + "learning_rate": 8.485992366880419e-07, + "loss": 0.3011, + "step": 31430, + "vit_learning_rate": 1.6971984733760837e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7292, + "epoch": 1.6354556803995006, + "grad_norm": 1.2384051803226612, + "learning_rate": 8.462532076889379e-07, + "loss": 0.3115, + "step": 31440, + "vit_learning_rate": 1.6925064153778758e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.7278, + "epoch": 1.6359758635039534, + "grad_norm": 3.3902268561158793, + "learning_rate": 8.439101262440729e-07, + "loss": 0.3376, + "step": 31450, + "vit_learning_rate": 1.6878202524881457e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6691, + "epoch": 1.6364960466084062, + "grad_norm": 2.157584966283915, + "learning_rate": 8.415699940161292e-07, + "loss": 0.3127, + "step": 31460, + "vit_learning_rate": 1.6831399880322583e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6891, + "epoch": 1.637016229712859, + "grad_norm": 1.9858558354637812, + "learning_rate": 8.392328126656979e-07, + "loss": 0.3278, + "step": 31470, + "vit_learning_rate": 1.6784656253313956e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6686, + "epoch": 1.6375364128173118, + "grad_norm": 1.9304664144683137, + "learning_rate": 8.368985838512728e-07, + "loss": 0.3121, + "step": 31480, + "vit_learning_rate": 1.6737971677025453e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6595, + "epoch": 1.6380565959217646, + "grad_norm": 1.4562003874722274, + "learning_rate": 8.345673092292545e-07, + "loss": 0.3418, + "step": 31490, + "vit_learning_rate": 1.6691346184585087e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.657, + "epoch": 1.6385767790262173, + "grad_norm": 1.8052518019073733, + "learning_rate": 8.322389904539457e-07, + "loss": 0.2996, + "step": 31500, + "vit_learning_rate": 1.6644779809078914e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6752, + "epoch": 1.6390969621306701, + "grad_norm": 1.6074523737483395, + "learning_rate": 8.299136291775533e-07, + "loss": 0.3082, + "step": 31510, + "vit_learning_rate": 1.6598272583551065e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.7376, + "epoch": 1.639617145235123, + "grad_norm": 1.5216113598354373, + "learning_rate": 8.275912270501845e-07, + "loss": 0.3214, + "step": 31520, + "vit_learning_rate": 1.655182454100369e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6576, + "epoch": 1.6401373283395755, + "grad_norm": 1.8586828941661289, + "learning_rate": 8.252717857198472e-07, + "loss": 0.3554, + "step": 31530, + "vit_learning_rate": 1.6505435714396944e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6432, + "epoch": 1.6406575114440283, + "grad_norm": 2.0715047903759305, + "learning_rate": 8.229553068324469e-07, + "loss": 0.3372, + "step": 31540, + "vit_learning_rate": 1.6459106136648938e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6547, + "epoch": 1.641177694548481, + "grad_norm": 4.581942305134531, + "learning_rate": 8.206417920317894e-07, + "loss": 0.3316, + "step": 31550, + "vit_learning_rate": 1.6412835840635786e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6528, + "epoch": 1.6416978776529338, + "grad_norm": 1.5493162675982814, + "learning_rate": 8.183312429595736e-07, + "loss": 0.3327, + "step": 31560, + "vit_learning_rate": 1.636662485919147e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6241, + "epoch": 1.6422180607573866, + "grad_norm": 1.3717799145016063, + "learning_rate": 8.16023661255399e-07, + "loss": 0.3233, + "step": 31570, + "vit_learning_rate": 1.6320473225107977e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6534, + "epoch": 1.6427382438618394, + "grad_norm": 1.8121720011190858, + "learning_rate": 8.137190485567553e-07, + "loss": 0.3336, + "step": 31580, + "vit_learning_rate": 1.6274380971135104e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6754, + "epoch": 1.643258426966292, + "grad_norm": 1.367011247017669, + "learning_rate": 8.114174064990254e-07, + "loss": 0.3176, + "step": 31590, + "vit_learning_rate": 1.6228348129980506e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6597, + "epoch": 1.6437786100707448, + "grad_norm": 1.569964434018555, + "learning_rate": 8.091187367154846e-07, + "loss": 0.3169, + "step": 31600, + "vit_learning_rate": 1.618237473430969e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6179, + "epoch": 1.6442987931751976, + "grad_norm": 1.5019804360417313, + "learning_rate": 8.068230408373023e-07, + "loss": 0.3249, + "step": 31610, + "vit_learning_rate": 1.6136460816746045e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.666, + "epoch": 1.6448189762796503, + "grad_norm": 1.4174969429428028, + "learning_rate": 8.04530320493534e-07, + "loss": 0.3266, + "step": 31620, + "vit_learning_rate": 1.6090606409870678e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.636, + "epoch": 1.6453391593841031, + "grad_norm": 1.6393657665596204, + "learning_rate": 8.022405773111236e-07, + "loss": 0.3337, + "step": 31630, + "vit_learning_rate": 1.604481154622247e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6318, + "epoch": 1.645859342488556, + "grad_norm": 2.8265227976072325, + "learning_rate": 7.999538129149048e-07, + "loss": 0.3382, + "step": 31640, + "vit_learning_rate": 1.5999076258298094e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6443, + "epoch": 1.6463795255930087, + "grad_norm": 1.929618634894764, + "learning_rate": 7.976700289275958e-07, + "loss": 0.3051, + "step": 31650, + "vit_learning_rate": 1.5953400578551913e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6393, + "epoch": 1.6468997086974615, + "grad_norm": 2.069859169864933, + "learning_rate": 7.953892269697994e-07, + "loss": 0.3473, + "step": 31660, + "vit_learning_rate": 1.5907784539395985e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6679, + "epoch": 1.6474198918019143, + "grad_norm": 1.6498765264637878, + "learning_rate": 7.931114086600044e-07, + "loss": 0.323, + "step": 31670, + "vit_learning_rate": 1.5862228173200089e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7487, + "epoch": 1.647940074906367, + "grad_norm": 1.8091625724655434, + "learning_rate": 7.908365756145802e-07, + "loss": 0.3497, + "step": 31680, + "vit_learning_rate": 1.5816731512291604e-07 + }, + { + "avg_batch_load_time": 0.0228, + "avg_batch_processing_time": 0.6687, + "epoch": 1.6484602580108199, + "grad_norm": 1.854594206172758, + "learning_rate": 7.885647294477794e-07, + "loss": 0.3455, + "step": 31690, + "vit_learning_rate": 1.5771294588955587e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.747, + "epoch": 1.6489804411152726, + "grad_norm": 1.5845644162936852, + "learning_rate": 7.862958717717328e-07, + "loss": 0.3099, + "step": 31700, + "vit_learning_rate": 1.5725917435434653e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6883, + "epoch": 1.6495006242197254, + "grad_norm": 1.4243615836883, + "learning_rate": 7.840300041964544e-07, + "loss": 0.3152, + "step": 31710, + "vit_learning_rate": 1.5680600083929086e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.652, + "epoch": 1.6500208073241782, + "grad_norm": 1.511316482323583, + "learning_rate": 7.817671283298345e-07, + "loss": 0.3237, + "step": 31720, + "vit_learning_rate": 1.5635342566596687e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.654, + "epoch": 1.650540990428631, + "grad_norm": 1.2210228628554947, + "learning_rate": 7.795072457776376e-07, + "loss": 0.3281, + "step": 31730, + "vit_learning_rate": 1.559014491555275e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6682, + "epoch": 1.6510611735330838, + "grad_norm": 1.9394285169417378, + "learning_rate": 7.772503581435076e-07, + "loss": 0.3447, + "step": 31740, + "vit_learning_rate": 1.554500716287015e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.8181, + "epoch": 1.6515813566375364, + "grad_norm": 1.5301763756560538, + "learning_rate": 7.749964670289606e-07, + "loss": 0.3262, + "step": 31750, + "vit_learning_rate": 1.549992934057921e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6921, + "epoch": 1.6521015397419891, + "grad_norm": 1.498352854842812, + "learning_rate": 7.727455740333912e-07, + "loss": 0.3303, + "step": 31760, + "vit_learning_rate": 1.545491148066782e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6776, + "epoch": 1.652621722846442, + "grad_norm": 1.516711658544688, + "learning_rate": 7.704976807540609e-07, + "loss": 0.3434, + "step": 31770, + "vit_learning_rate": 1.5409953615081217e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6603, + "epoch": 1.6531419059508947, + "grad_norm": 1.323703661723804, + "learning_rate": 7.682527887861057e-07, + "loss": 0.3207, + "step": 31780, + "vit_learning_rate": 1.5365055775722113e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.646, + "epoch": 1.6536620890553475, + "grad_norm": 1.5280457348633156, + "learning_rate": 7.660108997225302e-07, + "loss": 0.3489, + "step": 31790, + "vit_learning_rate": 1.5320217994450602e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.7219, + "epoch": 1.6541822721598003, + "grad_norm": 2.41127246562675, + "learning_rate": 7.637720151542094e-07, + "loss": 0.3371, + "step": 31800, + "vit_learning_rate": 1.5275440303084187e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6701, + "epoch": 1.6547024552642529, + "grad_norm": 1.891528739200391, + "learning_rate": 7.615361366698854e-07, + "loss": 0.3102, + "step": 31810, + "vit_learning_rate": 1.5230722733397706e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6762, + "epoch": 1.6552226383687056, + "grad_norm": 1.1671867201047297, + "learning_rate": 7.593032658561677e-07, + "loss": 0.317, + "step": 31820, + "vit_learning_rate": 1.5186065317123353e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.7371, + "epoch": 1.6557428214731584, + "grad_norm": 1.8381966368808258, + "learning_rate": 7.570734042975314e-07, + "loss": 0.3051, + "step": 31830, + "vit_learning_rate": 1.5141468085950626e-07 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.7385, + "epoch": 1.6562630045776112, + "grad_norm": 1.4092806599439176, + "learning_rate": 7.548465535763144e-07, + "loss": 0.3207, + "step": 31840, + "vit_learning_rate": 1.5096931071526286e-07 + }, + { + "avg_batch_load_time": 0.0033, + "avg_batch_processing_time": 0.7714, + "epoch": 1.656783187682064, + "grad_norm": 1.776281981608011, + "learning_rate": 7.526227152727222e-07, + "loss": 0.3351, + "step": 31850, + "vit_learning_rate": 1.5052454305454444e-07 + }, + { + "avg_batch_load_time": 0.0036, + "avg_batch_processing_time": 0.7098, + "epoch": 1.6573033707865168, + "grad_norm": 2.005069737212877, + "learning_rate": 7.504018909648203e-07, + "loss": 0.3379, + "step": 31860, + "vit_learning_rate": 1.5008037819296403e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6296, + "epoch": 1.6578235538909696, + "grad_norm": 1.6154962788464675, + "learning_rate": 7.48184082228533e-07, + "loss": 0.3193, + "step": 31870, + "vit_learning_rate": 1.496368164457066e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6644, + "epoch": 1.6583437369954224, + "grad_norm": 1.2902729983014194, + "learning_rate": 7.459692906376481e-07, + "loss": 0.344, + "step": 31880, + "vit_learning_rate": 1.491938581275296e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6487, + "epoch": 1.6588639200998752, + "grad_norm": 1.4349125156519815, + "learning_rate": 7.437575177638101e-07, + "loss": 0.3326, + "step": 31890, + "vit_learning_rate": 1.48751503552762e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6708, + "epoch": 1.659384103204328, + "grad_norm": 1.2640791084636045, + "learning_rate": 7.415487651765251e-07, + "loss": 0.314, + "step": 31900, + "vit_learning_rate": 1.48309753035305e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7458, + "epoch": 1.6599042863087807, + "grad_norm": 1.430341716683243, + "learning_rate": 7.393430344431524e-07, + "loss": 0.3105, + "step": 31910, + "vit_learning_rate": 1.4786860688863046e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7111, + "epoch": 1.6604244694132335, + "grad_norm": 1.537439778021885, + "learning_rate": 7.371403271289079e-07, + "loss": 0.3043, + "step": 31920, + "vit_learning_rate": 1.4742806542578156e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6289, + "epoch": 1.6609446525176863, + "grad_norm": 1.2882045671463063, + "learning_rate": 7.349406447968621e-07, + "loss": 0.3208, + "step": 31930, + "vit_learning_rate": 1.469881289593724e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6664, + "epoch": 1.661464835622139, + "grad_norm": 1.3945052774724451, + "learning_rate": 7.327439890079385e-07, + "loss": 0.3543, + "step": 31940, + "vit_learning_rate": 1.465487978015877e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7544, + "epoch": 1.6619850187265919, + "grad_norm": 2.2827453126824597, + "learning_rate": 7.305503613209142e-07, + "loss": 0.3321, + "step": 31950, + "vit_learning_rate": 1.4611007226418283e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6868, + "epoch": 1.6625052018310447, + "grad_norm": 1.6850451500509593, + "learning_rate": 7.283597632924167e-07, + "loss": 0.3296, + "step": 31960, + "vit_learning_rate": 1.456719526584833e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7229, + "epoch": 1.6630253849354975, + "grad_norm": 1.4560079645991773, + "learning_rate": 7.261721964769225e-07, + "loss": 0.3188, + "step": 31970, + "vit_learning_rate": 1.4523443929538447e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.638, + "epoch": 1.66354556803995, + "grad_norm": 1.720383894738399, + "learning_rate": 7.239876624267589e-07, + "loss": 0.3172, + "step": 31980, + "vit_learning_rate": 1.4479753248535175e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.646, + "epoch": 1.6640657511444028, + "grad_norm": 1.7854148564091066, + "learning_rate": 7.218061626921008e-07, + "loss": 0.318, + "step": 31990, + "vit_learning_rate": 1.4436123253842015e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.641, + "epoch": 1.6645859342488556, + "grad_norm": 1.4980125050288584, + "learning_rate": 7.196276988209711e-07, + "loss": 0.3271, + "step": 32000, + "vit_learning_rate": 1.439255397641942e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.662, + "epoch": 1.6651061173533084, + "grad_norm": 1.3620968096060255, + "learning_rate": 7.174522723592342e-07, + "loss": 0.2896, + "step": 32010, + "vit_learning_rate": 1.4349045447184683e-07 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.6523, + "epoch": 1.6656263004577612, + "grad_norm": 1.741665840196707, + "learning_rate": 7.152798848506026e-07, + "loss": 0.3367, + "step": 32020, + "vit_learning_rate": 1.4305597697012052e-07 + }, + { + "avg_batch_load_time": 0.0292, + "avg_batch_processing_time": 0.623, + "epoch": 1.666146483562214, + "grad_norm": 1.629011848873107, + "learning_rate": 7.131105378366304e-07, + "loss": 0.3161, + "step": 32030, + "vit_learning_rate": 1.4262210756732605e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7068, + "epoch": 1.6666666666666665, + "grad_norm": 1.680599704156932, + "learning_rate": 7.109442328567179e-07, + "loss": 0.3268, + "step": 32040, + "vit_learning_rate": 1.4218884657134355e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.625, + "epoch": 1.6671868497711193, + "grad_norm": 1.2138374442859916, + "learning_rate": 7.087809714481026e-07, + "loss": 0.3271, + "step": 32050, + "vit_learning_rate": 1.417561942896205e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6336, + "epoch": 1.667707032875572, + "grad_norm": 1.80907659370555, + "learning_rate": 7.066207551458648e-07, + "loss": 0.3241, + "step": 32060, + "vit_learning_rate": 1.4132415102917295e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7845, + "epoch": 1.6682272159800249, + "grad_norm": 1.638807966844505, + "learning_rate": 7.044635854829196e-07, + "loss": 0.341, + "step": 32070, + "vit_learning_rate": 1.408927170965839e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6751, + "epoch": 1.6687473990844777, + "grad_norm": 1.4100089730960155, + "learning_rate": 7.023094639900269e-07, + "loss": 0.3055, + "step": 32080, + "vit_learning_rate": 1.4046189279800537e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6535, + "epoch": 1.6692675821889305, + "grad_norm": 1.2151721842137595, + "learning_rate": 7.001583921957789e-07, + "loss": 0.3381, + "step": 32090, + "vit_learning_rate": 1.4003167843915576e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6832, + "epoch": 1.6697877652933832, + "grad_norm": 1.901731623114297, + "learning_rate": 6.980103716266051e-07, + "loss": 0.3249, + "step": 32100, + "vit_learning_rate": 1.3960207432532101e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7222, + "epoch": 1.670307948397836, + "grad_norm": 1.6511821265579738, + "learning_rate": 6.958654038067692e-07, + "loss": 0.3203, + "step": 32110, + "vit_learning_rate": 1.3917308076135381e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6264, + "epoch": 1.6708281315022888, + "grad_norm": 1.300532980395704, + "learning_rate": 6.937234902583678e-07, + "loss": 0.3155, + "step": 32120, + "vit_learning_rate": 1.3874469805167354e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7784, + "epoch": 1.6713483146067416, + "grad_norm": 1.7577678625744104, + "learning_rate": 6.915846325013353e-07, + "loss": 0.3102, + "step": 32130, + "vit_learning_rate": 1.3831692650026706e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6329, + "epoch": 1.6718684977111944, + "grad_norm": 1.529285831995983, + "learning_rate": 6.894488320534304e-07, + "loss": 0.3315, + "step": 32140, + "vit_learning_rate": 1.3788976641068607e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.8548, + "epoch": 1.6723886808156472, + "grad_norm": 1.127083517995878, + "learning_rate": 6.873160904302461e-07, + "loss": 0.3312, + "step": 32150, + "vit_learning_rate": 1.3746321808604922e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6599, + "epoch": 1.6729088639201, + "grad_norm": 1.5741194495232695, + "learning_rate": 6.851864091452054e-07, + "loss": 0.3236, + "step": 32160, + "vit_learning_rate": 1.3703728182904107e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6507, + "epoch": 1.6734290470245528, + "grad_norm": 1.7817929116548432, + "learning_rate": 6.830597897095559e-07, + "loss": 0.3347, + "step": 32170, + "vit_learning_rate": 1.3661195794191115e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6581, + "epoch": 1.6739492301290055, + "grad_norm": 1.4240445444437542, + "learning_rate": 6.809362336323789e-07, + "loss": 0.3465, + "step": 32180, + "vit_learning_rate": 1.3618724672647575e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6399, + "epoch": 1.6744694132334583, + "grad_norm": 1.817436889923108, + "learning_rate": 6.788157424205765e-07, + "loss": 0.313, + "step": 32190, + "vit_learning_rate": 1.357631484841153e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7511, + "epoch": 1.674989596337911, + "grad_norm": 2.8514662986902506, + "learning_rate": 6.766983175788788e-07, + "loss": 0.3238, + "step": 32200, + "vit_learning_rate": 1.3533966351577574e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6014, + "epoch": 1.6755097794423637, + "grad_norm": 2.3334315292527474, + "learning_rate": 6.745839606098354e-07, + "loss": 0.3248, + "step": 32210, + "vit_learning_rate": 1.3491679212196706e-07 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6893, + "epoch": 1.6760299625468165, + "grad_norm": 2.1993719831480893, + "learning_rate": 6.724726730138254e-07, + "loss": 0.3244, + "step": 32220, + "vit_learning_rate": 1.3449453460276505e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.657, + "epoch": 1.6765501456512693, + "grad_norm": 1.7092105857454665, + "learning_rate": 6.703644562890454e-07, + "loss": 0.2895, + "step": 32230, + "vit_learning_rate": 1.3407289125780908e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7336, + "epoch": 1.677070328755722, + "grad_norm": 1.4372466418015026, + "learning_rate": 6.682593119315151e-07, + "loss": 0.3139, + "step": 32240, + "vit_learning_rate": 1.33651862386303e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6225, + "epoch": 1.6775905118601748, + "grad_norm": 1.706992276701781, + "learning_rate": 6.66157241435072e-07, + "loss": 0.3107, + "step": 32250, + "vit_learning_rate": 1.3323144828701438e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6627, + "epoch": 1.6781106949646274, + "grad_norm": 1.8578047553381183, + "learning_rate": 6.640582462913731e-07, + "loss": 0.309, + "step": 32260, + "vit_learning_rate": 1.328116492582746e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6537, + "epoch": 1.6786308780690802, + "grad_norm": 1.567210568377292, + "learning_rate": 6.619623279898963e-07, + "loss": 0.3496, + "step": 32270, + "vit_learning_rate": 1.3239246559797922e-07 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6371, + "epoch": 1.679151061173533, + "grad_norm": 1.19636401293207, + "learning_rate": 6.598694880179302e-07, + "loss": 0.3184, + "step": 32280, + "vit_learning_rate": 1.3197389760358603e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6227, + "epoch": 1.6796712442779858, + "grad_norm": 1.6539969729775794, + "learning_rate": 6.57779727860583e-07, + "loss": 0.3244, + "step": 32290, + "vit_learning_rate": 1.3155594557211657e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6807, + "epoch": 1.6801914273824385, + "grad_norm": 1.3106325406525365, + "learning_rate": 6.556930490007762e-07, + "loss": 0.3148, + "step": 32300, + "vit_learning_rate": 1.3113860980015524e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7358, + "epoch": 1.6807116104868913, + "grad_norm": 1.8601829562539118, + "learning_rate": 6.536094529192438e-07, + "loss": 0.3452, + "step": 32310, + "vit_learning_rate": 1.3072189058384874e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6885, + "epoch": 1.6812317935913441, + "grad_norm": 1.228788491577828, + "learning_rate": 6.515289410945364e-07, + "loss": 0.326, + "step": 32320, + "vit_learning_rate": 1.3030578821890725e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.641, + "epoch": 1.681751976695797, + "grad_norm": 1.8709814894344312, + "learning_rate": 6.494515150030117e-07, + "loss": 0.3271, + "step": 32330, + "vit_learning_rate": 1.2989030300060234e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7206, + "epoch": 1.6822721598002497, + "grad_norm": 2.3743075815694494, + "learning_rate": 6.473771761188391e-07, + "loss": 0.3257, + "step": 32340, + "vit_learning_rate": 1.2947543522376782e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6493, + "epoch": 1.6827923429047025, + "grad_norm": 1.7626547435433695, + "learning_rate": 6.453059259139954e-07, + "loss": 0.3114, + "step": 32350, + "vit_learning_rate": 1.2906118518279908e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6625, + "epoch": 1.6833125260091553, + "grad_norm": 1.4785147277017994, + "learning_rate": 6.432377658582695e-07, + "loss": 0.301, + "step": 32360, + "vit_learning_rate": 1.2864755317165387e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6616, + "epoch": 1.683832709113608, + "grad_norm": 1.5261079213228586, + "learning_rate": 6.41172697419255e-07, + "loss": 0.2951, + "step": 32370, + "vit_learning_rate": 1.2823453948385098e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6852, + "epoch": 1.6843528922180608, + "grad_norm": 1.1777481345975032, + "learning_rate": 6.391107220623516e-07, + "loss": 0.3435, + "step": 32380, + "vit_learning_rate": 1.278221444124703e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6988, + "epoch": 1.6848730753225136, + "grad_norm": 1.2968501776819188, + "learning_rate": 6.370518412507654e-07, + "loss": 0.3273, + "step": 32390, + "vit_learning_rate": 1.2741036825015306e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7048, + "epoch": 1.6853932584269664, + "grad_norm": 1.6800083308655522, + "learning_rate": 6.349960564455049e-07, + "loss": 0.3446, + "step": 32400, + "vit_learning_rate": 1.2699921128910096e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6418, + "epoch": 1.6859134415314192, + "grad_norm": 1.823015229950209, + "learning_rate": 6.329433691053826e-07, + "loss": 0.3109, + "step": 32410, + "vit_learning_rate": 1.2658867382107652e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.729, + "epoch": 1.686433624635872, + "grad_norm": 1.536511020351412, + "learning_rate": 6.30893780687013e-07, + "loss": 0.3298, + "step": 32420, + "vit_learning_rate": 1.2617875613740258e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6417, + "epoch": 1.6869538077403246, + "grad_norm": 1.2828077028598441, + "learning_rate": 6.288472926448114e-07, + "loss": 0.3356, + "step": 32430, + "vit_learning_rate": 1.2576945852896227e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6471, + "epoch": 1.6874739908447773, + "grad_norm": 1.7966141855158435, + "learning_rate": 6.268039064309933e-07, + "loss": 0.3303, + "step": 32440, + "vit_learning_rate": 1.2536078128619865e-07 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6335, + "epoch": 1.6879941739492301, + "grad_norm": 1.4003799428425996, + "learning_rate": 6.247636234955718e-07, + "loss": 0.3431, + "step": 32450, + "vit_learning_rate": 1.2495272469911434e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6412, + "epoch": 1.688514357053683, + "grad_norm": 1.4030627010257857, + "learning_rate": 6.227264452863608e-07, + "loss": 0.3156, + "step": 32460, + "vit_learning_rate": 1.2454528905727213e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6661, + "epoch": 1.6890345401581357, + "grad_norm": 1.6540811406402536, + "learning_rate": 6.206923732489678e-07, + "loss": 0.3321, + "step": 32470, + "vit_learning_rate": 1.2413847464979354e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6762, + "epoch": 1.6895547232625883, + "grad_norm": 1.5984246678408416, + "learning_rate": 6.186614088267994e-07, + "loss": 0.3343, + "step": 32480, + "vit_learning_rate": 1.2373228176535989e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.713, + "epoch": 1.690074906367041, + "grad_norm": 1.4217937169390578, + "learning_rate": 6.166335534610513e-07, + "loss": 0.3426, + "step": 32490, + "vit_learning_rate": 1.2332671069221024e-07 + }, + { + "avg_batch_load_time": 0.0011, + "avg_batch_processing_time": 0.6237, + "epoch": 1.6905950894714938, + "grad_norm": 1.624749676933033, + "learning_rate": 6.146088085907203e-07, + "loss": 0.3395, + "step": 32500, + "vit_learning_rate": 1.2292176171814405e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6464, + "epoch": 1.6911152725759466, + "grad_norm": 1.7051978948592506, + "learning_rate": 6.125871756525914e-07, + "loss": 0.3096, + "step": 32510, + "vit_learning_rate": 1.2251743513051828e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6282, + "epoch": 1.6916354556803994, + "grad_norm": 1.9422880627326147, + "learning_rate": 6.105686560812418e-07, + "loss": 0.3037, + "step": 32520, + "vit_learning_rate": 1.2211373121624834e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6433, + "epoch": 1.6921556387848522, + "grad_norm": 1.4497142535824266, + "learning_rate": 6.085532513090408e-07, + "loss": 0.316, + "step": 32530, + "vit_learning_rate": 1.2171065026180817e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6392, + "epoch": 1.692675821889305, + "grad_norm": 1.7021220468516398, + "learning_rate": 6.065409627661456e-07, + "loss": 0.3267, + "step": 32540, + "vit_learning_rate": 1.213081925532291e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.704, + "epoch": 1.6931960049937578, + "grad_norm": 1.6805357864350092, + "learning_rate": 6.045317918805038e-07, + "loss": 0.3246, + "step": 32550, + "vit_learning_rate": 1.2090635837610075e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6952, + "epoch": 1.6937161880982106, + "grad_norm": 1.375270010554012, + "learning_rate": 6.025257400778495e-07, + "loss": 0.3219, + "step": 32560, + "vit_learning_rate": 1.2050514801556988e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6235, + "epoch": 1.6942363712026633, + "grad_norm": 1.6881793464618635, + "learning_rate": 6.005228087817039e-07, + "loss": 0.3132, + "step": 32570, + "vit_learning_rate": 1.2010456175634076e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6778, + "epoch": 1.6947565543071161, + "grad_norm": 1.7131248432789192, + "learning_rate": 5.985229994133746e-07, + "loss": 0.3247, + "step": 32580, + "vit_learning_rate": 1.197045998826749e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.633, + "epoch": 1.695276737411569, + "grad_norm": 1.859901459221443, + "learning_rate": 5.965263133919508e-07, + "loss": 0.3589, + "step": 32590, + "vit_learning_rate": 1.1930526267839014e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6402, + "epoch": 1.6957969205160217, + "grad_norm": 1.1152458406865153, + "learning_rate": 5.945327521343103e-07, + "loss": 0.3223, + "step": 32600, + "vit_learning_rate": 1.1890655042686204e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6704, + "epoch": 1.6963171036204745, + "grad_norm": 1.5015928193540555, + "learning_rate": 5.925423170551098e-07, + "loss": 0.3296, + "step": 32610, + "vit_learning_rate": 1.1850846341102195e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6774, + "epoch": 1.6968372867249273, + "grad_norm": 1.8583307731255336, + "learning_rate": 5.905550095667894e-07, + "loss": 0.3356, + "step": 32620, + "vit_learning_rate": 1.1811100191335787e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6483, + "epoch": 1.69735746982938, + "grad_norm": 1.6084617052651538, + "learning_rate": 5.885708310795663e-07, + "loss": 0.34, + "step": 32630, + "vit_learning_rate": 1.1771416621591324e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6508, + "epoch": 1.6978776529338329, + "grad_norm": 1.6532858048176553, + "learning_rate": 5.865897830014427e-07, + "loss": 0.299, + "step": 32640, + "vit_learning_rate": 1.1731795660028854e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.634, + "epoch": 1.6983978360382854, + "grad_norm": 2.0359382478117207, + "learning_rate": 5.846118667381961e-07, + "loss": 0.3369, + "step": 32650, + "vit_learning_rate": 1.1692237334763922e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7733, + "epoch": 1.6989180191427382, + "grad_norm": 2.158932127535204, + "learning_rate": 5.826370836933825e-07, + "loss": 0.3394, + "step": 32660, + "vit_learning_rate": 1.165274167386765e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7044, + "epoch": 1.699438202247191, + "grad_norm": 1.4058048154255098, + "learning_rate": 5.806654352683344e-07, + "loss": 0.3312, + "step": 32670, + "vit_learning_rate": 1.1613308705366687e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6274, + "epoch": 1.6999583853516438, + "grad_norm": 1.4328324338905352, + "learning_rate": 5.786969228621592e-07, + "loss": 0.3538, + "step": 32680, + "vit_learning_rate": 1.1573938457243182e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6766, + "epoch": 1.7004785684560966, + "grad_norm": 1.547617471604846, + "learning_rate": 5.767315478717401e-07, + "loss": 0.3411, + "step": 32690, + "vit_learning_rate": 1.1534630957434799e-07 + }, + { + "avg_batch_load_time": 2.4481, + "avg_batch_processing_time": 0.6737, + "epoch": 1.7009987515605494, + "grad_norm": 1.7192261722723852, + "learning_rate": 5.74769311691733e-07, + "loss": 0.3165, + "step": 32700, + "vit_learning_rate": 1.1495386233834658e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.7463, + "epoch": 1.701518934665002, + "grad_norm": 1.7803664614740777, + "learning_rate": 5.728102157145676e-07, + "loss": 0.3348, + "step": 32710, + "vit_learning_rate": 1.1456204314291352e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6817, + "epoch": 1.7020391177694547, + "grad_norm": 2.0931234060444717, + "learning_rate": 5.708542613304446e-07, + "loss": 0.3284, + "step": 32720, + "vit_learning_rate": 1.1417085226608892e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6684, + "epoch": 1.7025593008739075, + "grad_norm": 1.467361656609037, + "learning_rate": 5.689014499273337e-07, + "loss": 0.3286, + "step": 32730, + "vit_learning_rate": 1.1378028998546674e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7575, + "epoch": 1.7030794839783603, + "grad_norm": 1.6396929830687228, + "learning_rate": 5.669517828909782e-07, + "loss": 0.3165, + "step": 32740, + "vit_learning_rate": 1.1339035657819563e-07 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7581, + "epoch": 1.703599667082813, + "grad_norm": 1.4747345143391943, + "learning_rate": 5.650052616048873e-07, + "loss": 0.3086, + "step": 32750, + "vit_learning_rate": 1.1300105232097745e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.72, + "epoch": 1.7041198501872659, + "grad_norm": 1.3717911427995593, + "learning_rate": 5.630618874503391e-07, + "loss": 0.3225, + "step": 32760, + "vit_learning_rate": 1.126123774900678e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6226, + "epoch": 1.7046400332917186, + "grad_norm": 4.463636718476704, + "learning_rate": 5.611216618063748e-07, + "loss": 0.3069, + "step": 32770, + "vit_learning_rate": 1.1222433236127493e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7017, + "epoch": 1.7051602163961714, + "grad_norm": 1.930272231082434, + "learning_rate": 5.591845860498079e-07, + "loss": 0.3482, + "step": 32780, + "vit_learning_rate": 1.1183691720996157e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.657, + "epoch": 1.7056803995006242, + "grad_norm": 1.9620393442738668, + "learning_rate": 5.57250661555212e-07, + "loss": 0.3454, + "step": 32790, + "vit_learning_rate": 1.114501323110424e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.662, + "epoch": 1.706200582605077, + "grad_norm": 1.7522793142604132, + "learning_rate": 5.553198896949258e-07, + "loss": 0.3043, + "step": 32800, + "vit_learning_rate": 1.1106397793898514e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6711, + "epoch": 1.7067207657095298, + "grad_norm": 1.5727633103211203, + "learning_rate": 5.533922718390505e-07, + "loss": 0.3314, + "step": 32810, + "vit_learning_rate": 1.106784543678101e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6318, + "epoch": 1.7072409488139826, + "grad_norm": 1.2015554720802666, + "learning_rate": 5.514678093554499e-07, + "loss": 0.3143, + "step": 32820, + "vit_learning_rate": 1.1029356187108996e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6149, + "epoch": 1.7077611319184354, + "grad_norm": 1.7116321464725583, + "learning_rate": 5.49546503609748e-07, + "loss": 0.3283, + "step": 32830, + "vit_learning_rate": 1.0990930072194959e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6564, + "epoch": 1.7082813150228882, + "grad_norm": 1.4748503529839625, + "learning_rate": 5.476283559653295e-07, + "loss": 0.3226, + "step": 32840, + "vit_learning_rate": 1.0952567119306588e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7285, + "epoch": 1.708801498127341, + "grad_norm": 1.8703914679071436, + "learning_rate": 5.457133677833376e-07, + "loss": 0.3321, + "step": 32850, + "vit_learning_rate": 1.091426735566675e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6424, + "epoch": 1.7093216812317937, + "grad_norm": 1.5692086614098066, + "learning_rate": 5.438015404226727e-07, + "loss": 0.3193, + "step": 32860, + "vit_learning_rate": 1.0876030808453452e-07 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6645, + "epoch": 1.7098418643362465, + "grad_norm": 2.1562548674102193, + "learning_rate": 5.418928752399926e-07, + "loss": 0.327, + "step": 32870, + "vit_learning_rate": 1.0837857504799853e-07 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6834, + "epoch": 1.710362047440699, + "grad_norm": 1.5074189074951196, + "learning_rate": 5.399873735897137e-07, + "loss": 0.3112, + "step": 32880, + "vit_learning_rate": 1.0799747471794274e-07 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6448, + "epoch": 1.7108822305451519, + "grad_norm": 1.6942738747360866, + "learning_rate": 5.380850368240054e-07, + "loss": 0.34, + "step": 32890, + "vit_learning_rate": 1.0761700736480107e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6617, + "epoch": 1.7114024136496047, + "grad_norm": 1.5872332618708498, + "learning_rate": 5.361858662927894e-07, + "loss": 0.3135, + "step": 32900, + "vit_learning_rate": 1.0723717325855785e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7016, + "epoch": 1.7119225967540574, + "grad_norm": 1.2541320303571226, + "learning_rate": 5.342898633437422e-07, + "loss": 0.3287, + "step": 32910, + "vit_learning_rate": 1.0685797266874841e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7026, + "epoch": 1.7124427798585102, + "grad_norm": 1.3799797120133392, + "learning_rate": 5.323970293222947e-07, + "loss": 0.3175, + "step": 32920, + "vit_learning_rate": 1.0647940586445891e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6531, + "epoch": 1.7129629629629628, + "grad_norm": 1.6107558175285128, + "learning_rate": 5.305073655716269e-07, + "loss": 0.3159, + "step": 32930, + "vit_learning_rate": 1.0610147311432537e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7105, + "epoch": 1.7134831460674156, + "grad_norm": 1.8471089925497055, + "learning_rate": 5.286208734326687e-07, + "loss": 0.3279, + "step": 32940, + "vit_learning_rate": 1.0572417468653371e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7491, + "epoch": 1.7140033291718684, + "grad_norm": 1.9387373561453753, + "learning_rate": 5.267375542441005e-07, + "loss": 0.3296, + "step": 32950, + "vit_learning_rate": 1.053475108488201e-07 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6466, + "epoch": 1.7145235122763212, + "grad_norm": 1.567149022453418, + "learning_rate": 5.248574093423508e-07, + "loss": 0.3456, + "step": 32960, + "vit_learning_rate": 1.0497148186847015e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6579, + "epoch": 1.715043695380774, + "grad_norm": 2.280799220239686, + "learning_rate": 5.229804400615957e-07, + "loss": 0.3303, + "step": 32970, + "vit_learning_rate": 1.0459608801231912e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6918, + "epoch": 1.7155638784852267, + "grad_norm": 1.441950974169488, + "learning_rate": 5.211066477337573e-07, + "loss": 0.3198, + "step": 32980, + "vit_learning_rate": 1.0422132954675145e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.726, + "epoch": 1.7160840615896795, + "grad_norm": 1.9511230905837795, + "learning_rate": 5.192360336885033e-07, + "loss": 0.3186, + "step": 32990, + "vit_learning_rate": 1.0384720673770064e-07 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7099, + "epoch": 1.7166042446941323, + "grad_norm": 1.5662969252743626, + "learning_rate": 5.173685992532478e-07, + "loss": 0.3282, + "step": 33000, + "vit_learning_rate": 1.0347371985064957e-07 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6619, + "epoch": 1.717124427798585, + "grad_norm": 2.0125804665018716, + "learning_rate": 5.15504345753145e-07, + "loss": 0.3176, + "step": 33010, + "vit_learning_rate": 1.0310086915062899e-07 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7113, + "epoch": 1.7176446109030379, + "grad_norm": 1.6629813956472574, + "learning_rate": 5.136432745110964e-07, + "loss": 0.2908, + "step": 33020, + "vit_learning_rate": 1.0272865490221927e-07 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6914, + "epoch": 1.7181647940074907, + "grad_norm": 1.697561995532449, + "learning_rate": 5.117853868477434e-07, + "loss": 0.3369, + "step": 33030, + "vit_learning_rate": 1.0235707736954868e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6635, + "epoch": 1.7186849771119435, + "grad_norm": 1.010615398323049, + "learning_rate": 5.099306840814655e-07, + "loss": 0.3127, + "step": 33040, + "vit_learning_rate": 1.0198613681629309e-07 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.8075, + "epoch": 1.7192051602163962, + "grad_norm": 1.4012518979873976, + "learning_rate": 5.080791675283853e-07, + "loss": 0.326, + "step": 33050, + "vit_learning_rate": 1.0161583350567705e-07 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.7514, + "epoch": 1.719725343320849, + "grad_norm": 2.2844201430936892, + "learning_rate": 5.062308385023645e-07, + "loss": 0.3211, + "step": 33060, + "vit_learning_rate": 1.0124616770047289e-07 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.8076, + "epoch": 1.7202455264253018, + "grad_norm": 1.4306949101343098, + "learning_rate": 5.043856983150019e-07, + "loss": 0.335, + "step": 33070, + "vit_learning_rate": 1.0087713966300037e-07 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.761, + "epoch": 1.7207657095297546, + "grad_norm": 1.8088962849699637, + "learning_rate": 5.025437482756335e-07, + "loss": 0.3183, + "step": 33080, + "vit_learning_rate": 1.0050874965512668e-07 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.8297, + "epoch": 1.7212858926342074, + "grad_norm": 1.5078796559919663, + "learning_rate": 5.007049896913313e-07, + "loss": 0.3369, + "step": 33090, + "vit_learning_rate": 1.0014099793826624e-07 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6594, + "epoch": 1.72180607573866, + "grad_norm": 1.5326074992983625, + "learning_rate": 4.988694238669034e-07, + "loss": 0.3255, + "step": 33100, + "vit_learning_rate": 9.977388477338067e-08 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6782, + "epoch": 1.7223262588431127, + "grad_norm": 1.5308035610950697, + "learning_rate": 4.970370521048917e-07, + "loss": 0.3308, + "step": 33110, + "vit_learning_rate": 9.940741042097833e-08 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6806, + "epoch": 1.7228464419475655, + "grad_norm": 1.4821291627938358, + "learning_rate": 4.952078757055723e-07, + "loss": 0.3363, + "step": 33120, + "vit_learning_rate": 9.904157514111445e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6224, + "epoch": 1.7233666250520183, + "grad_norm": 1.5807053132176496, + "learning_rate": 4.933818959669517e-07, + "loss": 0.3235, + "step": 33130, + "vit_learning_rate": 9.867637919339034e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6488, + "epoch": 1.723886808156471, + "grad_norm": 1.4514504412052667, + "learning_rate": 4.915591141847708e-07, + "loss": 0.3341, + "step": 33140, + "vit_learning_rate": 9.831182283695416e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7541, + "epoch": 1.724406991260924, + "grad_norm": 1.3250065820145729, + "learning_rate": 4.897395316524989e-07, + "loss": 0.322, + "step": 33150, + "vit_learning_rate": 9.794790633049976e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6655, + "epoch": 1.7249271743653765, + "grad_norm": 1.5759410381494305, + "learning_rate": 4.879231496613379e-07, + "loss": 0.3112, + "step": 33160, + "vit_learning_rate": 9.758462993226757e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.666, + "epoch": 1.7254473574698292, + "grad_norm": 1.5947532865414311, + "learning_rate": 4.861099695002158e-07, + "loss": 0.3344, + "step": 33170, + "vit_learning_rate": 9.722199390004315e-08 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6564, + "epoch": 1.725967540574282, + "grad_norm": 1.9197321436163717, + "learning_rate": 4.84299992455789e-07, + "loss": 0.3231, + "step": 33180, + "vit_learning_rate": 9.685999849115777e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6559, + "epoch": 1.7264877236787348, + "grad_norm": 1.5250271429441906, + "learning_rate": 4.824932198124404e-07, + "loss": 0.3309, + "step": 33190, + "vit_learning_rate": 9.649864396248808e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6444, + "epoch": 1.7270079067831876, + "grad_norm": 1.7391178372399174, + "learning_rate": 4.80689652852282e-07, + "loss": 0.3102, + "step": 33200, + "vit_learning_rate": 9.613793057045638e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6383, + "epoch": 1.7275280898876404, + "grad_norm": 1.624351600833631, + "learning_rate": 4.788892928551486e-07, + "loss": 0.3294, + "step": 33210, + "vit_learning_rate": 9.577785857102971e-08 + }, + { + "avg_batch_load_time": 0.0217, + "avg_batch_processing_time": 0.669, + "epoch": 1.7280482729920932, + "grad_norm": 1.5931235330064868, + "learning_rate": 4.770921410985995e-07, + "loss": 0.3387, + "step": 33220, + "vit_learning_rate": 9.54184282197199e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6759, + "epoch": 1.728568456096546, + "grad_norm": 1.5962625349046065, + "learning_rate": 4.752981988579175e-07, + "loss": 0.3482, + "step": 33230, + "vit_learning_rate": 9.50596397715835e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6468, + "epoch": 1.7290886392009988, + "grad_norm": 1.421295423779806, + "learning_rate": 4.7350746740610797e-07, + "loss": 0.2992, + "step": 33240, + "vit_learning_rate": 9.470149348122158e-08 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7228, + "epoch": 1.7296088223054515, + "grad_norm": 2.038080749076144, + "learning_rate": 4.7171994801389787e-07, + "loss": 0.3021, + "step": 33250, + "vit_learning_rate": 9.434398960277956e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7413, + "epoch": 1.7301290054099043, + "grad_norm": 1.4773093722909683, + "learning_rate": 4.6993564194973474e-07, + "loss": 0.3183, + "step": 33260, + "vit_learning_rate": 9.398712838994694e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6295, + "epoch": 1.7306491885143571, + "grad_norm": 1.3809284123352197, + "learning_rate": 4.681545504797863e-07, + "loss": 0.3141, + "step": 33270, + "vit_learning_rate": 9.363091009595724e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6527, + "epoch": 1.73116937161881, + "grad_norm": 1.6872636502343348, + "learning_rate": 4.663766748679388e-07, + "loss": 0.3262, + "step": 33280, + "vit_learning_rate": 9.327533497358774e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6642, + "epoch": 1.7316895547232627, + "grad_norm": 1.6600069046251826, + "learning_rate": 4.6460201637579583e-07, + "loss": 0.3331, + "step": 33290, + "vit_learning_rate": 9.292040327515915e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7226, + "epoch": 1.7322097378277155, + "grad_norm": 1.8271188459116696, + "learning_rate": 4.6283057626268e-07, + "loss": 0.3132, + "step": 33300, + "vit_learning_rate": 9.2566115252536e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6705, + "epoch": 1.7327299209321683, + "grad_norm": 1.2397861232686114, + "learning_rate": 4.610623557856297e-07, + "loss": 0.3091, + "step": 33310, + "vit_learning_rate": 9.221247115712594e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6334, + "epoch": 1.733250104036621, + "grad_norm": 1.8243337281715575, + "learning_rate": 4.592973561993952e-07, + "loss": 0.3199, + "step": 33320, + "vit_learning_rate": 9.185947123987903e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6364, + "epoch": 1.7337702871410736, + "grad_norm": 2.043357144356197, + "learning_rate": 4.5753557875644394e-07, + "loss": 0.3085, + "step": 33330, + "vit_learning_rate": 9.150711575128878e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.669, + "epoch": 1.7342904702455264, + "grad_norm": 1.59565860305152, + "learning_rate": 4.557770247069587e-07, + "loss": 0.3358, + "step": 33340, + "vit_learning_rate": 9.115540494139174e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.701, + "epoch": 1.7348106533499792, + "grad_norm": 1.6349969686348007, + "learning_rate": 4.540216952988324e-07, + "loss": 0.323, + "step": 33350, + "vit_learning_rate": 9.080433905976648e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7599, + "epoch": 1.735330836454432, + "grad_norm": 1.5758130785507376, + "learning_rate": 4.522695917776698e-07, + "loss": 0.3259, + "step": 33360, + "vit_learning_rate": 9.045391835553396e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6734, + "epoch": 1.7358510195588848, + "grad_norm": 1.560969443757031, + "learning_rate": 4.505207153867869e-07, + "loss": 0.3468, + "step": 33370, + "vit_learning_rate": 9.010414307735736e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6284, + "epoch": 1.7363712026633373, + "grad_norm": 1.7054050529737608, + "learning_rate": 4.487750673672103e-07, + "loss": 0.3238, + "step": 33380, + "vit_learning_rate": 8.975501347344206e-08 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.7142, + "epoch": 1.7368913857677901, + "grad_norm": 1.8675102709069016, + "learning_rate": 4.4703264895767485e-07, + "loss": 0.3164, + "step": 33390, + "vit_learning_rate": 8.940652979153495e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6531, + "epoch": 1.737411568872243, + "grad_norm": 1.493451410888178, + "learning_rate": 4.452934613946236e-07, + "loss": 0.3473, + "step": 33400, + "vit_learning_rate": 8.90586922789247e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6914, + "epoch": 1.7379317519766957, + "grad_norm": 1.4504063445490754, + "learning_rate": 4.435575059122077e-07, + "loss": 0.3356, + "step": 33410, + "vit_learning_rate": 8.871150118244152e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7753, + "epoch": 1.7384519350811485, + "grad_norm": 1.5054268829682325, + "learning_rate": 4.41824783742284e-07, + "loss": 0.3333, + "step": 33420, + "vit_learning_rate": 8.836495674845678e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6277, + "epoch": 1.7389721181856013, + "grad_norm": 1.5370367600209618, + "learning_rate": 4.4009529611441505e-07, + "loss": 0.324, + "step": 33430, + "vit_learning_rate": 8.8019059222883e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6173, + "epoch": 1.739492301290054, + "grad_norm": 1.8826506853561595, + "learning_rate": 4.383690442558697e-07, + "loss": 0.3013, + "step": 33440, + "vit_learning_rate": 8.767380885117392e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6253, + "epoch": 1.7400124843945068, + "grad_norm": 1.595279763746043, + "learning_rate": 4.3664602939161973e-07, + "loss": 0.3146, + "step": 33450, + "vit_learning_rate": 8.732920587832394e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6415, + "epoch": 1.7405326674989596, + "grad_norm": 1.799332007411029, + "learning_rate": 4.349262527443371e-07, + "loss": 0.3504, + "step": 33460, + "vit_learning_rate": 8.698525054886741e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6697, + "epoch": 1.7410528506034124, + "grad_norm": 1.6153367795877163, + "learning_rate": 4.332097155343984e-07, + "loss": 0.314, + "step": 33470, + "vit_learning_rate": 8.664194310687967e-08 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6421, + "epoch": 1.7415730337078652, + "grad_norm": 1.5168237981687402, + "learning_rate": 4.314964189798837e-07, + "loss": 0.3509, + "step": 33480, + "vit_learning_rate": 8.629928379597673e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6173, + "epoch": 1.742093216812318, + "grad_norm": 1.4167883310304903, + "learning_rate": 4.2978636429656993e-07, + "loss": 0.3263, + "step": 33490, + "vit_learning_rate": 8.595727285931397e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6266, + "epoch": 1.7426133999167708, + "grad_norm": 1.6008903738007998, + "learning_rate": 4.280795526979353e-07, + "loss": 0.3134, + "step": 33500, + "vit_learning_rate": 8.561591053958706e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6723, + "epoch": 1.7431335830212236, + "grad_norm": 1.8625515208873018, + "learning_rate": 4.26375985395156e-07, + "loss": 0.3225, + "step": 33510, + "vit_learning_rate": 8.527519707903119e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6294, + "epoch": 1.7436537661256764, + "grad_norm": 1.8648177325371411, + "learning_rate": 4.2467566359710386e-07, + "loss": 0.321, + "step": 33520, + "vit_learning_rate": 8.493513271942076e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6733, + "epoch": 1.7441739492301291, + "grad_norm": 1.5622578254421458, + "learning_rate": 4.2297858851035266e-07, + "loss": 0.3296, + "step": 33530, + "vit_learning_rate": 8.459571770207053e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6669, + "epoch": 1.744694132334582, + "grad_norm": 1.5517832611489726, + "learning_rate": 4.212847613391691e-07, + "loss": 0.3207, + "step": 33540, + "vit_learning_rate": 8.42569522678338e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6441, + "epoch": 1.7452143154390345, + "grad_norm": 1.186971543056153, + "learning_rate": 4.1959418328551495e-07, + "loss": 0.3695, + "step": 33550, + "vit_learning_rate": 8.391883665710297e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.68, + "epoch": 1.7457344985434873, + "grad_norm": 1.0929676070686225, + "learning_rate": 4.1790685554904786e-07, + "loss": 0.3264, + "step": 33560, + "vit_learning_rate": 8.358137110980956e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.753, + "epoch": 1.74625468164794, + "grad_norm": 1.400194072060196, + "learning_rate": 4.162227793271162e-07, + "loss": 0.3222, + "step": 33570, + "vit_learning_rate": 8.324455586542323e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6439, + "epoch": 1.7467748647523929, + "grad_norm": 1.8063339459766978, + "learning_rate": 4.145419558147662e-07, + "loss": 0.3185, + "step": 33580, + "vit_learning_rate": 8.290839116295322e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6956, + "epoch": 1.7472950478568456, + "grad_norm": 1.4253768599003298, + "learning_rate": 4.128643862047299e-07, + "loss": 0.3555, + "step": 33590, + "vit_learning_rate": 8.257287724094597e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7291, + "epoch": 1.7478152309612984, + "grad_norm": 3.3443589862140244, + "learning_rate": 4.1119007168743395e-07, + "loss": 0.3353, + "step": 33600, + "vit_learning_rate": 8.223801433748679e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6663, + "epoch": 1.748335414065751, + "grad_norm": 2.1877953783088078, + "learning_rate": 4.095190134509941e-07, + "loss": 0.3211, + "step": 33610, + "vit_learning_rate": 8.190380269019881e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7024, + "epoch": 1.7488555971702038, + "grad_norm": 1.582685542167856, + "learning_rate": 4.0785121268121565e-07, + "loss": 0.3233, + "step": 33620, + "vit_learning_rate": 8.157024253624311e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6619, + "epoch": 1.7493757802746566, + "grad_norm": 1.47662789315526, + "learning_rate": 4.06186670561593e-07, + "loss": 0.3157, + "step": 33630, + "vit_learning_rate": 8.123733411231859e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.643, + "epoch": 1.7498959633791094, + "grad_norm": 1.3239726304399626, + "learning_rate": 4.0452538827330744e-07, + "loss": 0.3279, + "step": 33640, + "vit_learning_rate": 8.090507765466148e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6498, + "epoch": 1.7504161464835621, + "grad_norm": 1.2169540793787172, + "learning_rate": 4.028673669952271e-07, + "loss": 0.3178, + "step": 33650, + "vit_learning_rate": 8.057347339904541e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6248, + "epoch": 1.750936329588015, + "grad_norm": 1.8358174875784217, + "learning_rate": 4.0121260790390413e-07, + "loss": 0.3001, + "step": 33660, + "vit_learning_rate": 8.024252158078081e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6771, + "epoch": 1.7514565126924677, + "grad_norm": 1.595099137140911, + "learning_rate": 3.995611121735804e-07, + "loss": 0.3287, + "step": 33670, + "vit_learning_rate": 7.991222243471607e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6707, + "epoch": 1.7519766957969205, + "grad_norm": 1.2871237054619131, + "learning_rate": 3.9791288097617785e-07, + "loss": 0.318, + "step": 33680, + "vit_learning_rate": 7.958257619523556e-08 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6963, + "epoch": 1.7524968789013733, + "grad_norm": 2.0718170925585913, + "learning_rate": 3.962679154813037e-07, + "loss": 0.3257, + "step": 33690, + "vit_learning_rate": 7.925358309626074e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6615, + "epoch": 1.753017062005826, + "grad_norm": 1.681072478873972, + "learning_rate": 3.9462621685624704e-07, + "loss": 0.3346, + "step": 33700, + "vit_learning_rate": 7.89252433712494e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7304, + "epoch": 1.7535372451102789, + "grad_norm": 1.8220294175658325, + "learning_rate": 3.9298778626597924e-07, + "loss": 0.3367, + "step": 33710, + "vit_learning_rate": 7.859755725319584e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6261, + "epoch": 1.7540574282147317, + "grad_norm": 1.363470604499415, + "learning_rate": 3.913526248731542e-07, + "loss": 0.3362, + "step": 33720, + "vit_learning_rate": 7.827052497463083e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6442, + "epoch": 1.7545776113191844, + "grad_norm": 1.381802843320163, + "learning_rate": 3.897207338381026e-07, + "loss": 0.3233, + "step": 33730, + "vit_learning_rate": 7.794414676762051e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.63, + "epoch": 1.7550977944236372, + "grad_norm": 1.5148605404769748, + "learning_rate": 3.88092114318836e-07, + "loss": 0.3274, + "step": 33740, + "vit_learning_rate": 7.761842286376719e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7019, + "epoch": 1.75561797752809, + "grad_norm": 1.3254462604542003, + "learning_rate": 3.864667674710454e-07, + "loss": 0.3323, + "step": 33750, + "vit_learning_rate": 7.729335349420906e-08 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6513, + "epoch": 1.7561381606325428, + "grad_norm": 1.4139564478240312, + "learning_rate": 3.8484469444809714e-07, + "loss": 0.3048, + "step": 33760, + "vit_learning_rate": 7.696893888961941e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6585, + "epoch": 1.7566583437369956, + "grad_norm": 2.145618722509756, + "learning_rate": 3.8322589640103824e-07, + "loss": 0.3243, + "step": 33770, + "vit_learning_rate": 7.664517928020764e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6456, + "epoch": 1.7571785268414482, + "grad_norm": 1.78721624579883, + "learning_rate": 3.816103744785876e-07, + "loss": 0.3062, + "step": 33780, + "vit_learning_rate": 7.632207489571752e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6608, + "epoch": 1.757698709945901, + "grad_norm": 2.4839250863865026, + "learning_rate": 3.7999812982714324e-07, + "loss": 0.3316, + "step": 33790, + "vit_learning_rate": 7.599962596542864e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6779, + "epoch": 1.7582188930503537, + "grad_norm": 1.707503292085879, + "learning_rate": 3.7838916359077273e-07, + "loss": 0.3272, + "step": 33800, + "vit_learning_rate": 7.567783271815454e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6386, + "epoch": 1.7587390761548065, + "grad_norm": 2.2464531210358003, + "learning_rate": 3.7678347691122166e-07, + "loss": 0.3076, + "step": 33810, + "vit_learning_rate": 7.535669538224431e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.68, + "epoch": 1.7592592592592593, + "grad_norm": 2.0033437656799293, + "learning_rate": 3.7518107092790635e-07, + "loss": 0.3243, + "step": 33820, + "vit_learning_rate": 7.503621418558126e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6206, + "epoch": 1.7597794423637119, + "grad_norm": 3.603662749827146, + "learning_rate": 3.735819467779161e-07, + "loss": 0.3049, + "step": 33830, + "vit_learning_rate": 7.471638935558321e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7124, + "epoch": 1.7602996254681647, + "grad_norm": 1.2742462258756386, + "learning_rate": 3.7198610559601054e-07, + "loss": 0.3214, + "step": 33840, + "vit_learning_rate": 7.439722111920211e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7477, + "epoch": 1.7608198085726174, + "grad_norm": 1.522458848545019, + "learning_rate": 3.703935485146182e-07, + "loss": 0.3264, + "step": 33850, + "vit_learning_rate": 7.407870970292362e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7095, + "epoch": 1.7613399916770702, + "grad_norm": 1.7745446086162076, + "learning_rate": 3.6880427666384224e-07, + "loss": 0.3236, + "step": 33860, + "vit_learning_rate": 7.376085533276843e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6747, + "epoch": 1.761860174781523, + "grad_norm": 1.7254886957864406, + "learning_rate": 3.672182911714489e-07, + "loss": 0.3269, + "step": 33870, + "vit_learning_rate": 7.344365823428977e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6831, + "epoch": 1.7623803578859758, + "grad_norm": 1.5946226351851376, + "learning_rate": 3.656355931628747e-07, + "loss": 0.3202, + "step": 33880, + "vit_learning_rate": 7.312711863257492e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6662, + "epoch": 1.7629005409904286, + "grad_norm": 1.4690903942278837, + "learning_rate": 3.6405618376122387e-07, + "loss": 0.3292, + "step": 33890, + "vit_learning_rate": 7.281123675224477e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6613, + "epoch": 1.7634207240948814, + "grad_norm": 1.5032901622817272, + "learning_rate": 3.6248006408726554e-07, + "loss": 0.3237, + "step": 33900, + "vit_learning_rate": 7.24960128174531e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6969, + "epoch": 1.7639409071993342, + "grad_norm": 2.6048286937533365, + "learning_rate": 3.609072352594362e-07, + "loss": 0.3214, + "step": 33910, + "vit_learning_rate": 7.218144705188722e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.8336, + "epoch": 1.764461090303787, + "grad_norm": 1.2815548354070883, + "learning_rate": 3.593376983938368e-07, + "loss": 0.3308, + "step": 33920, + "vit_learning_rate": 7.186753967876735e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6298, + "epoch": 1.7649812734082397, + "grad_norm": 1.8626856031683658, + "learning_rate": 3.577714546042316e-07, + "loss": 0.3246, + "step": 33930, + "vit_learning_rate": 7.155429092084631e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6364, + "epoch": 1.7655014565126925, + "grad_norm": 1.620497499420755, + "learning_rate": 3.562085050020453e-07, + "loss": 0.334, + "step": 33940, + "vit_learning_rate": 7.124170100040905e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6207, + "epoch": 1.7660216396171453, + "grad_norm": 1.6804811972249838, + "learning_rate": 3.5464885069637133e-07, + "loss": 0.3145, + "step": 33950, + "vit_learning_rate": 7.092977013927425e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6917, + "epoch": 1.766541822721598, + "grad_norm": 1.5279874840655647, + "learning_rate": 3.5309249279395943e-07, + "loss": 0.3016, + "step": 33960, + "vit_learning_rate": 7.061849855879187e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7218, + "epoch": 1.7670620058260509, + "grad_norm": 1.1901302508689195, + "learning_rate": 3.515394323992227e-07, + "loss": 0.3177, + "step": 33970, + "vit_learning_rate": 7.030788647984454e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6988, + "epoch": 1.7675821889305037, + "grad_norm": 2.303058932532209, + "learning_rate": 3.499896706142331e-07, + "loss": 0.3395, + "step": 33980, + "vit_learning_rate": 6.99979341228466e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7236, + "epoch": 1.7681023720349565, + "grad_norm": 2.1178284125319333, + "learning_rate": 3.484432085387218e-07, + "loss": 0.3326, + "step": 33990, + "vit_learning_rate": 6.968864170774435e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6707, + "epoch": 1.768622555139409, + "grad_norm": 1.4373468285737223, + "learning_rate": 3.469000472700812e-07, + "loss": 0.3086, + "step": 34000, + "vit_learning_rate": 6.938000945401623e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6732, + "epoch": 1.7691427382438618, + "grad_norm": 1.225476630815371, + "learning_rate": 3.453601879033569e-07, + "loss": 0.3309, + "step": 34010, + "vit_learning_rate": 6.907203758067137e-08 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6516, + "epoch": 1.7696629213483146, + "grad_norm": 1.8766082567387052, + "learning_rate": 3.438236315312543e-07, + "loss": 0.2965, + "step": 34020, + "vit_learning_rate": 6.876472630625086e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6973, + "epoch": 1.7701831044527674, + "grad_norm": 1.8235655872848913, + "learning_rate": 3.42290379244134e-07, + "loss": 0.3245, + "step": 34030, + "vit_learning_rate": 6.845807584882679e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7822, + "epoch": 1.7707032875572202, + "grad_norm": 1.663683369713236, + "learning_rate": 3.407604321300123e-07, + "loss": 0.3072, + "step": 34040, + "vit_learning_rate": 6.815208642600246e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7223, + "epoch": 1.771223470661673, + "grad_norm": 1.670121150019891, + "learning_rate": 3.392337912745614e-07, + "loss": 0.3255, + "step": 34050, + "vit_learning_rate": 6.784675825491226e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6406, + "epoch": 1.7717436537661255, + "grad_norm": 2.1743012578099448, + "learning_rate": 3.377104577611051e-07, + "loss": 0.3182, + "step": 34060, + "vit_learning_rate": 6.754209155222101e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6803, + "epoch": 1.7722638368705783, + "grad_norm": 1.5370001944089, + "learning_rate": 3.361904326706222e-07, + "loss": 0.3236, + "step": 34070, + "vit_learning_rate": 6.723808653412444e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6921, + "epoch": 1.772784019975031, + "grad_norm": 1.9677352748346006, + "learning_rate": 3.346737170817399e-07, + "loss": 0.3199, + "step": 34080, + "vit_learning_rate": 6.693474341634797e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7551, + "epoch": 1.7733042030794839, + "grad_norm": 1.7862921978875894, + "learning_rate": 3.331603120707433e-07, + "loss": 0.3245, + "step": 34090, + "vit_learning_rate": 6.663206241414864e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6341, + "epoch": 1.7738243861839367, + "grad_norm": 1.5273777884556494, + "learning_rate": 3.3165021871156335e-07, + "loss": 0.3274, + "step": 34100, + "vit_learning_rate": 6.633004374231266e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6344, + "epoch": 1.7743445692883895, + "grad_norm": 2.131534992437662, + "learning_rate": 3.301434380757823e-07, + "loss": 0.3421, + "step": 34110, + "vit_learning_rate": 6.602868761515645e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.655, + "epoch": 1.7748647523928422, + "grad_norm": 1.4632281944096117, + "learning_rate": 3.2863997123263216e-07, + "loss": 0.3093, + "step": 34120, + "vit_learning_rate": 6.572799424652642e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6649, + "epoch": 1.775384935497295, + "grad_norm": 1.558546039075423, + "learning_rate": 3.271398192489916e-07, + "loss": 0.3583, + "step": 34130, + "vit_learning_rate": 6.542796384979832e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7564, + "epoch": 1.7759051186017478, + "grad_norm": 1.7387988671164993, + "learning_rate": 3.2564298318939256e-07, + "loss": 0.3333, + "step": 34140, + "vit_learning_rate": 6.512859663787851e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6958, + "epoch": 1.7764253017062006, + "grad_norm": 1.2488597978517213, + "learning_rate": 3.241494641160059e-07, + "loss": 0.3304, + "step": 34150, + "vit_learning_rate": 6.482989282320117e-08 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.7222, + "epoch": 1.7769454848106534, + "grad_norm": 1.7690434944487403, + "learning_rate": 3.2265926308865436e-07, + "loss": 0.3289, + "step": 34160, + "vit_learning_rate": 6.453185261773086e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7396, + "epoch": 1.7774656679151062, + "grad_norm": 1.7800301340049123, + "learning_rate": 3.211723811648038e-07, + "loss": 0.336, + "step": 34170, + "vit_learning_rate": 6.423447623296074e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6831, + "epoch": 1.777985851019559, + "grad_norm": 1.9867631576266904, + "learning_rate": 3.1968881939956564e-07, + "loss": 0.3306, + "step": 34180, + "vit_learning_rate": 6.393776387991313e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6596, + "epoch": 1.7785060341240118, + "grad_norm": 1.5924919874757262, + "learning_rate": 3.1820857884569624e-07, + "loss": 0.3276, + "step": 34190, + "vit_learning_rate": 6.364171576913924e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6764, + "epoch": 1.7790262172284645, + "grad_norm": 1.9834917371031864, + "learning_rate": 3.1673166055359304e-07, + "loss": 0.3087, + "step": 34200, + "vit_learning_rate": 6.33463321107186e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6569, + "epoch": 1.7795464003329173, + "grad_norm": 1.941065691058785, + "learning_rate": 3.152580655712978e-07, + "loss": 0.3349, + "step": 34210, + "vit_learning_rate": 6.305161311425954e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7013, + "epoch": 1.7800665834373701, + "grad_norm": 1.290890999469077, + "learning_rate": 3.137877949444912e-07, + "loss": 0.3174, + "step": 34220, + "vit_learning_rate": 6.275755898889823e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6164, + "epoch": 1.7805867665418227, + "grad_norm": 4.889947799940825, + "learning_rate": 3.1232084971649976e-07, + "loss": 0.317, + "step": 34230, + "vit_learning_rate": 6.246416994329995e-08 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6832, + "epoch": 1.7811069496462755, + "grad_norm": 1.9925780983447114, + "learning_rate": 3.1085723092828535e-07, + "loss": 0.3254, + "step": 34240, + "vit_learning_rate": 6.217144618565706e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7155, + "epoch": 1.7816271327507283, + "grad_norm": 1.641974996373707, + "learning_rate": 3.0939693961845265e-07, + "loss": 0.3304, + "step": 34250, + "vit_learning_rate": 6.187938792369052e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7136, + "epoch": 1.782147315855181, + "grad_norm": 2.2629917530853003, + "learning_rate": 3.079399768232433e-07, + "loss": 0.3336, + "step": 34260, + "vit_learning_rate": 6.158799536464865e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6563, + "epoch": 1.7826674989596338, + "grad_norm": 1.7696068710206534, + "learning_rate": 3.064863435765375e-07, + "loss": 0.3209, + "step": 34270, + "vit_learning_rate": 6.12972687153075e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7773, + "epoch": 1.7831876820640864, + "grad_norm": 1.7524877039282356, + "learning_rate": 3.0503604090985295e-07, + "loss": 0.3164, + "step": 34280, + "vit_learning_rate": 6.100720818197059e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6451, + "epoch": 1.7837078651685392, + "grad_norm": 2.0538272006149723, + "learning_rate": 3.035890698523436e-07, + "loss": 0.3324, + "step": 34290, + "vit_learning_rate": 6.071781397046871e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6765, + "epoch": 1.784228048272992, + "grad_norm": 1.482981616807547, + "learning_rate": 3.021454314307998e-07, + "loss": 0.3318, + "step": 34300, + "vit_learning_rate": 6.042908628615994e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6568, + "epoch": 1.7847482313774448, + "grad_norm": 1.9889598533398074, + "learning_rate": 3.0070512666964646e-07, + "loss": 0.345, + "step": 34310, + "vit_learning_rate": 6.014102533392929e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7348, + "epoch": 1.7852684144818975, + "grad_norm": 1.6559219513435084, + "learning_rate": 2.992681565909428e-07, + "loss": 0.3201, + "step": 34320, + "vit_learning_rate": 5.985363131818855e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6855, + "epoch": 1.7857885975863503, + "grad_norm": 2.2481922752850503, + "learning_rate": 2.9783452221438304e-07, + "loss": 0.3376, + "step": 34330, + "vit_learning_rate": 5.95669044428766e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6848, + "epoch": 1.7863087806908031, + "grad_norm": 1.4017099031152993, + "learning_rate": 2.964042245572929e-07, + "loss": 0.2995, + "step": 34340, + "vit_learning_rate": 5.9280844911458574e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6543, + "epoch": 1.786828963795256, + "grad_norm": 1.7217492005715118, + "learning_rate": 2.9497726463462995e-07, + "loss": 0.3188, + "step": 34350, + "vit_learning_rate": 5.899545292692598e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6568, + "epoch": 1.7873491468997087, + "grad_norm": 1.8649736069301837, + "learning_rate": 2.935536434589842e-07, + "loss": 0.3191, + "step": 34360, + "vit_learning_rate": 5.871072869179683e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6831, + "epoch": 1.7878693300041615, + "grad_norm": 1.4073032660343556, + "learning_rate": 2.92133362040577e-07, + "loss": 0.327, + "step": 34370, + "vit_learning_rate": 5.842667240811538e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6817, + "epoch": 1.7883895131086143, + "grad_norm": 2.01482100368585, + "learning_rate": 2.907164213872582e-07, + "loss": 0.3087, + "step": 34380, + "vit_learning_rate": 5.8143284277451635e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6731, + "epoch": 1.788909696213067, + "grad_norm": 1.648868272761009, + "learning_rate": 2.893028225045086e-07, + "loss": 0.3291, + "step": 34390, + "vit_learning_rate": 5.786056450090171e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6498, + "epoch": 1.7894298793175198, + "grad_norm": 1.6854318748878263, + "learning_rate": 2.878925663954368e-07, + "loss": 0.3277, + "step": 34400, + "vit_learning_rate": 5.757851327908736e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7976, + "epoch": 1.7899500624219726, + "grad_norm": 1.5230884558677962, + "learning_rate": 2.8648565406077856e-07, + "loss": 0.2969, + "step": 34410, + "vit_learning_rate": 5.7297130812155706e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6937, + "epoch": 1.7904702455264254, + "grad_norm": 1.2981517782427727, + "learning_rate": 2.8508208649889844e-07, + "loss": 0.334, + "step": 34420, + "vit_learning_rate": 5.701641729977969e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6243, + "epoch": 1.7909904286308782, + "grad_norm": 1.3743601638596936, + "learning_rate": 2.836818647057865e-07, + "loss": 0.3323, + "step": 34430, + "vit_learning_rate": 5.67363729411573e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6421, + "epoch": 1.791510611735331, + "grad_norm": 1.3169417593158363, + "learning_rate": 2.822849896750585e-07, + "loss": 0.3227, + "step": 34440, + "vit_learning_rate": 5.645699793501169e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7834, + "epoch": 1.7920307948397836, + "grad_norm": 1.275744917838719, + "learning_rate": 2.808914623979564e-07, + "loss": 0.3278, + "step": 34450, + "vit_learning_rate": 5.6178292479591275e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6747, + "epoch": 1.7925509779442363, + "grad_norm": 1.5049860114256428, + "learning_rate": 2.795012838633443e-07, + "loss": 0.3403, + "step": 34460, + "vit_learning_rate": 5.5900256772668854e-08 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.692, + "epoch": 1.7930711610486891, + "grad_norm": 1.7192890065428557, + "learning_rate": 2.781144550577131e-07, + "loss": 0.3103, + "step": 34470, + "vit_learning_rate": 5.562289101154261e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7365, + "epoch": 1.793591344153142, + "grad_norm": 1.6273888589983674, + "learning_rate": 2.7673097696517547e-07, + "loss": 0.337, + "step": 34480, + "vit_learning_rate": 5.534619539303509e-08 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6772, + "epoch": 1.7941115272575947, + "grad_norm": 1.68284529673392, + "learning_rate": 2.75350850567464e-07, + "loss": 0.3214, + "step": 34490, + "vit_learning_rate": 5.50701701134928e-08 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6808, + "epoch": 1.7946317103620475, + "grad_norm": 1.6846575078649955, + "learning_rate": 2.7397407684393527e-07, + "loss": 0.334, + "step": 34500, + "vit_learning_rate": 5.4794815368787054e-08 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6912, + "epoch": 1.7951518934665, + "grad_norm": 2.0136689075854877, + "learning_rate": 2.7260065677156775e-07, + "loss": 0.339, + "step": 34510, + "vit_learning_rate": 5.452013135431355e-08 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.694, + "epoch": 1.7956720765709528, + "grad_norm": 1.7644355444426059, + "learning_rate": 2.712305913249574e-07, + "loss": 0.3106, + "step": 34520, + "vit_learning_rate": 5.424611826499148e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7381, + "epoch": 1.7961922596754056, + "grad_norm": 1.4949910929462882, + "learning_rate": 2.6986388147632104e-07, + "loss": 0.3261, + "step": 34530, + "vit_learning_rate": 5.39727762952642e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6418, + "epoch": 1.7967124427798584, + "grad_norm": 1.5683897090728645, + "learning_rate": 2.68500528195495e-07, + "loss": 0.3319, + "step": 34540, + "vit_learning_rate": 5.370010563909899e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6755, + "epoch": 1.7972326258843112, + "grad_norm": 1.839896296312165, + "learning_rate": 2.6714053244993264e-07, + "loss": 0.3331, + "step": 34550, + "vit_learning_rate": 5.3428106489986525e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.8105, + "epoch": 1.797752808988764, + "grad_norm": 1.8032343254879015, + "learning_rate": 2.657838952047048e-07, + "loss": 0.3099, + "step": 34560, + "vit_learning_rate": 5.315677904094096e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6225, + "epoch": 1.7982729920932168, + "grad_norm": 1.3189019499710215, + "learning_rate": 2.644306174224992e-07, + "loss": 0.3163, + "step": 34570, + "vit_learning_rate": 5.288612348449983e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6447, + "epoch": 1.7987931751976696, + "grad_norm": 1.325080409172273, + "learning_rate": 2.6308070006362096e-07, + "loss": 0.3094, + "step": 34580, + "vit_learning_rate": 5.2616140012724186e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6155, + "epoch": 1.7993133583021224, + "grad_norm": 1.6770226907143833, + "learning_rate": 2.617341440859883e-07, + "loss": 0.3188, + "step": 34590, + "vit_learning_rate": 5.234682881719765e-08 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6609, + "epoch": 1.7998335414065751, + "grad_norm": 1.8412125696175978, + "learning_rate": 2.6039095044513516e-07, + "loss": 0.314, + "step": 34600, + "vit_learning_rate": 5.2078190089027027e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6593, + "epoch": 1.800353724511028, + "grad_norm": 1.2702714859878192, + "learning_rate": 2.5905112009421084e-07, + "loss": 0.3233, + "step": 34610, + "vit_learning_rate": 5.1810224018842166e-08 + }, + { + "avg_batch_load_time": 2.5467, + "avg_batch_processing_time": 0.6801, + "epoch": 1.8008739076154807, + "grad_norm": 1.6305159570370733, + "learning_rate": 2.5771465398397757e-07, + "loss": 0.3139, + "step": 34620, + "vit_learning_rate": 5.15429307967955e-08 + }, + { + "avg_batch_load_time": 0.0035, + "avg_batch_processing_time": 0.6624, + "epoch": 1.8013940907199335, + "grad_norm": 1.5711348727772907, + "learning_rate": 2.563815530628072e-07, + "loss": 0.3162, + "step": 34630, + "vit_learning_rate": 5.127631061256144e-08 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.692, + "epoch": 1.8019142738243863, + "grad_norm": 2.1424843285182567, + "learning_rate": 2.5505181827668646e-07, + "loss": 0.3252, + "step": 34640, + "vit_learning_rate": 5.1010363655337286e-08 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6505, + "epoch": 1.802434456928839, + "grad_norm": 1.3954524455480746, + "learning_rate": 2.537254505692144e-07, + "loss": 0.333, + "step": 34650, + "vit_learning_rate": 5.074509011384287e-08 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.7162, + "epoch": 1.8029546400332919, + "grad_norm": 1.820109415974429, + "learning_rate": 2.5240245088159867e-07, + "loss": 0.3221, + "step": 34660, + "vit_learning_rate": 5.0480490176319725e-08 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.7006, + "epoch": 1.8034748231377444, + "grad_norm": 1.885965451553994, + "learning_rate": 2.5108282015265716e-07, + "loss": 0.3029, + "step": 34670, + "vit_learning_rate": 5.021656403053143e-08 + }, + { + "avg_batch_load_time": 0.0037, + "avg_batch_processing_time": 0.7041, + "epoch": 1.8039950062421972, + "grad_norm": 1.5887160422108044, + "learning_rate": 2.4976655931881754e-07, + "loss": 0.3238, + "step": 34680, + "vit_learning_rate": 4.99533118637635e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6419, + "epoch": 1.80451518934665, + "grad_norm": 2.1151897614160413, + "learning_rate": 2.48453669314116e-07, + "loss": 0.3067, + "step": 34690, + "vit_learning_rate": 4.9690733862823186e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6806, + "epoch": 1.8050353724511028, + "grad_norm": 1.4858972763966631, + "learning_rate": 2.4714415107019717e-07, + "loss": 0.3426, + "step": 34700, + "vit_learning_rate": 4.942883021403943e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6349, + "epoch": 1.8055555555555556, + "grad_norm": 1.411063505538551, + "learning_rate": 2.4583800551631285e-07, + "loss": 0.3319, + "step": 34710, + "vit_learning_rate": 4.916760110326257e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.732, + "epoch": 1.8060757386600084, + "grad_norm": 1.3938513297659025, + "learning_rate": 2.445352335793205e-07, + "loss": 0.3505, + "step": 34720, + "vit_learning_rate": 4.8907046715864096e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.4187, + "epoch": 1.806595921764461, + "grad_norm": 1.192217070095165, + "learning_rate": 2.4323583618368564e-07, + "loss": 0.325, + "step": 34730, + "vit_learning_rate": 4.864716723673712e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6262, + "epoch": 1.8071161048689137, + "grad_norm": 1.45463753820866, + "learning_rate": 2.4193981425147674e-07, + "loss": 0.3224, + "step": 34740, + "vit_learning_rate": 4.838796285029534e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6455, + "epoch": 1.8076362879733665, + "grad_norm": 1.2309868877552956, + "learning_rate": 2.406471687023693e-07, + "loss": 0.3119, + "step": 34750, + "vit_learning_rate": 4.812943374047384e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6722, + "epoch": 1.8081564710778193, + "grad_norm": 1.5358163999928611, + "learning_rate": 2.3935790045364226e-07, + "loss": 0.3133, + "step": 34760, + "vit_learning_rate": 4.787158009072845e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7047, + "epoch": 1.808676654182272, + "grad_norm": 1.6049349540196465, + "learning_rate": 2.380720104201767e-07, + "loss": 0.3252, + "step": 34770, + "vit_learning_rate": 4.761440208403533e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6236, + "epoch": 1.8091968372867249, + "grad_norm": 1.3135861361942263, + "learning_rate": 2.367894995144565e-07, + "loss": 0.3186, + "step": 34780, + "vit_learning_rate": 4.735789990289129e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6712, + "epoch": 1.8097170203911777, + "grad_norm": 1.536394535835775, + "learning_rate": 2.355103686465704e-07, + "loss": 0.3225, + "step": 34790, + "vit_learning_rate": 4.7102073729314076e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6151, + "epoch": 1.8102372034956304, + "grad_norm": 1.9927461269845483, + "learning_rate": 2.3423461872420615e-07, + "loss": 0.3021, + "step": 34800, + "vit_learning_rate": 4.684692374484123e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6934, + "epoch": 1.8107573866000832, + "grad_norm": 1.7604961248617985, + "learning_rate": 2.3296225065265244e-07, + "loss": 0.3377, + "step": 34810, + "vit_learning_rate": 4.659245013053048e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7542, + "epoch": 1.811277569704536, + "grad_norm": 1.3022734485696696, + "learning_rate": 2.3169326533479919e-07, + "loss": 0.3201, + "step": 34820, + "vit_learning_rate": 4.6338653066959835e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6498, + "epoch": 1.8117977528089888, + "grad_norm": 1.580467582673685, + "learning_rate": 2.3042766367113557e-07, + "loss": 0.3285, + "step": 34830, + "vit_learning_rate": 4.608553273422711e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6509, + "epoch": 1.8123179359134416, + "grad_norm": 1.1800964094601356, + "learning_rate": 2.291654465597487e-07, + "loss": 0.339, + "step": 34840, + "vit_learning_rate": 4.583308931194974e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6493, + "epoch": 1.8128381190178944, + "grad_norm": 1.7073287286442753, + "learning_rate": 2.2790661489632492e-07, + "loss": 0.3304, + "step": 34850, + "vit_learning_rate": 4.558132297926498e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.4011, + "epoch": 1.8133583021223472, + "grad_norm": 1.9964030278900888, + "learning_rate": 2.2665116957414856e-07, + "loss": 0.3152, + "step": 34860, + "vit_learning_rate": 4.533023391482971e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6746, + "epoch": 1.8138784852268, + "grad_norm": 1.8104955460251217, + "learning_rate": 2.2539911148409976e-07, + "loss": 0.3212, + "step": 34870, + "vit_learning_rate": 4.5079822296819946e-08 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6598, + "epoch": 1.8143986683312527, + "grad_norm": 1.374059404513807, + "learning_rate": 2.2415044151465504e-07, + "loss": 0.3357, + "step": 34880, + "vit_learning_rate": 4.4830088302931004e-08 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.666, + "epoch": 1.8149188514357055, + "grad_norm": 1.549108270376268, + "learning_rate": 2.2290516055188894e-07, + "loss": 0.344, + "step": 34890, + "vit_learning_rate": 4.458103211037778e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6487, + "epoch": 1.815439034540158, + "grad_norm": 1.492181796550149, + "learning_rate": 2.2166326947946902e-07, + "loss": 0.3452, + "step": 34900, + "vit_learning_rate": 4.43326538958938e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6699, + "epoch": 1.8159592176446109, + "grad_norm": 2.1213336734978685, + "learning_rate": 2.2042476917865706e-07, + "loss": 0.34, + "step": 34910, + "vit_learning_rate": 4.4084953835731406e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.8415, + "epoch": 1.8164794007490637, + "grad_norm": 1.3158452706612147, + "learning_rate": 2.191896605283095e-07, + "loss": 0.3305, + "step": 34920, + "vit_learning_rate": 4.383793210566189e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 2.4104, + "epoch": 1.8169995838535165, + "grad_norm": 1.212150714465536, + "learning_rate": 2.1795794440487628e-07, + "loss": 0.3186, + "step": 34930, + "vit_learning_rate": 4.359158888097525e-08 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7041, + "epoch": 1.8175197669579692, + "grad_norm": 1.3066002107385988, + "learning_rate": 2.1672962168239998e-07, + "loss": 0.3215, + "step": 34940, + "vit_learning_rate": 4.3345924336479986e-08 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6641, + "epoch": 1.818039950062422, + "grad_norm": 1.9590265920678116, + "learning_rate": 2.1550469323251443e-07, + "loss": 0.3141, + "step": 34950, + "vit_learning_rate": 4.310093864650288e-08 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 2.4072, + "epoch": 1.8185601331668746, + "grad_norm": 1.7376905633410262, + "learning_rate": 2.1428315992444494e-07, + "loss": 0.324, + "step": 34960, + "vit_learning_rate": 4.285663198488898e-08 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6585, + "epoch": 1.8190803162713274, + "grad_norm": 2.5847789011848583, + "learning_rate": 2.130650226250086e-07, + "loss": 0.313, + "step": 34970, + "vit_learning_rate": 4.2613004525001715e-08 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.8312, + "epoch": 1.8196004993757802, + "grad_norm": 1.9026192986252755, + "learning_rate": 2.1185028219861125e-07, + "loss": 0.3319, + "step": 34980, + "vit_learning_rate": 4.2370056439722245e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6966, + "epoch": 1.820120682480233, + "grad_norm": 1.8221093360851408, + "learning_rate": 2.1063893950724888e-07, + "loss": 0.321, + "step": 34990, + "vit_learning_rate": 4.212778790144977e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6577, + "epoch": 1.8206408655846857, + "grad_norm": 1.6499661387138695, + "learning_rate": 2.0943099541050726e-07, + "loss": 0.3233, + "step": 35000, + "vit_learning_rate": 4.188619908210145e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6934, + "epoch": 1.8211610486891385, + "grad_norm": 1.6541935816143132, + "learning_rate": 2.0822645076555904e-07, + "loss": 0.3647, + "step": 35010, + "vit_learning_rate": 4.16452901531118e-08 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7043, + "epoch": 1.8216812317935913, + "grad_norm": 1.7578551810765246, + "learning_rate": 2.0702530642716434e-07, + "loss": 0.3104, + "step": 35020, + "vit_learning_rate": 4.140506128543286e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 2.4099, + "epoch": 1.822201414898044, + "grad_norm": 1.3242483988359872, + "learning_rate": 2.058275632476736e-07, + "loss": 0.3396, + "step": 35030, + "vit_learning_rate": 4.116551264953472e-08 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6734, + "epoch": 1.822721598002497, + "grad_norm": 1.5150433343047733, + "learning_rate": 2.0463322207701919e-07, + "loss": 0.3153, + "step": 35040, + "vit_learning_rate": 4.0926644415403836e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6626, + "epoch": 1.8232417811069497, + "grad_norm": 1.816001806060547, + "learning_rate": 2.0344228376272312e-07, + "loss": 0.3166, + "step": 35050, + "vit_learning_rate": 4.068845675254462e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6628, + "epoch": 1.8237619642114025, + "grad_norm": 1.479094845683727, + "learning_rate": 2.0225474914988886e-07, + "loss": 0.3158, + "step": 35060, + "vit_learning_rate": 4.045094982997776e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.693, + "epoch": 1.8242821473158553, + "grad_norm": 1.593520769434075, + "learning_rate": 2.010706190812095e-07, + "loss": 0.3051, + "step": 35070, + "vit_learning_rate": 4.02141238162419e-08 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 4.3252, + "epoch": 1.824802330420308, + "grad_norm": 1.9767444028190233, + "learning_rate": 1.998898943969585e-07, + "loss": 0.3391, + "step": 35080, + "vit_learning_rate": 3.997797887939169e-08 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6867, + "epoch": 1.8253225135247608, + "grad_norm": 1.2067954572280455, + "learning_rate": 1.9871257593499338e-07, + "loss": 0.3147, + "step": 35090, + "vit_learning_rate": 3.974251518699867e-08 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6752, + "epoch": 1.8258426966292136, + "grad_norm": 1.1476049551981728, + "learning_rate": 1.9753866453075643e-07, + "loss": 0.3607, + "step": 35100, + "vit_learning_rate": 3.950773290615128e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6115, + "epoch": 1.8263628797336664, + "grad_norm": 1.3752552853098774, + "learning_rate": 1.9636816101726852e-07, + "loss": 0.3268, + "step": 35110, + "vit_learning_rate": 3.92736322034537e-08 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 2.3696, + "epoch": 1.826883062838119, + "grad_norm": 1.5273591178980057, + "learning_rate": 1.9520106622513691e-07, + "loss": 0.323, + "step": 35120, + "vit_learning_rate": 3.904021324502738e-08 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 4.1579, + "epoch": 1.8274032459425718, + "grad_norm": 1.6545408586054082, + "learning_rate": 1.9403738098254632e-07, + "loss": 0.3145, + "step": 35130, + "vit_learning_rate": 3.880747619650926e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.662, + "epoch": 1.8279234290470245, + "grad_norm": 1.3627707054751856, + "learning_rate": 1.92877106115264e-07, + "loss": 0.3352, + "step": 35140, + "vit_learning_rate": 3.85754212230528e-08 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6526, + "epoch": 1.8284436121514773, + "grad_norm": 1.4397733221295743, + "learning_rate": 1.9172024244663635e-07, + "loss": 0.3278, + "step": 35150, + "vit_learning_rate": 3.8344048489327265e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 4.2386, + "epoch": 1.8289637952559301, + "grad_norm": 1.701012435710671, + "learning_rate": 1.9056679079758888e-07, + "loss": 0.3118, + "step": 35160, + "vit_learning_rate": 3.811335815951777e-08 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 4.4156, + "epoch": 1.829483978360383, + "grad_norm": 1.7405236283152665, + "learning_rate": 1.8941675198662745e-07, + "loss": 0.3397, + "step": 35170, + "vit_learning_rate": 3.788335039732549e-08 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.67, + "epoch": 1.8300041614648355, + "grad_norm": 1.325183522949699, + "learning_rate": 1.882701268298337e-07, + "loss": 0.3, + "step": 35180, + "vit_learning_rate": 3.765402536596674e-08 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6711, + "epoch": 1.8305243445692883, + "grad_norm": 5.150212303268509, + "learning_rate": 1.8712691614086843e-07, + "loss": 0.3195, + "step": 35190, + "vit_learning_rate": 3.742538322817368e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6802, + "epoch": 1.831044527673741, + "grad_norm": 1.7587382132112628, + "learning_rate": 1.859871207309688e-07, + "loss": 0.3308, + "step": 35200, + "vit_learning_rate": 3.719742414619376e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6793, + "epoch": 1.8315647107781938, + "grad_norm": 1.8805385515096182, + "learning_rate": 1.8485074140895066e-07, + "loss": 0.3315, + "step": 35210, + "vit_learning_rate": 3.697014828179013e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6581, + "epoch": 1.8320848938826466, + "grad_norm": 1.233892432149786, + "learning_rate": 1.8371777898120224e-07, + "loss": 0.3316, + "step": 35220, + "vit_learning_rate": 3.6743555796240446e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6811, + "epoch": 1.8326050769870994, + "grad_norm": 1.961817333511012, + "learning_rate": 1.8258823425168938e-07, + "loss": 0.3154, + "step": 35230, + "vit_learning_rate": 3.651764685033787e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.4954, + "epoch": 1.8331252600915522, + "grad_norm": 1.5599613028024744, + "learning_rate": 1.8146210802195306e-07, + "loss": 0.3175, + "step": 35240, + "vit_learning_rate": 3.629242160439061e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6639, + "epoch": 1.833645443196005, + "grad_norm": 1.749777188543003, + "learning_rate": 1.803394010911047e-07, + "loss": 0.3222, + "step": 35250, + "vit_learning_rate": 3.6067880218220937e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 4.1164, + "epoch": 1.8341656263004578, + "grad_norm": 1.4428552082683166, + "learning_rate": 1.792201142558353e-07, + "loss": 0.3383, + "step": 35260, + "vit_learning_rate": 3.584402285116706e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.3604, + "epoch": 1.8346858094049106, + "grad_norm": 1.7223421142648823, + "learning_rate": 1.7810424831040395e-07, + "loss": 0.2959, + "step": 35270, + "vit_learning_rate": 3.562084966208079e-08 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 2.3917, + "epoch": 1.8352059925093633, + "grad_norm": 1.6913628418958455, + "learning_rate": 1.7699180404664508e-07, + "loss": 0.3232, + "step": 35280, + "vit_learning_rate": 3.539836080932901e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.4004, + "epoch": 1.8357261756138161, + "grad_norm": 2.535384723135559, + "learning_rate": 1.7588278225396326e-07, + "loss": 0.3429, + "step": 35290, + "vit_learning_rate": 3.517655645079265e-08 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 2.3927, + "epoch": 1.836246358718269, + "grad_norm": 1.38658005049009, + "learning_rate": 1.747771837193346e-07, + "loss": 0.3263, + "step": 35300, + "vit_learning_rate": 3.495543674386692e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6466, + "epoch": 1.8367665418227217, + "grad_norm": 1.6857255151234345, + "learning_rate": 1.7367500922730873e-07, + "loss": 0.3171, + "step": 35310, + "vit_learning_rate": 3.4735001845461744e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6418, + "epoch": 1.8372867249271745, + "grad_norm": 1.748345134756053, + "learning_rate": 1.7257625956000113e-07, + "loss": 0.3021, + "step": 35320, + "vit_learning_rate": 3.451525191200022e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6815, + "epoch": 1.8378069080316273, + "grad_norm": 1.442657036054284, + "learning_rate": 1.7148093549710087e-07, + "loss": 0.3345, + "step": 35330, + "vit_learning_rate": 3.429618709942017e-08 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6993, + "epoch": 1.83832709113608, + "grad_norm": 1.711261374832003, + "learning_rate": 1.7038903781586346e-07, + "loss": 0.3208, + "step": 35340, + "vit_learning_rate": 3.407780756317269e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6822, + "epoch": 1.8388472742405326, + "grad_norm": 1.7543165145501782, + "learning_rate": 1.6930056729111356e-07, + "loss": 0.314, + "step": 35350, + "vit_learning_rate": 3.386011345822271e-08 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 2.3852, + "epoch": 1.8393674573449854, + "grad_norm": 1.5363894285074087, + "learning_rate": 1.682155246952466e-07, + "loss": 0.3279, + "step": 35360, + "vit_learning_rate": 3.364310493904932e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6505, + "epoch": 1.8398876404494382, + "grad_norm": 1.9109808064330127, + "learning_rate": 1.6713391079822118e-07, + "loss": 0.3188, + "step": 35370, + "vit_learning_rate": 3.3426782159644226e-08 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.646, + "epoch": 1.840407823553891, + "grad_norm": 2.0471346790597496, + "learning_rate": 1.6605572636756717e-07, + "loss": 0.3272, + "step": 35380, + "vit_learning_rate": 3.321114527351343e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6904, + "epoch": 1.8409280066583438, + "grad_norm": 1.6934212422966095, + "learning_rate": 1.6498097216837595e-07, + "loss": 0.3031, + "step": 35390, + "vit_learning_rate": 3.2996194433675187e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6764, + "epoch": 1.8414481897627963, + "grad_norm": 1.7289707710270537, + "learning_rate": 1.639096489633091e-07, + "loss": 0.3209, + "step": 35400, + "vit_learning_rate": 3.278192979266181e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6469, + "epoch": 1.8419683728672491, + "grad_norm": 1.8425042701623577, + "learning_rate": 1.628417575125918e-07, + "loss": 0.3106, + "step": 35410, + "vit_learning_rate": 3.256835150251836e-08 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 2.7305, + "epoch": 1.842488555971702, + "grad_norm": 1.7220475601163394, + "learning_rate": 1.6177729857401403e-07, + "loss": 0.3316, + "step": 35420, + "vit_learning_rate": 3.2355459714802804e-08 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.7789, + "epoch": 1.8430087390761547, + "grad_norm": 1.8535178040854927, + "learning_rate": 1.607162729029299e-07, + "loss": 0.3053, + "step": 35430, + "vit_learning_rate": 3.2143254580585976e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6464, + "epoch": 1.8435289221806075, + "grad_norm": 1.0464441710995103, + "learning_rate": 1.596586812522566e-07, + "loss": 0.3144, + "step": 35440, + "vit_learning_rate": 3.193173625045131e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 2.3665, + "epoch": 1.8440491052850603, + "grad_norm": 1.6606243863439483, + "learning_rate": 1.5860452437247763e-07, + "loss": 0.3256, + "step": 35450, + "vit_learning_rate": 3.172090487449552e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7058, + "epoch": 1.844569288389513, + "grad_norm": 1.6394580668921857, + "learning_rate": 1.575538030116347e-07, + "loss": 0.3375, + "step": 35460, + "vit_learning_rate": 3.1510760602326935e-08 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 2.387, + "epoch": 1.8450894714939658, + "grad_norm": 1.825878132356033, + "learning_rate": 1.5650651791533467e-07, + "loss": 0.3258, + "step": 35470, + "vit_learning_rate": 3.130130358306693e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 4.1937, + "epoch": 1.8456096545984186, + "grad_norm": 1.7692795383187552, + "learning_rate": 1.554626698267442e-07, + "loss": 0.3282, + "step": 35480, + "vit_learning_rate": 3.109253396534883e-08 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6666, + "epoch": 1.8461298377028714, + "grad_norm": 1.796943523181411, + "learning_rate": 1.5442225948659183e-07, + "loss": 0.3271, + "step": 35490, + "vit_learning_rate": 3.0884451897318364e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6221, + "epoch": 1.8466500208073242, + "grad_norm": 1.2499089116337028, + "learning_rate": 1.5338528763316817e-07, + "loss": 0.302, + "step": 35500, + "vit_learning_rate": 3.0677057526633635e-08 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.7065, + "epoch": 1.847170203911777, + "grad_norm": 1.3732021307737596, + "learning_rate": 1.5235175500232124e-07, + "loss": 0.3202, + "step": 35510, + "vit_learning_rate": 3.0470351000464244e-08 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6763, + "epoch": 1.8476903870162298, + "grad_norm": 1.3154483550488993, + "learning_rate": 1.5132166232745992e-07, + "loss": 0.3178, + "step": 35520, + "vit_learning_rate": 3.0264332465491984e-08 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6749, + "epoch": 1.8482105701206826, + "grad_norm": 1.604960350877629, + "learning_rate": 1.5029501033955062e-07, + "loss": 0.3093, + "step": 35530, + "vit_learning_rate": 3.0059002067910124e-08 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6942, + "epoch": 1.8487307532251354, + "grad_norm": 1.987907850440195, + "learning_rate": 1.4927179976712058e-07, + "loss": 0.326, + "step": 35540, + "vit_learning_rate": 2.985435995342411e-08 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.7118, + "epoch": 1.8492509363295881, + "grad_norm": 1.4848155215203793, + "learning_rate": 1.4825203133625342e-07, + "loss": 0.3223, + "step": 35550, + "vit_learning_rate": 2.9650406267250682e-08 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6596, + "epoch": 1.849771119434041, + "grad_norm": 1.4141155234090184, + "learning_rate": 1.4723570577059087e-07, + "loss": 0.3198, + "step": 35560, + "vit_learning_rate": 2.9447141154118172e-08 + }, + { + "avg_batch_load_time": 0.1976, + "avg_batch_processing_time": 0.7138, + "epoch": 1.8502913025384935, + "grad_norm": 2.06416365037498, + "learning_rate": 1.462228237913299e-07, + "loss": 0.3299, + "step": 35570, + "vit_learning_rate": 2.9244564758265977e-08 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 2.6315, + "epoch": 1.8508114856429463, + "grad_norm": 1.4421658084278095, + "learning_rate": 1.4521338611722556e-07, + "loss": 0.3243, + "step": 35580, + "vit_learning_rate": 2.904267722344511e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7237, + "epoch": 1.851331668747399, + "grad_norm": 1.5306740917293358, + "learning_rate": 1.442073934645899e-07, + "loss": 0.3062, + "step": 35590, + "vit_learning_rate": 2.8841478692917976e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.699, + "epoch": 1.8518518518518519, + "grad_norm": 1.881987019064308, + "learning_rate": 1.4320484654728685e-07, + "loss": 0.3255, + "step": 35600, + "vit_learning_rate": 2.8640969309457364e-08 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6671, + "epoch": 1.8523720349563046, + "grad_norm": 1.7771208996761099, + "learning_rate": 1.4220574607673732e-07, + "loss": 0.3297, + "step": 35610, + "vit_learning_rate": 2.844114921534746e-08 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.6838, + "epoch": 1.8528922180607574, + "grad_norm": 2.297178968009689, + "learning_rate": 1.4121009276191698e-07, + "loss": 0.3188, + "step": 35620, + "vit_learning_rate": 2.8242018552383396e-08 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6525, + "epoch": 1.85341240116521, + "grad_norm": 1.836821366180804, + "learning_rate": 1.4021788730935348e-07, + "loss": 0.3368, + "step": 35630, + "vit_learning_rate": 2.8043577461870695e-08 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.6603, + "epoch": 1.8539325842696628, + "grad_norm": 1.4060378966462885, + "learning_rate": 1.3922913042313134e-07, + "loss": 0.3381, + "step": 35640, + "vit_learning_rate": 2.784582608462627e-08 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 2.508, + "epoch": 1.8544527673741156, + "grad_norm": 1.673814024981993, + "learning_rate": 1.382438228048838e-07, + "loss": 0.3336, + "step": 35650, + "vit_learning_rate": 2.7648764560976755e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6497, + "epoch": 1.8549729504785684, + "grad_norm": 1.5194042654257585, + "learning_rate": 1.3726196515379986e-07, + "loss": 0.3389, + "step": 35660, + "vit_learning_rate": 2.7452393030759968e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6496, + "epoch": 1.8554931335830211, + "grad_norm": 1.457050790065265, + "learning_rate": 1.3628355816661664e-07, + "loss": 0.3292, + "step": 35670, + "vit_learning_rate": 2.7256711633323326e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6173, + "epoch": 1.856013316687474, + "grad_norm": 1.618400089544901, + "learning_rate": 1.3530860253762602e-07, + "loss": 0.324, + "step": 35680, + "vit_learning_rate": 2.70617205075252e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6472, + "epoch": 1.8565334997919267, + "grad_norm": 1.4157737315423293, + "learning_rate": 1.343370989586701e-07, + "loss": 0.3234, + "step": 35690, + "vit_learning_rate": 2.6867419791734014e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6733, + "epoch": 1.8570536828963795, + "grad_norm": 1.3943984314618572, + "learning_rate": 1.333690481191402e-07, + "loss": 0.3066, + "step": 35700, + "vit_learning_rate": 2.6673809623828036e-08 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.6367, + "epoch": 1.8575738660008323, + "grad_norm": 1.9110502491844932, + "learning_rate": 1.3240445070597853e-07, + "loss": 0.3153, + "step": 35710, + "vit_learning_rate": 2.64808901411957e-08 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6978, + "epoch": 1.858094049105285, + "grad_norm": 1.6095875889998104, + "learning_rate": 1.3144330740367528e-07, + "loss": 0.326, + "step": 35720, + "vit_learning_rate": 2.6288661480735054e-08 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.7242, + "epoch": 1.8586142322097379, + "grad_norm": 1.6444114309902496, + "learning_rate": 1.3048561889427215e-07, + "loss": 0.3427, + "step": 35730, + "vit_learning_rate": 2.6097123778854423e-08 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.7523, + "epoch": 1.8591344153141907, + "grad_norm": 1.573350882564, + "learning_rate": 1.2953138585735602e-07, + "loss": 0.3234, + "step": 35740, + "vit_learning_rate": 2.5906277171471202e-08 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6642, + "epoch": 1.8596545984186434, + "grad_norm": 1.6373242389485452, + "learning_rate": 1.2858060897006473e-07, + "loss": 0.3097, + "step": 35750, + "vit_learning_rate": 2.5716121794012945e-08 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 4.1006, + "epoch": 1.8601747815230962, + "grad_norm": 2.3258572947013967, + "learning_rate": 1.2763328890708136e-07, + "loss": 0.315, + "step": 35760, + "vit_learning_rate": 2.5526657781416273e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6843, + "epoch": 1.860694964627549, + "grad_norm": 1.234598834586425, + "learning_rate": 1.2668942634063707e-07, + "loss": 0.3031, + "step": 35770, + "vit_learning_rate": 2.5337885268127414e-08 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6735, + "epoch": 1.8612151477320018, + "grad_norm": 1.647473501136057, + "learning_rate": 1.2574902194050996e-07, + "loss": 0.3237, + "step": 35780, + "vit_learning_rate": 2.514980438810199e-08 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6954, + "epoch": 1.8617353308364546, + "grad_norm": 1.553408475127025, + "learning_rate": 1.2481207637402405e-07, + "loss": 0.2975, + "step": 35790, + "vit_learning_rate": 2.4962415274804804e-08 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6643, + "epoch": 1.8622555139409072, + "grad_norm": 1.749103801235261, + "learning_rate": 1.2387859030604686e-07, + "loss": 0.3424, + "step": 35800, + "vit_learning_rate": 2.4775718061209373e-08 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6783, + "epoch": 1.86277569704536, + "grad_norm": 1.5343716203582916, + "learning_rate": 1.229485643989925e-07, + "loss": 0.3185, + "step": 35810, + "vit_learning_rate": 2.4589712879798496e-08 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6597, + "epoch": 1.8632958801498127, + "grad_norm": 1.4600780224617327, + "learning_rate": 1.2202199931282134e-07, + "loss": 0.3067, + "step": 35820, + "vit_learning_rate": 2.4404399862564262e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6586, + "epoch": 1.8638160632542655, + "grad_norm": 1.7192763171583536, + "learning_rate": 1.2109889570503574e-07, + "loss": 0.3516, + "step": 35830, + "vit_learning_rate": 2.4219779141007147e-08 + }, + { + "avg_batch_load_time": 0.0036, + "avg_batch_processing_time": 0.7401, + "epoch": 1.8643362463587183, + "grad_norm": 2.359043406692692, + "learning_rate": 1.2017925423068178e-07, + "loss": 0.3082, + "step": 35840, + "vit_learning_rate": 2.4035850846136352e-08 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.7169, + "epoch": 1.8648564294631709, + "grad_norm": 2.1068559566965264, + "learning_rate": 1.1926307554234962e-07, + "loss": 0.346, + "step": 35850, + "vit_learning_rate": 2.385261510846992e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6717, + "epoch": 1.8653766125676237, + "grad_norm": 1.7470640155444601, + "learning_rate": 1.1835036029017256e-07, + "loss": 0.3483, + "step": 35860, + "vit_learning_rate": 2.3670072058034508e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6622, + "epoch": 1.8658967956720764, + "grad_norm": 1.4991376653039343, + "learning_rate": 1.1744110912182471e-07, + "loss": 0.302, + "step": 35870, + "vit_learning_rate": 2.348822182436494e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.66, + "epoch": 1.8664169787765292, + "grad_norm": 1.3826198489774413, + "learning_rate": 1.1653532268252333e-07, + "loss": 0.3181, + "step": 35880, + "vit_learning_rate": 2.3307064536504662e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 2.4093, + "epoch": 1.866937161880982, + "grad_norm": 1.7426475795408936, + "learning_rate": 1.1563300161502644e-07, + "loss": 0.3404, + "step": 35890, + "vit_learning_rate": 2.3126600323005285e-08 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6948, + "epoch": 1.8674573449854348, + "grad_norm": 1.7716021294759077, + "learning_rate": 1.14734146559633e-07, + "loss": 0.3421, + "step": 35900, + "vit_learning_rate": 2.2946829311926595e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6587, + "epoch": 1.8679775280898876, + "grad_norm": 1.4119084742595518, + "learning_rate": 1.1383875815418222e-07, + "loss": 0.3275, + "step": 35910, + "vit_learning_rate": 2.276775163083644e-08 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6818, + "epoch": 1.8684977111943404, + "grad_norm": 2.245583603135167, + "learning_rate": 1.1294683703405474e-07, + "loss": 0.3354, + "step": 35920, + "vit_learning_rate": 2.2589367406810945e-08 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6913, + "epoch": 1.8690178942987932, + "grad_norm": 1.4985414164919835, + "learning_rate": 1.1205838383216928e-07, + "loss": 0.3211, + "step": 35930, + "vit_learning_rate": 2.241167676643385e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6881, + "epoch": 1.869538077403246, + "grad_norm": 1.8659569010427859, + "learning_rate": 1.1117339917898373e-07, + "loss": 0.3251, + "step": 35940, + "vit_learning_rate": 2.2234679835796745e-08 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6489, + "epoch": 1.8700582605076987, + "grad_norm": 1.7728111525685142, + "learning_rate": 1.1029188370249467e-07, + "loss": 0.3232, + "step": 35950, + "vit_learning_rate": 2.205837674049893e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6342, + "epoch": 1.8705784436121515, + "grad_norm": 1.6575759899065916, + "learning_rate": 1.094138380282378e-07, + "loss": 0.3152, + "step": 35960, + "vit_learning_rate": 2.1882767605647556e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7223, + "epoch": 1.8710986267166043, + "grad_norm": 1.490730918938066, + "learning_rate": 1.0853926277928695e-07, + "loss": 0.3078, + "step": 35970, + "vit_learning_rate": 2.1707852555857385e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6525, + "epoch": 1.871618809821057, + "grad_norm": 1.5018076954537858, + "learning_rate": 1.0766815857625067e-07, + "loss": 0.3203, + "step": 35980, + "vit_learning_rate": 2.1533631715250133e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7054, + "epoch": 1.87213899292551, + "grad_norm": 1.3374501674346273, + "learning_rate": 1.0680052603727786e-07, + "loss": 0.3404, + "step": 35990, + "vit_learning_rate": 2.136010520745557e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6348, + "epoch": 1.8726591760299627, + "grad_norm": 1.3987759986956256, + "learning_rate": 1.05936365778051e-07, + "loss": 0.3193, + "step": 36000, + "vit_learning_rate": 2.1187273155610197e-08 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6756, + "epoch": 1.8731793591344155, + "grad_norm": 1.6918741018195824, + "learning_rate": 1.0507567841178956e-07, + "loss": 0.3349, + "step": 36010, + "vit_learning_rate": 2.101513568235791e-08 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6456, + "epoch": 1.873699542238868, + "grad_norm": 1.3837292546872624, + "learning_rate": 1.0421846454925e-07, + "loss": 0.3055, + "step": 36020, + "vit_learning_rate": 2.0843692909849998e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7016, + "epoch": 1.8742197253433208, + "grad_norm": 1.672886232219275, + "learning_rate": 1.0336472479872184e-07, + "loss": 0.3202, + "step": 36030, + "vit_learning_rate": 2.0672944959744364e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6484, + "epoch": 1.8747399084477736, + "grad_norm": 1.4082316621292577, + "learning_rate": 1.0251445976603047e-07, + "loss": 0.3287, + "step": 36040, + "vit_learning_rate": 2.0502891953206092e-08 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6883, + "epoch": 1.8752600915522264, + "grad_norm": 1.8320310981050547, + "learning_rate": 1.0166767005453493e-07, + "loss": 0.3175, + "step": 36050, + "vit_learning_rate": 2.0333534010906984e-08 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6872, + "epoch": 1.8757802746566792, + "grad_norm": 1.9149059164444668, + "learning_rate": 1.0082435626512899e-07, + "loss": 0.294, + "step": 36060, + "vit_learning_rate": 2.0164871253025795e-08 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6556, + "epoch": 1.876300457761132, + "grad_norm": 1.7094875806022432, + "learning_rate": 9.998451899624007e-08, + "loss": 0.3471, + "step": 36070, + "vit_learning_rate": 1.999690379924801e-08 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6568, + "epoch": 1.8768206408655845, + "grad_norm": 1.5426340674984684, + "learning_rate": 9.914815884382645e-08, + "loss": 0.3203, + "step": 36080, + "vit_learning_rate": 1.9829631768765287e-08 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6851, + "epoch": 1.8773408239700373, + "grad_norm": 1.6720616803859474, + "learning_rate": 9.831527640138062e-08, + "loss": 0.3307, + "step": 36090, + "vit_learning_rate": 1.9663055280276118e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6639, + "epoch": 1.87786100707449, + "grad_norm": 1.5087306691493103, + "learning_rate": 9.748587225992812e-08, + "loss": 0.3331, + "step": 36100, + "vit_learning_rate": 1.949717445198562e-08 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.8073, + "epoch": 1.878381190178943, + "grad_norm": 2.0819045825584293, + "learning_rate": 9.665994700802483e-08, + "loss": 0.3159, + "step": 36110, + "vit_learning_rate": 1.9331989401604966e-08 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6687, + "epoch": 1.8789013732833957, + "grad_norm": 1.6377672635709337, + "learning_rate": 9.583750123175805e-08, + "loss": 0.3406, + "step": 36120, + "vit_learning_rate": 1.9167500246351608e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.64, + "epoch": 1.8794215563878485, + "grad_norm": 1.338267237323514, + "learning_rate": 9.50185355147465e-08, + "loss": 0.3102, + "step": 36130, + "vit_learning_rate": 1.9003707102949296e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6207, + "epoch": 1.8799417394923013, + "grad_norm": 1.3167124376755956, + "learning_rate": 9.420305043813916e-08, + "loss": 0.3192, + "step": 36140, + "vit_learning_rate": 1.884061008762783e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6829, + "epoch": 1.880461922596754, + "grad_norm": 1.5584628801739062, + "learning_rate": 9.339104658061537e-08, + "loss": 0.3043, + "step": 36150, + "vit_learning_rate": 1.8678209316123073e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6678, + "epoch": 1.8809821057012068, + "grad_norm": 1.5835056158344702, + "learning_rate": 9.258252451838367e-08, + "loss": 0.3331, + "step": 36160, + "vit_learning_rate": 1.8516504903676734e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6309, + "epoch": 1.8815022888056596, + "grad_norm": 2.0780219825121478, + "learning_rate": 9.177748482518178e-08, + "loss": 0.3327, + "step": 36170, + "vit_learning_rate": 1.8355496965036354e-08 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6489, + "epoch": 1.8820224719101124, + "grad_norm": 1.7879366619589183, + "learning_rate": 9.097592807227773e-08, + "loss": 0.3268, + "step": 36180, + "vit_learning_rate": 1.8195185614455545e-08 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6546, + "epoch": 1.8825426550145652, + "grad_norm": 1.6346496838726854, + "learning_rate": 9.017785482846486e-08, + "loss": 0.333, + "step": 36190, + "vit_learning_rate": 1.803557096569297e-08 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.77, + "epoch": 1.883062838119018, + "grad_norm": 1.7159161220864572, + "learning_rate": 8.938326566006905e-08, + "loss": 0.3165, + "step": 36200, + "vit_learning_rate": 1.7876653132013808e-08 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6833, + "epoch": 1.8835830212234708, + "grad_norm": 1.3370199751053953, + "learning_rate": 8.859216113094038e-08, + "loss": 0.3031, + "step": 36210, + "vit_learning_rate": 1.7718432226188074e-08 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.7024, + "epoch": 1.8841032043279236, + "grad_norm": 1.872236649773146, + "learning_rate": 8.780454180245645e-08, + "loss": 0.2794, + "step": 36220, + "vit_learning_rate": 1.7560908360491288e-08 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6566, + "epoch": 1.8846233874323763, + "grad_norm": 1.4288599750824786, + "learning_rate": 8.702040823352298e-08, + "loss": 0.3179, + "step": 36230, + "vit_learning_rate": 1.7404081646704594e-08 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.66, + "epoch": 1.8851435705368291, + "grad_norm": 1.6430459521998708, + "learning_rate": 8.623976098057207e-08, + "loss": 0.316, + "step": 36240, + "vit_learning_rate": 1.7247952196114413e-08 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6766, + "epoch": 1.8856637536412817, + "grad_norm": 1.7688582865638007, + "learning_rate": 8.546260059756117e-08, + "loss": 0.3441, + "step": 36250, + "vit_learning_rate": 1.709252011951223e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6846, + "epoch": 1.8861839367457345, + "grad_norm": 1.1625781650173026, + "learning_rate": 8.468892763597358e-08, + "loss": 0.3068, + "step": 36260, + "vit_learning_rate": 1.6937785527194716e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6561, + "epoch": 1.8867041198501873, + "grad_norm": 1.4212176547709419, + "learning_rate": 8.391874264481792e-08, + "loss": 0.3148, + "step": 36270, + "vit_learning_rate": 1.678374852896358e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7095, + "epoch": 1.88722430295464, + "grad_norm": 1.2970003356443702, + "learning_rate": 8.315204617062811e-08, + "loss": 0.3246, + "step": 36280, + "vit_learning_rate": 1.663040923412562e-08 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.725, + "epoch": 1.8877444860590928, + "grad_norm": 1.9957721231450374, + "learning_rate": 8.238883875746173e-08, + "loss": 0.3131, + "step": 36290, + "vit_learning_rate": 1.6477767751492345e-08 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6934, + "epoch": 1.8882646691635454, + "grad_norm": 3.452612790693908, + "learning_rate": 8.162912094690167e-08, + "loss": 0.3346, + "step": 36300, + "vit_learning_rate": 1.632582418938033e-08 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6426, + "epoch": 1.8887848522679982, + "grad_norm": 1.4944326994451944, + "learning_rate": 8.087289327805281e-08, + "loss": 0.3247, + "step": 36310, + "vit_learning_rate": 1.617457865561056e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7954, + "epoch": 1.889305035372451, + "grad_norm": 1.5069708190051265, + "learning_rate": 8.012015628754476e-08, + "loss": 0.3125, + "step": 36320, + "vit_learning_rate": 1.6024031257508953e-08 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7063, + "epoch": 1.8898252184769038, + "grad_norm": 1.5942209587499678, + "learning_rate": 7.937091050953027e-08, + "loss": 0.3434, + "step": 36330, + "vit_learning_rate": 1.5874182101906053e-08 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6449, + "epoch": 1.8903454015813566, + "grad_norm": 1.4720353799372656, + "learning_rate": 7.862515647568403e-08, + "loss": 0.3256, + "step": 36340, + "vit_learning_rate": 1.5725031295136805e-08 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.7529, + "epoch": 1.8908655846858093, + "grad_norm": 1.183075827539871, + "learning_rate": 7.788289471520272e-08, + "loss": 0.3113, + "step": 36350, + "vit_learning_rate": 1.557657894304054e-08 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6427, + "epoch": 1.8913857677902621, + "grad_norm": 1.9264665322500139, + "learning_rate": 7.714412575480556e-08, + "loss": 0.3249, + "step": 36360, + "vit_learning_rate": 1.542882515096111e-08 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.6915, + "epoch": 1.891905950894715, + "grad_norm": 1.9287431763869927, + "learning_rate": 7.640885011873212e-08, + "loss": 0.318, + "step": 36370, + "vit_learning_rate": 1.528177002374642e-08 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.7223, + "epoch": 1.8924261339991677, + "grad_norm": 2.1486048208790427, + "learning_rate": 7.567706832874444e-08, + "loss": 0.3172, + "step": 36380, + "vit_learning_rate": 1.5135413665748887e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7178, + "epoch": 1.8929463171036205, + "grad_norm": 1.3381500207601795, + "learning_rate": 7.494878090412494e-08, + "loss": 0.3218, + "step": 36390, + "vit_learning_rate": 1.4989756180824986e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6688, + "epoch": 1.8934665002080733, + "grad_norm": 1.8870807569218706, + "learning_rate": 7.422398836167578e-08, + "loss": 0.3254, + "step": 36400, + "vit_learning_rate": 1.4844797672335153e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7482, + "epoch": 1.893986683312526, + "grad_norm": 1.4290219869730405, + "learning_rate": 7.35026912157194e-08, + "loss": 0.3296, + "step": 36410, + "vit_learning_rate": 1.4700538243143878e-08 + }, + { + "avg_batch_load_time": 0.0321, + "avg_batch_processing_time": 0.6385, + "epoch": 1.8945068664169789, + "grad_norm": 2.566887759089902, + "learning_rate": 7.278488997809807e-08, + "loss": 0.3426, + "step": 36420, + "vit_learning_rate": 1.4556977995619613e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.6607, + "epoch": 1.8950270495214316, + "grad_norm": 2.429377740681186, + "learning_rate": 7.207058515817267e-08, + "loss": 0.3174, + "step": 36430, + "vit_learning_rate": 1.4414117031634532e-08 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6591, + "epoch": 1.8955472326258844, + "grad_norm": 1.4298611745361722, + "learning_rate": 7.135977726282384e-08, + "loss": 0.3142, + "step": 36440, + "vit_learning_rate": 1.4271955452564766e-08 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.675, + "epoch": 1.8960674157303372, + "grad_norm": 1.6209526902250024, + "learning_rate": 7.065246679645032e-08, + "loss": 0.3145, + "step": 36450, + "vit_learning_rate": 1.4130493359290063e-08 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.709, + "epoch": 1.89658759883479, + "grad_norm": 1.3456860957846832, + "learning_rate": 6.994865426096953e-08, + "loss": 0.3089, + "step": 36460, + "vit_learning_rate": 1.3989730852193904e-08 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6665, + "epoch": 1.8971077819392426, + "grad_norm": 3.0258363282183707, + "learning_rate": 6.924834015581472e-08, + "loss": 0.3328, + "step": 36470, + "vit_learning_rate": 1.3849668031162943e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6706, + "epoch": 1.8976279650436954, + "grad_norm": 1.2488741722882573, + "learning_rate": 6.855152497794005e-08, + "loss": 0.317, + "step": 36480, + "vit_learning_rate": 1.3710304995588007e-08 + }, + { + "avg_batch_load_time": 0.0038, + "avg_batch_processing_time": 0.6622, + "epoch": 1.8981481481481481, + "grad_norm": 1.822253054333864, + "learning_rate": 6.785820922181386e-08, + "loss": 0.3439, + "step": 36490, + "vit_learning_rate": 1.3571641844362769e-08 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.6763, + "epoch": 1.898668331252601, + "grad_norm": 1.5584817543758889, + "learning_rate": 6.71683933794226e-08, + "loss": 0.3024, + "step": 36500, + "vit_learning_rate": 1.3433678675884519e-08 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.7194, + "epoch": 1.8991885143570537, + "grad_norm": 1.2737029994510374, + "learning_rate": 6.648207794026806e-08, + "loss": 0.3351, + "step": 36510, + "vit_learning_rate": 1.3296415588053612e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.679, + "epoch": 1.8997086974615065, + "grad_norm": 1.765303019342506, + "learning_rate": 6.579926339136956e-08, + "loss": 0.3114, + "step": 36520, + "vit_learning_rate": 1.3159852678273909e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6598, + "epoch": 1.900228880565959, + "grad_norm": 1.7708582838956066, + "learning_rate": 6.51199502172617e-08, + "loss": 0.3013, + "step": 36530, + "vit_learning_rate": 1.3023990043452337e-08 + }, + { + "avg_batch_load_time": 2.3548, + "avg_batch_processing_time": 0.6463, + "epoch": 1.9007490636704119, + "grad_norm": 1.3783801401315057, + "learning_rate": 6.444413889999335e-08, + "loss": 0.3304, + "step": 36540, + "vit_learning_rate": 1.2888827779998667e-08 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.7096, + "epoch": 1.9012692467748646, + "grad_norm": 1.459522526279213, + "learning_rate": 6.377182991913034e-08, + "loss": 0.3034, + "step": 36550, + "vit_learning_rate": 1.2754365983826065e-08 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.7199, + "epoch": 1.9017894298793174, + "grad_norm": 1.449185193642431, + "learning_rate": 6.310302375175104e-08, + "loss": 0.3206, + "step": 36560, + "vit_learning_rate": 1.2620604750350205e-08 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.8672, + "epoch": 1.9023096129837702, + "grad_norm": 2.165017819106808, + "learning_rate": 6.24377208724497e-08, + "loss": 0.3093, + "step": 36570, + "vit_learning_rate": 1.248754417448994e-08 + }, + { + "avg_batch_load_time": 0.0036, + "avg_batch_processing_time": 0.8108, + "epoch": 1.902829796088223, + "grad_norm": 1.3152657797591893, + "learning_rate": 6.177592175333369e-08, + "loss": 0.3091, + "step": 36580, + "vit_learning_rate": 1.2355184350666736e-08 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.7752, + "epoch": 1.9033499791926758, + "grad_norm": 1.2192231235537767, + "learning_rate": 6.111762686402567e-08, + "loss": 0.3193, + "step": 36590, + "vit_learning_rate": 1.2223525372805132e-08 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.7462, + "epoch": 1.9038701622971286, + "grad_norm": 2.013336744321837, + "learning_rate": 6.04628366716592e-08, + "loss": 0.32, + "step": 36600, + "vit_learning_rate": 1.2092567334331837e-08 + }, + { + "avg_batch_load_time": 0.0036, + "avg_batch_processing_time": 0.7355, + "epoch": 1.9043903454015814, + "grad_norm": 1.3461013900657086, + "learning_rate": 5.981155164088259e-08, + "loss": 0.3148, + "step": 36610, + "vit_learning_rate": 1.1962310328176517e-08 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.7666, + "epoch": 1.9049105285060342, + "grad_norm": 1.7882608231684627, + "learning_rate": 5.916377223385727e-08, + "loss": 0.3196, + "step": 36620, + "vit_learning_rate": 1.1832754446771454e-08 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.742, + "epoch": 1.905430711610487, + "grad_norm": 1.3721177359993915, + "learning_rate": 5.851949891025499e-08, + "loss": 0.3123, + "step": 36630, + "vit_learning_rate": 1.1703899782050996e-08 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.7131, + "epoch": 1.9059508947149397, + "grad_norm": 1.6342889347554368, + "learning_rate": 5.787873212726114e-08, + "loss": 0.3223, + "step": 36640, + "vit_learning_rate": 1.1575746425452226e-08 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.8207, + "epoch": 1.9064710778193925, + "grad_norm": 1.9169132012392653, + "learning_rate": 5.7241472339572e-08, + "loss": 0.3222, + "step": 36650, + "vit_learning_rate": 1.1448294467914399e-08 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6539, + "epoch": 1.9069912609238453, + "grad_norm": 1.9363604938488503, + "learning_rate": 5.6607719999396406e-08, + "loss": 0.3399, + "step": 36660, + "vit_learning_rate": 1.132154399987928e-08 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6922, + "epoch": 1.907511444028298, + "grad_norm": 1.2210438372177617, + "learning_rate": 5.597747555645294e-08, + "loss": 0.3171, + "step": 36670, + "vit_learning_rate": 1.1195495111290588e-08 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6772, + "epoch": 1.9080316271327509, + "grad_norm": 1.4490925562010768, + "learning_rate": 5.5350739457971646e-08, + "loss": 0.3285, + "step": 36680, + "vit_learning_rate": 1.1070147891594328e-08 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7372, + "epoch": 1.9085518102372037, + "grad_norm": 2.112445432786526, + "learning_rate": 5.4727512148693426e-08, + "loss": 0.313, + "step": 36690, + "vit_learning_rate": 1.0945502429738684e-08 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6509, + "epoch": 1.9090719933416562, + "grad_norm": 1.6566731564410493, + "learning_rate": 5.41077940708673e-08, + "loss": 0.3418, + "step": 36700, + "vit_learning_rate": 1.082155881417346e-08 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6602, + "epoch": 1.909592176446109, + "grad_norm": 1.497032785433261, + "learning_rate": 5.349158566425427e-08, + "loss": 0.3361, + "step": 36710, + "vit_learning_rate": 1.0698317132850854e-08 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7629, + "epoch": 1.9101123595505618, + "grad_norm": 1.5258541598029953, + "learning_rate": 5.2878887366124565e-08, + "loss": 0.3273, + "step": 36720, + "vit_learning_rate": 1.0575777473224911e-08 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.7013, + "epoch": 1.9106325426550146, + "grad_norm": 2.2049369065948996, + "learning_rate": 5.22696996112565e-08, + "loss": 0.3311, + "step": 36730, + "vit_learning_rate": 1.0453939922251299e-08 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.7126, + "epoch": 1.9111527257594674, + "grad_norm": 1.4038825738683696, + "learning_rate": 5.1664022831938164e-08, + "loss": 0.3227, + "step": 36740, + "vit_learning_rate": 1.0332804566387631e-08 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.622, + "epoch": 1.91167290886392, + "grad_norm": 1.5691451767999536, + "learning_rate": 5.106185745796521e-08, + "loss": 0.3406, + "step": 36750, + "vit_learning_rate": 1.0212371491593041e-08 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7327, + "epoch": 1.9121930919683727, + "grad_norm": 1.571697584064165, + "learning_rate": 5.0463203916643056e-08, + "loss": 0.3394, + "step": 36760, + "vit_learning_rate": 1.009264078332861e-08 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6399, + "epoch": 1.9127132750728255, + "grad_norm": 1.5588262256583165, + "learning_rate": 4.9868062632784096e-08, + "loss": 0.3288, + "step": 36770, + "vit_learning_rate": 9.973612526556818e-09 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.694, + "epoch": 1.9132334581772783, + "grad_norm": 1.5978068875355673, + "learning_rate": 4.92764340287083e-08, + "loss": 0.2901, + "step": 36780, + "vit_learning_rate": 9.855286805741658e-09 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6689, + "epoch": 1.913753641281731, + "grad_norm": 1.6451998432343036, + "learning_rate": 4.868831852424261e-08, + "loss": 0.3335, + "step": 36790, + "vit_learning_rate": 9.73766370484852e-09 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6934, + "epoch": 1.9142738243861839, + "grad_norm": 2.4928015665977745, + "learning_rate": 4.810371653672263e-08, + "loss": 0.3213, + "step": 36800, + "vit_learning_rate": 9.620743307344525e-09 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6538, + "epoch": 1.9147940074906367, + "grad_norm": 1.5997476609138463, + "learning_rate": 4.752262848098987e-08, + "loss": 0.3244, + "step": 36810, + "vit_learning_rate": 9.504525696197974e-09 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6914, + "epoch": 1.9153141905950894, + "grad_norm": 1.686577769830676, + "learning_rate": 4.694505476939171e-08, + "loss": 0.3089, + "step": 36820, + "vit_learning_rate": 9.38901095387834e-09 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6955, + "epoch": 1.9158343736995422, + "grad_norm": 1.5173862741349589, + "learning_rate": 4.637099581178251e-08, + "loss": 0.3052, + "step": 36830, + "vit_learning_rate": 9.274199162356499e-09 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.6683, + "epoch": 1.916354556803995, + "grad_norm": 1.458580180349675, + "learning_rate": 4.580045201552086e-08, + "loss": 0.338, + "step": 36840, + "vit_learning_rate": 9.16009040310417e-09 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6971, + "epoch": 1.9168747399084478, + "grad_norm": 1.899509319297028, + "learning_rate": 4.523342378547402e-08, + "loss": 0.3722, + "step": 36850, + "vit_learning_rate": 9.046684757094802e-09 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.8719, + "epoch": 1.9173949230129006, + "grad_norm": 2.024076380033123, + "learning_rate": 4.466991152401179e-08, + "loss": 0.3336, + "step": 36860, + "vit_learning_rate": 8.933982304802356e-09 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.764, + "epoch": 1.9179151061173534, + "grad_norm": 1.4427118183225083, + "learning_rate": 4.410991563101097e-08, + "loss": 0.3072, + "step": 36870, + "vit_learning_rate": 8.821983126202192e-09 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7138, + "epoch": 1.9184352892218062, + "grad_norm": 1.7813798871669722, + "learning_rate": 4.355343650385091e-08, + "loss": 0.3165, + "step": 36880, + "vit_learning_rate": 8.71068730077018e-09 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6849, + "epoch": 1.918955472326259, + "grad_norm": 2.3991334600532066, + "learning_rate": 4.300047453741685e-08, + "loss": 0.3279, + "step": 36890, + "vit_learning_rate": 8.600094907483369e-09 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6353, + "epoch": 1.9194756554307117, + "grad_norm": 1.5116065573669946, + "learning_rate": 4.2451030124098794e-08, + "loss": 0.3383, + "step": 36900, + "vit_learning_rate": 8.490206024819758e-09 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6276, + "epoch": 1.9199958385351645, + "grad_norm": 1.8088994017817044, + "learning_rate": 4.19051036537893e-08, + "loss": 0.3449, + "step": 36910, + "vit_learning_rate": 8.38102073075786e-09 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.659, + "epoch": 1.920516021639617, + "grad_norm": 1.4885898997535978, + "learning_rate": 4.1362695513885165e-08, + "loss": 0.3337, + "step": 36920, + "vit_learning_rate": 8.272539102777032e-09 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6911, + "epoch": 1.9210362047440699, + "grad_norm": 1.5238411729536085, + "learning_rate": 4.0823806089286266e-08, + "loss": 0.3237, + "step": 36930, + "vit_learning_rate": 8.164761217857252e-09 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.6988, + "epoch": 1.9215563878485227, + "grad_norm": 1.225529048752744, + "learning_rate": 4.0288435762396164e-08, + "loss": 0.3022, + "step": 36940, + "vit_learning_rate": 8.057687152479231e-09 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7553, + "epoch": 1.9220765709529755, + "grad_norm": 1.7034699825052795, + "learning_rate": 3.97565849131204e-08, + "loss": 0.3342, + "step": 36950, + "vit_learning_rate": 7.951316982624078e-09 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.636, + "epoch": 1.9225967540574282, + "grad_norm": 1.5814029420762976, + "learning_rate": 3.9228253918867644e-08, + "loss": 0.3208, + "step": 36960, + "vit_learning_rate": 7.845650783773528e-09 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7302, + "epoch": 1.923116937161881, + "grad_norm": 1.3118063243436162, + "learning_rate": 3.870344315454855e-08, + "loss": 0.3375, + "step": 36970, + "vit_learning_rate": 7.74068863090971e-09 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6871, + "epoch": 1.9236371202663336, + "grad_norm": 1.2308415927772614, + "learning_rate": 3.818215299257577e-08, + "loss": 0.3082, + "step": 36980, + "vit_learning_rate": 7.636430598515154e-09 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.6466, + "epoch": 1.9241573033707864, + "grad_norm": 1.9804298636273383, + "learning_rate": 3.766438380286397e-08, + "loss": 0.3035, + "step": 36990, + "vit_learning_rate": 7.532876760572793e-09 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6631, + "epoch": 1.9246774864752392, + "grad_norm": 1.408593259145086, + "learning_rate": 3.7150135952829234e-08, + "loss": 0.3047, + "step": 37000, + "vit_learning_rate": 7.430027190565846e-09 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6768, + "epoch": 1.925197669579692, + "grad_norm": 1.391836218917871, + "learning_rate": 3.663940980738856e-08, + "loss": 0.3413, + "step": 37010, + "vit_learning_rate": 7.327881961477711e-09 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7216, + "epoch": 1.9257178526841447, + "grad_norm": 1.3693588255048508, + "learning_rate": 3.613220572895926e-08, + "loss": 0.3206, + "step": 37020, + "vit_learning_rate": 7.226441145791851e-09 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7104, + "epoch": 1.9262380357885975, + "grad_norm": 1.531243099226291, + "learning_rate": 3.5628524077461204e-08, + "loss": 0.3098, + "step": 37030, + "vit_learning_rate": 7.12570481549224e-09 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6991, + "epoch": 1.9267582188930503, + "grad_norm": 2.11246748654633, + "learning_rate": 3.5128365210313486e-08, + "loss": 0.3154, + "step": 37040, + "vit_learning_rate": 7.025673042062696e-09 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7861, + "epoch": 1.927278401997503, + "grad_norm": 1.5382183339739046, + "learning_rate": 3.463172948243554e-08, + "loss": 0.3517, + "step": 37050, + "vit_learning_rate": 6.926345896487107e-09 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.641, + "epoch": 1.927798585101956, + "grad_norm": 1.404369160226898, + "learning_rate": 3.413861724624601e-08, + "loss": 0.3272, + "step": 37060, + "vit_learning_rate": 6.827723449249201e-09 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6512, + "epoch": 1.9283187682064087, + "grad_norm": 1.5958899147322927, + "learning_rate": 3.3649028851663876e-08, + "loss": 0.3256, + "step": 37070, + "vit_learning_rate": 6.729805770332775e-09 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6666, + "epoch": 1.9288389513108615, + "grad_norm": 1.4170208572494936, + "learning_rate": 3.316296464610902e-08, + "loss": 0.3466, + "step": 37080, + "vit_learning_rate": 6.632592929221803e-09 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.7357, + "epoch": 1.9293591344153143, + "grad_norm": 1.5195692443832258, + "learning_rate": 3.268042497449775e-08, + "loss": 0.3268, + "step": 37090, + "vit_learning_rate": 6.536084994899549e-09 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.7269, + "epoch": 1.929879317519767, + "grad_norm": 1.6243081632027103, + "learning_rate": 3.220141017924727e-08, + "loss": 0.323, + "step": 37100, + "vit_learning_rate": 6.440282035849453e-09 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.716, + "epoch": 1.9303995006242198, + "grad_norm": 1.6554583127266862, + "learning_rate": 3.172592060027291e-08, + "loss": 0.3309, + "step": 37110, + "vit_learning_rate": 6.345184120054581e-09 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6786, + "epoch": 1.9309196837286726, + "grad_norm": 2.3416415396467674, + "learning_rate": 3.125395657498809e-08, + "loss": 0.3365, + "step": 37120, + "vit_learning_rate": 6.250791314997616e-09 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6883, + "epoch": 1.9314398668331254, + "grad_norm": 1.5911149757696756, + "learning_rate": 3.078551843830491e-08, + "loss": 0.3335, + "step": 37130, + "vit_learning_rate": 6.157103687660981e-09 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6335, + "epoch": 1.931960049937578, + "grad_norm": 2.274874067432711, + "learning_rate": 3.0320606522633024e-08, + "loss": 0.3183, + "step": 37140, + "vit_learning_rate": 6.064121304526604e-09 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.685, + "epoch": 1.9324802330420308, + "grad_norm": 1.6806406871863266, + "learning_rate": 2.985922115788076e-08, + "loss": 0.3105, + "step": 37150, + "vit_learning_rate": 5.9718442315761504e-09 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6968, + "epoch": 1.9330004161464835, + "grad_norm": 1.7542160250945003, + "learning_rate": 2.9401362671453436e-08, + "loss": 0.3085, + "step": 37160, + "vit_learning_rate": 5.880272534290687e-09 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6429, + "epoch": 1.9335205992509363, + "grad_norm": 1.3520576587130466, + "learning_rate": 2.894703138825228e-08, + "loss": 0.3099, + "step": 37170, + "vit_learning_rate": 5.789406277650455e-09 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.6725, + "epoch": 1.9340407823553891, + "grad_norm": 1.4455775918933753, + "learning_rate": 2.849622763067883e-08, + "loss": 0.3418, + "step": 37180, + "vit_learning_rate": 5.699245526135765e-09 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.713, + "epoch": 1.934560965459842, + "grad_norm": 1.861849951029204, + "learning_rate": 2.8048951718627758e-08, + "loss": 0.3255, + "step": 37190, + "vit_learning_rate": 5.609790343725551e-09 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.8737, + "epoch": 1.9350811485642945, + "grad_norm": 1.8045627291511508, + "learning_rate": 2.76052039694924e-08, + "loss": 0.2961, + "step": 37200, + "vit_learning_rate": 5.521040793898479e-09 + }, + { + "avg_batch_load_time": 0.0059, + "avg_batch_processing_time": 0.7569, + "epoch": 1.9356013316687473, + "grad_norm": 1.6992947977960149, + "learning_rate": 2.716498469816309e-08, + "loss": 0.3143, + "step": 37210, + "vit_learning_rate": 5.432996939632617e-09 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7397, + "epoch": 1.9361215147732, + "grad_norm": 1.43953816734733, + "learning_rate": 2.672829421702383e-08, + "loss": 0.3238, + "step": 37220, + "vit_learning_rate": 5.345658843404766e-09 + }, + { + "avg_batch_load_time": 0.0141, + "avg_batch_processing_time": 0.7739, + "epoch": 1.9366416978776528, + "grad_norm": 1.1682463963256817, + "learning_rate": 2.6295132835956748e-08, + "loss": 0.3293, + "step": 37230, + "vit_learning_rate": 5.25902656719135e-09 + }, + { + "avg_batch_load_time": 0.0041, + "avg_batch_processing_time": 0.7443, + "epoch": 1.9371618809821056, + "grad_norm": 1.678602398076525, + "learning_rate": 2.58655008623393e-08, + "loss": 0.3185, + "step": 37240, + "vit_learning_rate": 5.173100172467859e-09 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7028, + "epoch": 1.9376820640865584, + "grad_norm": 1.6839436432973178, + "learning_rate": 2.5439398601043718e-08, + "loss": 0.317, + "step": 37250, + "vit_learning_rate": 5.087879720208743e-09 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6851, + "epoch": 1.9382022471910112, + "grad_norm": 1.5453239048863256, + "learning_rate": 2.5016826354437584e-08, + "loss": 0.3233, + "step": 37260, + "vit_learning_rate": 5.0033652708875165e-09 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6839, + "epoch": 1.938722430295464, + "grad_norm": 1.4335616566810252, + "learning_rate": 2.4597784422384365e-08, + "loss": 0.3244, + "step": 37270, + "vit_learning_rate": 4.919556884476872e-09 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.7162, + "epoch": 1.9392426133999168, + "grad_norm": 1.3013925657026346, + "learning_rate": 2.4182273102241193e-08, + "loss": 0.3058, + "step": 37280, + "vit_learning_rate": 4.8364546204482385e-09 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.67, + "epoch": 1.9397627965043696, + "grad_norm": 1.8199377335910147, + "learning_rate": 2.3770292688861106e-08, + "loss": 0.309, + "step": 37290, + "vit_learning_rate": 4.754058537772221e-09 + }, + { + "avg_batch_load_time": 0.0037, + "avg_batch_processing_time": 0.7424, + "epoch": 1.9402829796088223, + "grad_norm": 1.9453012493065054, + "learning_rate": 2.3361843474590807e-08, + "loss": 0.3241, + "step": 37300, + "vit_learning_rate": 4.672368694918161e-09 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6851, + "epoch": 1.9408031627132751, + "grad_norm": 1.3064635006550045, + "learning_rate": 2.2956925749270665e-08, + "loss": 0.3238, + "step": 37310, + "vit_learning_rate": 4.591385149854132e-09 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7588, + "epoch": 1.941323345817728, + "grad_norm": 3.0156447707360265, + "learning_rate": 2.255553980023639e-08, + "loss": 0.3311, + "step": 37320, + "vit_learning_rate": 4.511107960047278e-09 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.8055, + "epoch": 1.9418435289221807, + "grad_norm": 1.791193202499572, + "learning_rate": 2.2157685912316818e-08, + "loss": 0.3482, + "step": 37330, + "vit_learning_rate": 4.431537182463363e-09 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7905, + "epoch": 1.9423637120266335, + "grad_norm": 1.4024658078094803, + "learning_rate": 2.1763364367833882e-08, + "loss": 0.3153, + "step": 37340, + "vit_learning_rate": 4.352672873566776e-09 + }, + { + "avg_batch_load_time": 0.0033, + "avg_batch_processing_time": 0.7138, + "epoch": 1.9428838951310863, + "grad_norm": 1.691990829884812, + "learning_rate": 2.1372575446603762e-08, + "loss": 0.3136, + "step": 37350, + "vit_learning_rate": 4.274515089320751e-09 + }, + { + "avg_batch_load_time": 0.0043, + "avg_batch_processing_time": 0.7347, + "epoch": 1.943404078235539, + "grad_norm": 1.5358656286563743, + "learning_rate": 2.098531942593518e-08, + "loss": 0.3048, + "step": 37360, + "vit_learning_rate": 4.197063885187036e-09 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7454, + "epoch": 1.9439242613399916, + "grad_norm": 1.744370960083529, + "learning_rate": 2.0601596580630545e-08, + "loss": 0.3374, + "step": 37370, + "vit_learning_rate": 4.1203193161261084e-09 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.7321, + "epoch": 1.9444444444444444, + "grad_norm": 1.7929598453653561, + "learning_rate": 2.022140718298482e-08, + "loss": 0.3242, + "step": 37380, + "vit_learning_rate": 4.0442814365969635e-09 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6252, + "epoch": 1.9449646275488972, + "grad_norm": 1.9564167599584583, + "learning_rate": 1.984475150278553e-08, + "loss": 0.3125, + "step": 37390, + "vit_learning_rate": 3.968950300557106e-09 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7062, + "epoch": 1.94548481065335, + "grad_norm": 1.9347724741674759, + "learning_rate": 1.9471629807311653e-08, + "loss": 0.3394, + "step": 37400, + "vit_learning_rate": 3.89432596146233e-09 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6718, + "epoch": 1.9460049937578028, + "grad_norm": 1.4856051961897976, + "learning_rate": 1.9102042361336392e-08, + "loss": 0.3333, + "step": 37410, + "vit_learning_rate": 3.820408472267278e-09 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.6728, + "epoch": 1.9465251768622556, + "grad_norm": 1.78783633116223, + "learning_rate": 1.8735989427123292e-08, + "loss": 0.325, + "step": 37420, + "vit_learning_rate": 3.747197885424658e-09 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6651, + "epoch": 1.9470453599667081, + "grad_norm": 1.383470286243024, + "learning_rate": 1.8373471264429012e-08, + "loss": 0.3091, + "step": 37430, + "vit_learning_rate": 3.6746942528858015e-09 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.704, + "epoch": 1.947565543071161, + "grad_norm": 1.2835459837986396, + "learning_rate": 1.801448813050055e-08, + "loss": 0.3103, + "step": 37440, + "vit_learning_rate": 3.6028976261001098e-09 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7176, + "epoch": 1.9480857261756137, + "grad_norm": 1.622076253342558, + "learning_rate": 1.7659040280077478e-08, + "loss": 0.3615, + "step": 37450, + "vit_learning_rate": 3.531808056015495e-09 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.6839, + "epoch": 1.9486059092800665, + "grad_norm": 1.8227848983261017, + "learning_rate": 1.73071279653908e-08, + "loss": 0.3472, + "step": 37460, + "vit_learning_rate": 3.46142559307816e-09 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.6845, + "epoch": 1.9491260923845193, + "grad_norm": 1.3793988208788448, + "learning_rate": 1.695875143616188e-08, + "loss": 0.3188, + "step": 37470, + "vit_learning_rate": 3.3917502872323755e-09 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.7928, + "epoch": 1.949646275488972, + "grad_norm": 1.6686876689745482, + "learning_rate": 1.661391093960296e-08, + "loss": 0.3261, + "step": 37480, + "vit_learning_rate": 3.322782187920592e-09 + }, + { + "avg_batch_load_time": 0.0013, + "avg_batch_processing_time": 0.6407, + "epoch": 1.9501664585934249, + "grad_norm": 1.3023555823992239, + "learning_rate": 1.627260672041775e-08, + "loss": 0.3176, + "step": 37490, + "vit_learning_rate": 3.2545213440835494e-09 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.7471, + "epoch": 1.9506866416978776, + "grad_norm": 1.4852343237908352, + "learning_rate": 1.593483902079973e-08, + "loss": 0.3251, + "step": 37500, + "vit_learning_rate": 3.186967804159946e-09 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6882, + "epoch": 1.9512068248023304, + "grad_norm": 1.5257785880109045, + "learning_rate": 1.5600608080434953e-08, + "loss": 0.3305, + "step": 37510, + "vit_learning_rate": 3.1201216160869902e-09 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6729, + "epoch": 1.9517270079067832, + "grad_norm": 1.7222817538828843, + "learning_rate": 1.5269914136497033e-08, + "loss": 0.3134, + "step": 37520, + "vit_learning_rate": 3.053982827299406e-09 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6748, + "epoch": 1.952247191011236, + "grad_norm": 1.5259536594104441, + "learning_rate": 1.494275742365048e-08, + "loss": 0.3233, + "step": 37530, + "vit_learning_rate": 2.9885514847300954e-09 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.8937, + "epoch": 1.9527673741156888, + "grad_norm": 1.308190253965486, + "learning_rate": 1.4619138174050695e-08, + "loss": 0.3252, + "step": 37540, + "vit_learning_rate": 2.9238276348101386e-09 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6911, + "epoch": 1.9532875572201416, + "grad_norm": 1.4546942894669737, + "learning_rate": 1.4299056617341766e-08, + "loss": 0.3297, + "step": 37550, + "vit_learning_rate": 2.859811323468353e-09 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6665, + "epoch": 1.9538077403245944, + "grad_norm": 1.4680988538908373, + "learning_rate": 1.398251298065867e-08, + "loss": 0.3374, + "step": 37560, + "vit_learning_rate": 2.7965025961317333e-09 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6744, + "epoch": 1.9543279234290472, + "grad_norm": 1.4349879082825827, + "learning_rate": 1.3669507488623945e-08, + "loss": 0.3348, + "step": 37570, + "vit_learning_rate": 2.7339014977247888e-09 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.7155, + "epoch": 1.9548481065335, + "grad_norm": 2.2335820226333922, + "learning_rate": 1.3360040363351589e-08, + "loss": 0.3354, + "step": 37580, + "vit_learning_rate": 2.6720080726703177e-09 + }, + { + "avg_batch_load_time": 0.0228, + "avg_batch_processing_time": 0.6771, + "epoch": 1.9553682896379525, + "grad_norm": 1.9217309316426385, + "learning_rate": 1.3054111824442606e-08, + "loss": 0.35, + "step": 37590, + "vit_learning_rate": 2.610822364888521e-09 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7916, + "epoch": 1.9558884727424053, + "grad_norm": 1.6484954311657012, + "learning_rate": 1.2751722088988338e-08, + "loss": 0.3269, + "step": 37600, + "vit_learning_rate": 2.5503444177976675e-09 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.754, + "epoch": 1.956408655846858, + "grad_norm": 1.492264447339383, + "learning_rate": 1.245287137156881e-08, + "loss": 0.3273, + "step": 37610, + "vit_learning_rate": 2.4905742743137616e-09 + }, + { + "avg_batch_load_time": 0.0029, + "avg_batch_processing_time": 0.7903, + "epoch": 1.9569288389513109, + "grad_norm": 1.7248426179210887, + "learning_rate": 1.2157559884252158e-08, + "loss": 0.3269, + "step": 37620, + "vit_learning_rate": 2.431511976850431e-09 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6994, + "epoch": 1.9574490220557637, + "grad_norm": 1.6599654157783823, + "learning_rate": 1.18657878365952e-08, + "loss": 0.2875, + "step": 37630, + "vit_learning_rate": 2.3731575673190396e-09 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7083, + "epoch": 1.9579692051602164, + "grad_norm": 1.6117525890777906, + "learning_rate": 1.1577555435643984e-08, + "loss": 0.3143, + "step": 37640, + "vit_learning_rate": 2.3155110871287962e-09 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6947, + "epoch": 1.958489388264669, + "grad_norm": 1.8061992502882929, + "learning_rate": 1.1292862885931565e-08, + "loss": 0.3168, + "step": 37650, + "vit_learning_rate": 2.2585725771863128e-09 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6629, + "epoch": 1.9590095713691218, + "grad_norm": 1.7002673759154827, + "learning_rate": 1.1011710389480235e-08, + "loss": 0.3217, + "step": 37660, + "vit_learning_rate": 2.2023420778960465e-09 + }, + { + "avg_batch_load_time": 0.0069, + "avg_batch_processing_time": 0.6555, + "epoch": 1.9595297544735746, + "grad_norm": 1.582089932223149, + "learning_rate": 1.0734098145798177e-08, + "loss": 0.3173, + "step": 37670, + "vit_learning_rate": 2.1468196291596354e-09 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.7867, + "epoch": 1.9600499375780274, + "grad_norm": 1.5403328370138551, + "learning_rate": 1.0460026351883924e-08, + "loss": 0.3302, + "step": 37680, + "vit_learning_rate": 2.0920052703767844e-09 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.68, + "epoch": 1.9605701206824802, + "grad_norm": 1.5805058169396524, + "learning_rate": 1.0189495202222454e-08, + "loss": 0.3398, + "step": 37690, + "vit_learning_rate": 2.0378990404444905e-09 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.7028, + "epoch": 1.961090303786933, + "grad_norm": 1.5045056613210976, + "learning_rate": 9.922504888785767e-09, + "loss": 0.3091, + "step": 37700, + "vit_learning_rate": 1.9845009777571533e-09 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6644, + "epoch": 1.9616104868913857, + "grad_norm": 1.790192997411805, + "learning_rate": 9.659055601033418e-09, + "loss": 0.3365, + "step": 37710, + "vit_learning_rate": 1.9318111202066833e-09 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 0.7496, + "epoch": 1.9621306699958385, + "grad_norm": 1.4419531383897635, + "learning_rate": 9.399147525913644e-09, + "loss": 0.3265, + "step": 37720, + "vit_learning_rate": 1.8798295051827286e-09 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7474, + "epoch": 1.9626508531002913, + "grad_norm": 1.5830737774539028, + "learning_rate": 9.142780847860023e-09, + "loss": 0.3216, + "step": 37730, + "vit_learning_rate": 1.8285561695720042e-09 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.7196, + "epoch": 1.963171036204744, + "grad_norm": 1.611836640617878, + "learning_rate": 8.889955748794254e-09, + "loss": 0.3123, + "step": 37740, + "vit_learning_rate": 1.7779911497588507e-09 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7102, + "epoch": 1.9636912193091969, + "grad_norm": 1.4519877528860412, + "learning_rate": 8.64067240812394e-09, + "loss": 0.3209, + "step": 37750, + "vit_learning_rate": 1.7281344816247879e-09 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.7805, + "epoch": 1.9642114024136497, + "grad_norm": 1.5508664599586772, + "learning_rate": 8.39493100274369e-09, + "loss": 0.3097, + "step": 37760, + "vit_learning_rate": 1.6789862005487376e-09 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6994, + "epoch": 1.9647315855181025, + "grad_norm": 1.5033359006514875, + "learning_rate": 8.152731707035122e-09, + "loss": 0.3228, + "step": 37770, + "vit_learning_rate": 1.6305463414070242e-09 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.746, + "epoch": 1.9652517686225552, + "grad_norm": 2.247777309020931, + "learning_rate": 7.914074692866313e-09, + "loss": 0.3114, + "step": 37780, + "vit_learning_rate": 1.5828149385732625e-09 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7355, + "epoch": 1.965771951727008, + "grad_norm": 1.4407571886220025, + "learning_rate": 7.67896012959124e-09, + "loss": 0.3073, + "step": 37790, + "vit_learning_rate": 1.5357920259182478e-09 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.7144, + "epoch": 1.9662921348314608, + "grad_norm": 1.6864725410609773, + "learning_rate": 7.44738818405033e-09, + "loss": 0.3303, + "step": 37800, + "vit_learning_rate": 1.4894776368100658e-09 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7232, + "epoch": 1.9668123179359136, + "grad_norm": 1.4332464055877518, + "learning_rate": 7.219359020570471e-09, + "loss": 0.3268, + "step": 37810, + "vit_learning_rate": 1.443871804114094e-09 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.7346, + "epoch": 1.9673325010403662, + "grad_norm": 1.6901820754239134, + "learning_rate": 6.994872800963892e-09, + "loss": 0.3294, + "step": 37820, + "vit_learning_rate": 1.3989745601927782e-09 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.6987, + "epoch": 1.967852684144819, + "grad_norm": 1.9288436922125451, + "learning_rate": 6.773929684528724e-09, + "loss": 0.3363, + "step": 37830, + "vit_learning_rate": 1.3547859369057445e-09 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6674, + "epoch": 1.9683728672492717, + "grad_norm": 2.4756515276454496, + "learning_rate": 6.556529828050107e-09, + "loss": 0.3112, + "step": 37840, + "vit_learning_rate": 1.311305965610021e-09 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 0.6847, + "epoch": 1.9688930503537245, + "grad_norm": 1.5476854401976095, + "learning_rate": 6.34267338579686e-09, + "loss": 0.3272, + "step": 37850, + "vit_learning_rate": 1.268534677159372e-09 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6568, + "epoch": 1.9694132334581773, + "grad_norm": 1.313233691534211, + "learning_rate": 6.132360509524815e-09, + "loss": 0.3317, + "step": 37860, + "vit_learning_rate": 1.2264721019049629e-09 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6583, + "epoch": 1.9699334165626299, + "grad_norm": 1.6838436957115859, + "learning_rate": 5.9255913484745905e-09, + "loss": 0.3124, + "step": 37870, + "vit_learning_rate": 1.185118269694918e-09 + }, + { + "avg_batch_load_time": 0.0012, + "avg_batch_processing_time": 1.1683, + "epoch": 1.9704535996670827, + "grad_norm": 1.7837957963006599, + "learning_rate": 5.7223660493727075e-09, + "loss": 0.3198, + "step": 37880, + "vit_learning_rate": 1.1444732098745414e-09 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.8797, + "epoch": 1.9709737827715355, + "grad_norm": 1.9310687146407908, + "learning_rate": 5.5226847564304744e-09, + "loss": 0.3239, + "step": 37890, + "vit_learning_rate": 1.1045369512860947e-09 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 1.1426, + "epoch": 1.9714939658759882, + "grad_norm": 1.2256598529382987, + "learning_rate": 5.3265476113445456e-09, + "loss": 0.2718, + "step": 37900, + "vit_learning_rate": 1.0653095222689091e-09 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.8107, + "epoch": 1.972014148980441, + "grad_norm": 1.9393814827353566, + "learning_rate": 5.133954753295811e-09, + "loss": 0.3338, + "step": 37910, + "vit_learning_rate": 1.026790950659162e-09 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.7515, + "epoch": 1.9725343320848938, + "grad_norm": 1.684781437511521, + "learning_rate": 4.944906318951059e-09, + "loss": 0.3409, + "step": 37920, + "vit_learning_rate": 9.889812637902117e-10 + }, + { + "avg_batch_load_time": 0.0037, + "avg_batch_processing_time": 0.7502, + "epoch": 1.9730545151893466, + "grad_norm": 2.1388143431010507, + "learning_rate": 4.759402442461869e-09, + "loss": 0.3236, + "step": 37930, + "vit_learning_rate": 9.518804884923737e-10 + }, + { + "avg_batch_load_time": 0.0156, + "avg_batch_processing_time": 0.7636, + "epoch": 1.9735746982937994, + "grad_norm": 1.781618773424894, + "learning_rate": 4.577443255464053e-09, + "loss": 0.3252, + "step": 37940, + "vit_learning_rate": 9.154886510928106e-10 + }, + { + "avg_batch_load_time": 0.0037, + "avg_batch_processing_time": 0.9809, + "epoch": 1.9740948813982522, + "grad_norm": 1.5278962807059027, + "learning_rate": 4.399028887078771e-09, + "loss": 0.3287, + "step": 37950, + "vit_learning_rate": 8.798057774157541e-10 + }, + { + "avg_batch_load_time": 0.0035, + "avg_batch_processing_time": 0.7791, + "epoch": 1.974615064502705, + "grad_norm": 1.5557808244497908, + "learning_rate": 4.224159463909749e-09, + "loss": 0.3074, + "step": 37960, + "vit_learning_rate": 8.448318927819498e-10 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.756, + "epoch": 1.9751352476071578, + "grad_norm": 2.499123212216171, + "learning_rate": 4.052835110048281e-09, + "loss": 0.3335, + "step": 37970, + "vit_learning_rate": 8.105670220096561e-10 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.7907, + "epoch": 1.9756554307116105, + "grad_norm": 1.2673432219704548, + "learning_rate": 3.885055947068228e-09, + "loss": 0.3139, + "step": 37980, + "vit_learning_rate": 7.770111894136455e-10 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.7001, + "epoch": 1.9761756138160633, + "grad_norm": 1.6399485166402978, + "learning_rate": 3.720822094027132e-09, + "loss": 0.3068, + "step": 37990, + "vit_learning_rate": 7.441644188054264e-10 + }, + { + "avg_batch_load_time": 0.0016, + "avg_batch_processing_time": 0.7606, + "epoch": 1.9766957969205161, + "grad_norm": 1.4078192348458798, + "learning_rate": 3.56013366746788e-09, + "loss": 0.327, + "step": 38000, + "vit_learning_rate": 7.120267334935759e-10 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.7164, + "epoch": 1.977215980024969, + "grad_norm": 1.5718644455998485, + "learning_rate": 3.402990781417037e-09, + "loss": 0.3326, + "step": 38010, + "vit_learning_rate": 6.805981562834074e-10 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.8059, + "epoch": 1.9777361631294217, + "grad_norm": 1.5597726732266028, + "learning_rate": 3.2493935473859595e-09, + "loss": 0.2996, + "step": 38020, + "vit_learning_rate": 6.498787094771918e-10 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 0.6925, + "epoch": 1.9782563462338745, + "grad_norm": 1.6750966260765425, + "learning_rate": 3.099342074368017e-09, + "loss": 0.3473, + "step": 38030, + "vit_learning_rate": 6.198684148736033e-10 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.7446, + "epoch": 1.978776529338327, + "grad_norm": 2.284672391196351, + "learning_rate": 2.9528364688430346e-09, + "loss": 0.3239, + "step": 38040, + "vit_learning_rate": 5.905672937686068e-10 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.6393, + "epoch": 1.9792967124427798, + "grad_norm": 1.8635584595085788, + "learning_rate": 2.809876834772851e-09, + "loss": 0.3088, + "step": 38050, + "vit_learning_rate": 5.619753669545702e-10 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.7238, + "epoch": 1.9798168955472326, + "grad_norm": 2.1333964285033025, + "learning_rate": 2.6704632736029855e-09, + "loss": 0.313, + "step": 38060, + "vit_learning_rate": 5.340926547205971e-10 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 0.7329, + "epoch": 1.9803370786516854, + "grad_norm": 1.422466198389118, + "learning_rate": 2.5345958842643015e-09, + "loss": 0.2899, + "step": 38070, + "vit_learning_rate": 5.069191768528603e-10 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 1.0021, + "epoch": 1.9808572617561382, + "grad_norm": 1.5933663117322518, + "learning_rate": 2.402274763169121e-09, + "loss": 0.3175, + "step": 38080, + "vit_learning_rate": 4.804549526338242e-10 + }, + { + "avg_batch_load_time": 0.003, + "avg_batch_processing_time": 0.9877, + "epoch": 1.981377444860591, + "grad_norm": 1.6485624297794088, + "learning_rate": 2.2735000042145572e-09, + "loss": 0.3204, + "step": 38090, + "vit_learning_rate": 4.5470000084291136e-10 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.7168, + "epoch": 1.9818976279650435, + "grad_norm": 1.6017418756395407, + "learning_rate": 2.148271698781401e-09, + "loss": 0.3367, + "step": 38100, + "vit_learning_rate": 4.296543397562802e-10 + }, + { + "avg_batch_load_time": 0.0023, + "avg_batch_processing_time": 0.7228, + "epoch": 1.9824178110694963, + "grad_norm": 1.4421597312218164, + "learning_rate": 2.0265899357324593e-09, + "loss": 0.294, + "step": 38110, + "vit_learning_rate": 4.053179871464918e-10 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.6594, + "epoch": 1.9829379941739491, + "grad_norm": 1.653170034676979, + "learning_rate": 1.908454801415327e-09, + "loss": 0.3136, + "step": 38120, + "vit_learning_rate": 3.816909602830654e-10 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6868, + "epoch": 1.983458177278402, + "grad_norm": 1.584731397310288, + "learning_rate": 1.7938663796601697e-09, + "loss": 0.3294, + "step": 38130, + "vit_learning_rate": 3.587732759320339e-10 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.7253, + "epoch": 1.9839783603828547, + "grad_norm": 1.5080154549031155, + "learning_rate": 1.6828247517802766e-09, + "loss": 0.3306, + "step": 38140, + "vit_learning_rate": 3.3656495035605524e-10 + }, + { + "avg_batch_load_time": 0.0041, + "avg_batch_processing_time": 0.7741, + "epoch": 1.9844985434873075, + "grad_norm": 1.4142356478489662, + "learning_rate": 1.5753299965726166e-09, + "loss": 0.3409, + "step": 38150, + "vit_learning_rate": 3.150659993145233e-10 + }, + { + "avg_batch_load_time": 0.0036, + "avg_batch_processing_time": 0.7268, + "epoch": 1.9850187265917603, + "grad_norm": 1.380124000372268, + "learning_rate": 1.471382190316728e-09, + "loss": 0.325, + "step": 38160, + "vit_learning_rate": 2.9427643806334556e-10 + }, + { + "avg_batch_load_time": 0.0033, + "avg_batch_processing_time": 0.9342, + "epoch": 1.985538909696213, + "grad_norm": 1.499586366168331, + "learning_rate": 1.3709814067752736e-09, + "loss": 0.3173, + "step": 38170, + "vit_learning_rate": 2.741962813550547e-10 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 1.5101, + "epoch": 1.9860590928006658, + "grad_norm": 1.4622942682913407, + "learning_rate": 1.2741277171945954e-09, + "loss": 0.2976, + "step": 38180, + "vit_learning_rate": 2.5482554343891905e-10 + }, + { + "avg_batch_load_time": 0.0031, + "avg_batch_processing_time": 0.8123, + "epoch": 1.9865792759051186, + "grad_norm": 1.4813669721952858, + "learning_rate": 1.1808211903019395e-09, + "loss": 0.3195, + "step": 38190, + "vit_learning_rate": 2.3616423806038787e-10 + }, + { + "avg_batch_load_time": 0.0027, + "avg_batch_processing_time": 1.0689, + "epoch": 1.9870994590095714, + "grad_norm": 1.6189514919208348, + "learning_rate": 1.091061892310452e-09, + "loss": 0.3247, + "step": 38200, + "vit_learning_rate": 2.182123784620904e-10 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.6965, + "epoch": 1.9876196421140242, + "grad_norm": 1.7232383242486204, + "learning_rate": 1.004849886913628e-09, + "loss": 0.3184, + "step": 38210, + "vit_learning_rate": 2.009699773827256e-10 + }, + { + "avg_batch_load_time": 0.0019, + "avg_batch_processing_time": 1.0664, + "epoch": 1.988139825218477, + "grad_norm": 1.7513529694040306, + "learning_rate": 9.221852352886417e-10, + "loss": 0.3217, + "step": 38220, + "vit_learning_rate": 1.8443704705772833e-10 + }, + { + "avg_batch_load_time": 0.0024, + "avg_batch_processing_time": 0.815, + "epoch": 1.9886600083229298, + "grad_norm": 1.5114540834216201, + "learning_rate": 8.430679960957921e-10, + "loss": 0.3073, + "step": 38230, + "vit_learning_rate": 1.6861359921915842e-10 + }, + { + "avg_batch_load_time": 0.0025, + "avg_batch_processing_time": 0.8202, + "epoch": 1.9891801914273826, + "grad_norm": 1.567779317526907, + "learning_rate": 7.674982254779473e-10, + "loss": 0.3139, + "step": 38240, + "vit_learning_rate": 1.5349964509558944e-10 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.7523, + "epoch": 1.9897003745318353, + "grad_norm": 1.5778280374504232, + "learning_rate": 6.954759770594344e-10, + "loss": 0.3193, + "step": 38250, + "vit_learning_rate": 1.3909519541188685e-10 + }, + { + "avg_batch_load_time": 0.0034, + "avg_batch_processing_time": 0.7383, + "epoch": 1.9902205576362881, + "grad_norm": 1.1769166293771334, + "learning_rate": 6.270013019493703e-10, + "loss": 0.3325, + "step": 38260, + "vit_learning_rate": 1.2540026038987405e-10 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 0.7623, + "epoch": 1.9907407407407407, + "grad_norm": 1.5131791321847783, + "learning_rate": 5.620742487377762e-10, + "loss": 0.3062, + "step": 38270, + "vit_learning_rate": 1.1241484974755522e-10 + }, + { + "avg_batch_load_time": 0.032, + "avg_batch_processing_time": 0.7125, + "epoch": 1.9912609238451935, + "grad_norm": 1.6703066126032151, + "learning_rate": 5.00694863497797e-10, + "loss": 0.3149, + "step": 38280, + "vit_learning_rate": 1.001389726995594e-10 + }, + { + "avg_batch_load_time": 0.0032, + "avg_batch_processing_time": 1.1799, + "epoch": 1.9917811069496463, + "grad_norm": 1.4288629237859916, + "learning_rate": 4.428631897845925e-10, + "loss": 0.3297, + "step": 38290, + "vit_learning_rate": 8.857263795691849e-11 + }, + { + "avg_batch_load_time": 0.0044, + "avg_batch_processing_time": 0.8356, + "epoch": 1.992301290054099, + "grad_norm": 1.765311930759829, + "learning_rate": 3.885792686370016e-10, + "loss": 0.358, + "step": 38300, + "vit_learning_rate": 7.77158537274003e-11 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.8462, + "epoch": 1.9928214731585518, + "grad_norm": 2.0475228281809583, + "learning_rate": 3.3784313857532224e-10, + "loss": 0.3161, + "step": 38310, + "vit_learning_rate": 6.756862771506444e-11 + }, + { + "avg_batch_load_time": 0.0028, + "avg_batch_processing_time": 0.715, + "epoch": 1.9933416562630044, + "grad_norm": 1.5909938208671455, + "learning_rate": 2.906548356024219e-10, + "loss": 0.3064, + "step": 38320, + "vit_learning_rate": 5.8130967120484375e-11 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 0.743, + "epoch": 1.9938618393674572, + "grad_norm": 1.3130613379760883, + "learning_rate": 2.470143932042923e-10, + "loss": 0.3203, + "step": 38330, + "vit_learning_rate": 4.9402878640858457e-11 + }, + { + "avg_batch_load_time": 0.0021, + "avg_batch_processing_time": 0.6887, + "epoch": 1.99438202247191, + "grad_norm": 1.5731382054443217, + "learning_rate": 2.0692184234838432e-10, + "loss": 0.3351, + "step": 38340, + "vit_learning_rate": 4.138436846967685e-11 + }, + { + "avg_batch_load_time": 0.0015, + "avg_batch_processing_time": 0.7395, + "epoch": 1.9949022055763628, + "grad_norm": 1.4530304318756224, + "learning_rate": 1.7037721148471797e-10, + "loss": 0.321, + "step": 38350, + "vit_learning_rate": 3.407544229694359e-11 + }, + { + "avg_batch_load_time": 0.002, + "avg_batch_processing_time": 0.6769, + "epoch": 1.9954223886808156, + "grad_norm": 1.4905914001407905, + "learning_rate": 1.3738052654643787e-10, + "loss": 0.2981, + "step": 38360, + "vit_learning_rate": 2.7476105309287567e-11 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6681, + "epoch": 1.9959425717852683, + "grad_norm": 1.4459691540490978, + "learning_rate": 1.0793181094759242e-10, + "loss": 0.3173, + "step": 38370, + "vit_learning_rate": 2.158636218951848e-11 + }, + { + "avg_batch_load_time": 0.0014, + "avg_batch_processing_time": 0.6587, + "epoch": 1.9964627548897211, + "grad_norm": 1.579203229932932, + "learning_rate": 8.203108558646477e-11, + "loss": 0.3193, + "step": 38380, + "vit_learning_rate": 1.6406217117292953e-11 + }, + { + "avg_batch_load_time": 0.0018, + "avg_batch_processing_time": 0.6537, + "epoch": 1.996982937994174, + "grad_norm": 1.5051456871978357, + "learning_rate": 5.967836884168687e-11, + "loss": 0.349, + "step": 38390, + "vit_learning_rate": 1.1935673768337373e-11 + }, + { + "avg_batch_load_time": 0.0022, + "avg_batch_processing_time": 1.7713, + "epoch": 1.9975031210986267, + "grad_norm": 1.5159766983201022, + "learning_rate": 4.087367657501507e-11, + "loss": 0.2891, + "step": 38400, + "vit_learning_rate": 8.174735315003012e-12 + }, + { + "avg_batch_load_time": 1.042, + "avg_batch_processing_time": 0.7227, + "epoch": 1.9980233042030795, + "grad_norm": 2.5786284807620796, + "learning_rate": 2.5617022131330103e-11, + "loss": 0.3209, + "step": 38410, + "vit_learning_rate": 5.12340442626602e-12 + }, + { + "avg_batch_load_time": 0.0017, + "avg_batch_processing_time": 1.1523, + "epoch": 1.9985434873075323, + "grad_norm": 1.7032447618558937, + "learning_rate": 1.3908416335861597e-11, + "loss": 0.3286, + "step": 38420, + "vit_learning_rate": 2.781683267172319e-12 + }, + { + "avg_batch_load_time": 0.0037, + "avg_batch_processing_time": 0.7874, + "epoch": 1.999063670411985, + "grad_norm": 1.699801781672804, + "learning_rate": 5.747867498073767e-12, + "loss": 0.3269, + "step": 38430, + "vit_learning_rate": 1.1495734996147533e-12 + }, + { + "avg_batch_load_time": 0.0026, + "avg_batch_processing_time": 0.7133, + "epoch": 1.9995838535164379, + "grad_norm": 1.7591404608826253, + "learning_rate": 1.135381408889913e-12, + "loss": 0.3153, + "step": 38440, + "vit_learning_rate": 2.2707628177798254e-13 + }, + { + "epoch": 2.0, + "step": 38448, + "total_flos": 2.8637355249615503e+19, + "train_loss": 0.09728173750343791, + "train_runtime": 9579.4183, + "train_samples_per_second": 256.868, + "train_steps_per_second": 4.014 + } + ], + "logging_steps": 10, + "max_steps": 38448, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1923, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.8637355249615503e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}