{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.78494623655914, "eval_steps": 250, "global_step": 2750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005376344086021506, "grad_norm": 63.91816329956055, "learning_rate": 5e-07, "loss": 4.069, "num_input_tokens_seen": 115868, "step": 1 }, { "epoch": 0.005376344086021506, "loss": 3.810594081878662, "loss_ce": 1.692674160003662, "loss_iou": 0.87890625, "loss_num": 0.072265625, "loss_xval": 2.125, "num_input_tokens_seen": 115868, "step": 1 }, { "epoch": 0.010752688172043012, "grad_norm": 67.84192657470703, "learning_rate": 5e-07, "loss": 3.4124, "num_input_tokens_seen": 233220, "step": 2 }, { "epoch": 0.010752688172043012, "loss": 3.532464027404785, "loss_ce": 1.6935968399047852, "loss_iou": 0.734375, "loss_num": 0.07421875, "loss_xval": 1.8359375, "num_input_tokens_seen": 233220, "step": 2 }, { "epoch": 0.016129032258064516, "grad_norm": 85.17193603515625, "learning_rate": 5e-07, "loss": 3.6838, "num_input_tokens_seen": 347320, "step": 3 }, { "epoch": 0.016129032258064516, "loss": 3.5050368309020996, "loss_ce": 1.1808180809020996, "loss_iou": 0.984375, "loss_num": 0.07177734375, "loss_xval": 2.328125, "num_input_tokens_seen": 347320, "step": 3 }, { "epoch": 0.021505376344086023, "grad_norm": 73.9618148803711, "learning_rate": 5e-07, "loss": 3.5308, "num_input_tokens_seen": 462780, "step": 4 }, { "epoch": 0.021505376344086023, "loss": 3.1332285404205322, "loss_ce": 1.4291268587112427, "loss_iou": 0.71484375, "loss_num": 0.054931640625, "loss_xval": 1.703125, "num_input_tokens_seen": 462780, "step": 4 }, { "epoch": 0.026881720430107527, "grad_norm": 77.57437133789062, "learning_rate": 5e-07, "loss": 3.5889, "num_input_tokens_seen": 578700, "step": 5 }, { "epoch": 0.026881720430107527, "eval_icons_CIoU": 0.34418797492980957, "eval_icons_GIoU": 0.3129650801420212, "eval_icons_IoU": 0.4987167418003082, "eval_icons_MAE_all": 0.022344814613461494, "eval_icons_MAE_h": 0.0230330191552639, "eval_icons_MAE_w": 0.051025050692260265, "eval_icons_MAE_x_boxes": 0.048343575559556484, "eval_icons_MAE_y_boxes": 0.021558688953518867, "eval_icons_NUM_probability": 0.999860942363739, "eval_icons_inside_bbox": 0.7517361044883728, "eval_icons_loss": 1.5062987804412842, "eval_icons_loss_ce": 4.2414796553202905e-05, "eval_icons_loss_iou": 0.6829833984375, "eval_icons_loss_num": 0.025074005126953125, "eval_icons_loss_xval": 1.49169921875, "eval_icons_runtime": 39.5012, "eval_icons_samples_per_second": 1.266, "eval_icons_steps_per_second": 0.051, "num_input_tokens_seen": 578700, "step": 5 }, { "epoch": 0.026881720430107527, "eval_screenspot_CIoU": 0.12853384266297022, "eval_screenspot_GIoU": 0.10456962635119756, "eval_screenspot_IoU": 0.28989797830581665, "eval_screenspot_MAE_all": 0.07958084965745608, "eval_screenspot_MAE_h": 0.07069386790196101, "eval_screenspot_MAE_w": 0.12874508400758108, "eval_screenspot_MAE_x_boxes": 0.10875214884678523, "eval_screenspot_MAE_y_boxes": 0.05736609920859337, "eval_screenspot_NUM_probability": 0.999954879283905, "eval_screenspot_inside_bbox": 0.5987499952316284, "eval_screenspot_loss": 2.2332680225372314, "eval_screenspot_loss_ce": 0.0039237569241474075, "eval_screenspot_loss_iou": 0.9208984375, "eval_screenspot_loss_num": 0.0846099853515625, "eval_screenspot_loss_xval": 2.2638346354166665, "eval_screenspot_runtime": 72.1701, "eval_screenspot_samples_per_second": 1.233, "eval_screenspot_steps_per_second": 0.042, "num_input_tokens_seen": 578700, "step": 5 }, { "epoch": 0.026881720430107527, "loss": 2.219446897506714, "loss_ce": 0.0021616534795612097, "loss_iou": 0.91796875, "loss_num": 0.076171875, "loss_xval": 2.21875, "num_input_tokens_seen": 578700, "step": 5 }, { "epoch": 0.03225806451612903, "grad_norm": 137.27980041503906, "learning_rate": 5e-07, "loss": 3.4931, "num_input_tokens_seen": 692020, "step": 6 }, { "epoch": 0.03225806451612903, "loss": 3.541520118713379, "loss_ce": 1.3965004682540894, "loss_iou": 0.8984375, "loss_num": 0.0693359375, "loss_xval": 2.140625, "num_input_tokens_seen": 692020, "step": 6 }, { "epoch": 0.03763440860215054, "grad_norm": 92.00971984863281, "learning_rate": 5e-07, "loss": 3.2537, "num_input_tokens_seen": 809252, "step": 7 }, { "epoch": 0.03763440860215054, "loss": 2.4464709758758545, "loss_ce": 0.8361192941665649, "loss_iou": 0.70703125, "loss_num": 0.03955078125, "loss_xval": 1.609375, "num_input_tokens_seen": 809252, "step": 7 }, { "epoch": 0.043010752688172046, "grad_norm": 66.32962799072266, "learning_rate": 5e-07, "loss": 3.8727, "num_input_tokens_seen": 921880, "step": 8 }, { "epoch": 0.043010752688172046, "loss": 4.452248573303223, "loss_ce": 1.907814621925354, "loss_iou": 1.09375, "loss_num": 0.07177734375, "loss_xval": 2.546875, "num_input_tokens_seen": 921880, "step": 8 }, { "epoch": 0.04838709677419355, "grad_norm": 60.78109359741211, "learning_rate": 5e-07, "loss": 3.7119, "num_input_tokens_seen": 1036148, "step": 9 }, { "epoch": 0.04838709677419355, "loss": 4.799681663513184, "loss_ce": 2.0438222885131836, "loss_iou": 1.203125, "loss_num": 0.06884765625, "loss_xval": 2.75, "num_input_tokens_seen": 1036148, "step": 9 }, { "epoch": 0.053763440860215055, "grad_norm": 59.72976303100586, "learning_rate": 5e-07, "loss": 3.8002, "num_input_tokens_seen": 1150264, "step": 10 }, { "epoch": 0.053763440860215055, "loss": 3.8487606048583984, "loss_ce": 1.5362606048583984, "loss_iou": 0.9609375, "loss_num": 0.0771484375, "loss_xval": 2.3125, "num_input_tokens_seen": 1150264, "step": 10 }, { "epoch": 0.05913978494623656, "grad_norm": 71.97016143798828, "learning_rate": 5e-07, "loss": 4.0298, "num_input_tokens_seen": 1265124, "step": 11 }, { "epoch": 0.05913978494623656, "loss": 3.8839755058288574, "loss_ce": 1.8214757442474365, "loss_iou": 0.82421875, "loss_num": 0.08251953125, "loss_xval": 2.0625, "num_input_tokens_seen": 1265124, "step": 11 }, { "epoch": 0.06451612903225806, "grad_norm": 62.945491790771484, "learning_rate": 5e-07, "loss": 3.9322, "num_input_tokens_seen": 1376344, "step": 12 }, { "epoch": 0.06451612903225806, "loss": 3.577096939086914, "loss_ce": 1.775827407836914, "loss_iou": 0.76953125, "loss_num": 0.052490234375, "loss_xval": 1.8046875, "num_input_tokens_seen": 1376344, "step": 12 }, { "epoch": 0.06989247311827956, "grad_norm": 65.23322296142578, "learning_rate": 5e-07, "loss": 3.965, "num_input_tokens_seen": 1490644, "step": 13 }, { "epoch": 0.06989247311827956, "loss": 4.18902587890625, "loss_ce": 1.7300411462783813, "loss_iou": 1.0078125, "loss_num": 0.087890625, "loss_xval": 2.453125, "num_input_tokens_seen": 1490644, "step": 13 }, { "epoch": 0.07526881720430108, "grad_norm": 59.846126556396484, "learning_rate": 5e-07, "loss": 3.2915, "num_input_tokens_seen": 1606900, "step": 14 }, { "epoch": 0.07526881720430108, "loss": 3.463229179382324, "loss_ce": 1.5208463668823242, "loss_iou": 0.80859375, "loss_num": 0.06396484375, "loss_xval": 1.9453125, "num_input_tokens_seen": 1606900, "step": 14 }, { "epoch": 0.08064516129032258, "grad_norm": 65.48455810546875, "learning_rate": 5e-07, "loss": 3.4281, "num_input_tokens_seen": 1721588, "step": 15 }, { "epoch": 0.08064516129032258, "loss": 3.8229939937591553, "loss_ce": 1.4069783687591553, "loss_iou": 1.0703125, "loss_num": 0.054443359375, "loss_xval": 2.421875, "num_input_tokens_seen": 1721588, "step": 15 }, { "epoch": 0.08602150537634409, "grad_norm": 63.53733825683594, "learning_rate": 5e-07, "loss": 3.465, "num_input_tokens_seen": 1838160, "step": 16 }, { "epoch": 0.08602150537634409, "loss": 3.117443084716797, "loss_ce": 1.2614859342575073, "loss_iou": 0.75390625, "loss_num": 0.0693359375, "loss_xval": 1.859375, "num_input_tokens_seen": 1838160, "step": 16 }, { "epoch": 0.0913978494623656, "grad_norm": 59.834510803222656, "learning_rate": 5e-07, "loss": 3.6622, "num_input_tokens_seen": 1953460, "step": 17 }, { "epoch": 0.0913978494623656, "loss": 4.1744608879089355, "loss_ce": 1.6085426807403564, "loss_iou": 1.078125, "loss_num": 0.08154296875, "loss_xval": 2.5625, "num_input_tokens_seen": 1953460, "step": 17 }, { "epoch": 0.0967741935483871, "grad_norm": 71.49234008789062, "learning_rate": 5e-07, "loss": 3.7766, "num_input_tokens_seen": 2068572, "step": 18 }, { "epoch": 0.0967741935483871, "loss": 3.8596837520599365, "loss_ce": 1.341128945350647, "loss_iou": 1.0390625, "loss_num": 0.0888671875, "loss_xval": 2.515625, "num_input_tokens_seen": 2068572, "step": 18 }, { "epoch": 0.10215053763440861, "grad_norm": 51.239219665527344, "learning_rate": 5e-07, "loss": 3.4467, "num_input_tokens_seen": 2184932, "step": 19 }, { "epoch": 0.10215053763440861, "loss": 3.3158349990844727, "loss_ce": 1.3285305500030518, "loss_iou": 0.85546875, "loss_num": 0.054931640625, "loss_xval": 1.984375, "num_input_tokens_seen": 2184932, "step": 19 }, { "epoch": 0.10752688172043011, "grad_norm": 59.26980972290039, "learning_rate": 5e-07, "loss": 4.2361, "num_input_tokens_seen": 2300424, "step": 20 }, { "epoch": 0.10752688172043011, "loss": 4.4680023193359375, "loss_ce": 1.272689938545227, "loss_iou": 1.328125, "loss_num": 0.10888671875, "loss_xval": 3.1875, "num_input_tokens_seen": 2300424, "step": 20 }, { "epoch": 0.11290322580645161, "grad_norm": 75.09331512451172, "learning_rate": 5e-07, "loss": 3.6384, "num_input_tokens_seen": 2415872, "step": 21 }, { "epoch": 0.11290322580645161, "loss": 4.212388038635254, "loss_ce": 2.350571632385254, "loss_iou": 0.734375, "loss_num": 0.0791015625, "loss_xval": 1.859375, "num_input_tokens_seen": 2415872, "step": 21 }, { "epoch": 0.11827956989247312, "grad_norm": 72.20677947998047, "learning_rate": 5e-07, "loss": 3.831, "num_input_tokens_seen": 2528880, "step": 22 }, { "epoch": 0.11827956989247312, "loss": 4.211462020874023, "loss_ce": 1.8940787315368652, "loss_iou": 0.98828125, "loss_num": 0.068359375, "loss_xval": 2.3125, "num_input_tokens_seen": 2528880, "step": 22 }, { "epoch": 0.12365591397849462, "grad_norm": 61.82088088989258, "learning_rate": 5e-07, "loss": 3.5488, "num_input_tokens_seen": 2645212, "step": 23 }, { "epoch": 0.12365591397849462, "loss": 3.9213790893554688, "loss_ce": 1.3940355777740479, "loss_iou": 1.0859375, "loss_num": 0.072265625, "loss_xval": 2.53125, "num_input_tokens_seen": 2645212, "step": 23 }, { "epoch": 0.12903225806451613, "grad_norm": 46.60725021362305, "learning_rate": 5e-07, "loss": 3.8027, "num_input_tokens_seen": 2759744, "step": 24 }, { "epoch": 0.12903225806451613, "loss": 3.600879192352295, "loss_ce": 1.2229493856430054, "loss_iou": 0.9921875, "loss_num": 0.07861328125, "loss_xval": 2.375, "num_input_tokens_seen": 2759744, "step": 24 }, { "epoch": 0.13440860215053763, "grad_norm": 73.40596771240234, "learning_rate": 5e-07, "loss": 3.6809, "num_input_tokens_seen": 2876124, "step": 25 }, { "epoch": 0.13440860215053763, "loss": 3.851010322570801, "loss_ce": 1.5971040725708008, "loss_iou": 0.8984375, "loss_num": 0.0908203125, "loss_xval": 2.25, "num_input_tokens_seen": 2876124, "step": 25 }, { "epoch": 0.13978494623655913, "grad_norm": 48.34743118286133, "learning_rate": 5e-07, "loss": 3.4583, "num_input_tokens_seen": 2995092, "step": 26 }, { "epoch": 0.13978494623655913, "loss": 3.288588523864746, "loss_ce": 1.041029930114746, "loss_iou": 0.9140625, "loss_num": 0.083984375, "loss_xval": 2.25, "num_input_tokens_seen": 2995092, "step": 26 }, { "epoch": 0.14516129032258066, "grad_norm": 53.19231414794922, "learning_rate": 5e-07, "loss": 3.6856, "num_input_tokens_seen": 3111272, "step": 27 }, { "epoch": 0.14516129032258066, "loss": 4.104222774505615, "loss_ce": 1.9924062490463257, "loss_iou": 0.828125, "loss_num": 0.0908203125, "loss_xval": 2.109375, "num_input_tokens_seen": 3111272, "step": 27 }, { "epoch": 0.15053763440860216, "grad_norm": 58.61141586303711, "learning_rate": 5e-07, "loss": 4.0287, "num_input_tokens_seen": 3224432, "step": 28 }, { "epoch": 0.15053763440860216, "loss": 3.221557378768921, "loss_ce": 1.2152096033096313, "loss_iou": 0.84375, "loss_num": 0.064453125, "loss_xval": 2.0, "num_input_tokens_seen": 3224432, "step": 28 }, { "epoch": 0.15591397849462366, "grad_norm": 72.3184814453125, "learning_rate": 5e-07, "loss": 3.8405, "num_input_tokens_seen": 3341196, "step": 29 }, { "epoch": 0.15591397849462366, "loss": 3.6549267768859863, "loss_ce": 0.9771925210952759, "loss_iou": 1.125, "loss_num": 0.08447265625, "loss_xval": 2.671875, "num_input_tokens_seen": 3341196, "step": 29 }, { "epoch": 0.16129032258064516, "grad_norm": 50.18266296386719, "learning_rate": 5e-07, "loss": 3.0591, "num_input_tokens_seen": 3459216, "step": 30 }, { "epoch": 0.16129032258064516, "loss": 2.82873272895813, "loss_ce": 1.0533421039581299, "loss_iou": 0.74609375, "loss_num": 0.056396484375, "loss_xval": 1.7734375, "num_input_tokens_seen": 3459216, "step": 30 }, { "epoch": 0.16666666666666666, "grad_norm": 55.66604995727539, "learning_rate": 5e-07, "loss": 3.6997, "num_input_tokens_seen": 3575076, "step": 31 }, { "epoch": 0.16666666666666666, "loss": 3.6777069568634033, "loss_ce": 1.0253632068634033, "loss_iou": 1.1015625, "loss_num": 0.0888671875, "loss_xval": 2.65625, "num_input_tokens_seen": 3575076, "step": 31 }, { "epoch": 0.17204301075268819, "grad_norm": 66.53539276123047, "learning_rate": 5e-07, "loss": 3.3018, "num_input_tokens_seen": 3695096, "step": 32 }, { "epoch": 0.17204301075268819, "loss": 3.9738612174987793, "loss_ce": 1.6545252799987793, "loss_iou": 0.94921875, "loss_num": 0.083984375, "loss_xval": 2.3125, "num_input_tokens_seen": 3695096, "step": 32 }, { "epoch": 0.1774193548387097, "grad_norm": 62.468528747558594, "learning_rate": 5e-07, "loss": 3.9192, "num_input_tokens_seen": 3811580, "step": 33 }, { "epoch": 0.1774193548387097, "loss": 4.518918991088867, "loss_ce": 1.7337629795074463, "loss_iou": 1.1328125, "loss_num": 0.10400390625, "loss_xval": 2.78125, "num_input_tokens_seen": 3811580, "step": 33 }, { "epoch": 0.1827956989247312, "grad_norm": 47.74855422973633, "learning_rate": 5e-07, "loss": 3.4728, "num_input_tokens_seen": 3929660, "step": 34 }, { "epoch": 0.1827956989247312, "loss": 3.3192944526672363, "loss_ce": 1.5829663276672363, "loss_iou": 0.69921875, "loss_num": 0.0673828125, "loss_xval": 1.734375, "num_input_tokens_seen": 3929660, "step": 34 }, { "epoch": 0.1881720430107527, "grad_norm": 77.3158950805664, "learning_rate": 5e-07, "loss": 3.7821, "num_input_tokens_seen": 4038396, "step": 35 }, { "epoch": 0.1881720430107527, "loss": 4.077603816986084, "loss_ce": 1.489713191986084, "loss_iou": 1.0859375, "loss_num": 0.08447265625, "loss_xval": 2.59375, "num_input_tokens_seen": 4038396, "step": 35 }, { "epoch": 0.1935483870967742, "grad_norm": 59.74021911621094, "learning_rate": 5e-07, "loss": 3.2355, "num_input_tokens_seen": 4154552, "step": 36 }, { "epoch": 0.1935483870967742, "loss": 3.48569917678833, "loss_ce": 0.979839563369751, "loss_iou": 1.0703125, "loss_num": 0.07470703125, "loss_xval": 2.5, "num_input_tokens_seen": 4154552, "step": 36 }, { "epoch": 0.1989247311827957, "grad_norm": 67.18848419189453, "learning_rate": 5e-07, "loss": 3.4554, "num_input_tokens_seen": 4272216, "step": 37 }, { "epoch": 0.1989247311827957, "loss": 3.2729299068450928, "loss_ce": 1.2963674068450928, "loss_iou": 0.8359375, "loss_num": 0.060302734375, "loss_xval": 1.9765625, "num_input_tokens_seen": 4272216, "step": 37 }, { "epoch": 0.20430107526881722, "grad_norm": 50.68886184692383, "learning_rate": 5e-07, "loss": 3.7715, "num_input_tokens_seen": 4387204, "step": 38 }, { "epoch": 0.20430107526881722, "loss": 3.754398822784424, "loss_ce": 1.3278852701187134, "loss_iou": 1.0546875, "loss_num": 0.06396484375, "loss_xval": 2.421875, "num_input_tokens_seen": 4387204, "step": 38 }, { "epoch": 0.20967741935483872, "grad_norm": 59.145782470703125, "learning_rate": 5e-07, "loss": 3.4361, "num_input_tokens_seen": 4504592, "step": 39 }, { "epoch": 0.20967741935483872, "loss": 2.6862258911132812, "loss_ce": 1.0368119478225708, "loss_iou": 0.69140625, "loss_num": 0.0537109375, "loss_xval": 1.6484375, "num_input_tokens_seen": 4504592, "step": 39 }, { "epoch": 0.21505376344086022, "grad_norm": 59.1508674621582, "learning_rate": 5e-07, "loss": 3.8631, "num_input_tokens_seen": 4622264, "step": 40 }, { "epoch": 0.21505376344086022, "loss": 3.7908599376678467, "loss_ce": 1.7557036876678467, "loss_iou": 0.86328125, "loss_num": 0.060791015625, "loss_xval": 2.03125, "num_input_tokens_seen": 4622264, "step": 40 }, { "epoch": 0.22043010752688172, "grad_norm": 71.88423156738281, "learning_rate": 5e-07, "loss": 3.4833, "num_input_tokens_seen": 4737540, "step": 41 }, { "epoch": 0.22043010752688172, "loss": 3.5846195220947266, "loss_ce": 1.7923831939697266, "loss_iou": 0.73828125, "loss_num": 0.0634765625, "loss_xval": 1.7890625, "num_input_tokens_seen": 4737540, "step": 41 }, { "epoch": 0.22580645161290322, "grad_norm": 95.6041488647461, "learning_rate": 5e-07, "loss": 3.5842, "num_input_tokens_seen": 4851364, "step": 42 }, { "epoch": 0.22580645161290322, "loss": 3.944122314453125, "loss_ce": 1.433380126953125, "loss_iou": 1.03125, "loss_num": 0.0908203125, "loss_xval": 2.515625, "num_input_tokens_seen": 4851364, "step": 42 }, { "epoch": 0.23118279569892472, "grad_norm": 52.09756851196289, "learning_rate": 5e-07, "loss": 3.3139, "num_input_tokens_seen": 4966520, "step": 43 }, { "epoch": 0.23118279569892472, "loss": 3.402984142303467, "loss_ce": 1.3189998865127563, "loss_iou": 0.875, "loss_num": 0.06640625, "loss_xval": 2.078125, "num_input_tokens_seen": 4966520, "step": 43 }, { "epoch": 0.23655913978494625, "grad_norm": 62.15574645996094, "learning_rate": 5e-07, "loss": 3.2642, "num_input_tokens_seen": 5083608, "step": 44 }, { "epoch": 0.23655913978494625, "loss": 3.711635112762451, "loss_ce": 1.456263780593872, "loss_iou": 0.94140625, "loss_num": 0.07470703125, "loss_xval": 2.25, "num_input_tokens_seen": 5083608, "step": 44 }, { "epoch": 0.24193548387096775, "grad_norm": 43.022281646728516, "learning_rate": 5e-07, "loss": 3.3398, "num_input_tokens_seen": 5200704, "step": 45 }, { "epoch": 0.24193548387096775, "loss": 2.6831061840057373, "loss_ce": 0.9780279397964478, "loss_iou": 0.734375, "loss_num": 0.04736328125, "loss_xval": 1.703125, "num_input_tokens_seen": 5200704, "step": 45 }, { "epoch": 0.24731182795698925, "grad_norm": 48.33220291137695, "learning_rate": 5e-07, "loss": 3.3166, "num_input_tokens_seen": 5315888, "step": 46 }, { "epoch": 0.24731182795698925, "loss": 3.4219326972961426, "loss_ce": 1.5747647285461426, "loss_iou": 0.7109375, "loss_num": 0.083984375, "loss_xval": 1.84375, "num_input_tokens_seen": 5315888, "step": 46 }, { "epoch": 0.25268817204301075, "grad_norm": 53.58827209472656, "learning_rate": 5e-07, "loss": 3.5236, "num_input_tokens_seen": 5428940, "step": 47 }, { "epoch": 0.25268817204301075, "loss": 3.213961601257324, "loss_ce": 1.0508755445480347, "loss_iou": 0.953125, "loss_num": 0.05224609375, "loss_xval": 2.15625, "num_input_tokens_seen": 5428940, "step": 47 }, { "epoch": 0.25806451612903225, "grad_norm": 115.67105865478516, "learning_rate": 5e-07, "loss": 3.9579, "num_input_tokens_seen": 5543304, "step": 48 }, { "epoch": 0.25806451612903225, "loss": 4.0065083503723145, "loss_ce": 1.7086565494537354, "loss_iou": 0.95703125, "loss_num": 0.076171875, "loss_xval": 2.296875, "num_input_tokens_seen": 5543304, "step": 48 }, { "epoch": 0.26344086021505375, "grad_norm": 75.6112289428711, "learning_rate": 5e-07, "loss": 3.9323, "num_input_tokens_seen": 5657272, "step": 49 }, { "epoch": 0.26344086021505375, "loss": 4.61932373046875, "loss_ce": 1.858581781387329, "loss_iou": 1.171875, "loss_num": 0.083984375, "loss_xval": 2.765625, "num_input_tokens_seen": 5657272, "step": 49 }, { "epoch": 0.26881720430107525, "grad_norm": 46.45659637451172, "learning_rate": 5e-07, "loss": 3.4694, "num_input_tokens_seen": 5775300, "step": 50 }, { "epoch": 0.26881720430107525, "loss": 3.494737148284912, "loss_ce": 1.444932460784912, "loss_iou": 0.85546875, "loss_num": 0.06787109375, "loss_xval": 2.046875, "num_input_tokens_seen": 5775300, "step": 50 }, { "epoch": 0.27419354838709675, "grad_norm": 48.63031768798828, "learning_rate": 5e-07, "loss": 3.1535, "num_input_tokens_seen": 5889744, "step": 51 }, { "epoch": 0.27419354838709675, "loss": 3.691460609436035, "loss_ce": 1.7534722089767456, "loss_iou": 0.83203125, "loss_num": 0.055419921875, "loss_xval": 1.9375, "num_input_tokens_seen": 5889744, "step": 51 }, { "epoch": 0.27956989247311825, "grad_norm": 111.9576187133789, "learning_rate": 5e-07, "loss": 3.7786, "num_input_tokens_seen": 6000860, "step": 52 }, { "epoch": 0.27956989247311825, "loss": 4.0410003662109375, "loss_ce": 1.3642425537109375, "loss_iou": 1.1796875, "loss_num": 0.06298828125, "loss_xval": 2.671875, "num_input_tokens_seen": 6000860, "step": 52 }, { "epoch": 0.2849462365591398, "grad_norm": 46.60837936401367, "learning_rate": 5e-07, "loss": 3.8787, "num_input_tokens_seen": 6115848, "step": 53 }, { "epoch": 0.2849462365591398, "loss": 4.156681060791016, "loss_ce": 1.4496499300003052, "loss_iou": 1.1875, "loss_num": 0.06787109375, "loss_xval": 2.703125, "num_input_tokens_seen": 6115848, "step": 53 }, { "epoch": 0.2903225806451613, "grad_norm": 56.812599182128906, "learning_rate": 5e-07, "loss": 3.4178, "num_input_tokens_seen": 6231984, "step": 54 }, { "epoch": 0.2903225806451613, "loss": 3.5737056732177734, "loss_ce": 1.3344477415084839, "loss_iou": 0.93359375, "loss_num": 0.07470703125, "loss_xval": 2.234375, "num_input_tokens_seen": 6231984, "step": 54 }, { "epoch": 0.2956989247311828, "grad_norm": 48.5151481628418, "learning_rate": 5e-07, "loss": 3.438, "num_input_tokens_seen": 6346512, "step": 55 }, { "epoch": 0.2956989247311828, "loss": 3.3281238079071045, "loss_ce": 0.918944239616394, "loss_iou": 1.046875, "loss_num": 0.06396484375, "loss_xval": 2.40625, "num_input_tokens_seen": 6346512, "step": 55 }, { "epoch": 0.3010752688172043, "grad_norm": 71.8069076538086, "learning_rate": 5e-07, "loss": 3.6954, "num_input_tokens_seen": 6459108, "step": 56 }, { "epoch": 0.3010752688172043, "loss": 3.177731990814209, "loss_ce": 1.377927303314209, "loss_iou": 0.71484375, "loss_num": 0.07421875, "loss_xval": 1.796875, "num_input_tokens_seen": 6459108, "step": 56 }, { "epoch": 0.3064516129032258, "grad_norm": 46.1513557434082, "learning_rate": 5e-07, "loss": 3.3584, "num_input_tokens_seen": 6575028, "step": 57 }, { "epoch": 0.3064516129032258, "loss": 3.562460422515869, "loss_ce": 1.6830660104751587, "loss_iou": 0.72265625, "loss_num": 0.087890625, "loss_xval": 1.8828125, "num_input_tokens_seen": 6575028, "step": 57 }, { "epoch": 0.3118279569892473, "grad_norm": 66.45560455322266, "learning_rate": 5e-07, "loss": 3.5555, "num_input_tokens_seen": 6691524, "step": 58 }, { "epoch": 0.3118279569892473, "loss": 3.2384209632873535, "loss_ce": 0.8580499887466431, "loss_iou": 1.0078125, "loss_num": 0.0732421875, "loss_xval": 2.375, "num_input_tokens_seen": 6691524, "step": 58 }, { "epoch": 0.3172043010752688, "grad_norm": 52.618648529052734, "learning_rate": 5e-07, "loss": 3.8229, "num_input_tokens_seen": 6804040, "step": 59 }, { "epoch": 0.3172043010752688, "loss": 3.398911952972412, "loss_ce": 1.1723496913909912, "loss_iou": 0.8828125, "loss_num": 0.0927734375, "loss_xval": 2.21875, "num_input_tokens_seen": 6804040, "step": 59 }, { "epoch": 0.3225806451612903, "grad_norm": 83.9734115600586, "learning_rate": 5e-07, "loss": 3.755, "num_input_tokens_seen": 6922488, "step": 60 }, { "epoch": 0.3225806451612903, "loss": 4.1494951248168945, "loss_ce": 1.728596568107605, "loss_iou": 0.96484375, "loss_num": 0.09765625, "loss_xval": 2.421875, "num_input_tokens_seen": 6922488, "step": 60 }, { "epoch": 0.3279569892473118, "grad_norm": 56.6994514465332, "learning_rate": 5e-07, "loss": 3.5956, "num_input_tokens_seen": 7040316, "step": 61 }, { "epoch": 0.3279569892473118, "loss": 3.8732001781463623, "loss_ce": 1.3253486156463623, "loss_iou": 1.03125, "loss_num": 0.095703125, "loss_xval": 2.546875, "num_input_tokens_seen": 7040316, "step": 61 }, { "epoch": 0.3333333333333333, "grad_norm": 59.78831100463867, "learning_rate": 5e-07, "loss": 3.7333, "num_input_tokens_seen": 7150284, "step": 62 }, { "epoch": 0.3333333333333333, "loss": 4.30922269821167, "loss_ce": 1.94691801071167, "loss_iou": 0.984375, "loss_num": 0.0791015625, "loss_xval": 2.359375, "num_input_tokens_seen": 7150284, "step": 62 }, { "epoch": 0.3387096774193548, "grad_norm": 48.80577850341797, "learning_rate": 5e-07, "loss": 3.4021, "num_input_tokens_seen": 7267364, "step": 63 }, { "epoch": 0.3387096774193548, "loss": 3.132075548171997, "loss_ce": 0.8439894914627075, "loss_iou": 0.953125, "loss_num": 0.0751953125, "loss_xval": 2.28125, "num_input_tokens_seen": 7267364, "step": 63 }, { "epoch": 0.34408602150537637, "grad_norm": 56.81675338745117, "learning_rate": 5e-07, "loss": 3.412, "num_input_tokens_seen": 7385368, "step": 64 }, { "epoch": 0.34408602150537637, "loss": 3.5635831356048584, "loss_ce": 1.1212003231048584, "loss_iou": 1.0546875, "loss_num": 0.06787109375, "loss_xval": 2.4375, "num_input_tokens_seen": 7385368, "step": 64 }, { "epoch": 0.34946236559139787, "grad_norm": 55.505821228027344, "learning_rate": 5e-07, "loss": 3.5091, "num_input_tokens_seen": 7499896, "step": 65 }, { "epoch": 0.34946236559139787, "loss": 3.175748348236084, "loss_ce": 1.544156551361084, "loss_iou": 0.640625, "loss_num": 0.0703125, "loss_xval": 1.6328125, "num_input_tokens_seen": 7499896, "step": 65 }, { "epoch": 0.3548387096774194, "grad_norm": 45.45054244995117, "learning_rate": 5e-07, "loss": 3.4775, "num_input_tokens_seen": 7616932, "step": 66 }, { "epoch": 0.3548387096774194, "loss": 2.6867074966430664, "loss_ce": 0.9747934341430664, "loss_iou": 0.72265625, "loss_num": 0.052734375, "loss_xval": 1.7109375, "num_input_tokens_seen": 7616932, "step": 66 }, { "epoch": 0.3602150537634409, "grad_norm": 38.91335678100586, "learning_rate": 5e-07, "loss": 3.4132, "num_input_tokens_seen": 7731708, "step": 67 }, { "epoch": 0.3602150537634409, "loss": 3.817378282546997, "loss_ce": 1.339839220046997, "loss_iou": 1.078125, "loss_num": 0.0654296875, "loss_xval": 2.484375, "num_input_tokens_seen": 7731708, "step": 67 }, { "epoch": 0.3655913978494624, "grad_norm": 290.7739562988281, "learning_rate": 5e-07, "loss": 3.6095, "num_input_tokens_seen": 7847160, "step": 68 }, { "epoch": 0.3655913978494624, "loss": 3.3654630184173584, "loss_ce": 1.1721036434173584, "loss_iou": 0.921875, "loss_num": 0.07080078125, "loss_xval": 2.1875, "num_input_tokens_seen": 7847160, "step": 68 }, { "epoch": 0.3709677419354839, "grad_norm": 48.885684967041016, "learning_rate": 5e-07, "loss": 3.3172, "num_input_tokens_seen": 7960592, "step": 69 }, { "epoch": 0.3709677419354839, "loss": 3.6439921855926514, "loss_ce": 1.2455549240112305, "loss_iou": 1.03125, "loss_num": 0.06689453125, "loss_xval": 2.40625, "num_input_tokens_seen": 7960592, "step": 69 }, { "epoch": 0.3763440860215054, "grad_norm": 79.67800903320312, "learning_rate": 5e-07, "loss": 3.5957, "num_input_tokens_seen": 8077572, "step": 70 }, { "epoch": 0.3763440860215054, "loss": 3.1781539916992188, "loss_ce": 1.1097948551177979, "loss_iou": 0.890625, "loss_num": 0.056884765625, "loss_xval": 2.0625, "num_input_tokens_seen": 8077572, "step": 70 }, { "epoch": 0.3817204301075269, "grad_norm": 60.376625061035156, "learning_rate": 5e-07, "loss": 3.4306, "num_input_tokens_seen": 8195608, "step": 71 }, { "epoch": 0.3817204301075269, "loss": 3.8781449794769287, "loss_ce": 1.1476764678955078, "loss_iou": 1.1328125, "loss_num": 0.0927734375, "loss_xval": 2.734375, "num_input_tokens_seen": 8195608, "step": 71 }, { "epoch": 0.3870967741935484, "grad_norm": 62.071773529052734, "learning_rate": 5e-07, "loss": 3.2755, "num_input_tokens_seen": 8313504, "step": 72 }, { "epoch": 0.3870967741935484, "loss": 2.8782851696014404, "loss_ce": 0.6473280787467957, "loss_iou": 0.8984375, "loss_num": 0.0859375, "loss_xval": 2.234375, "num_input_tokens_seen": 8313504, "step": 72 }, { "epoch": 0.3924731182795699, "grad_norm": 67.21529388427734, "learning_rate": 5e-07, "loss": 3.4857, "num_input_tokens_seen": 8430408, "step": 73 }, { "epoch": 0.3924731182795699, "loss": 3.6197454929351807, "loss_ce": 0.9683783054351807, "loss_iou": 1.09375, "loss_num": 0.091796875, "loss_xval": 2.65625, "num_input_tokens_seen": 8430408, "step": 73 }, { "epoch": 0.3978494623655914, "grad_norm": 48.5662841796875, "learning_rate": 5e-07, "loss": 3.4713, "num_input_tokens_seen": 8545784, "step": 74 }, { "epoch": 0.3978494623655914, "loss": 3.5417473316192627, "loss_ce": 1.2233879566192627, "loss_iou": 0.96484375, "loss_num": 0.078125, "loss_xval": 2.3125, "num_input_tokens_seen": 8545784, "step": 74 }, { "epoch": 0.4032258064516129, "grad_norm": 83.33881378173828, "learning_rate": 5e-07, "loss": 3.4957, "num_input_tokens_seen": 8663584, "step": 75 }, { "epoch": 0.4032258064516129, "loss": 4.1115593910217285, "loss_ce": 1.326403260231018, "loss_iou": 1.1484375, "loss_num": 0.09765625, "loss_xval": 2.78125, "num_input_tokens_seen": 8663584, "step": 75 }, { "epoch": 0.40860215053763443, "grad_norm": 92.01639556884766, "learning_rate": 5e-07, "loss": 3.5606, "num_input_tokens_seen": 8777664, "step": 76 }, { "epoch": 0.40860215053763443, "loss": 3.506777763366699, "loss_ce": 1.3593168258666992, "loss_iou": 0.9375, "loss_num": 0.05517578125, "loss_xval": 2.140625, "num_input_tokens_seen": 8777664, "step": 76 }, { "epoch": 0.41397849462365593, "grad_norm": 47.4762077331543, "learning_rate": 5e-07, "loss": 3.4642, "num_input_tokens_seen": 8891528, "step": 77 }, { "epoch": 0.41397849462365593, "loss": 3.3423962593078613, "loss_ce": 0.6841931939125061, "loss_iou": 1.0859375, "loss_num": 0.0966796875, "loss_xval": 2.65625, "num_input_tokens_seen": 8891528, "step": 77 }, { "epoch": 0.41935483870967744, "grad_norm": 56.785736083984375, "learning_rate": 5e-07, "loss": 3.3614, "num_input_tokens_seen": 9007948, "step": 78 }, { "epoch": 0.41935483870967744, "loss": 3.3037919998168945, "loss_ce": 1.101643681526184, "loss_iou": 0.953125, "loss_num": 0.058837890625, "loss_xval": 2.203125, "num_input_tokens_seen": 9007948, "step": 78 }, { "epoch": 0.42473118279569894, "grad_norm": 68.128173828125, "learning_rate": 5e-07, "loss": 3.4856, "num_input_tokens_seen": 9117692, "step": 79 }, { "epoch": 0.42473118279569894, "loss": 3.687155246734619, "loss_ce": 1.79628586769104, "loss_iou": 0.76953125, "loss_num": 0.0703125, "loss_xval": 1.890625, "num_input_tokens_seen": 9117692, "step": 79 }, { "epoch": 0.43010752688172044, "grad_norm": 42.718910217285156, "learning_rate": 5e-07, "loss": 3.1134, "num_input_tokens_seen": 9233832, "step": 80 }, { "epoch": 0.43010752688172044, "loss": 3.5462148189544678, "loss_ce": 0.9456290006637573, "loss_iou": 1.0625, "loss_num": 0.09375, "loss_xval": 2.59375, "num_input_tokens_seen": 9233832, "step": 80 }, { "epoch": 0.43548387096774194, "grad_norm": 67.5402603149414, "learning_rate": 5e-07, "loss": 3.1359, "num_input_tokens_seen": 9349092, "step": 81 }, { "epoch": 0.43548387096774194, "loss": 2.6907777786254883, "loss_ce": 1.1180237531661987, "loss_iou": 0.640625, "loss_num": 0.057861328125, "loss_xval": 1.5703125, "num_input_tokens_seen": 9349092, "step": 81 }, { "epoch": 0.44086021505376344, "grad_norm": 53.73824691772461, "learning_rate": 5e-07, "loss": 2.8648, "num_input_tokens_seen": 9464712, "step": 82 }, { "epoch": 0.44086021505376344, "loss": 2.869441270828247, "loss_ce": 1.1375077962875366, "loss_iou": 0.73828125, "loss_num": 0.051513671875, "loss_xval": 1.734375, "num_input_tokens_seen": 9464712, "step": 82 }, { "epoch": 0.44623655913978494, "grad_norm": 41.40106201171875, "learning_rate": 5e-07, "loss": 2.8698, "num_input_tokens_seen": 9581704, "step": 83 }, { "epoch": 0.44623655913978494, "loss": 2.0970919132232666, "loss_ce": 0.614669919013977, "loss_iou": 0.6484375, "loss_num": 0.036865234375, "loss_xval": 1.484375, "num_input_tokens_seen": 9581704, "step": 83 }, { "epoch": 0.45161290322580644, "grad_norm": 38.47473907470703, "learning_rate": 5e-07, "loss": 2.9199, "num_input_tokens_seen": 9701016, "step": 84 }, { "epoch": 0.45161290322580644, "loss": 2.562135934829712, "loss_ce": 0.8995382785797119, "loss_iou": 0.6875, "loss_num": 0.05712890625, "loss_xval": 1.6640625, "num_input_tokens_seen": 9701016, "step": 84 }, { "epoch": 0.45698924731182794, "grad_norm": 40.05167770385742, "learning_rate": 5e-07, "loss": 3.2008, "num_input_tokens_seen": 9816416, "step": 85 }, { "epoch": 0.45698924731182794, "loss": 3.73551344871521, "loss_ce": 1.7941070795059204, "loss_iou": 0.7578125, "loss_num": 0.08544921875, "loss_xval": 1.9375, "num_input_tokens_seen": 9816416, "step": 85 }, { "epoch": 0.46236559139784944, "grad_norm": 53.655277252197266, "learning_rate": 5e-07, "loss": 3.1182, "num_input_tokens_seen": 9933356, "step": 86 }, { "epoch": 0.46236559139784944, "loss": 2.742077350616455, "loss_ce": 0.8914914131164551, "loss_iou": 0.77734375, "loss_num": 0.059814453125, "loss_xval": 1.8515625, "num_input_tokens_seen": 9933356, "step": 86 }, { "epoch": 0.46774193548387094, "grad_norm": 54.13434982299805, "learning_rate": 5e-07, "loss": 2.7394, "num_input_tokens_seen": 10051900, "step": 87 }, { "epoch": 0.46774193548387094, "loss": 2.2613234519958496, "loss_ce": 0.6724560260772705, "loss_iou": 0.66796875, "loss_num": 0.051513671875, "loss_xval": 1.5859375, "num_input_tokens_seen": 10051900, "step": 87 }, { "epoch": 0.4731182795698925, "grad_norm": 62.26861572265625, "learning_rate": 5e-07, "loss": 2.7106, "num_input_tokens_seen": 10169764, "step": 88 }, { "epoch": 0.4731182795698925, "loss": 2.3846375942230225, "loss_ce": 0.6243836879730225, "loss_iou": 0.74609375, "loss_num": 0.054443359375, "loss_xval": 1.7578125, "num_input_tokens_seen": 10169764, "step": 88 }, { "epoch": 0.478494623655914, "grad_norm": 40.17814636230469, "learning_rate": 5e-07, "loss": 2.899, "num_input_tokens_seen": 10283384, "step": 89 }, { "epoch": 0.478494623655914, "loss": 3.0028464794158936, "loss_ce": 1.1405417919158936, "loss_iou": 0.734375, "loss_num": 0.0791015625, "loss_xval": 1.859375, "num_input_tokens_seen": 10283384, "step": 89 }, { "epoch": 0.4838709677419355, "grad_norm": 37.448551177978516, "learning_rate": 5e-07, "loss": 2.8974, "num_input_tokens_seen": 10399664, "step": 90 }, { "epoch": 0.4838709677419355, "loss": 2.2535336017608643, "loss_ce": 0.7325375080108643, "loss_iou": 0.640625, "loss_num": 0.048095703125, "loss_xval": 1.5234375, "num_input_tokens_seen": 10399664, "step": 90 }, { "epoch": 0.489247311827957, "grad_norm": 44.297176361083984, "learning_rate": 5e-07, "loss": 3.1882, "num_input_tokens_seen": 10512512, "step": 91 }, { "epoch": 0.489247311827957, "loss": 2.5039544105529785, "loss_ce": 0.901171088218689, "loss_iou": 0.66796875, "loss_num": 0.0537109375, "loss_xval": 1.6015625, "num_input_tokens_seen": 10512512, "step": 91 }, { "epoch": 0.4946236559139785, "grad_norm": 49.75688934326172, "learning_rate": 5e-07, "loss": 2.9306, "num_input_tokens_seen": 10627204, "step": 92 }, { "epoch": 0.4946236559139785, "loss": 3.1024093627929688, "loss_ce": 0.9778978824615479, "loss_iou": 0.8984375, "loss_num": 0.0654296875, "loss_xval": 2.125, "num_input_tokens_seen": 10627204, "step": 92 }, { "epoch": 0.5, "grad_norm": 42.21867370605469, "learning_rate": 5e-07, "loss": 3.1496, "num_input_tokens_seen": 10741120, "step": 93 }, { "epoch": 0.5, "loss": 3.3632566928863525, "loss_ce": 1.4579832553863525, "loss_iou": 0.7734375, "loss_num": 0.07177734375, "loss_xval": 1.90625, "num_input_tokens_seen": 10741120, "step": 93 }, { "epoch": 0.5053763440860215, "grad_norm": 43.08932113647461, "learning_rate": 5e-07, "loss": 2.9015, "num_input_tokens_seen": 10857616, "step": 94 }, { "epoch": 0.5053763440860215, "loss": 3.5374746322631836, "loss_ce": 1.0277092456817627, "loss_iou": 1.0390625, "loss_num": 0.08642578125, "loss_xval": 2.515625, "num_input_tokens_seen": 10857616, "step": 94 }, { "epoch": 0.510752688172043, "grad_norm": 45.67325210571289, "learning_rate": 5e-07, "loss": 2.996, "num_input_tokens_seen": 10973044, "step": 95 }, { "epoch": 0.510752688172043, "loss": 3.16493558883667, "loss_ce": 1.13270902633667, "loss_iou": 0.8515625, "loss_num": 0.0654296875, "loss_xval": 2.03125, "num_input_tokens_seen": 10973044, "step": 95 }, { "epoch": 0.5161290322580645, "grad_norm": 56.953330993652344, "learning_rate": 5e-07, "loss": 2.9032, "num_input_tokens_seen": 11088800, "step": 96 }, { "epoch": 0.5161290322580645, "loss": 2.9522972106933594, "loss_ce": 1.1007349491119385, "loss_iou": 0.76953125, "loss_num": 0.0625, "loss_xval": 1.8515625, "num_input_tokens_seen": 11088800, "step": 96 }, { "epoch": 0.521505376344086, "grad_norm": 51.03895568847656, "learning_rate": 5e-07, "loss": 3.0946, "num_input_tokens_seen": 11205704, "step": 97 }, { "epoch": 0.521505376344086, "loss": 2.46919584274292, "loss_ce": 0.7006409168243408, "loss_iou": 0.7421875, "loss_num": 0.056884765625, "loss_xval": 1.765625, "num_input_tokens_seen": 11205704, "step": 97 }, { "epoch": 0.5268817204301075, "grad_norm": 60.607154846191406, "learning_rate": 5e-07, "loss": 3.3674, "num_input_tokens_seen": 11319480, "step": 98 }, { "epoch": 0.5268817204301075, "loss": 3.656581163406372, "loss_ce": 1.456385612487793, "loss_iou": 0.8984375, "loss_num": 0.080078125, "loss_xval": 2.203125, "num_input_tokens_seen": 11319480, "step": 98 }, { "epoch": 0.532258064516129, "grad_norm": 39.99761199951172, "learning_rate": 5e-07, "loss": 2.7459, "num_input_tokens_seen": 11432708, "step": 99 }, { "epoch": 0.532258064516129, "loss": 2.9446966648101807, "loss_ce": 1.0364935398101807, "loss_iou": 0.8046875, "loss_num": 0.06005859375, "loss_xval": 1.90625, "num_input_tokens_seen": 11432708, "step": 99 }, { "epoch": 0.5376344086021505, "grad_norm": 46.91141891479492, "learning_rate": 5e-07, "loss": 2.8398, "num_input_tokens_seen": 11550680, "step": 100 }, { "epoch": 0.5376344086021505, "loss": 3.1869680881500244, "loss_ce": 1.3542041778564453, "loss_iou": 0.72265625, "loss_num": 0.078125, "loss_xval": 1.8359375, "num_input_tokens_seen": 11550680, "step": 100 }, { "epoch": 0.543010752688172, "grad_norm": 48.34837341308594, "learning_rate": 5e-07, "loss": 2.903, "num_input_tokens_seen": 11666784, "step": 101 }, { "epoch": 0.543010752688172, "loss": 2.787130832672119, "loss_ce": 0.9404511451721191, "loss_iou": 0.75390625, "loss_num": 0.06787109375, "loss_xval": 1.84375, "num_input_tokens_seen": 11666784, "step": 101 }, { "epoch": 0.5483870967741935, "grad_norm": 48.75275802612305, "learning_rate": 5e-07, "loss": 2.7909, "num_input_tokens_seen": 11781264, "step": 102 }, { "epoch": 0.5483870967741935, "loss": 3.0503787994384766, "loss_ce": 1.057214617729187, "loss_iou": 0.81640625, "loss_num": 0.072265625, "loss_xval": 1.9921875, "num_input_tokens_seen": 11781264, "step": 102 }, { "epoch": 0.553763440860215, "grad_norm": 48.65825271606445, "learning_rate": 5e-07, "loss": 3.0815, "num_input_tokens_seen": 11896728, "step": 103 }, { "epoch": 0.553763440860215, "loss": 3.4087624549865723, "loss_ce": 1.1109110116958618, "loss_iou": 0.9453125, "loss_num": 0.08154296875, "loss_xval": 2.296875, "num_input_tokens_seen": 11896728, "step": 103 }, { "epoch": 0.5591397849462365, "grad_norm": 34.946895599365234, "learning_rate": 5e-07, "loss": 2.2946, "num_input_tokens_seen": 12014620, "step": 104 }, { "epoch": 0.5591397849462365, "loss": 2.5490450859069824, "loss_ce": 0.7326385974884033, "loss_iou": 0.77734375, "loss_num": 0.052001953125, "loss_xval": 1.8125, "num_input_tokens_seen": 12014620, "step": 104 }, { "epoch": 0.5645161290322581, "grad_norm": 53.63935089111328, "learning_rate": 5e-07, "loss": 2.7999, "num_input_tokens_seen": 12130624, "step": 105 }, { "epoch": 0.5645161290322581, "loss": 2.3726959228515625, "loss_ce": 0.6143950819969177, "loss_iou": 0.765625, "loss_num": 0.045166015625, "loss_xval": 1.7578125, "num_input_tokens_seen": 12130624, "step": 105 }, { "epoch": 0.5698924731182796, "grad_norm": 31.33278465270996, "learning_rate": 5e-07, "loss": 2.3879, "num_input_tokens_seen": 12249724, "step": 106 }, { "epoch": 0.5698924731182796, "loss": 2.0739071369171143, "loss_ce": 0.49724704027175903, "loss_iou": 0.6640625, "loss_num": 0.0498046875, "loss_xval": 1.578125, "num_input_tokens_seen": 12249724, "step": 106 }, { "epoch": 0.5752688172043011, "grad_norm": 53.64350509643555, "learning_rate": 5e-07, "loss": 2.9399, "num_input_tokens_seen": 12364208, "step": 107 }, { "epoch": 0.5752688172043011, "loss": 2.5310282707214355, "loss_ce": 0.6008524894714355, "loss_iou": 0.83203125, "loss_num": 0.052978515625, "loss_xval": 1.9296875, "num_input_tokens_seen": 12364208, "step": 107 }, { "epoch": 0.5806451612903226, "grad_norm": 49.99045944213867, "learning_rate": 5e-07, "loss": 2.4238, "num_input_tokens_seen": 12480484, "step": 108 }, { "epoch": 0.5806451612903226, "loss": 2.6745245456695557, "loss_ce": 0.7721809148788452, "loss_iou": 0.81640625, "loss_num": 0.0537109375, "loss_xval": 1.90625, "num_input_tokens_seen": 12480484, "step": 108 }, { "epoch": 0.5860215053763441, "grad_norm": 37.70599365234375, "learning_rate": 5e-07, "loss": 2.5211, "num_input_tokens_seen": 12598412, "step": 109 }, { "epoch": 0.5860215053763441, "loss": 2.4762144088745117, "loss_ce": 0.6598081588745117, "loss_iou": 0.75390625, "loss_num": 0.0615234375, "loss_xval": 1.8125, "num_input_tokens_seen": 12598412, "step": 109 }, { "epoch": 0.5913978494623656, "grad_norm": 34.94200897216797, "learning_rate": 5e-07, "loss": 2.3423, "num_input_tokens_seen": 12714328, "step": 110 }, { "epoch": 0.5913978494623656, "loss": 2.537071704864502, "loss_ce": 0.9482043385505676, "loss_iou": 0.65234375, "loss_num": 0.056640625, "loss_xval": 1.5859375, "num_input_tokens_seen": 12714328, "step": 110 }, { "epoch": 0.5967741935483871, "grad_norm": 45.41818618774414, "learning_rate": 5e-07, "loss": 2.2928, "num_input_tokens_seen": 12832360, "step": 111 }, { "epoch": 0.5967741935483871, "loss": 2.0729188919067383, "loss_ce": 0.5870789885520935, "loss_iou": 0.609375, "loss_num": 0.052734375, "loss_xval": 1.484375, "num_input_tokens_seen": 12832360, "step": 111 }, { "epoch": 0.6021505376344086, "grad_norm": 43.42786407470703, "learning_rate": 5e-07, "loss": 2.4864, "num_input_tokens_seen": 12949940, "step": 112 }, { "epoch": 0.6021505376344086, "loss": 2.642192840576172, "loss_ce": 0.7349663376808167, "loss_iou": 0.78515625, "loss_num": 0.0673828125, "loss_xval": 1.90625, "num_input_tokens_seen": 12949940, "step": 112 }, { "epoch": 0.6075268817204301, "grad_norm": 60.17188262939453, "learning_rate": 5e-07, "loss": 2.5578, "num_input_tokens_seen": 13067140, "step": 113 }, { "epoch": 0.6075268817204301, "loss": 2.588433027267456, "loss_ce": 0.43120643496513367, "loss_iou": 0.8984375, "loss_num": 0.07177734375, "loss_xval": 2.15625, "num_input_tokens_seen": 13067140, "step": 113 }, { "epoch": 0.6129032258064516, "grad_norm": 81.76246643066406, "learning_rate": 5e-07, "loss": 2.3827, "num_input_tokens_seen": 13179680, "step": 114 }, { "epoch": 0.6129032258064516, "loss": 2.3307933807373047, "loss_ce": 0.6555004119873047, "loss_iou": 0.703125, "loss_num": 0.05322265625, "loss_xval": 1.671875, "num_input_tokens_seen": 13179680, "step": 114 }, { "epoch": 0.6182795698924731, "grad_norm": 39.99794006347656, "learning_rate": 5e-07, "loss": 2.3168, "num_input_tokens_seen": 13296936, "step": 115 }, { "epoch": 0.6182795698924731, "loss": 2.6183619499206543, "loss_ce": 0.6759790778160095, "loss_iou": 0.83984375, "loss_num": 0.05224609375, "loss_xval": 1.9453125, "num_input_tokens_seen": 13296936, "step": 115 }, { "epoch": 0.6236559139784946, "grad_norm": 46.15687561035156, "learning_rate": 5e-07, "loss": 2.5706, "num_input_tokens_seen": 13412612, "step": 116 }, { "epoch": 0.6236559139784946, "loss": 3.2330923080444336, "loss_ce": 0.815123438835144, "loss_iou": 0.99609375, "loss_num": 0.08447265625, "loss_xval": 2.421875, "num_input_tokens_seen": 13412612, "step": 116 }, { "epoch": 0.6290322580645161, "grad_norm": 48.28379821777344, "learning_rate": 5e-07, "loss": 2.2415, "num_input_tokens_seen": 13529820, "step": 117 }, { "epoch": 0.6290322580645161, "loss": 1.7463641166687012, "loss_ce": 0.4978289306163788, "loss_iou": 0.51171875, "loss_num": 0.045166015625, "loss_xval": 1.25, "num_input_tokens_seen": 13529820, "step": 117 }, { "epoch": 0.6344086021505376, "grad_norm": 35.34401321411133, "learning_rate": 5e-07, "loss": 2.5335, "num_input_tokens_seen": 13645328, "step": 118 }, { "epoch": 0.6344086021505376, "loss": 2.519780158996582, "loss_ce": 0.5412644743919373, "loss_iou": 0.75390625, "loss_num": 0.09423828125, "loss_xval": 1.9765625, "num_input_tokens_seen": 13645328, "step": 118 }, { "epoch": 0.6397849462365591, "grad_norm": 40.59331512451172, "learning_rate": 5e-07, "loss": 2.5484, "num_input_tokens_seen": 13762108, "step": 119 }, { "epoch": 0.6397849462365591, "loss": 2.397430896759033, "loss_ce": 0.6012884974479675, "loss_iou": 0.75, "loss_num": 0.06005859375, "loss_xval": 1.796875, "num_input_tokens_seen": 13762108, "step": 119 }, { "epoch": 0.6451612903225806, "grad_norm": 130.24134826660156, "learning_rate": 5e-07, "loss": 2.1853, "num_input_tokens_seen": 13879192, "step": 120 }, { "epoch": 0.6451612903225806, "loss": 2.2687573432922363, "loss_ce": 0.24434331059455872, "loss_iou": 0.8359375, "loss_num": 0.0703125, "loss_xval": 2.03125, "num_input_tokens_seen": 13879192, "step": 120 }, { "epoch": 0.6505376344086021, "grad_norm": 80.86445617675781, "learning_rate": 5e-07, "loss": 2.5251, "num_input_tokens_seen": 13994620, "step": 121 }, { "epoch": 0.6505376344086021, "loss": 2.8564462661743164, "loss_ce": 0.6826180219650269, "loss_iou": 0.93359375, "loss_num": 0.0615234375, "loss_xval": 2.171875, "num_input_tokens_seen": 13994620, "step": 121 }, { "epoch": 0.6559139784946236, "grad_norm": 47.47772979736328, "learning_rate": 5e-07, "loss": 2.4612, "num_input_tokens_seen": 14110500, "step": 122 }, { "epoch": 0.6559139784946236, "loss": 2.372309446334839, "loss_ce": 0.3337351679801941, "loss_iou": 0.859375, "loss_num": 0.064453125, "loss_xval": 2.03125, "num_input_tokens_seen": 14110500, "step": 122 }, { "epoch": 0.6612903225806451, "grad_norm": 41.24259948730469, "learning_rate": 5e-07, "loss": 2.131, "num_input_tokens_seen": 14225520, "step": 123 }, { "epoch": 0.6612903225806451, "loss": 2.2154526710510254, "loss_ce": 0.3150619864463806, "loss_iou": 0.80078125, "loss_num": 0.059326171875, "loss_xval": 1.8984375, "num_input_tokens_seen": 14225520, "step": 123 }, { "epoch": 0.6666666666666666, "grad_norm": 35.40959548950195, "learning_rate": 5e-07, "loss": 2.3792, "num_input_tokens_seen": 14340080, "step": 124 }, { "epoch": 0.6666666666666666, "loss": 2.879027843475342, "loss_ce": 0.728637158870697, "loss_iou": 0.890625, "loss_num": 0.07373046875, "loss_xval": 2.15625, "num_input_tokens_seen": 14340080, "step": 124 }, { "epoch": 0.6720430107526881, "grad_norm": 45.46902084350586, "learning_rate": 5e-07, "loss": 2.4445, "num_input_tokens_seen": 14454004, "step": 125 }, { "epoch": 0.6720430107526881, "loss": 2.509049415588379, "loss_ce": 0.4782877564430237, "loss_iou": 0.82421875, "loss_num": 0.076171875, "loss_xval": 2.03125, "num_input_tokens_seen": 14454004, "step": 125 }, { "epoch": 0.6774193548387096, "grad_norm": 120.44620513916016, "learning_rate": 5e-07, "loss": 2.0838, "num_input_tokens_seen": 14569640, "step": 126 }, { "epoch": 0.6774193548387096, "loss": 2.3757736682891846, "loss_ce": 0.47733622789382935, "loss_iou": 0.7890625, "loss_num": 0.06396484375, "loss_xval": 1.8984375, "num_input_tokens_seen": 14569640, "step": 126 }, { "epoch": 0.6827956989247311, "grad_norm": 166.53773498535156, "learning_rate": 5e-07, "loss": 2.4042, "num_input_tokens_seen": 14681336, "step": 127 }, { "epoch": 0.6827956989247311, "loss": 2.195580244064331, "loss_ce": 0.592064619064331, "loss_iou": 0.65234375, "loss_num": 0.060302734375, "loss_xval": 1.6015625, "num_input_tokens_seen": 14681336, "step": 127 }, { "epoch": 0.6881720430107527, "grad_norm": 32.83817672729492, "learning_rate": 5e-07, "loss": 2.107, "num_input_tokens_seen": 14795612, "step": 128 }, { "epoch": 0.6881720430107527, "loss": 2.6388978958129883, "loss_ce": 0.7028627991676331, "loss_iou": 0.8046875, "loss_num": 0.06591796875, "loss_xval": 1.9375, "num_input_tokens_seen": 14795612, "step": 128 }, { "epoch": 0.6935483870967742, "grad_norm": 66.23137664794922, "learning_rate": 5e-07, "loss": 2.0047, "num_input_tokens_seen": 14910672, "step": 129 }, { "epoch": 0.6935483870967742, "loss": 2.3332712650299072, "loss_ce": 0.4660838842391968, "loss_iou": 0.76953125, "loss_num": 0.06494140625, "loss_xval": 1.8671875, "num_input_tokens_seen": 14910672, "step": 129 }, { "epoch": 0.6989247311827957, "grad_norm": 36.8791389465332, "learning_rate": 5e-07, "loss": 1.923, "num_input_tokens_seen": 15026176, "step": 130 }, { "epoch": 0.6989247311827957, "loss": 1.9672675132751465, "loss_ce": 0.38718944787979126, "loss_iou": 0.671875, "loss_num": 0.04638671875, "loss_xval": 1.578125, "num_input_tokens_seen": 15026176, "step": 130 }, { "epoch": 0.7043010752688172, "grad_norm": 39.1432991027832, "learning_rate": 5e-07, "loss": 2.1721, "num_input_tokens_seen": 15143192, "step": 131 }, { "epoch": 0.7043010752688172, "loss": 2.2844886779785156, "loss_ce": 0.3880044221878052, "loss_iou": 0.78125, "loss_num": 0.06689453125, "loss_xval": 1.8984375, "num_input_tokens_seen": 15143192, "step": 131 }, { "epoch": 0.7096774193548387, "grad_norm": 36.759315490722656, "learning_rate": 5e-07, "loss": 2.2538, "num_input_tokens_seen": 15259120, "step": 132 }, { "epoch": 0.7096774193548387, "loss": 2.1002144813537598, "loss_ce": 0.33068329095840454, "loss_iou": 0.7421875, "loss_num": 0.0576171875, "loss_xval": 1.765625, "num_input_tokens_seen": 15259120, "step": 132 }, { "epoch": 0.7150537634408602, "grad_norm": 38.42310333251953, "learning_rate": 5e-07, "loss": 2.3162, "num_input_tokens_seen": 15373548, "step": 133 }, { "epoch": 0.7150537634408602, "loss": 2.3711953163146973, "loss_ce": 0.3741249740123749, "loss_iou": 0.8125, "loss_num": 0.07470703125, "loss_xval": 2.0, "num_input_tokens_seen": 15373548, "step": 133 }, { "epoch": 0.7204301075268817, "grad_norm": 31.50933837890625, "learning_rate": 5e-07, "loss": 2.1203, "num_input_tokens_seen": 15490932, "step": 134 }, { "epoch": 0.7204301075268817, "loss": 2.1362204551696777, "loss_ce": 0.2914940118789673, "loss_iou": 0.76953125, "loss_num": 0.060546875, "loss_xval": 1.84375, "num_input_tokens_seen": 15490932, "step": 134 }, { "epoch": 0.7258064516129032, "grad_norm": 50.20446014404297, "learning_rate": 5e-07, "loss": 2.0705, "num_input_tokens_seen": 15609176, "step": 135 }, { "epoch": 0.7258064516129032, "loss": 2.0689401626586914, "loss_ce": 0.3013620376586914, "loss_iou": 0.74609375, "loss_num": 0.054931640625, "loss_xval": 1.765625, "num_input_tokens_seen": 15609176, "step": 135 }, { "epoch": 0.7311827956989247, "grad_norm": 49.4326171875, "learning_rate": 5e-07, "loss": 2.1821, "num_input_tokens_seen": 15726016, "step": 136 }, { "epoch": 0.7311827956989247, "loss": 2.2778074741363525, "loss_ce": 0.37790507078170776, "loss_iou": 0.83203125, "loss_num": 0.0478515625, "loss_xval": 1.8984375, "num_input_tokens_seen": 15726016, "step": 136 }, { "epoch": 0.7365591397849462, "grad_norm": 42.53056335449219, "learning_rate": 5e-07, "loss": 2.1109, "num_input_tokens_seen": 15844540, "step": 137 }, { "epoch": 0.7365591397849462, "loss": 1.9710431098937988, "loss_ce": 0.31576961278915405, "loss_iou": 0.66796875, "loss_num": 0.0634765625, "loss_xval": 1.65625, "num_input_tokens_seen": 15844540, "step": 137 }, { "epoch": 0.7419354838709677, "grad_norm": 35.93233871459961, "learning_rate": 5e-07, "loss": 2.0841, "num_input_tokens_seen": 15959872, "step": 138 }, { "epoch": 0.7419354838709677, "loss": 2.182249069213867, "loss_ce": 0.4078349471092224, "loss_iou": 0.7421875, "loss_num": 0.05810546875, "loss_xval": 1.7734375, "num_input_tokens_seen": 15959872, "step": 138 }, { "epoch": 0.7473118279569892, "grad_norm": 33.831214904785156, "learning_rate": 5e-07, "loss": 2.2448, "num_input_tokens_seen": 16074936, "step": 139 }, { "epoch": 0.7473118279569892, "loss": 1.9252322912216187, "loss_ce": 0.4760134816169739, "loss_iou": 0.5390625, "loss_num": 0.07373046875, "loss_xval": 1.453125, "num_input_tokens_seen": 16074936, "step": 139 }, { "epoch": 0.7526881720430108, "grad_norm": 40.59697341918945, "learning_rate": 5e-07, "loss": 1.8763, "num_input_tokens_seen": 16188076, "step": 140 }, { "epoch": 0.7526881720430108, "loss": 1.8560432195663452, "loss_ce": 0.37459784746170044, "loss_iou": 0.58984375, "loss_num": 0.060546875, "loss_xval": 1.484375, "num_input_tokens_seen": 16188076, "step": 140 }, { "epoch": 0.7580645161290323, "grad_norm": 68.76010131835938, "learning_rate": 5e-07, "loss": 1.9759, "num_input_tokens_seen": 16301772, "step": 141 }, { "epoch": 0.7580645161290323, "loss": 1.9830987453460693, "loss_ce": 0.34149718284606934, "loss_iou": 0.68359375, "loss_num": 0.0546875, "loss_xval": 1.640625, "num_input_tokens_seen": 16301772, "step": 141 }, { "epoch": 0.7634408602150538, "grad_norm": 49.800819396972656, "learning_rate": 5e-07, "loss": 1.9522, "num_input_tokens_seen": 16419788, "step": 142 }, { "epoch": 0.7634408602150538, "loss": 2.142573356628418, "loss_ce": 0.35888200998306274, "loss_iou": 0.7421875, "loss_num": 0.060546875, "loss_xval": 1.78125, "num_input_tokens_seen": 16419788, "step": 142 }, { "epoch": 0.7688172043010753, "grad_norm": 52.86172866821289, "learning_rate": 5e-07, "loss": 2.1587, "num_input_tokens_seen": 16531856, "step": 143 }, { "epoch": 0.7688172043010753, "loss": 2.4006338119506836, "loss_ce": 0.46166908740997314, "loss_iou": 0.7578125, "loss_num": 0.08544921875, "loss_xval": 1.9375, "num_input_tokens_seen": 16531856, "step": 143 }, { "epoch": 0.7741935483870968, "grad_norm": 100.81636810302734, "learning_rate": 5e-07, "loss": 2.121, "num_input_tokens_seen": 16644928, "step": 144 }, { "epoch": 0.7741935483870968, "loss": 1.6544090509414673, "loss_ce": 0.2535302937030792, "loss_iou": 0.56640625, "loss_num": 0.05322265625, "loss_xval": 1.3984375, "num_input_tokens_seen": 16644928, "step": 144 }, { "epoch": 0.7795698924731183, "grad_norm": 28.732891082763672, "learning_rate": 5e-07, "loss": 2.099, "num_input_tokens_seen": 16760916, "step": 145 }, { "epoch": 0.7795698924731183, "loss": 1.9689574241638184, "loss_ce": 0.25386959314346313, "loss_iou": 0.70703125, "loss_num": 0.060546875, "loss_xval": 1.71875, "num_input_tokens_seen": 16760916, "step": 145 }, { "epoch": 0.7849462365591398, "grad_norm": 30.291162490844727, "learning_rate": 5e-07, "loss": 1.8544, "num_input_tokens_seen": 16874232, "step": 146 }, { "epoch": 0.7849462365591398, "loss": 1.67598557472229, "loss_ce": 0.267294317483902, "loss_iou": 0.58984375, "loss_num": 0.0458984375, "loss_xval": 1.40625, "num_input_tokens_seen": 16874232, "step": 146 }, { "epoch": 0.7903225806451613, "grad_norm": 113.18450927734375, "learning_rate": 5e-07, "loss": 2.2493, "num_input_tokens_seen": 16987704, "step": 147 }, { "epoch": 0.7903225806451613, "loss": 2.0547547340393066, "loss_ce": 0.29498904943466187, "loss_iou": 0.734375, "loss_num": 0.05810546875, "loss_xval": 1.7578125, "num_input_tokens_seen": 16987704, "step": 147 }, { "epoch": 0.7956989247311828, "grad_norm": 40.227439880371094, "learning_rate": 5e-07, "loss": 1.9667, "num_input_tokens_seen": 17106072, "step": 148 }, { "epoch": 0.7956989247311828, "loss": 1.996564507484436, "loss_ce": 0.21775591373443604, "loss_iou": 0.75, "loss_num": 0.054931640625, "loss_xval": 1.78125, "num_input_tokens_seen": 17106072, "step": 148 }, { "epoch": 0.8010752688172043, "grad_norm": 66.02403259277344, "learning_rate": 5e-07, "loss": 1.9664, "num_input_tokens_seen": 17217744, "step": 149 }, { "epoch": 0.8010752688172043, "loss": 2.033226490020752, "loss_ce": 0.3574453592300415, "loss_iou": 0.66015625, "loss_num": 0.07080078125, "loss_xval": 1.671875, "num_input_tokens_seen": 17217744, "step": 149 }, { "epoch": 0.8064516129032258, "grad_norm": 35.51504898071289, "learning_rate": 5e-07, "loss": 1.8801, "num_input_tokens_seen": 17333392, "step": 150 }, { "epoch": 0.8064516129032258, "loss": 2.198267936706543, "loss_ce": 0.35451778769493103, "loss_iou": 0.77734375, "loss_num": 0.05810546875, "loss_xval": 1.84375, "num_input_tokens_seen": 17333392, "step": 150 }, { "epoch": 0.8118279569892473, "grad_norm": 31.917646408081055, "learning_rate": 5e-07, "loss": 1.9761, "num_input_tokens_seen": 17449600, "step": 151 }, { "epoch": 0.8118279569892473, "loss": 1.9336137771606445, "loss_ce": 0.26320362091064453, "loss_iou": 0.6796875, "loss_num": 0.06298828125, "loss_xval": 1.671875, "num_input_tokens_seen": 17449600, "step": 151 }, { "epoch": 0.8172043010752689, "grad_norm": 33.78981399536133, "learning_rate": 5e-07, "loss": 1.9601, "num_input_tokens_seen": 17568828, "step": 152 }, { "epoch": 0.8172043010752689, "loss": 1.73143470287323, "loss_ce": 0.18358317017555237, "loss_iou": 0.66015625, "loss_num": 0.045654296875, "loss_xval": 1.546875, "num_input_tokens_seen": 17568828, "step": 152 }, { "epoch": 0.8225806451612904, "grad_norm": 53.87074661254883, "learning_rate": 5e-07, "loss": 1.9187, "num_input_tokens_seen": 17682876, "step": 153 }, { "epoch": 0.8225806451612904, "loss": 2.4083755016326904, "loss_ce": 0.29411765933036804, "loss_iou": 0.84765625, "loss_num": 0.08349609375, "loss_xval": 2.109375, "num_input_tokens_seen": 17682876, "step": 153 }, { "epoch": 0.8279569892473119, "grad_norm": 32.55501174926758, "learning_rate": 5e-07, "loss": 1.9606, "num_input_tokens_seen": 17799272, "step": 154 }, { "epoch": 0.8279569892473119, "loss": 2.175846815109253, "loss_ce": 0.27985072135925293, "loss_iou": 0.81640625, "loss_num": 0.05224609375, "loss_xval": 1.8984375, "num_input_tokens_seen": 17799272, "step": 154 }, { "epoch": 0.8333333333333334, "grad_norm": 37.96480178833008, "learning_rate": 5e-07, "loss": 1.776, "num_input_tokens_seen": 17918928, "step": 155 }, { "epoch": 0.8333333333333334, "loss": 1.6038984060287476, "loss_ce": 0.283585786819458, "loss_iou": 0.56640625, "loss_num": 0.038330078125, "loss_xval": 1.3203125, "num_input_tokens_seen": 17918928, "step": 155 }, { "epoch": 0.8387096774193549, "grad_norm": 33.3166618347168, "learning_rate": 5e-07, "loss": 1.8396, "num_input_tokens_seen": 18035744, "step": 156 }, { "epoch": 0.8387096774193549, "loss": 1.701444387435913, "loss_ce": 0.20681558549404144, "loss_iou": 0.62109375, "loss_num": 0.050537109375, "loss_xval": 1.4921875, "num_input_tokens_seen": 18035744, "step": 156 }, { "epoch": 0.8440860215053764, "grad_norm": 45.82835388183594, "learning_rate": 5e-07, "loss": 1.7584, "num_input_tokens_seen": 18150004, "step": 157 }, { "epoch": 0.8440860215053764, "loss": 1.849740982055664, "loss_ce": 0.18274876475334167, "loss_iou": 0.66796875, "loss_num": 0.06640625, "loss_xval": 1.6640625, "num_input_tokens_seen": 18150004, "step": 157 }, { "epoch": 0.8494623655913979, "grad_norm": 38.835365295410156, "learning_rate": 5e-07, "loss": 1.7346, "num_input_tokens_seen": 18262628, "step": 158 }, { "epoch": 0.8494623655913979, "loss": 2.0613901615142822, "loss_ce": 0.2669565677642822, "loss_iou": 0.7578125, "loss_num": 0.054931640625, "loss_xval": 1.796875, "num_input_tokens_seen": 18262628, "step": 158 }, { "epoch": 0.8548387096774194, "grad_norm": 25.594383239746094, "learning_rate": 5e-07, "loss": 1.7185, "num_input_tokens_seen": 18376516, "step": 159 }, { "epoch": 0.8548387096774194, "loss": 1.579763412475586, "loss_ce": 0.12322040647268295, "loss_iou": 0.63671875, "loss_num": 0.0361328125, "loss_xval": 1.453125, "num_input_tokens_seen": 18376516, "step": 159 }, { "epoch": 0.8602150537634409, "grad_norm": 44.70818328857422, "learning_rate": 5e-07, "loss": 1.729, "num_input_tokens_seen": 18491028, "step": 160 }, { "epoch": 0.8602150537634409, "loss": 1.640080451965332, "loss_ce": 0.23614981770515442, "loss_iou": 0.57421875, "loss_num": 0.051513671875, "loss_xval": 1.40625, "num_input_tokens_seen": 18491028, "step": 160 }, { "epoch": 0.8655913978494624, "grad_norm": 110.21721649169922, "learning_rate": 5e-07, "loss": 1.8879, "num_input_tokens_seen": 18606980, "step": 161 }, { "epoch": 0.8655913978494624, "loss": 1.8018466234207153, "loss_ce": 0.22713962197303772, "loss_iou": 0.62109375, "loss_num": 0.06689453125, "loss_xval": 1.578125, "num_input_tokens_seen": 18606980, "step": 161 }, { "epoch": 0.8709677419354839, "grad_norm": 28.112640380859375, "learning_rate": 5e-07, "loss": 2.1505, "num_input_tokens_seen": 18719788, "step": 162 }, { "epoch": 0.8709677419354839, "loss": 1.8684906959533691, "loss_ce": 0.20198675990104675, "loss_iou": 0.68359375, "loss_num": 0.0595703125, "loss_xval": 1.6640625, "num_input_tokens_seen": 18719788, "step": 162 }, { "epoch": 0.8763440860215054, "grad_norm": 36.876163482666016, "learning_rate": 5e-07, "loss": 1.8824, "num_input_tokens_seen": 18833408, "step": 163 }, { "epoch": 0.8763440860215054, "loss": 2.1256818771362305, "loss_ce": 0.19599446654319763, "loss_iou": 0.80859375, "loss_num": 0.06298828125, "loss_xval": 1.9296875, "num_input_tokens_seen": 18833408, "step": 163 }, { "epoch": 0.8817204301075269, "grad_norm": 40.819610595703125, "learning_rate": 5e-07, "loss": 1.9385, "num_input_tokens_seen": 18946576, "step": 164 }, { "epoch": 0.8817204301075269, "loss": 2.1568174362182617, "loss_ce": 0.1900205910205841, "loss_iou": 0.8046875, "loss_num": 0.0712890625, "loss_xval": 1.96875, "num_input_tokens_seen": 18946576, "step": 164 }, { "epoch": 0.8870967741935484, "grad_norm": 46.3994140625, "learning_rate": 5e-07, "loss": 1.7347, "num_input_tokens_seen": 19060700, "step": 165 }, { "epoch": 0.8870967741935484, "loss": 1.5002691745758057, "loss_ce": 0.16774974763393402, "loss_iou": 0.5625, "loss_num": 0.04150390625, "loss_xval": 1.3359375, "num_input_tokens_seen": 19060700, "step": 165 }, { "epoch": 0.8924731182795699, "grad_norm": 39.95574188232422, "learning_rate": 5e-07, "loss": 1.9221, "num_input_tokens_seen": 19175700, "step": 166 }, { "epoch": 0.8924731182795699, "loss": 1.637725830078125, "loss_ce": 0.13162225484848022, "loss_iou": 0.625, "loss_num": 0.050537109375, "loss_xval": 1.5078125, "num_input_tokens_seen": 19175700, "step": 166 }, { "epoch": 0.8978494623655914, "grad_norm": 28.067472457885742, "learning_rate": 5e-07, "loss": 1.6416, "num_input_tokens_seen": 19291272, "step": 167 }, { "epoch": 0.8978494623655914, "loss": 2.2798938751220703, "loss_ce": 0.13145627081394196, "loss_iou": 0.91015625, "loss_num": 0.06640625, "loss_xval": 2.15625, "num_input_tokens_seen": 19291272, "step": 167 }, { "epoch": 0.9032258064516129, "grad_norm": 33.39390563964844, "learning_rate": 5e-07, "loss": 1.9694, "num_input_tokens_seen": 19405720, "step": 168 }, { "epoch": 0.9032258064516129, "loss": 1.5906121730804443, "loss_ce": 0.1277216076850891, "loss_iou": 0.56640625, "loss_num": 0.06640625, "loss_xval": 1.4609375, "num_input_tokens_seen": 19405720, "step": 168 }, { "epoch": 0.9086021505376344, "grad_norm": 27.440271377563477, "learning_rate": 5e-07, "loss": 1.793, "num_input_tokens_seen": 19521760, "step": 169 }, { "epoch": 0.9086021505376344, "loss": 1.8443773984909058, "loss_ce": 0.14051993191242218, "loss_iou": 0.7109375, "loss_num": 0.05615234375, "loss_xval": 1.703125, "num_input_tokens_seen": 19521760, "step": 169 }, { "epoch": 0.9139784946236559, "grad_norm": 26.10700798034668, "learning_rate": 5e-07, "loss": 1.802, "num_input_tokens_seen": 19637644, "step": 170 }, { "epoch": 0.9139784946236559, "loss": 1.5932401418685913, "loss_ce": 0.07663855701684952, "loss_iou": 0.62890625, "loss_num": 0.052490234375, "loss_xval": 1.515625, "num_input_tokens_seen": 19637644, "step": 170 }, { "epoch": 0.9193548387096774, "grad_norm": 27.8729305267334, "learning_rate": 5e-07, "loss": 1.6406, "num_input_tokens_seen": 19754448, "step": 171 }, { "epoch": 0.9193548387096774, "loss": 1.9075937271118164, "loss_ce": 0.0521249920129776, "loss_iou": 0.80078125, "loss_num": 0.051513671875, "loss_xval": 1.859375, "num_input_tokens_seen": 19754448, "step": 171 }, { "epoch": 0.9247311827956989, "grad_norm": 34.33494567871094, "learning_rate": 5e-07, "loss": 1.7384, "num_input_tokens_seen": 19870072, "step": 172 }, { "epoch": 0.9247311827956989, "loss": 1.5047760009765625, "loss_ce": 0.1024322658777237, "loss_iou": 0.61328125, "loss_num": 0.03466796875, "loss_xval": 1.40625, "num_input_tokens_seen": 19870072, "step": 172 }, { "epoch": 0.9301075268817204, "grad_norm": 56.14564895629883, "learning_rate": 5e-07, "loss": 1.2677, "num_input_tokens_seen": 19988564, "step": 173 }, { "epoch": 0.9301075268817204, "loss": 1.3803539276123047, "loss_ce": 0.07078355550765991, "loss_iou": 0.546875, "loss_num": 0.04296875, "loss_xval": 1.3125, "num_input_tokens_seen": 19988564, "step": 173 }, { "epoch": 0.9354838709677419, "grad_norm": 29.92527198791504, "learning_rate": 5e-07, "loss": 1.5656, "num_input_tokens_seen": 20107852, "step": 174 }, { "epoch": 0.9354838709677419, "loss": 1.5528461933135986, "loss_ce": 0.06602971255779266, "loss_iou": 0.63671875, "loss_num": 0.0419921875, "loss_xval": 1.484375, "num_input_tokens_seen": 20107852, "step": 174 }, { "epoch": 0.9408602150537635, "grad_norm": 63.58463668823242, "learning_rate": 5e-07, "loss": 1.7904, "num_input_tokens_seen": 20222708, "step": 175 }, { "epoch": 0.9408602150537635, "loss": 1.6647460460662842, "loss_ce": 0.04194331169128418, "loss_iou": 0.6484375, "loss_num": 0.06591796875, "loss_xval": 1.625, "num_input_tokens_seen": 20222708, "step": 175 }, { "epoch": 0.946236559139785, "grad_norm": 35.27510452270508, "learning_rate": 5e-07, "loss": 1.58, "num_input_tokens_seen": 20340852, "step": 176 }, { "epoch": 0.946236559139785, "loss": 1.4713125228881836, "loss_ce": 0.04601961374282837, "loss_iou": 0.546875, "loss_num": 0.06591796875, "loss_xval": 1.421875, "num_input_tokens_seen": 20340852, "step": 176 }, { "epoch": 0.9516129032258065, "grad_norm": 37.32596969604492, "learning_rate": 5e-07, "loss": 1.3319, "num_input_tokens_seen": 20458340, "step": 177 }, { "epoch": 0.9516129032258065, "loss": 1.5491204261779785, "loss_ce": 0.0386224165558815, "loss_iou": 0.6171875, "loss_num": 0.05517578125, "loss_xval": 1.5078125, "num_input_tokens_seen": 20458340, "step": 177 }, { "epoch": 0.956989247311828, "grad_norm": 34.88058090209961, "learning_rate": 5e-07, "loss": 1.5426, "num_input_tokens_seen": 20574860, "step": 178 }, { "epoch": 0.956989247311828, "loss": 1.3569762706756592, "loss_ce": 0.06010131165385246, "loss_iou": 0.50390625, "loss_num": 0.057373046875, "loss_xval": 1.296875, "num_input_tokens_seen": 20574860, "step": 178 }, { "epoch": 0.9623655913978495, "grad_norm": 32.96063995361328, "learning_rate": 5e-07, "loss": 1.5279, "num_input_tokens_seen": 20693460, "step": 179 }, { "epoch": 0.9623655913978495, "loss": 1.4921629428863525, "loss_ce": 0.05710437148809433, "loss_iou": 0.61328125, "loss_num": 0.042236328125, "loss_xval": 1.4375, "num_input_tokens_seen": 20693460, "step": 179 }, { "epoch": 0.967741935483871, "grad_norm": 52.93255615234375, "learning_rate": 5e-07, "loss": 1.673, "num_input_tokens_seen": 20806944, "step": 180 }, { "epoch": 0.967741935483871, "loss": 1.680335283279419, "loss_ce": 0.0377572625875473, "loss_iou": 0.6796875, "loss_num": 0.057373046875, "loss_xval": 1.640625, "num_input_tokens_seen": 20806944, "step": 180 }, { "epoch": 0.9731182795698925, "grad_norm": 36.093170166015625, "learning_rate": 5e-07, "loss": 1.5173, "num_input_tokens_seen": 20921484, "step": 181 }, { "epoch": 0.9731182795698925, "loss": 1.9919745922088623, "loss_ce": 0.025177650153636932, "loss_iou": 0.80859375, "loss_num": 0.0693359375, "loss_xval": 1.96875, "num_input_tokens_seen": 20921484, "step": 181 }, { "epoch": 0.978494623655914, "grad_norm": 18.307016372680664, "learning_rate": 5e-07, "loss": 1.2441, "num_input_tokens_seen": 21036056, "step": 182 }, { "epoch": 0.978494623655914, "loss": 1.3939225673675537, "loss_ce": 0.0389421284198761, "loss_iou": 0.56640625, "loss_num": 0.045166015625, "loss_xval": 1.3515625, "num_input_tokens_seen": 21036056, "step": 182 }, { "epoch": 0.9838709677419355, "grad_norm": 27.69432258605957, "learning_rate": 5e-07, "loss": 1.4301, "num_input_tokens_seen": 21150092, "step": 183 }, { "epoch": 0.9838709677419355, "loss": 1.4880752563476562, "loss_ce": 0.0183488130569458, "loss_iou": 0.6171875, "loss_num": 0.046875, "loss_xval": 1.46875, "num_input_tokens_seen": 21150092, "step": 183 }, { "epoch": 0.989247311827957, "grad_norm": 54.88238525390625, "learning_rate": 5e-07, "loss": 1.7092, "num_input_tokens_seen": 21266740, "step": 184 }, { "epoch": 0.989247311827957, "loss": 1.5082449913024902, "loss_ce": 0.030217673629522324, "loss_iou": 0.62109375, "loss_num": 0.046630859375, "loss_xval": 1.4765625, "num_input_tokens_seen": 21266740, "step": 184 }, { "epoch": 0.9946236559139785, "grad_norm": 19.39280891418457, "learning_rate": 5e-07, "loss": 1.5, "num_input_tokens_seen": 21382592, "step": 185 }, { "epoch": 0.9946236559139785, "loss": 1.6721198558807373, "loss_ce": 0.03344807028770447, "loss_iou": 0.68359375, "loss_num": 0.0546875, "loss_xval": 1.640625, "num_input_tokens_seen": 21382592, "step": 185 }, { "epoch": 1.0, "grad_norm": 28.364171981811523, "learning_rate": 5e-07, "loss": 1.5434, "num_input_tokens_seen": 21500000, "step": 186 }, { "epoch": 1.0, "loss": 1.494255781173706, "loss_ce": 0.015984252095222473, "loss_iou": 0.59765625, "loss_num": 0.055908203125, "loss_xval": 1.4765625, "num_input_tokens_seen": 21500000, "step": 186 }, { "epoch": 1.0053763440860215, "grad_norm": 33.421836853027344, "learning_rate": 5e-07, "loss": 1.5766, "num_input_tokens_seen": 21614084, "step": 187 }, { "epoch": 1.0053763440860215, "loss": 1.6684014797210693, "loss_ce": 0.023870214819908142, "loss_iou": 0.69140625, "loss_num": 0.052001953125, "loss_xval": 1.640625, "num_input_tokens_seen": 21614084, "step": 187 }, { "epoch": 1.010752688172043, "grad_norm": 61.526485443115234, "learning_rate": 5e-07, "loss": 1.6178, "num_input_tokens_seen": 21727288, "step": 188 }, { "epoch": 1.010752688172043, "loss": 1.7919200658798218, "loss_ce": 0.015064647421240807, "loss_iou": 0.7421875, "loss_num": 0.05908203125, "loss_xval": 1.7734375, "num_input_tokens_seen": 21727288, "step": 188 }, { "epoch": 1.0161290322580645, "grad_norm": 39.428096771240234, "learning_rate": 5e-07, "loss": 1.4111, "num_input_tokens_seen": 21841416, "step": 189 }, { "epoch": 1.0161290322580645, "loss": 1.9997045993804932, "loss_ce": 0.03534901887178421, "loss_iou": 0.87109375, "loss_num": 0.044677734375, "loss_xval": 1.9609375, "num_input_tokens_seen": 21841416, "step": 189 }, { "epoch": 1.021505376344086, "grad_norm": 25.58272361755371, "learning_rate": 5e-07, "loss": 1.3706, "num_input_tokens_seen": 21957776, "step": 190 }, { "epoch": 1.021505376344086, "loss": 1.4757163524627686, "loss_ce": 0.019173379987478256, "loss_iou": 0.57421875, "loss_num": 0.061767578125, "loss_xval": 1.453125, "num_input_tokens_seen": 21957776, "step": 190 }, { "epoch": 1.0268817204301075, "grad_norm": 34.28939437866211, "learning_rate": 5e-07, "loss": 1.5575, "num_input_tokens_seen": 22075232, "step": 191 }, { "epoch": 1.0268817204301075, "loss": 1.311399221420288, "loss_ce": 0.0179422777146101, "loss_iou": 0.51953125, "loss_num": 0.05126953125, "loss_xval": 1.296875, "num_input_tokens_seen": 22075232, "step": 191 }, { "epoch": 1.032258064516129, "grad_norm": 39.15760803222656, "learning_rate": 5e-07, "loss": 1.3639, "num_input_tokens_seen": 22191396, "step": 192 }, { "epoch": 1.032258064516129, "loss": 1.3183003664016724, "loss_ce": 0.02240191027522087, "loss_iou": 0.546875, "loss_num": 0.04052734375, "loss_xval": 1.296875, "num_input_tokens_seen": 22191396, "step": 192 }, { "epoch": 1.0376344086021505, "grad_norm": 18.651803970336914, "learning_rate": 5e-07, "loss": 1.7244, "num_input_tokens_seen": 22304560, "step": 193 }, { "epoch": 1.0376344086021505, "loss": 2.025524139404297, "loss_ce": 0.009899035096168518, "loss_iou": 0.8203125, "loss_num": 0.07470703125, "loss_xval": 2.015625, "num_input_tokens_seen": 22304560, "step": 193 }, { "epoch": 1.043010752688172, "grad_norm": 27.29646110534668, "learning_rate": 5e-07, "loss": 1.547, "num_input_tokens_seen": 22423200, "step": 194 }, { "epoch": 1.043010752688172, "loss": 1.3405640125274658, "loss_ce": 0.019763214513659477, "loss_iou": 0.546875, "loss_num": 0.045166015625, "loss_xval": 1.3203125, "num_input_tokens_seen": 22423200, "step": 194 }, { "epoch": 1.0483870967741935, "grad_norm": 55.68614959716797, "learning_rate": 5e-07, "loss": 1.5393, "num_input_tokens_seen": 22538240, "step": 195 }, { "epoch": 1.0483870967741935, "loss": 1.657959222793579, "loss_ce": 0.0192872304469347, "loss_iou": 0.66796875, "loss_num": 0.060546875, "loss_xval": 1.640625, "num_input_tokens_seen": 22538240, "step": 195 }, { "epoch": 1.053763440860215, "grad_norm": 98.3216552734375, "learning_rate": 5e-07, "loss": 1.7909, "num_input_tokens_seen": 22653100, "step": 196 }, { "epoch": 1.053763440860215, "loss": 1.1073150634765625, "loss_ce": 0.015518225729465485, "loss_iou": 0.439453125, "loss_num": 0.042724609375, "loss_xval": 1.09375, "num_input_tokens_seen": 22653100, "step": 196 }, { "epoch": 1.0591397849462365, "grad_norm": 22.79617691040039, "learning_rate": 5e-07, "loss": 1.4801, "num_input_tokens_seen": 22770140, "step": 197 }, { "epoch": 1.0591397849462365, "loss": 1.3390501737594604, "loss_ce": 0.023132193833589554, "loss_iou": 0.54296875, "loss_num": 0.0458984375, "loss_xval": 1.3125, "num_input_tokens_seen": 22770140, "step": 197 }, { "epoch": 1.064516129032258, "grad_norm": 25.340099334716797, "learning_rate": 5e-07, "loss": 1.4997, "num_input_tokens_seen": 22879056, "step": 198 }, { "epoch": 1.064516129032258, "loss": 1.9306972026824951, "loss_ce": 0.012728463858366013, "loss_iou": 0.8125, "loss_num": 0.058837890625, "loss_xval": 1.921875, "num_input_tokens_seen": 22879056, "step": 198 }, { "epoch": 1.0698924731182795, "grad_norm": 40.509666442871094, "learning_rate": 5e-07, "loss": 1.381, "num_input_tokens_seen": 22997904, "step": 199 }, { "epoch": 1.0698924731182795, "loss": 1.8154844045639038, "loss_ce": 0.02324814721941948, "loss_iou": 0.703125, "loss_num": 0.07763671875, "loss_xval": 1.7890625, "num_input_tokens_seen": 22997904, "step": 199 }, { "epoch": 1.075268817204301, "grad_norm": 59.791988372802734, "learning_rate": 5e-07, "loss": 1.2145, "num_input_tokens_seen": 23111700, "step": 200 }, { "epoch": 1.075268817204301, "loss": 0.9104034900665283, "loss_ce": 0.007083158940076828, "loss_iou": 0.3359375, "loss_num": 0.0458984375, "loss_xval": 0.90234375, "num_input_tokens_seen": 23111700, "step": 200 }, { "epoch": 1.0806451612903225, "grad_norm": 50.05624771118164, "learning_rate": 5e-07, "loss": 1.5585, "num_input_tokens_seen": 23225860, "step": 201 }, { "epoch": 1.0806451612903225, "loss": 1.6932320594787598, "loss_ce": 0.028192933648824692, "loss_iou": 0.65625, "loss_num": 0.07080078125, "loss_xval": 1.6640625, "num_input_tokens_seen": 23225860, "step": 201 }, { "epoch": 1.086021505376344, "grad_norm": 28.056642532348633, "learning_rate": 5e-07, "loss": 1.6546, "num_input_tokens_seen": 23340152, "step": 202 }, { "epoch": 1.086021505376344, "loss": 1.5204676389694214, "loss_ce": 0.0226648710668087, "loss_iou": 0.625, "loss_num": 0.049072265625, "loss_xval": 1.5, "num_input_tokens_seen": 23340152, "step": 202 }, { "epoch": 1.0913978494623655, "grad_norm": 28.02910804748535, "learning_rate": 5e-07, "loss": 1.8239, "num_input_tokens_seen": 23457476, "step": 203 }, { "epoch": 1.0913978494623655, "loss": 1.6955671310424805, "loss_ce": 0.02222730591893196, "loss_iou": 0.671875, "loss_num": 0.06591796875, "loss_xval": 1.671875, "num_input_tokens_seen": 23457476, "step": 203 }, { "epoch": 1.096774193548387, "grad_norm": 92.92642211914062, "learning_rate": 5e-07, "loss": 1.5213, "num_input_tokens_seen": 23572680, "step": 204 }, { "epoch": 1.096774193548387, "loss": 1.3738243579864502, "loss_ce": 0.012496189214289188, "loss_iou": 0.59375, "loss_num": 0.035400390625, "loss_xval": 1.359375, "num_input_tokens_seen": 23572680, "step": 204 }, { "epoch": 1.1021505376344085, "grad_norm": 25.67889976501465, "learning_rate": 5e-07, "loss": 1.5142, "num_input_tokens_seen": 23684600, "step": 205 }, { "epoch": 1.1021505376344085, "loss": 1.5587878227233887, "loss_ce": 0.01093622762709856, "loss_iou": 0.65234375, "loss_num": 0.048828125, "loss_xval": 1.546875, "num_input_tokens_seen": 23684600, "step": 205 }, { "epoch": 1.10752688172043, "grad_norm": 34.937164306640625, "learning_rate": 5e-07, "loss": 1.4856, "num_input_tokens_seen": 23797476, "step": 206 }, { "epoch": 1.10752688172043, "loss": 1.390256404876709, "loss_ce": 0.019650958478450775, "loss_iou": 0.56640625, "loss_num": 0.0478515625, "loss_xval": 1.3671875, "num_input_tokens_seen": 23797476, "step": 206 }, { "epoch": 1.1129032258064515, "grad_norm": 22.208677291870117, "learning_rate": 5e-07, "loss": 1.7581, "num_input_tokens_seen": 23909800, "step": 207 }, { "epoch": 1.1129032258064515, "loss": 1.1367950439453125, "loss_ce": 0.004959031939506531, "loss_iou": 0.4375, "loss_num": 0.05126953125, "loss_xval": 1.1328125, "num_input_tokens_seen": 23909800, "step": 207 }, { "epoch": 1.118279569892473, "grad_norm": 40.692352294921875, "learning_rate": 5e-07, "loss": 1.5637, "num_input_tokens_seen": 24025296, "step": 208 }, { "epoch": 1.118279569892473, "loss": 2.098600387573242, "loss_ce": 0.016569266095757484, "loss_iou": 0.8671875, "loss_num": 0.0693359375, "loss_xval": 2.078125, "num_input_tokens_seen": 24025296, "step": 208 }, { "epoch": 1.1236559139784945, "grad_norm": 25.585731506347656, "learning_rate": 5e-07, "loss": 1.567, "num_input_tokens_seen": 24140824, "step": 209 }, { "epoch": 1.1236559139784945, "loss": 1.3767918348312378, "loss_ce": 0.009604405611753464, "loss_iou": 0.5546875, "loss_num": 0.051025390625, "loss_xval": 1.3671875, "num_input_tokens_seen": 24140824, "step": 209 }, { "epoch": 1.129032258064516, "grad_norm": 35.67340087890625, "learning_rate": 5e-07, "loss": 1.5087, "num_input_tokens_seen": 24254304, "step": 210 }, { "epoch": 1.129032258064516, "loss": 1.363642692565918, "loss_ce": 0.0050001442432403564, "loss_iou": 0.55078125, "loss_num": 0.051025390625, "loss_xval": 1.359375, "num_input_tokens_seen": 24254304, "step": 210 }, { "epoch": 1.1344086021505375, "grad_norm": 20.632625579833984, "learning_rate": 5e-07, "loss": 1.4534, "num_input_tokens_seen": 24365800, "step": 211 }, { "epoch": 1.1344086021505375, "loss": 1.2198079824447632, "loss_ce": 0.008870471268892288, "loss_iou": 0.462890625, "loss_num": 0.057373046875, "loss_xval": 1.2109375, "num_input_tokens_seen": 24365800, "step": 211 }, { "epoch": 1.139784946236559, "grad_norm": 30.201740264892578, "learning_rate": 5e-07, "loss": 1.6017, "num_input_tokens_seen": 24477084, "step": 212 }, { "epoch": 1.139784946236559, "loss": 1.4494805335998535, "loss_ce": 0.013445373624563217, "loss_iou": 0.57421875, "loss_num": 0.05712890625, "loss_xval": 1.4375, "num_input_tokens_seen": 24477084, "step": 212 }, { "epoch": 1.1451612903225807, "grad_norm": 24.84994125366211, "learning_rate": 5e-07, "loss": 1.195, "num_input_tokens_seen": 24592388, "step": 213 }, { "epoch": 1.1451612903225807, "loss": 1.1129597425460815, "loss_ce": 0.02140704356133938, "loss_iou": 0.44921875, "loss_num": 0.0380859375, "loss_xval": 1.09375, "num_input_tokens_seen": 24592388, "step": 213 }, { "epoch": 1.1505376344086022, "grad_norm": 54.7974967956543, "learning_rate": 5e-07, "loss": 1.5157, "num_input_tokens_seen": 24707616, "step": 214 }, { "epoch": 1.1505376344086022, "loss": 1.3838951587677002, "loss_ce": 0.00791865587234497, "loss_iou": 0.5546875, "loss_num": 0.052490234375, "loss_xval": 1.375, "num_input_tokens_seen": 24707616, "step": 214 }, { "epoch": 1.1559139784946237, "grad_norm": 70.17814636230469, "learning_rate": 5e-07, "loss": 1.4912, "num_input_tokens_seen": 24825352, "step": 215 }, { "epoch": 1.1559139784946237, "loss": 1.5063836574554443, "loss_ce": 0.008825058117508888, "loss_iou": 0.59765625, "loss_num": 0.059814453125, "loss_xval": 1.5, "num_input_tokens_seen": 24825352, "step": 215 }, { "epoch": 1.1612903225806452, "grad_norm": 70.89762878417969, "learning_rate": 5e-07, "loss": 1.6013, "num_input_tokens_seen": 24939544, "step": 216 }, { "epoch": 1.1612903225806452, "loss": 1.7236270904541016, "loss_ce": 0.02636145055294037, "loss_iou": 0.7265625, "loss_num": 0.049072265625, "loss_xval": 1.6953125, "num_input_tokens_seen": 24939544, "step": 216 }, { "epoch": 1.1666666666666667, "grad_norm": 65.59315490722656, "learning_rate": 5e-07, "loss": 1.4737, "num_input_tokens_seen": 25054832, "step": 217 }, { "epoch": 1.1666666666666667, "loss": 1.6390049457550049, "loss_ce": 0.010098627768456936, "loss_iou": 0.69140625, "loss_num": 0.048828125, "loss_xval": 1.625, "num_input_tokens_seen": 25054832, "step": 217 }, { "epoch": 1.1720430107526882, "grad_norm": 35.01422119140625, "learning_rate": 5e-07, "loss": 1.5338, "num_input_tokens_seen": 25169596, "step": 218 }, { "epoch": 1.1720430107526882, "loss": 2.1897029876708984, "loss_ce": 0.012945284135639668, "loss_iou": 0.89453125, "loss_num": 0.0771484375, "loss_xval": 2.171875, "num_input_tokens_seen": 25169596, "step": 218 }, { "epoch": 1.1774193548387097, "grad_norm": 27.730222702026367, "learning_rate": 5e-07, "loss": 1.6176, "num_input_tokens_seen": 25284116, "step": 219 }, { "epoch": 1.1774193548387097, "loss": 1.4684910774230957, "loss_ce": 0.024643385782837868, "loss_iou": 0.6171875, "loss_num": 0.042724609375, "loss_xval": 1.4453125, "num_input_tokens_seen": 25284116, "step": 219 }, { "epoch": 1.1827956989247312, "grad_norm": 21.865497589111328, "learning_rate": 5e-07, "loss": 1.4247, "num_input_tokens_seen": 25400676, "step": 220 }, { "epoch": 1.1827956989247312, "loss": 1.7422438859939575, "loss_ce": 0.011775162070989609, "loss_iou": 0.7109375, "loss_num": 0.061279296875, "loss_xval": 1.734375, "num_input_tokens_seen": 25400676, "step": 220 }, { "epoch": 1.1881720430107527, "grad_norm": 47.47127151489258, "learning_rate": 5e-07, "loss": 1.3477, "num_input_tokens_seen": 25513956, "step": 221 }, { "epoch": 1.1881720430107527, "loss": 1.5659741163253784, "loss_ce": 0.0029858716297894716, "loss_iou": 0.640625, "loss_num": 0.05615234375, "loss_xval": 1.5625, "num_input_tokens_seen": 25513956, "step": 221 }, { "epoch": 1.1935483870967742, "grad_norm": 38.19325256347656, "learning_rate": 5e-07, "loss": 1.3555, "num_input_tokens_seen": 25629252, "step": 222 }, { "epoch": 1.1935483870967742, "loss": 1.2463247776031494, "loss_ce": 0.012926386669278145, "loss_iou": 0.5234375, "loss_num": 0.037109375, "loss_xval": 1.234375, "num_input_tokens_seen": 25629252, "step": 222 }, { "epoch": 1.1989247311827957, "grad_norm": 41.60444641113281, "learning_rate": 5e-07, "loss": 1.5001, "num_input_tokens_seen": 25746520, "step": 223 }, { "epoch": 1.1989247311827957, "loss": 1.3149046897888184, "loss_ce": 0.008752378635108471, "loss_iou": 0.546875, "loss_num": 0.043212890625, "loss_xval": 1.3046875, "num_input_tokens_seen": 25746520, "step": 223 }, { "epoch": 1.2043010752688172, "grad_norm": 23.260597229003906, "learning_rate": 5e-07, "loss": 1.216, "num_input_tokens_seen": 25860812, "step": 224 }, { "epoch": 1.2043010752688172, "loss": 1.3655295372009277, "loss_ce": 0.013478795066475868, "loss_iou": 0.56640625, "loss_num": 0.044189453125, "loss_xval": 1.3515625, "num_input_tokens_seen": 25860812, "step": 224 }, { "epoch": 1.2096774193548387, "grad_norm": 34.71173095703125, "learning_rate": 5e-07, "loss": 1.2426, "num_input_tokens_seen": 25977704, "step": 225 }, { "epoch": 1.2096774193548387, "loss": 1.3021342754364014, "loss_ce": 0.006724011618643999, "loss_iou": 0.53125, "loss_num": 0.045654296875, "loss_xval": 1.296875, "num_input_tokens_seen": 25977704, "step": 225 }, { "epoch": 1.2150537634408602, "grad_norm": 28.806682586669922, "learning_rate": 5e-07, "loss": 1.7987, "num_input_tokens_seen": 26093632, "step": 226 }, { "epoch": 1.2150537634408602, "loss": 1.7133296728134155, "loss_ce": 0.004833557642996311, "loss_iou": 0.66796875, "loss_num": 0.07421875, "loss_xval": 1.7109375, "num_input_tokens_seen": 26093632, "step": 226 }, { "epoch": 1.2204301075268817, "grad_norm": 20.089323043823242, "learning_rate": 5e-07, "loss": 1.4608, "num_input_tokens_seen": 26209128, "step": 227 }, { "epoch": 1.2204301075268817, "loss": 1.5584263801574707, "loss_ce": 0.004227249883115292, "loss_iou": 0.62109375, "loss_num": 0.0634765625, "loss_xval": 1.5546875, "num_input_tokens_seen": 26209128, "step": 227 }, { "epoch": 1.2258064516129032, "grad_norm": 29.202558517456055, "learning_rate": 5e-07, "loss": 1.5968, "num_input_tokens_seen": 26326320, "step": 228 }, { "epoch": 1.2258064516129032, "loss": 1.3659098148345947, "loss_ce": 0.0021402642596513033, "loss_iou": 0.546875, "loss_num": 0.0537109375, "loss_xval": 1.3671875, "num_input_tokens_seen": 26326320, "step": 228 }, { "epoch": 1.2311827956989247, "grad_norm": 24.158615112304688, "learning_rate": 5e-07, "loss": 1.5028, "num_input_tokens_seen": 26444876, "step": 229 }, { "epoch": 1.2311827956989247, "loss": 1.9646565914154053, "loss_ce": 0.009090177714824677, "loss_iou": 0.82421875, "loss_num": 0.0615234375, "loss_xval": 1.953125, "num_input_tokens_seen": 26444876, "step": 229 }, { "epoch": 1.2365591397849462, "grad_norm": 20.82314682006836, "learning_rate": 5e-07, "loss": 1.6671, "num_input_tokens_seen": 26561148, "step": 230 }, { "epoch": 1.2365591397849462, "loss": 1.7786476612091064, "loss_ce": 0.003257096977904439, "loss_iou": 0.71875, "loss_num": 0.068359375, "loss_xval": 1.7734375, "num_input_tokens_seen": 26561148, "step": 230 }, { "epoch": 1.2419354838709677, "grad_norm": 32.511505126953125, "learning_rate": 5e-07, "loss": 1.4049, "num_input_tokens_seen": 26673188, "step": 231 }, { "epoch": 1.2419354838709677, "loss": 1.7070343494415283, "loss_ce": 0.00390938576310873, "loss_iou": 0.671875, "loss_num": 0.072265625, "loss_xval": 1.703125, "num_input_tokens_seen": 26673188, "step": 231 }, { "epoch": 1.2473118279569892, "grad_norm": 34.374332427978516, "learning_rate": 5e-07, "loss": 1.5369, "num_input_tokens_seen": 26786556, "step": 232 }, { "epoch": 1.2473118279569892, "loss": 1.7234493494033813, "loss_ce": 0.00421100202947855, "loss_iou": 0.6875, "loss_num": 0.06884765625, "loss_xval": 1.71875, "num_input_tokens_seen": 26786556, "step": 232 }, { "epoch": 1.2526881720430108, "grad_norm": 37.149070739746094, "learning_rate": 5e-07, "loss": 1.5824, "num_input_tokens_seen": 26902000, "step": 233 }, { "epoch": 1.2526881720430108, "loss": 1.6707124710083008, "loss_ce": 0.003232024610042572, "loss_iou": 0.66796875, "loss_num": 0.06640625, "loss_xval": 1.6640625, "num_input_tokens_seen": 26902000, "step": 233 }, { "epoch": 1.2580645161290323, "grad_norm": 98.07947540283203, "learning_rate": 5e-07, "loss": 1.2768, "num_input_tokens_seen": 27018456, "step": 234 }, { "epoch": 1.2580645161290323, "loss": 1.4071602821350098, "loss_ce": 0.002619244623929262, "loss_iou": 0.58203125, "loss_num": 0.048095703125, "loss_xval": 1.40625, "num_input_tokens_seen": 27018456, "step": 234 }, { "epoch": 1.2634408602150538, "grad_norm": 21.93700408935547, "learning_rate": 5e-07, "loss": 1.4328, "num_input_tokens_seen": 27131608, "step": 235 }, { "epoch": 1.2634408602150538, "loss": 1.1788636445999146, "loss_ce": 0.0011293105781078339, "loss_iou": 0.51171875, "loss_num": 0.0302734375, "loss_xval": 1.1796875, "num_input_tokens_seen": 27131608, "step": 235 }, { "epoch": 1.2688172043010753, "grad_norm": 26.892953872680664, "learning_rate": 5e-07, "loss": 1.6554, "num_input_tokens_seen": 27247056, "step": 236 }, { "epoch": 1.2688172043010753, "loss": 2.0783963203430176, "loss_ce": 0.008083954453468323, "loss_iou": 0.8671875, "loss_num": 0.06689453125, "loss_xval": 2.0625, "num_input_tokens_seen": 27247056, "step": 236 }, { "epoch": 1.2741935483870968, "grad_norm": 24.042295455932617, "learning_rate": 5e-07, "loss": 1.3516, "num_input_tokens_seen": 27362996, "step": 237 }, { "epoch": 1.2741935483870968, "loss": 1.3171396255493164, "loss_ce": 0.0017099895048886538, "loss_iou": 0.54296875, "loss_num": 0.0458984375, "loss_xval": 1.3125, "num_input_tokens_seen": 27362996, "step": 237 }, { "epoch": 1.2795698924731183, "grad_norm": 45.369911193847656, "learning_rate": 5e-07, "loss": 1.2333, "num_input_tokens_seen": 27477400, "step": 238 }, { "epoch": 1.2795698924731183, "loss": 1.1028507947921753, "loss_ce": 0.0022648517042398453, "loss_iou": 0.44921875, "loss_num": 0.04052734375, "loss_xval": 1.1015625, "num_input_tokens_seen": 27477400, "step": 238 }, { "epoch": 1.2849462365591398, "grad_norm": 29.612136840820312, "learning_rate": 5e-07, "loss": 1.3936, "num_input_tokens_seen": 27592736, "step": 239 }, { "epoch": 1.2849462365591398, "loss": 1.4711177349090576, "loss_ce": 0.0009029931388795376, "loss_iou": 0.62890625, "loss_num": 0.042236328125, "loss_xval": 1.46875, "num_input_tokens_seen": 27592736, "step": 239 }, { "epoch": 1.2903225806451613, "grad_norm": 47.100093841552734, "learning_rate": 5e-07, "loss": 1.3522, "num_input_tokens_seen": 27710808, "step": 240 }, { "epoch": 1.2903225806451613, "loss": 1.555254340171814, "loss_ce": 0.0005667555378749967, "loss_iou": 0.6171875, "loss_num": 0.06494140625, "loss_xval": 1.5546875, "num_input_tokens_seen": 27710808, "step": 240 }, { "epoch": 1.2956989247311828, "grad_norm": 31.210046768188477, "learning_rate": 5e-07, "loss": 1.4456, "num_input_tokens_seen": 27824700, "step": 241 }, { "epoch": 1.2956989247311828, "loss": 1.5355629920959473, "loss_ce": 0.0013832369586452842, "loss_iou": 0.625, "loss_num": 0.05615234375, "loss_xval": 1.53125, "num_input_tokens_seen": 27824700, "step": 241 }, { "epoch": 1.3010752688172043, "grad_norm": 24.380056381225586, "learning_rate": 5e-07, "loss": 1.1648, "num_input_tokens_seen": 27938156, "step": 242 }, { "epoch": 1.3010752688172043, "loss": 1.113837718963623, "loss_ce": 0.003242054721340537, "loss_iou": 0.43359375, "loss_num": 0.049072265625, "loss_xval": 1.109375, "num_input_tokens_seen": 27938156, "step": 242 }, { "epoch": 1.3064516129032258, "grad_norm": 119.98265838623047, "learning_rate": 5e-07, "loss": 1.3012, "num_input_tokens_seen": 28054328, "step": 243 }, { "epoch": 1.3064516129032258, "loss": 1.428575038909912, "loss_ce": 0.0008406037813983858, "loss_iou": 0.59375, "loss_num": 0.04833984375, "loss_xval": 1.4296875, "num_input_tokens_seen": 28054328, "step": 243 }, { "epoch": 1.3118279569892473, "grad_norm": 24.216394424438477, "learning_rate": 5e-07, "loss": 1.5827, "num_input_tokens_seen": 28172504, "step": 244 }, { "epoch": 1.3118279569892473, "loss": 1.457690954208374, "loss_ce": 0.0021246494725346565, "loss_iou": 0.5859375, "loss_num": 0.056396484375, "loss_xval": 1.453125, "num_input_tokens_seen": 28172504, "step": 244 }, { "epoch": 1.3172043010752688, "grad_norm": 33.99232482910156, "learning_rate": 5e-07, "loss": 1.5382, "num_input_tokens_seen": 28290908, "step": 245 }, { "epoch": 1.3172043010752688, "loss": 1.884040355682373, "loss_ce": 0.0007396676810458302, "loss_iou": 0.8046875, "loss_num": 0.054443359375, "loss_xval": 1.8828125, "num_input_tokens_seen": 28290908, "step": 245 }, { "epoch": 1.3225806451612903, "grad_norm": 28.014034271240234, "learning_rate": 5e-07, "loss": 1.4509, "num_input_tokens_seen": 28406556, "step": 246 }, { "epoch": 1.3225806451612903, "loss": 1.1849000453948975, "loss_ce": 0.0013063286896795034, "loss_iou": 0.484375, "loss_num": 0.043212890625, "loss_xval": 1.1875, "num_input_tokens_seen": 28406556, "step": 246 }, { "epoch": 1.3279569892473118, "grad_norm": 23.500469207763672, "learning_rate": 5e-07, "loss": 1.2259, "num_input_tokens_seen": 28523048, "step": 247 }, { "epoch": 1.3279569892473118, "loss": 1.2141563892364502, "loss_ce": 0.0022422303445637226, "loss_iou": 0.490234375, "loss_num": 0.0458984375, "loss_xval": 1.2109375, "num_input_tokens_seen": 28523048, "step": 247 }, { "epoch": 1.3333333333333333, "grad_norm": 95.76544189453125, "learning_rate": 5e-07, "loss": 1.4295, "num_input_tokens_seen": 28642136, "step": 248 }, { "epoch": 1.3333333333333333, "loss": 1.4604480266571045, "loss_ce": 0.00048704291111789644, "loss_iou": 0.59765625, "loss_num": 0.052734375, "loss_xval": 1.4609375, "num_input_tokens_seen": 28642136, "step": 248 }, { "epoch": 1.3387096774193548, "grad_norm": 32.01243591308594, "learning_rate": 5e-07, "loss": 1.4465, "num_input_tokens_seen": 28759148, "step": 249 }, { "epoch": 1.3387096774193548, "loss": 1.4616165161132812, "loss_ce": 0.0006790047627873719, "loss_iou": 0.62109375, "loss_num": 0.044189453125, "loss_xval": 1.4609375, "num_input_tokens_seen": 28759148, "step": 249 }, { "epoch": 1.3440860215053765, "grad_norm": 37.893306732177734, "learning_rate": 5e-07, "loss": 1.2691, "num_input_tokens_seen": 28873784, "step": 250 }, { "epoch": 1.3440860215053765, "eval_icons_CIoU": 0.11686551570892334, "eval_icons_GIoU": 0.08299633115530014, "eval_icons_IoU": 0.28376345336437225, "eval_icons_MAE_all": 0.030684583820402622, "eval_icons_MAE_h": 0.034129880368709564, "eval_icons_MAE_w": 0.05606722831726074, "eval_icons_MAE_x_boxes": 0.05219218507409096, "eval_icons_MAE_y_boxes": 0.03177757188677788, "eval_icons_NUM_probability": 0.9997840225696564, "eval_icons_inside_bbox": 0.5503472238779068, "eval_icons_loss": 1.9891079664230347, "eval_icons_loss_ce": 0.013990547508001328, "eval_icons_loss_iou": 0.89697265625, "eval_icons_loss_num": 0.032253265380859375, "eval_icons_loss_xval": 1.955078125, "eval_icons_runtime": 40.4729, "eval_icons_samples_per_second": 1.235, "eval_icons_steps_per_second": 0.049, "num_input_tokens_seen": 28873784, "step": 250 }, { "epoch": 1.3440860215053765, "eval_screenspot_CIoU": 0.16999953985214233, "eval_screenspot_GIoU": 0.15343699604272842, "eval_screenspot_IoU": 0.30905379851659137, "eval_screenspot_MAE_all": 0.0824604481458664, "eval_screenspot_MAE_h": 0.06886620198686917, "eval_screenspot_MAE_w": 0.11407167961200078, "eval_screenspot_MAE_x_boxes": 0.11565089722474416, "eval_screenspot_MAE_y_boxes": 0.05074728652834892, "eval_screenspot_NUM_probability": 0.9995138049125671, "eval_screenspot_inside_bbox": 0.6120833357175192, "eval_screenspot_loss": 2.140793561935425, "eval_screenspot_loss_ce": 0.0003674099959122638, "eval_screenspot_loss_iou": 0.8717447916666666, "eval_screenspot_loss_num": 0.08971913655598958, "eval_screenspot_loss_xval": 2.1923828125, "eval_screenspot_runtime": 72.4371, "eval_screenspot_samples_per_second": 1.229, "eval_screenspot_steps_per_second": 0.041, "num_input_tokens_seen": 28873784, "step": 250 }, { "epoch": 1.3440860215053765, "loss": 2.0969948768615723, "loss_ce": 0.0003153600846417248, "loss_iou": 0.85546875, "loss_num": 0.0771484375, "loss_xval": 2.09375, "num_input_tokens_seen": 28873784, "step": 250 }, { "epoch": 1.349462365591398, "grad_norm": 22.456090927124023, "learning_rate": 5e-07, "loss": 1.3711, "num_input_tokens_seen": 28991404, "step": 251 }, { "epoch": 1.349462365591398, "loss": 1.3372344970703125, "loss_ce": 0.00032041023951023817, "loss_iou": 0.515625, "loss_num": 0.06103515625, "loss_xval": 1.3359375, "num_input_tokens_seen": 28991404, "step": 251 }, { "epoch": 1.3548387096774195, "grad_norm": 24.922805786132812, "learning_rate": 5e-07, "loss": 1.1992, "num_input_tokens_seen": 29109008, "step": 252 }, { "epoch": 1.3548387096774195, "loss": 1.1730928421020508, "loss_ce": 0.00024133155238814652, "loss_iou": 0.51171875, "loss_num": 0.02978515625, "loss_xval": 1.171875, "num_input_tokens_seen": 29109008, "step": 252 }, { "epoch": 1.360215053763441, "grad_norm": 25.932035446166992, "learning_rate": 5e-07, "loss": 1.4659, "num_input_tokens_seen": 29224476, "step": 253 }, { "epoch": 1.360215053763441, "loss": 1.35430109500885, "loss_ce": 0.006644846871495247, "loss_iou": 0.54296875, "loss_num": 0.051513671875, "loss_xval": 1.34375, "num_input_tokens_seen": 29224476, "step": 253 }, { "epoch": 1.3655913978494625, "grad_norm": 34.375, "learning_rate": 5e-07, "loss": 1.4861, "num_input_tokens_seen": 29341280, "step": 254 }, { "epoch": 1.3655913978494625, "loss": 1.1968388557434082, "loss_ce": 0.00054976309183985, "loss_iou": 0.48046875, "loss_num": 0.047119140625, "loss_xval": 1.1953125, "num_input_tokens_seen": 29341280, "step": 254 }, { "epoch": 1.370967741935484, "grad_norm": 36.97913360595703, "learning_rate": 5e-07, "loss": 1.1371, "num_input_tokens_seen": 29459668, "step": 255 }, { "epoch": 1.370967741935484, "loss": 1.020681619644165, "loss_ce": 0.0001737616112222895, "loss_iou": 0.423828125, "loss_num": 0.0341796875, "loss_xval": 1.0234375, "num_input_tokens_seen": 29459668, "step": 255 }, { "epoch": 1.3763440860215055, "grad_norm": 30.016645431518555, "learning_rate": 5e-07, "loss": 1.2723, "num_input_tokens_seen": 29573972, "step": 256 }, { "epoch": 1.3763440860215055, "loss": 0.9550455212593079, "loss_ce": 0.00045561842853203416, "loss_iou": 0.35546875, "loss_num": 0.048583984375, "loss_xval": 0.953125, "num_input_tokens_seen": 29573972, "step": 256 }, { "epoch": 1.381720430107527, "grad_norm": 30.630483627319336, "learning_rate": 5e-07, "loss": 1.3626, "num_input_tokens_seen": 29691052, "step": 257 }, { "epoch": 1.381720430107527, "loss": 1.3693835735321045, "loss_ce": 0.00024295558978337795, "loss_iou": 0.578125, "loss_num": 0.042724609375, "loss_xval": 1.3671875, "num_input_tokens_seen": 29691052, "step": 257 }, { "epoch": 1.3870967741935485, "grad_norm": 36.568721771240234, "learning_rate": 5e-07, "loss": 1.2661, "num_input_tokens_seen": 29802536, "step": 258 }, { "epoch": 1.3870967741935485, "loss": 1.4565470218658447, "loss_ce": 0.0007365698693320155, "loss_iou": 0.5859375, "loss_num": 0.056640625, "loss_xval": 1.453125, "num_input_tokens_seen": 29802536, "step": 258 }, { "epoch": 1.39247311827957, "grad_norm": 20.696990966796875, "learning_rate": 5e-07, "loss": 1.0776, "num_input_tokens_seen": 29918736, "step": 259 }, { "epoch": 1.39247311827957, "loss": 1.2158234119415283, "loss_ce": 0.000979668227955699, "loss_iou": 0.52734375, "loss_num": 0.03271484375, "loss_xval": 1.21875, "num_input_tokens_seen": 29918736, "step": 259 }, { "epoch": 1.3978494623655915, "grad_norm": 21.850862503051758, "learning_rate": 5e-07, "loss": 1.4826, "num_input_tokens_seen": 30032260, "step": 260 }, { "epoch": 1.3978494623655915, "loss": 1.5486454963684082, "loss_ce": 0.0034794630482792854, "loss_iou": 0.66015625, "loss_num": 0.044677734375, "loss_xval": 1.546875, "num_input_tokens_seen": 30032260, "step": 260 }, { "epoch": 1.403225806451613, "grad_norm": 31.71571159362793, "learning_rate": 5e-07, "loss": 1.3287, "num_input_tokens_seen": 30151720, "step": 261 }, { "epoch": 1.403225806451613, "loss": 1.0720112323760986, "loss_ce": 0.00023386965040117502, "loss_iou": 0.447265625, "loss_num": 0.03564453125, "loss_xval": 1.0703125, "num_input_tokens_seen": 30151720, "step": 261 }, { "epoch": 1.4086021505376345, "grad_norm": 45.20985412597656, "learning_rate": 5e-07, "loss": 1.3297, "num_input_tokens_seen": 30268788, "step": 262 }, { "epoch": 1.4086021505376345, "loss": 1.4609746932983398, "loss_ce": 0.0010138447396457195, "loss_iou": 0.59375, "loss_num": 0.0537109375, "loss_xval": 1.4609375, "num_input_tokens_seen": 30268788, "step": 262 }, { "epoch": 1.413978494623656, "grad_norm": 26.74817657470703, "learning_rate": 5e-07, "loss": 1.5049, "num_input_tokens_seen": 30384432, "step": 263 }, { "epoch": 1.413978494623656, "loss": 1.630875587463379, "loss_ce": 0.0019694047514349222, "loss_iou": 0.6796875, "loss_num": 0.054443359375, "loss_xval": 1.625, "num_input_tokens_seen": 30384432, "step": 263 }, { "epoch": 1.4193548387096775, "grad_norm": 28.81574821472168, "learning_rate": 5e-07, "loss": 1.1714, "num_input_tokens_seen": 30499792, "step": 264 }, { "epoch": 1.4193548387096775, "loss": 0.7116221189498901, "loss_ce": 0.00044046182301826775, "loss_iou": 0.30078125, "loss_num": 0.0220947265625, "loss_xval": 0.7109375, "num_input_tokens_seen": 30499792, "step": 264 }, { "epoch": 1.424731182795699, "grad_norm": 77.72657775878906, "learning_rate": 5e-07, "loss": 1.2604, "num_input_tokens_seen": 30615364, "step": 265 }, { "epoch": 1.424731182795699, "loss": 1.2476550340652466, "loss_ce": 0.000584689318202436, "loss_iou": 0.5078125, "loss_num": 0.046142578125, "loss_xval": 1.25, "num_input_tokens_seen": 30615364, "step": 265 }, { "epoch": 1.4301075268817205, "grad_norm": 29.920270919799805, "learning_rate": 5e-07, "loss": 1.3806, "num_input_tokens_seen": 30733052, "step": 266 }, { "epoch": 1.4301075268817205, "loss": 1.291933298110962, "loss_ce": 0.00042938062688335776, "loss_iou": 0.55078125, "loss_num": 0.038330078125, "loss_xval": 1.2890625, "num_input_tokens_seen": 30733052, "step": 266 }, { "epoch": 1.435483870967742, "grad_norm": 26.253578186035156, "learning_rate": 5e-07, "loss": 1.3936, "num_input_tokens_seen": 30849268, "step": 267 }, { "epoch": 1.435483870967742, "loss": 1.342031478881836, "loss_ce": 0.001211304683238268, "loss_iou": 0.578125, "loss_num": 0.03759765625, "loss_xval": 1.34375, "num_input_tokens_seen": 30849268, "step": 267 }, { "epoch": 1.4408602150537635, "grad_norm": 48.07600402832031, "learning_rate": 5e-07, "loss": 1.3613, "num_input_tokens_seen": 30966608, "step": 268 }, { "epoch": 1.4408602150537635, "loss": 1.489617943763733, "loss_ce": 0.0013367097126320004, "loss_iou": 0.578125, "loss_num": 0.06640625, "loss_xval": 1.484375, "num_input_tokens_seen": 30966608, "step": 268 }, { "epoch": 1.446236559139785, "grad_norm": 17.908920288085938, "learning_rate": 5e-07, "loss": 1.5422, "num_input_tokens_seen": 31083768, "step": 269 }, { "epoch": 1.446236559139785, "loss": 1.6677517890930176, "loss_ce": 0.00027140381280332804, "loss_iou": 0.71875, "loss_num": 0.045166015625, "loss_xval": 1.6640625, "num_input_tokens_seen": 31083768, "step": 269 }, { "epoch": 1.4516129032258065, "grad_norm": 25.236888885498047, "learning_rate": 5e-07, "loss": 1.4417, "num_input_tokens_seen": 31202012, "step": 270 }, { "epoch": 1.4516129032258065, "loss": 1.4163403511047363, "loss_ce": 0.00032464927062392235, "loss_iou": 0.6015625, "loss_num": 0.04296875, "loss_xval": 1.4140625, "num_input_tokens_seen": 31202012, "step": 270 }, { "epoch": 1.456989247311828, "grad_norm": 40.84685134887695, "learning_rate": 5e-07, "loss": 1.2754, "num_input_tokens_seen": 31317652, "step": 271 }, { "epoch": 1.456989247311828, "loss": 1.2106504440307617, "loss_ce": 0.0002011623582802713, "loss_iou": 0.462890625, "loss_num": 0.056640625, "loss_xval": 1.2109375, "num_input_tokens_seen": 31317652, "step": 271 }, { "epoch": 1.4623655913978495, "grad_norm": 23.67865753173828, "learning_rate": 5e-07, "loss": 1.2894, "num_input_tokens_seen": 31434612, "step": 272 }, { "epoch": 1.4623655913978495, "loss": 1.2186697721481323, "loss_ce": 0.0008963110158219934, "loss_iou": 0.515625, "loss_num": 0.037353515625, "loss_xval": 1.21875, "num_input_tokens_seen": 31434612, "step": 272 }, { "epoch": 1.467741935483871, "grad_norm": 39.03847885131836, "learning_rate": 5e-07, "loss": 1.2605, "num_input_tokens_seen": 31549180, "step": 273 }, { "epoch": 1.467741935483871, "loss": 1.328099250793457, "loss_ce": 0.0007066746475175023, "loss_iou": 0.55078125, "loss_num": 0.0458984375, "loss_xval": 1.328125, "num_input_tokens_seen": 31549180, "step": 273 }, { "epoch": 1.4731182795698925, "grad_norm": 16.71857261657715, "learning_rate": 5e-07, "loss": 1.1308, "num_input_tokens_seen": 31665124, "step": 274 }, { "epoch": 1.4731182795698925, "loss": 1.2096019983291626, "loss_ce": 0.0006176717579364777, "loss_iou": 0.490234375, "loss_num": 0.04541015625, "loss_xval": 1.2109375, "num_input_tokens_seen": 31665124, "step": 274 }, { "epoch": 1.478494623655914, "grad_norm": 28.48612403869629, "learning_rate": 5e-07, "loss": 1.3969, "num_input_tokens_seen": 31778124, "step": 275 }, { "epoch": 1.478494623655914, "loss": 1.39576256275177, "loss_ce": 0.00025470572290942073, "loss_iou": 0.57421875, "loss_num": 0.04931640625, "loss_xval": 1.3984375, "num_input_tokens_seen": 31778124, "step": 275 }, { "epoch": 1.4838709677419355, "grad_norm": 27.119670867919922, "learning_rate": 5e-07, "loss": 1.2153, "num_input_tokens_seen": 31894960, "step": 276 }, { "epoch": 1.4838709677419355, "loss": 1.0971896648406982, "loss_ce": 0.0014865421690046787, "loss_iou": 0.458984375, "loss_num": 0.0361328125, "loss_xval": 1.09375, "num_input_tokens_seen": 31894960, "step": 276 }, { "epoch": 1.489247311827957, "grad_norm": 26.81801414489746, "learning_rate": 5e-07, "loss": 1.387, "num_input_tokens_seen": 32011524, "step": 277 }, { "epoch": 1.489247311827957, "loss": 1.4158366918563843, "loss_ce": 0.00030930255888961256, "loss_iou": 0.59765625, "loss_num": 0.0439453125, "loss_xval": 1.4140625, "num_input_tokens_seen": 32011524, "step": 277 }, { "epoch": 1.4946236559139785, "grad_norm": 18.39853286743164, "learning_rate": 5e-07, "loss": 1.3847, "num_input_tokens_seen": 32129484, "step": 278 }, { "epoch": 1.4946236559139785, "loss": 1.1703050136566162, "loss_ce": 0.00013904371007811278, "loss_iou": 0.49609375, "loss_num": 0.035400390625, "loss_xval": 1.171875, "num_input_tokens_seen": 32129484, "step": 278 }, { "epoch": 1.5, "grad_norm": 48.2813720703125, "learning_rate": 5e-07, "loss": 1.4082, "num_input_tokens_seen": 32249544, "step": 279 }, { "epoch": 1.5, "loss": 1.7375752925872803, "loss_ce": 0.00027055529062636197, "loss_iou": 0.70703125, "loss_num": 0.064453125, "loss_xval": 1.734375, "num_input_tokens_seen": 32249544, "step": 279 }, { "epoch": 1.5053763440860215, "grad_norm": 39.37226867675781, "learning_rate": 5e-07, "loss": 1.3434, "num_input_tokens_seen": 32367320, "step": 280 }, { "epoch": 1.5053763440860215, "loss": 1.3753284215927124, "loss_ce": 0.000328383466694504, "loss_iou": 0.5703125, "loss_num": 0.04736328125, "loss_xval": 1.375, "num_input_tokens_seen": 32367320, "step": 280 }, { "epoch": 1.510752688172043, "grad_norm": 20.39142417907715, "learning_rate": 5e-07, "loss": 1.2529, "num_input_tokens_seen": 32484368, "step": 281 }, { "epoch": 1.510752688172043, "loss": 0.8953875303268433, "loss_ce": 0.00036800160887651145, "loss_iou": 0.388671875, "loss_num": 0.0238037109375, "loss_xval": 0.89453125, "num_input_tokens_seen": 32484368, "step": 281 }, { "epoch": 1.5161290322580645, "grad_norm": 23.934959411621094, "learning_rate": 5e-07, "loss": 1.3789, "num_input_tokens_seen": 32601848, "step": 282 }, { "epoch": 1.5161290322580645, "loss": 1.5058369636535645, "loss_ce": 0.0004659321275539696, "loss_iou": 0.6015625, "loss_num": 0.0595703125, "loss_xval": 1.5078125, "num_input_tokens_seen": 32601848, "step": 282 }, { "epoch": 1.521505376344086, "grad_norm": 22.00821304321289, "learning_rate": 5e-07, "loss": 1.166, "num_input_tokens_seen": 32720204, "step": 283 }, { "epoch": 1.521505376344086, "loss": 1.2277607917785645, "loss_ce": 0.00022175186313688755, "loss_iou": 0.486328125, "loss_num": 0.05078125, "loss_xval": 1.2265625, "num_input_tokens_seen": 32720204, "step": 283 }, { "epoch": 1.5268817204301075, "grad_norm": 23.788698196411133, "learning_rate": 5e-07, "loss": 1.0955, "num_input_tokens_seen": 32836744, "step": 284 }, { "epoch": 1.5268817204301075, "loss": 1.3528149127960205, "loss_ce": 0.0002759112394414842, "loss_iou": 0.52734375, "loss_num": 0.060302734375, "loss_xval": 1.3515625, "num_input_tokens_seen": 32836744, "step": 284 }, { "epoch": 1.532258064516129, "grad_norm": 30.007781982421875, "learning_rate": 5e-07, "loss": 1.3715, "num_input_tokens_seen": 32951132, "step": 285 }, { "epoch": 1.532258064516129, "loss": 1.6007411479949951, "loss_ce": 0.0001551959285279736, "loss_iou": 0.6640625, "loss_num": 0.053466796875, "loss_xval": 1.6015625, "num_input_tokens_seen": 32951132, "step": 285 }, { "epoch": 1.5376344086021505, "grad_norm": 23.775808334350586, "learning_rate": 5e-07, "loss": 1.353, "num_input_tokens_seen": 33066760, "step": 286 }, { "epoch": 1.5376344086021505, "loss": 1.4523662328720093, "loss_ce": 0.00021778480731882155, "loss_iou": 0.6015625, "loss_num": 0.050048828125, "loss_xval": 1.453125, "num_input_tokens_seen": 33066760, "step": 286 }, { "epoch": 1.543010752688172, "grad_norm": 24.135812759399414, "learning_rate": 5e-07, "loss": 1.3506, "num_input_tokens_seen": 33181832, "step": 287 }, { "epoch": 1.543010752688172, "loss": 1.2468979358673096, "loss_ce": 0.0003159286279696971, "loss_iou": 0.53515625, "loss_num": 0.03466796875, "loss_xval": 1.25, "num_input_tokens_seen": 33181832, "step": 287 }, { "epoch": 1.5483870967741935, "grad_norm": 29.05400276184082, "learning_rate": 5e-07, "loss": 1.2277, "num_input_tokens_seen": 33297868, "step": 288 }, { "epoch": 1.5483870967741935, "loss": 1.3181742429733276, "loss_ce": 0.002012102399021387, "loss_iou": 0.53515625, "loss_num": 0.04931640625, "loss_xval": 1.3125, "num_input_tokens_seen": 33297868, "step": 288 }, { "epoch": 1.553763440860215, "grad_norm": 25.72732925415039, "learning_rate": 5e-07, "loss": 1.38, "num_input_tokens_seen": 33409728, "step": 289 }, { "epoch": 1.553763440860215, "loss": 1.3870584964752197, "loss_ce": 0.0003396991523914039, "loss_iou": 0.55078125, "loss_num": 0.056884765625, "loss_xval": 1.390625, "num_input_tokens_seen": 33409728, "step": 289 }, { "epoch": 1.5591397849462365, "grad_norm": 22.07428550720215, "learning_rate": 5e-07, "loss": 1.4192, "num_input_tokens_seen": 33525176, "step": 290 }, { "epoch": 1.5591397849462365, "loss": 1.2337000370025635, "loss_ce": 0.0017663676990196109, "loss_iou": 0.5234375, "loss_num": 0.036865234375, "loss_xval": 1.234375, "num_input_tokens_seen": 33525176, "step": 290 }, { "epoch": 1.564516129032258, "grad_norm": 34.291500091552734, "learning_rate": 5e-07, "loss": 1.2926, "num_input_tokens_seen": 33640136, "step": 291 }, { "epoch": 1.564516129032258, "loss": 1.3434220552444458, "loss_ce": 0.000648634391836822, "loss_iou": 0.50390625, "loss_num": 0.06689453125, "loss_xval": 1.34375, "num_input_tokens_seen": 33640136, "step": 291 }, { "epoch": 1.5698924731182795, "grad_norm": 21.006689071655273, "learning_rate": 5e-07, "loss": 1.3253, "num_input_tokens_seen": 33752920, "step": 292 }, { "epoch": 1.5698924731182795, "loss": 1.0958058834075928, "loss_ce": 0.0010793369729071856, "loss_iou": 0.431640625, "loss_num": 0.0458984375, "loss_xval": 1.09375, "num_input_tokens_seen": 33752920, "step": 292 }, { "epoch": 1.575268817204301, "grad_norm": 28.173355102539062, "learning_rate": 5e-07, "loss": 1.3408, "num_input_tokens_seen": 33869760, "step": 293 }, { "epoch": 1.575268817204301, "loss": 1.166793704032898, "loss_ce": 0.0010221948614344, "loss_iou": 0.46484375, "loss_num": 0.04736328125, "loss_xval": 1.1640625, "num_input_tokens_seen": 33869760, "step": 293 }, { "epoch": 1.5806451612903225, "grad_norm": 21.93071937561035, "learning_rate": 5e-07, "loss": 1.4461, "num_input_tokens_seen": 33986108, "step": 294 }, { "epoch": 1.5806451612903225, "loss": 1.6650246381759644, "loss_ce": 0.00047387293307110667, "loss_iou": 0.72265625, "loss_num": 0.04345703125, "loss_xval": 1.6640625, "num_input_tokens_seen": 33986108, "step": 294 }, { "epoch": 1.586021505376344, "grad_norm": 24.111236572265625, "learning_rate": 5e-07, "loss": 1.2089, "num_input_tokens_seen": 34103756, "step": 295 }, { "epoch": 1.586021505376344, "loss": 1.1295509338378906, "loss_ce": 0.000156310765305534, "loss_iou": 0.462890625, "loss_num": 0.040771484375, "loss_xval": 1.1328125, "num_input_tokens_seen": 34103756, "step": 295 }, { "epoch": 1.5913978494623655, "grad_norm": 27.47979164123535, "learning_rate": 5e-07, "loss": 1.3962, "num_input_tokens_seen": 34215540, "step": 296 }, { "epoch": 1.5913978494623655, "loss": 1.5237879753112793, "loss_ce": 0.00035045170807279646, "loss_iou": 0.640625, "loss_num": 0.048095703125, "loss_xval": 1.5234375, "num_input_tokens_seen": 34215540, "step": 296 }, { "epoch": 1.596774193548387, "grad_norm": 28.613290786743164, "learning_rate": 5e-07, "loss": 1.402, "num_input_tokens_seen": 34330308, "step": 297 }, { "epoch": 1.596774193548387, "loss": 1.5228352546691895, "loss_ce": 0.00037436955608427525, "loss_iou": 0.62890625, "loss_num": 0.052978515625, "loss_xval": 1.5234375, "num_input_tokens_seen": 34330308, "step": 297 }, { "epoch": 1.6021505376344085, "grad_norm": 18.313156127929688, "learning_rate": 5e-07, "loss": 1.331, "num_input_tokens_seen": 34445216, "step": 298 }, { "epoch": 1.6021505376344085, "loss": 0.8458495736122131, "loss_ce": 0.000146449136082083, "loss_iou": 0.36328125, "loss_num": 0.02392578125, "loss_xval": 0.84375, "num_input_tokens_seen": 34445216, "step": 298 }, { "epoch": 1.60752688172043, "grad_norm": 25.41391944885254, "learning_rate": 5e-07, "loss": 1.1058, "num_input_tokens_seen": 34560500, "step": 299 }, { "epoch": 1.60752688172043, "loss": 0.9816834926605225, "loss_ce": 0.0002381867088843137, "loss_iou": 0.380859375, "loss_num": 0.043701171875, "loss_xval": 0.98046875, "num_input_tokens_seen": 34560500, "step": 299 }, { "epoch": 1.6129032258064515, "grad_norm": 33.190399169921875, "learning_rate": 5e-07, "loss": 1.2438, "num_input_tokens_seen": 34674192, "step": 300 }, { "epoch": 1.6129032258064515, "loss": 1.4761874675750732, "loss_ce": 0.0001132046600105241, "loss_iou": 0.59765625, "loss_num": 0.05615234375, "loss_xval": 1.4765625, "num_input_tokens_seen": 34674192, "step": 300 }, { "epoch": 1.618279569892473, "grad_norm": 17.97383689880371, "learning_rate": 5e-07, "loss": 1.2678, "num_input_tokens_seen": 34792356, "step": 301 }, { "epoch": 1.618279569892473, "loss": 1.1845439672470093, "loss_ce": 0.00046196964103728533, "loss_iou": 0.51171875, "loss_num": 0.032470703125, "loss_xval": 1.1875, "num_input_tokens_seen": 34792356, "step": 301 }, { "epoch": 1.6236559139784945, "grad_norm": 34.87323760986328, "learning_rate": 5e-07, "loss": 1.4259, "num_input_tokens_seen": 34906860, "step": 302 }, { "epoch": 1.6236559139784945, "loss": 1.9639605283737183, "loss_ce": 9.33092160266824e-05, "loss_iou": 0.8125, "loss_num": 0.0673828125, "loss_xval": 1.9609375, "num_input_tokens_seen": 34906860, "step": 302 }, { "epoch": 1.629032258064516, "grad_norm": 43.183319091796875, "learning_rate": 5e-07, "loss": 1.3879, "num_input_tokens_seen": 35024132, "step": 303 }, { "epoch": 1.629032258064516, "loss": 1.2176069021224976, "loss_ce": 0.000321756029734388, "loss_iou": 0.482421875, "loss_num": 0.05029296875, "loss_xval": 1.21875, "num_input_tokens_seen": 35024132, "step": 303 }, { "epoch": 1.6344086021505375, "grad_norm": 19.73162078857422, "learning_rate": 5e-07, "loss": 1.4286, "num_input_tokens_seen": 35136480, "step": 304 }, { "epoch": 1.6344086021505375, "loss": 1.241147756576538, "loss_ce": 0.00018088749493472278, "loss_iou": 0.50390625, "loss_num": 0.04736328125, "loss_xval": 1.2421875, "num_input_tokens_seen": 35136480, "step": 304 }, { "epoch": 1.639784946236559, "grad_norm": 50.98681640625, "learning_rate": 5e-07, "loss": 1.2652, "num_input_tokens_seen": 35253940, "step": 305 }, { "epoch": 1.639784946236559, "loss": 1.188593864440918, "loss_ce": 0.00011736956366803497, "loss_iou": 0.474609375, "loss_num": 0.047607421875, "loss_xval": 1.1875, "num_input_tokens_seen": 35253940, "step": 305 }, { "epoch": 1.6451612903225805, "grad_norm": 20.6522159576416, "learning_rate": 5e-07, "loss": 1.3487, "num_input_tokens_seen": 35365936, "step": 306 }, { "epoch": 1.6451612903225805, "loss": 1.3180465698242188, "loss_ce": 0.0009078371804207563, "loss_iou": 0.4921875, "loss_num": 0.06640625, "loss_xval": 1.3203125, "num_input_tokens_seen": 35365936, "step": 306 }, { "epoch": 1.650537634408602, "grad_norm": 119.95570373535156, "learning_rate": 5e-07, "loss": 1.3643, "num_input_tokens_seen": 35479468, "step": 307 }, { "epoch": 1.650537634408602, "loss": 1.0518798828125, "loss_ce": 0.00012209292617626488, "loss_iou": 0.4609375, "loss_num": 0.0262451171875, "loss_xval": 1.0546875, "num_input_tokens_seen": 35479468, "step": 307 }, { "epoch": 1.6559139784946235, "grad_norm": 70.41187286376953, "learning_rate": 5e-07, "loss": 1.3017, "num_input_tokens_seen": 35592972, "step": 308 }, { "epoch": 1.6559139784946235, "loss": 1.3947410583496094, "loss_ce": 0.00020978764223400503, "loss_iou": 0.59375, "loss_num": 0.04150390625, "loss_xval": 1.390625, "num_input_tokens_seen": 35592972, "step": 308 }, { "epoch": 1.661290322580645, "grad_norm": 43.18191909790039, "learning_rate": 5e-07, "loss": 1.4124, "num_input_tokens_seen": 35706456, "step": 309 }, { "epoch": 1.661290322580645, "loss": 1.4879074096679688, "loss_ce": 0.00011445886048022658, "loss_iou": 0.6171875, "loss_num": 0.05029296875, "loss_xval": 1.484375, "num_input_tokens_seen": 35706456, "step": 309 }, { "epoch": 1.6666666666666665, "grad_norm": 27.96845054626465, "learning_rate": 5e-07, "loss": 1.3901, "num_input_tokens_seen": 35821372, "step": 310 }, { "epoch": 1.6666666666666665, "loss": 1.1279469728469849, "loss_ce": 0.00026141153648495674, "loss_iou": 0.46484375, "loss_num": 0.039794921875, "loss_xval": 1.125, "num_input_tokens_seen": 35821372, "step": 310 }, { "epoch": 1.672043010752688, "grad_norm": 17.558996200561523, "learning_rate": 5e-07, "loss": 1.4307, "num_input_tokens_seen": 35936864, "step": 311 }, { "epoch": 1.672043010752688, "loss": 1.627155065536499, "loss_ce": 0.00020183512242510915, "loss_iou": 0.671875, "loss_num": 0.05712890625, "loss_xval": 1.625, "num_input_tokens_seen": 35936864, "step": 311 }, { "epoch": 1.6774193548387095, "grad_norm": 27.77954864501953, "learning_rate": 5e-07, "loss": 1.2991, "num_input_tokens_seen": 36052228, "step": 312 }, { "epoch": 1.6774193548387095, "loss": 1.5525881052017212, "loss_ce": 0.00034195592161267996, "loss_iou": 0.65625, "loss_num": 0.04736328125, "loss_xval": 1.5546875, "num_input_tokens_seen": 36052228, "step": 312 }, { "epoch": 1.682795698924731, "grad_norm": 30.524085998535156, "learning_rate": 5e-07, "loss": 1.2047, "num_input_tokens_seen": 36167720, "step": 313 }, { "epoch": 1.682795698924731, "loss": 1.5037989616394043, "loss_ce": 0.00038096567732281983, "loss_iou": 0.61328125, "loss_num": 0.0556640625, "loss_xval": 1.5, "num_input_tokens_seen": 36167720, "step": 313 }, { "epoch": 1.6881720430107527, "grad_norm": 16.589773178100586, "learning_rate": 5e-07, "loss": 1.2374, "num_input_tokens_seen": 36284916, "step": 314 }, { "epoch": 1.6881720430107527, "loss": 0.9210742115974426, "loss_ce": 0.0001757803256623447, "loss_iou": 0.33984375, "loss_num": 0.048095703125, "loss_xval": 0.921875, "num_input_tokens_seen": 36284916, "step": 314 }, { "epoch": 1.6935483870967742, "grad_norm": 13.818792343139648, "learning_rate": 5e-07, "loss": 1.2967, "num_input_tokens_seen": 36398268, "step": 315 }, { "epoch": 1.6935483870967742, "loss": 1.1665892601013184, "loss_ce": 8.530773629900068e-05, "loss_iou": 0.51953125, "loss_num": 0.0255126953125, "loss_xval": 1.1640625, "num_input_tokens_seen": 36398268, "step": 315 }, { "epoch": 1.6989247311827957, "grad_norm": 16.541500091552734, "learning_rate": 5e-07, "loss": 1.3183, "num_input_tokens_seen": 36511248, "step": 316 }, { "epoch": 1.6989247311827957, "loss": 1.3652228116989136, "loss_ce": 0.0029180627316236496, "loss_iou": 0.486328125, "loss_num": 0.078125, "loss_xval": 1.359375, "num_input_tokens_seen": 36511248, "step": 316 }, { "epoch": 1.7043010752688172, "grad_norm": 25.74933433532715, "learning_rate": 5e-07, "loss": 1.3051, "num_input_tokens_seen": 36628920, "step": 317 }, { "epoch": 1.7043010752688172, "loss": 1.2639411687850952, "loss_ce": 0.0005134050734341145, "loss_iou": 0.5234375, "loss_num": 0.0439453125, "loss_xval": 1.265625, "num_input_tokens_seen": 36628920, "step": 317 }, { "epoch": 1.7096774193548387, "grad_norm": 29.4514217376709, "learning_rate": 5e-07, "loss": 1.298, "num_input_tokens_seen": 36745708, "step": 318 }, { "epoch": 1.7096774193548387, "loss": 0.8770350217819214, "loss_ce": 0.0005702002672478557, "loss_iou": 0.365234375, "loss_num": 0.0289306640625, "loss_xval": 0.875, "num_input_tokens_seen": 36745708, "step": 318 }, { "epoch": 1.7150537634408602, "grad_norm": 28.174436569213867, "learning_rate": 5e-07, "loss": 1.2279, "num_input_tokens_seen": 36860460, "step": 319 }, { "epoch": 1.7150537634408602, "loss": 1.5612757205963135, "loss_ce": 0.0002405668783467263, "loss_iou": 0.6328125, "loss_num": 0.05908203125, "loss_xval": 1.5625, "num_input_tokens_seen": 36860460, "step": 319 }, { "epoch": 1.7204301075268817, "grad_norm": 26.87467384338379, "learning_rate": 5e-07, "loss": 1.3022, "num_input_tokens_seen": 36975324, "step": 320 }, { "epoch": 1.7204301075268817, "loss": 0.9953126907348633, "loss_ce": 0.00019553990568965673, "loss_iou": 0.41015625, "loss_num": 0.034912109375, "loss_xval": 0.99609375, "num_input_tokens_seen": 36975324, "step": 320 }, { "epoch": 1.7258064516129032, "grad_norm": 22.934890747070312, "learning_rate": 5e-07, "loss": 1.1312, "num_input_tokens_seen": 37092484, "step": 321 }, { "epoch": 1.7258064516129032, "loss": 0.9908231496810913, "loss_ce": 0.00010050787386717275, "loss_iou": 0.404296875, "loss_num": 0.036376953125, "loss_xval": 0.9921875, "num_input_tokens_seen": 37092484, "step": 321 }, { "epoch": 1.7311827956989247, "grad_norm": 41.191402435302734, "learning_rate": 5e-07, "loss": 1.1811, "num_input_tokens_seen": 37210928, "step": 322 }, { "epoch": 1.7311827956989247, "loss": 1.0646507740020752, "loss_ce": 0.00019761751173064113, "loss_iou": 0.45703125, "loss_num": 0.0299072265625, "loss_xval": 1.0625, "num_input_tokens_seen": 37210928, "step": 322 }, { "epoch": 1.7365591397849462, "grad_norm": 24.213245391845703, "learning_rate": 5e-07, "loss": 1.375, "num_input_tokens_seen": 37327420, "step": 323 }, { "epoch": 1.7365591397849462, "loss": 1.363851547241211, "loss_ce": 8.200346928788349e-05, "loss_iou": 0.59375, "loss_num": 0.03515625, "loss_xval": 1.3671875, "num_input_tokens_seen": 37327420, "step": 323 }, { "epoch": 1.7419354838709677, "grad_norm": 18.232446670532227, "learning_rate": 5e-07, "loss": 1.106, "num_input_tokens_seen": 37442180, "step": 324 }, { "epoch": 1.7419354838709677, "loss": 1.2695823907852173, "loss_ce": 5.1160201110178605e-05, "loss_iou": 0.54296875, "loss_num": 0.03662109375, "loss_xval": 1.265625, "num_input_tokens_seen": 37442180, "step": 324 }, { "epoch": 1.7473118279569892, "grad_norm": 25.369831085205078, "learning_rate": 5e-07, "loss": 1.2129, "num_input_tokens_seen": 37555580, "step": 325 }, { "epoch": 1.7473118279569892, "loss": 1.0382353067398071, "loss_ce": 0.00014935382932890207, "loss_iou": 0.43359375, "loss_num": 0.03466796875, "loss_xval": 1.0390625, "num_input_tokens_seen": 37555580, "step": 325 }, { "epoch": 1.7526881720430108, "grad_norm": 20.25792694091797, "learning_rate": 5e-07, "loss": 1.327, "num_input_tokens_seen": 37667932, "step": 326 }, { "epoch": 1.7526881720430108, "loss": 1.4074268341064453, "loss_ce": 0.00020022218814119697, "loss_iou": 0.578125, "loss_num": 0.049560546875, "loss_xval": 1.40625, "num_input_tokens_seen": 37667932, "step": 326 }, { "epoch": 1.7580645161290323, "grad_norm": 25.041458129882812, "learning_rate": 5e-07, "loss": 1.1848, "num_input_tokens_seen": 37783540, "step": 327 }, { "epoch": 1.7580645161290323, "loss": 0.971797525882721, "loss_ce": 0.00011782460933318362, "loss_iou": 0.396484375, "loss_num": 0.035888671875, "loss_xval": 0.97265625, "num_input_tokens_seen": 37783540, "step": 327 }, { "epoch": 1.7634408602150538, "grad_norm": 15.323723793029785, "learning_rate": 5e-07, "loss": 1.5391, "num_input_tokens_seen": 37899732, "step": 328 }, { "epoch": 1.7634408602150538, "loss": 1.2816846370697021, "loss_ce": 0.00043460974120534956, "loss_iou": 0.51953125, "loss_num": 0.048095703125, "loss_xval": 1.28125, "num_input_tokens_seen": 37899732, "step": 328 }, { "epoch": 1.7688172043010753, "grad_norm": 27.60696029663086, "learning_rate": 5e-07, "loss": 1.1917, "num_input_tokens_seen": 38014888, "step": 329 }, { "epoch": 1.7688172043010753, "loss": 1.0821279287338257, "loss_ce": 9.670348663348705e-05, "loss_iou": 0.46484375, "loss_num": 0.03076171875, "loss_xval": 1.078125, "num_input_tokens_seen": 38014888, "step": 329 }, { "epoch": 1.7741935483870968, "grad_norm": 188.74517822265625, "learning_rate": 5e-07, "loss": 1.2713, "num_input_tokens_seen": 38132196, "step": 330 }, { "epoch": 1.7741935483870968, "loss": 1.3234026432037354, "loss_ce": 0.00016042862262111157, "loss_iou": 0.5546875, "loss_num": 0.042724609375, "loss_xval": 1.3203125, "num_input_tokens_seen": 38132196, "step": 330 }, { "epoch": 1.7795698924731183, "grad_norm": 25.83640480041504, "learning_rate": 5e-07, "loss": 1.1321, "num_input_tokens_seen": 38250340, "step": 331 }, { "epoch": 1.7795698924731183, "loss": 1.1709858179092407, "loss_ce": 0.0008198174182325602, "loss_iou": 0.45703125, "loss_num": 0.05126953125, "loss_xval": 1.171875, "num_input_tokens_seen": 38250340, "step": 331 }, { "epoch": 1.7849462365591398, "grad_norm": 23.28061294555664, "learning_rate": 5e-07, "loss": 1.1361, "num_input_tokens_seen": 38365668, "step": 332 }, { "epoch": 1.7849462365591398, "loss": 0.995232880115509, "loss_ce": 0.0001157313454314135, "loss_iou": 0.4375, "loss_num": 0.023681640625, "loss_xval": 0.99609375, "num_input_tokens_seen": 38365668, "step": 332 }, { "epoch": 1.7903225806451613, "grad_norm": 24.09176254272461, "learning_rate": 5e-07, "loss": 1.1933, "num_input_tokens_seen": 38484236, "step": 333 }, { "epoch": 1.7903225806451613, "loss": 1.6867761611938477, "loss_ce": 0.0007410419639199972, "loss_iou": 0.70703125, "loss_num": 0.0546875, "loss_xval": 1.6875, "num_input_tokens_seen": 38484236, "step": 333 }, { "epoch": 1.7956989247311828, "grad_norm": 27.607431411743164, "learning_rate": 5e-07, "loss": 1.5461, "num_input_tokens_seen": 38597012, "step": 334 }, { "epoch": 1.7956989247311828, "loss": 1.3316617012023926, "loss_ce": 0.001095283660106361, "loss_iou": 0.58203125, "loss_num": 0.032958984375, "loss_xval": 1.328125, "num_input_tokens_seen": 38597012, "step": 334 }, { "epoch": 1.8010752688172043, "grad_norm": 35.65273666381836, "learning_rate": 5e-07, "loss": 1.206, "num_input_tokens_seen": 38712840, "step": 335 }, { "epoch": 1.8010752688172043, "loss": 1.0990673303604126, "loss_ce": 0.00019039415929000825, "loss_iou": 0.453125, "loss_num": 0.038330078125, "loss_xval": 1.1015625, "num_input_tokens_seen": 38712840, "step": 335 }, { "epoch": 1.8064516129032258, "grad_norm": 20.699142456054688, "learning_rate": 5e-07, "loss": 1.1832, "num_input_tokens_seen": 38830404, "step": 336 }, { "epoch": 1.8064516129032258, "loss": 0.744584321975708, "loss_ce": 0.00019959561177529395, "loss_iou": 0.306640625, "loss_num": 0.026123046875, "loss_xval": 0.74609375, "num_input_tokens_seen": 38830404, "step": 336 }, { "epoch": 1.8118279569892473, "grad_norm": 42.17115783691406, "learning_rate": 5e-07, "loss": 1.2782, "num_input_tokens_seen": 38945332, "step": 337 }, { "epoch": 1.8118279569892473, "loss": 1.2659610509872437, "loss_ce": 9.188587864628062e-05, "loss_iou": 0.51953125, "loss_num": 0.04541015625, "loss_xval": 1.265625, "num_input_tokens_seen": 38945332, "step": 337 }, { "epoch": 1.817204301075269, "grad_norm": 24.7875919342041, "learning_rate": 5e-07, "loss": 0.935, "num_input_tokens_seen": 39060996, "step": 338 }, { "epoch": 1.817204301075269, "loss": 1.0008047819137573, "loss_ce": 7.23384291632101e-05, "loss_iou": 0.38671875, "loss_num": 0.045654296875, "loss_xval": 1.0, "num_input_tokens_seen": 39060996, "step": 338 }, { "epoch": 1.8225806451612905, "grad_norm": 25.732702255249023, "learning_rate": 5e-07, "loss": 1.2865, "num_input_tokens_seen": 39175236, "step": 339 }, { "epoch": 1.8225806451612905, "loss": 1.631498098373413, "loss_ce": 0.00015041400911286473, "loss_iou": 0.671875, "loss_num": 0.057861328125, "loss_xval": 1.6328125, "num_input_tokens_seen": 39175236, "step": 339 }, { "epoch": 1.827956989247312, "grad_norm": 41.03340530395508, "learning_rate": 5e-07, "loss": 1.1313, "num_input_tokens_seen": 39290140, "step": 340 }, { "epoch": 1.827956989247312, "loss": 1.2554937601089478, "loss_ce": 0.0001226377353304997, "loss_iou": 0.51171875, "loss_num": 0.04638671875, "loss_xval": 1.2578125, "num_input_tokens_seen": 39290140, "step": 340 }, { "epoch": 1.8333333333333335, "grad_norm": 26.432586669921875, "learning_rate": 5e-07, "loss": 1.3736, "num_input_tokens_seen": 39403528, "step": 341 }, { "epoch": 1.8333333333333335, "loss": 1.6625630855560303, "loss_ce": 0.00020961585687473416, "loss_iou": 0.6953125, "loss_num": 0.054443359375, "loss_xval": 1.6640625, "num_input_tokens_seen": 39403528, "step": 341 }, { "epoch": 1.838709677419355, "grad_norm": 41.6815185546875, "learning_rate": 5e-07, "loss": 1.3164, "num_input_tokens_seen": 39519868, "step": 342 }, { "epoch": 1.838709677419355, "loss": 1.2112619876861572, "loss_ce": 0.0003243897808715701, "loss_iou": 0.484375, "loss_num": 0.04833984375, "loss_xval": 1.2109375, "num_input_tokens_seen": 39519868, "step": 342 }, { "epoch": 1.8440860215053765, "grad_norm": 31.32219696044922, "learning_rate": 5e-07, "loss": 1.2299, "num_input_tokens_seen": 39636492, "step": 343 }, { "epoch": 1.8440860215053765, "loss": 1.002537488937378, "loss_ce": 9.602365753380582e-05, "loss_iou": 0.408203125, "loss_num": 0.037353515625, "loss_xval": 1.0, "num_input_tokens_seen": 39636492, "step": 343 }, { "epoch": 1.849462365591398, "grad_norm": 32.94488525390625, "learning_rate": 5e-07, "loss": 1.201, "num_input_tokens_seen": 39752620, "step": 344 }, { "epoch": 1.849462365591398, "loss": 1.2476418018341064, "loss_ce": 8.327006798936054e-05, "loss_iou": 0.515625, "loss_num": 0.04296875, "loss_xval": 1.25, "num_input_tokens_seen": 39752620, "step": 344 }, { "epoch": 1.8548387096774195, "grad_norm": 39.875755310058594, "learning_rate": 5e-07, "loss": 1.0772, "num_input_tokens_seen": 39869620, "step": 345 }, { "epoch": 1.8548387096774195, "loss": 0.9825512766838074, "loss_ce": 0.00012941968452651054, "loss_iou": 0.4140625, "loss_num": 0.03076171875, "loss_xval": 0.984375, "num_input_tokens_seen": 39869620, "step": 345 }, { "epoch": 1.860215053763441, "grad_norm": 68.85070037841797, "learning_rate": 5e-07, "loss": 1.2609, "num_input_tokens_seen": 39988172, "step": 346 }, { "epoch": 1.860215053763441, "loss": 1.5494775772094727, "loss_ce": 0.0011378251947462559, "loss_iou": 0.6328125, "loss_num": 0.056884765625, "loss_xval": 1.546875, "num_input_tokens_seen": 39988172, "step": 346 }, { "epoch": 1.8655913978494625, "grad_norm": 37.56696701049805, "learning_rate": 5e-07, "loss": 1.2576, "num_input_tokens_seen": 40105604, "step": 347 }, { "epoch": 1.8655913978494625, "loss": 1.5534281730651855, "loss_ce": 0.00020543779828585684, "loss_iou": 0.65234375, "loss_num": 0.0498046875, "loss_xval": 1.5546875, "num_input_tokens_seen": 40105604, "step": 347 }, { "epoch": 1.870967741935484, "grad_norm": 24.1704158782959, "learning_rate": 5e-07, "loss": 1.0086, "num_input_tokens_seen": 40222408, "step": 348 }, { "epoch": 1.870967741935484, "loss": 1.2032214403152466, "loss_ce": 9.641717042541131e-05, "loss_iou": 0.474609375, "loss_num": 0.05078125, "loss_xval": 1.203125, "num_input_tokens_seen": 40222408, "step": 348 }, { "epoch": 1.8763440860215055, "grad_norm": 42.38536834716797, "learning_rate": 5e-07, "loss": 1.1316, "num_input_tokens_seen": 40335056, "step": 349 }, { "epoch": 1.8763440860215055, "loss": 1.070625901222229, "loss_ce": 6.925688649062067e-05, "loss_iou": 0.443359375, "loss_num": 0.037353515625, "loss_xval": 1.0703125, "num_input_tokens_seen": 40335056, "step": 349 }, { "epoch": 1.881720430107527, "grad_norm": 48.565589904785156, "learning_rate": 5e-07, "loss": 1.1331, "num_input_tokens_seen": 40447692, "step": 350 }, { "epoch": 1.881720430107527, "loss": 1.1359137296676636, "loss_ce": 0.0001715672988211736, "loss_iou": 0.47265625, "loss_num": 0.037841796875, "loss_xval": 1.1328125, "num_input_tokens_seen": 40447692, "step": 350 }, { "epoch": 1.8870967741935485, "grad_norm": 30.365177154541016, "learning_rate": 5e-07, "loss": 1.203, "num_input_tokens_seen": 40561948, "step": 351 }, { "epoch": 1.8870967741935485, "loss": 1.8936853408813477, "loss_ce": 0.00013059121556580067, "loss_iou": 0.796875, "loss_num": 0.0595703125, "loss_xval": 1.890625, "num_input_tokens_seen": 40561948, "step": 351 }, { "epoch": 1.89247311827957, "grad_norm": 27.804824829101562, "learning_rate": 5e-07, "loss": 1.1603, "num_input_tokens_seen": 40679012, "step": 352 }, { "epoch": 1.89247311827957, "loss": 1.7882091999053955, "loss_ce": 0.00012324145063757896, "loss_iou": 0.74609375, "loss_num": 0.0595703125, "loss_xval": 1.7890625, "num_input_tokens_seen": 40679012, "step": 352 }, { "epoch": 1.8978494623655915, "grad_norm": 44.566585540771484, "learning_rate": 5e-07, "loss": 1.1681, "num_input_tokens_seen": 40795828, "step": 353 }, { "epoch": 1.8978494623655915, "loss": 1.2621114253997803, "loss_ce": 0.0008809185237623751, "loss_iou": 0.51171875, "loss_num": 0.047607421875, "loss_xval": 1.2578125, "num_input_tokens_seen": 40795828, "step": 353 }, { "epoch": 1.903225806451613, "grad_norm": 18.61931610107422, "learning_rate": 5e-07, "loss": 1.0977, "num_input_tokens_seen": 40913392, "step": 354 }, { "epoch": 1.903225806451613, "loss": 1.3940973281860352, "loss_ce": 5.4349635320249945e-05, "loss_iou": 0.60546875, "loss_num": 0.0361328125, "loss_xval": 1.390625, "num_input_tokens_seen": 40913392, "step": 354 }, { "epoch": 1.9086021505376345, "grad_norm": 22.107934951782227, "learning_rate": 5e-07, "loss": 1.2379, "num_input_tokens_seen": 41028476, "step": 355 }, { "epoch": 1.9086021505376345, "loss": 1.36515212059021, "loss_ce": 0.00040596723556518555, "loss_iou": 0.58203125, "loss_num": 0.04052734375, "loss_xval": 1.3671875, "num_input_tokens_seen": 41028476, "step": 355 }, { "epoch": 1.913978494623656, "grad_norm": 27.9364013671875, "learning_rate": 5e-07, "loss": 1.227, "num_input_tokens_seen": 41143844, "step": 356 }, { "epoch": 1.913978494623656, "loss": 1.2688488960266113, "loss_ce": 5.013001646148041e-05, "loss_iou": 0.5390625, "loss_num": 0.038330078125, "loss_xval": 1.265625, "num_input_tokens_seen": 41143844, "step": 356 }, { "epoch": 1.9193548387096775, "grad_norm": 34.17280197143555, "learning_rate": 5e-07, "loss": 1.2847, "num_input_tokens_seen": 41258072, "step": 357 }, { "epoch": 1.9193548387096775, "loss": 1.5377854108810425, "loss_ce": 0.00018781019025482237, "loss_iou": 0.65234375, "loss_num": 0.0458984375, "loss_xval": 1.5390625, "num_input_tokens_seen": 41258072, "step": 357 }, { "epoch": 1.924731182795699, "grad_norm": 20.038223266601562, "learning_rate": 5e-07, "loss": 1.3026, "num_input_tokens_seen": 41376792, "step": 358 }, { "epoch": 1.924731182795699, "loss": 1.1736713647842407, "loss_ce": 8.737420284887776e-05, "loss_iou": 0.50390625, "loss_num": 0.033203125, "loss_xval": 1.171875, "num_input_tokens_seen": 41376792, "step": 358 }, { "epoch": 1.9301075268817205, "grad_norm": 24.019777297973633, "learning_rate": 5e-07, "loss": 1.2423, "num_input_tokens_seen": 41494504, "step": 359 }, { "epoch": 1.9301075268817205, "loss": 1.027751088142395, "loss_ce": 0.00016325050091836601, "loss_iou": 0.404296875, "loss_num": 0.0439453125, "loss_xval": 1.03125, "num_input_tokens_seen": 41494504, "step": 359 }, { "epoch": 1.935483870967742, "grad_norm": 18.414155960083008, "learning_rate": 5e-07, "loss": 0.8811, "num_input_tokens_seen": 41610112, "step": 360 }, { "epoch": 1.935483870967742, "loss": 1.089284896850586, "loss_ce": 0.0004177556838840246, "loss_iou": 0.44921875, "loss_num": 0.038330078125, "loss_xval": 1.0859375, "num_input_tokens_seen": 41610112, "step": 360 }, { "epoch": 1.9408602150537635, "grad_norm": 24.023271560668945, "learning_rate": 5e-07, "loss": 1.2472, "num_input_tokens_seen": 41725284, "step": 361 }, { "epoch": 1.9408602150537635, "loss": 1.0610706806182861, "loss_ce": 3.5555276554077864e-05, "loss_iou": 0.451171875, "loss_num": 0.031494140625, "loss_xval": 1.0625, "num_input_tokens_seen": 41725284, "step": 361 }, { "epoch": 1.946236559139785, "grad_norm": 50.585227966308594, "learning_rate": 5e-07, "loss": 1.3034, "num_input_tokens_seen": 41841908, "step": 362 }, { "epoch": 1.946236559139785, "loss": 0.8804289102554321, "loss_ce": 5.782771768281236e-05, "loss_iou": 0.376953125, "loss_num": 0.025146484375, "loss_xval": 0.87890625, "num_input_tokens_seen": 41841908, "step": 362 }, { "epoch": 1.9516129032258065, "grad_norm": 17.747150421142578, "learning_rate": 5e-07, "loss": 1.1451, "num_input_tokens_seen": 41956632, "step": 363 }, { "epoch": 1.9516129032258065, "loss": 0.9972618818283081, "loss_ce": 0.00019158700888510793, "loss_iou": 0.41015625, "loss_num": 0.03564453125, "loss_xval": 0.99609375, "num_input_tokens_seen": 41956632, "step": 363 }, { "epoch": 1.956989247311828, "grad_norm": 22.728187561035156, "learning_rate": 5e-07, "loss": 1.0911, "num_input_tokens_seen": 42072256, "step": 364 }, { "epoch": 1.956989247311828, "loss": 0.9599194526672363, "loss_ce": 0.002155695343390107, "loss_iou": 0.3984375, "loss_num": 0.03271484375, "loss_xval": 0.95703125, "num_input_tokens_seen": 42072256, "step": 364 }, { "epoch": 1.9623655913978495, "grad_norm": 30.408143997192383, "learning_rate": 5e-07, "loss": 1.194, "num_input_tokens_seen": 42186112, "step": 365 }, { "epoch": 1.9623655913978495, "loss": 1.1233935356140137, "loss_ce": 0.00010247259342577308, "loss_iou": 0.45703125, "loss_num": 0.041259765625, "loss_xval": 1.125, "num_input_tokens_seen": 42186112, "step": 365 }, { "epoch": 1.967741935483871, "grad_norm": 49.245906829833984, "learning_rate": 5e-07, "loss": 1.253, "num_input_tokens_seen": 42301024, "step": 366 }, { "epoch": 1.967741935483871, "loss": 1.3013803958892822, "loss_ce": 0.00011083489516749978, "loss_iou": 0.515625, "loss_num": 0.0546875, "loss_xval": 1.3046875, "num_input_tokens_seen": 42301024, "step": 366 }, { "epoch": 1.9731182795698925, "grad_norm": 25.63349723815918, "learning_rate": 5e-07, "loss": 1.1122, "num_input_tokens_seen": 42417404, "step": 367 }, { "epoch": 1.9731182795698925, "loss": 1.0342490673065186, "loss_ce": 6.939281593076885e-05, "loss_iou": 0.42578125, "loss_num": 0.036376953125, "loss_xval": 1.03125, "num_input_tokens_seen": 42417404, "step": 367 }, { "epoch": 1.978494623655914, "grad_norm": 25.155261993408203, "learning_rate": 5e-07, "loss": 1.0743, "num_input_tokens_seen": 42531896, "step": 368 }, { "epoch": 1.978494623655914, "loss": 1.3867135047912598, "loss_ce": 0.00023892437457107008, "loss_iou": 0.5859375, "loss_num": 0.04345703125, "loss_xval": 1.3828125, "num_input_tokens_seen": 42531896, "step": 368 }, { "epoch": 1.9838709677419355, "grad_norm": 26.180747985839844, "learning_rate": 5e-07, "loss": 1.1534, "num_input_tokens_seen": 42649640, "step": 369 }, { "epoch": 1.9838709677419355, "loss": 0.8880460262298584, "loss_ce": 0.00010654062498360872, "loss_iou": 0.345703125, "loss_num": 0.0390625, "loss_xval": 0.88671875, "num_input_tokens_seen": 42649640, "step": 369 }, { "epoch": 1.989247311827957, "grad_norm": 42.74534606933594, "learning_rate": 5e-07, "loss": 1.1419, "num_input_tokens_seen": 42766364, "step": 370 }, { "epoch": 1.989247311827957, "loss": 1.2762598991394043, "loss_ce": 0.00013682043936569244, "loss_iou": 0.49609375, "loss_num": 0.056396484375, "loss_xval": 1.2734375, "num_input_tokens_seen": 42766364, "step": 370 }, { "epoch": 1.9946236559139785, "grad_norm": 21.44220542907715, "learning_rate": 5e-07, "loss": 1.2621, "num_input_tokens_seen": 42881696, "step": 371 }, { "epoch": 1.9946236559139785, "loss": 1.342010736465454, "loss_ce": 0.00021379867393989116, "loss_iou": 0.55078125, "loss_num": 0.048583984375, "loss_xval": 1.34375, "num_input_tokens_seen": 42881696, "step": 371 }, { "epoch": 2.0, "grad_norm": 19.68453598022461, "learning_rate": 5e-07, "loss": 1.0573, "num_input_tokens_seen": 42997500, "step": 372 }, { "epoch": 2.0, "loss": 1.19719660282135, "loss_ce": 0.00017506346921436489, "loss_iou": 0.474609375, "loss_num": 0.050048828125, "loss_xval": 1.1953125, "num_input_tokens_seen": 42997500, "step": 372 }, { "epoch": 2.0053763440860215, "grad_norm": 29.257863998413086, "learning_rate": 5e-07, "loss": 1.1075, "num_input_tokens_seen": 43113508, "step": 373 }, { "epoch": 2.0053763440860215, "loss": 0.9019731283187866, "loss_ce": 0.00011766343232011423, "loss_iou": 0.3671875, "loss_num": 0.03369140625, "loss_xval": 0.90234375, "num_input_tokens_seen": 43113508, "step": 373 }, { "epoch": 2.010752688172043, "grad_norm": 38.60951232910156, "learning_rate": 5e-07, "loss": 1.2843, "num_input_tokens_seen": 43231356, "step": 374 }, { "epoch": 2.010752688172043, "loss": 0.9934818744659424, "loss_ce": 7.367755461018533e-05, "loss_iou": 0.423828125, "loss_num": 0.0289306640625, "loss_xval": 0.9921875, "num_input_tokens_seen": 43231356, "step": 374 }, { "epoch": 2.0161290322580645, "grad_norm": 29.696205139160156, "learning_rate": 5e-07, "loss": 1.0666, "num_input_tokens_seen": 43348044, "step": 375 }, { "epoch": 2.0161290322580645, "loss": 1.5049880743026733, "loss_ce": 0.00010528881102800369, "loss_iou": 0.63671875, "loss_num": 0.046142578125, "loss_xval": 1.5078125, "num_input_tokens_seen": 43348044, "step": 375 }, { "epoch": 2.021505376344086, "grad_norm": 50.412784576416016, "learning_rate": 5e-07, "loss": 0.9628, "num_input_tokens_seen": 43463952, "step": 376 }, { "epoch": 2.021505376344086, "loss": 1.0265620946884155, "loss_ce": 0.0001949374855030328, "loss_iou": 0.4140625, "loss_num": 0.039794921875, "loss_xval": 1.0234375, "num_input_tokens_seen": 43463952, "step": 376 }, { "epoch": 2.0268817204301075, "grad_norm": 26.51283836364746, "learning_rate": 5e-07, "loss": 1.2997, "num_input_tokens_seen": 43579760, "step": 377 }, { "epoch": 2.0268817204301075, "loss": 1.526553988456726, "loss_ce": 0.00018680098582990468, "loss_iou": 0.5859375, "loss_num": 0.07080078125, "loss_xval": 1.5234375, "num_input_tokens_seen": 43579760, "step": 377 }, { "epoch": 2.032258064516129, "grad_norm": 22.23632049560547, "learning_rate": 5e-07, "loss": 1.3551, "num_input_tokens_seen": 43698396, "step": 378 }, { "epoch": 2.032258064516129, "loss": 1.6991592645645142, "loss_ce": 0.007753017358481884, "loss_iou": 0.6796875, "loss_num": 0.06494140625, "loss_xval": 1.6875, "num_input_tokens_seen": 43698396, "step": 378 }, { "epoch": 2.0376344086021505, "grad_norm": 23.680484771728516, "learning_rate": 5e-07, "loss": 0.9504, "num_input_tokens_seen": 43815624, "step": 379 }, { "epoch": 2.0376344086021505, "loss": 0.9969279766082764, "loss_ce": 0.0003459237632341683, "loss_iou": 0.423828125, "loss_num": 0.029541015625, "loss_xval": 0.99609375, "num_input_tokens_seen": 43815624, "step": 379 }, { "epoch": 2.043010752688172, "grad_norm": 26.614883422851562, "learning_rate": 5e-07, "loss": 1.0611, "num_input_tokens_seen": 43930860, "step": 380 }, { "epoch": 2.043010752688172, "loss": 1.0470634698867798, "loss_ce": 0.00043260969687253237, "loss_iou": 0.431640625, "loss_num": 0.03662109375, "loss_xval": 1.046875, "num_input_tokens_seen": 43930860, "step": 380 }, { "epoch": 2.0483870967741935, "grad_norm": 22.758464813232422, "learning_rate": 5e-07, "loss": 1.1246, "num_input_tokens_seen": 44045744, "step": 381 }, { "epoch": 2.0483870967741935, "loss": 1.1870884895324707, "loss_ce": 7.680810813326389e-05, "loss_iou": 0.462890625, "loss_num": 0.0517578125, "loss_xval": 1.1875, "num_input_tokens_seen": 44045744, "step": 381 }, { "epoch": 2.053763440860215, "grad_norm": 23.079858779907227, "learning_rate": 5e-07, "loss": 1.0439, "num_input_tokens_seen": 44162040, "step": 382 }, { "epoch": 2.053763440860215, "loss": 1.032552719116211, "loss_ce": 8.202037861337885e-05, "loss_iou": 0.40234375, "loss_num": 0.04541015625, "loss_xval": 1.03125, "num_input_tokens_seen": 44162040, "step": 382 }, { "epoch": 2.0591397849462365, "grad_norm": 33.786869049072266, "learning_rate": 5e-07, "loss": 1.0921, "num_input_tokens_seen": 44278920, "step": 383 }, { "epoch": 2.0591397849462365, "loss": 0.8577149510383606, "loss_ce": 4.895062738796696e-05, "loss_iou": 0.361328125, "loss_num": 0.02685546875, "loss_xval": 0.859375, "num_input_tokens_seen": 44278920, "step": 383 }, { "epoch": 2.064516129032258, "grad_norm": 25.45073699951172, "learning_rate": 5e-07, "loss": 1.0089, "num_input_tokens_seen": 44395980, "step": 384 }, { "epoch": 2.064516129032258, "loss": 0.9214950203895569, "loss_ce": 0.0001082730814232491, "loss_iou": 0.380859375, "loss_num": 0.031494140625, "loss_xval": 0.921875, "num_input_tokens_seen": 44395980, "step": 384 }, { "epoch": 2.0698924731182795, "grad_norm": 24.89506721496582, "learning_rate": 5e-07, "loss": 0.9691, "num_input_tokens_seen": 44510232, "step": 385 }, { "epoch": 2.0698924731182795, "loss": 1.1255779266357422, "loss_ce": 8.95499688340351e-05, "loss_iou": 0.48828125, "loss_num": 0.0296630859375, "loss_xval": 1.125, "num_input_tokens_seen": 44510232, "step": 385 }, { "epoch": 2.075268817204301, "grad_norm": 23.22723388671875, "learning_rate": 5e-07, "loss": 1.2168, "num_input_tokens_seen": 44629312, "step": 386 }, { "epoch": 2.075268817204301, "loss": 1.2364214658737183, "loss_ce": 9.335231152363122e-05, "loss_iou": 0.52734375, "loss_num": 0.035888671875, "loss_xval": 1.234375, "num_input_tokens_seen": 44629312, "step": 386 }, { "epoch": 2.0806451612903225, "grad_norm": 24.07893180847168, "learning_rate": 5e-07, "loss": 1.3297, "num_input_tokens_seen": 44742756, "step": 387 }, { "epoch": 2.0806451612903225, "loss": 1.907779335975647, "loss_ce": 6.454905087593943e-05, "loss_iou": 0.8125, "loss_num": 0.0556640625, "loss_xval": 1.90625, "num_input_tokens_seen": 44742756, "step": 387 }, { "epoch": 2.086021505376344, "grad_norm": 23.856931686401367, "learning_rate": 5e-07, "loss": 0.9469, "num_input_tokens_seen": 44857104, "step": 388 }, { "epoch": 2.086021505376344, "loss": 0.8787313103675842, "loss_ce": 6.920649320818484e-05, "loss_iou": 0.34765625, "loss_num": 0.036376953125, "loss_xval": 0.87890625, "num_input_tokens_seen": 44857104, "step": 388 }, { "epoch": 2.0913978494623655, "grad_norm": 20.004840850830078, "learning_rate": 5e-07, "loss": 1.1326, "num_input_tokens_seen": 44972800, "step": 389 }, { "epoch": 2.0913978494623655, "loss": 0.8999965786933899, "loss_ce": 0.0003383511211723089, "loss_iou": 0.3828125, "loss_num": 0.0274658203125, "loss_xval": 0.8984375, "num_input_tokens_seen": 44972800, "step": 389 }, { "epoch": 2.096774193548387, "grad_norm": 25.814470291137695, "learning_rate": 5e-07, "loss": 1.1614, "num_input_tokens_seen": 45088216, "step": 390 }, { "epoch": 2.096774193548387, "loss": 0.912952184677124, "loss_ce": 0.00011043237464036793, "loss_iou": 0.38671875, "loss_num": 0.02783203125, "loss_xval": 0.9140625, "num_input_tokens_seen": 45088216, "step": 390 }, { "epoch": 2.1021505376344085, "grad_norm": 33.15570068359375, "learning_rate": 5e-07, "loss": 1.1773, "num_input_tokens_seen": 45204076, "step": 391 }, { "epoch": 2.1021505376344085, "loss": 1.1292542219161987, "loss_ce": 0.00010382162872701883, "loss_iou": 0.482421875, "loss_num": 0.03271484375, "loss_xval": 1.1328125, "num_input_tokens_seen": 45204076, "step": 391 }, { "epoch": 2.10752688172043, "grad_norm": 22.90032196044922, "learning_rate": 5e-07, "loss": 1.2136, "num_input_tokens_seen": 45321116, "step": 392 }, { "epoch": 2.10752688172043, "loss": 0.8651525974273682, "loss_ce": 0.0004065177054144442, "loss_iou": 0.341796875, "loss_num": 0.03662109375, "loss_xval": 0.86328125, "num_input_tokens_seen": 45321116, "step": 392 }, { "epoch": 2.1129032258064515, "grad_norm": 26.047807693481445, "learning_rate": 5e-07, "loss": 1.0295, "num_input_tokens_seen": 45437272, "step": 393 }, { "epoch": 2.1129032258064515, "loss": 0.9288240075111389, "loss_ce": 0.0001130516902776435, "loss_iou": 0.376953125, "loss_num": 0.03466796875, "loss_xval": 0.9296875, "num_input_tokens_seen": 45437272, "step": 393 }, { "epoch": 2.118279569892473, "grad_norm": 19.5715389251709, "learning_rate": 5e-07, "loss": 1.17, "num_input_tokens_seen": 45553420, "step": 394 }, { "epoch": 2.118279569892473, "loss": 1.1490615606307983, "loss_ce": 0.0006241328082978725, "loss_iou": 0.4765625, "loss_num": 0.0390625, "loss_xval": 1.1484375, "num_input_tokens_seen": 45553420, "step": 394 }, { "epoch": 2.1236559139784945, "grad_norm": 22.863100051879883, "learning_rate": 5e-07, "loss": 1.2842, "num_input_tokens_seen": 45668668, "step": 395 }, { "epoch": 2.1236559139784945, "loss": 1.228628396987915, "loss_ce": 0.00011276049917796627, "loss_iou": 0.4921875, "loss_num": 0.04931640625, "loss_xval": 1.2265625, "num_input_tokens_seen": 45668668, "step": 395 }, { "epoch": 2.129032258064516, "grad_norm": 37.55246353149414, "learning_rate": 5e-07, "loss": 1.2884, "num_input_tokens_seen": 45783788, "step": 396 }, { "epoch": 2.129032258064516, "loss": 1.2168655395507812, "loss_ce": 6.866579497000203e-05, "loss_iou": 0.51171875, "loss_num": 0.037841796875, "loss_xval": 1.21875, "num_input_tokens_seen": 45783788, "step": 396 }, { "epoch": 2.1344086021505375, "grad_norm": 28.34800910949707, "learning_rate": 5e-07, "loss": 1.1351, "num_input_tokens_seen": 45902348, "step": 397 }, { "epoch": 2.1344086021505375, "loss": 0.9281342029571533, "loss_ce": 0.00015564513159915805, "loss_iou": 0.369140625, "loss_num": 0.038330078125, "loss_xval": 0.9296875, "num_input_tokens_seen": 45902348, "step": 397 }, { "epoch": 2.139784946236559, "grad_norm": 27.22662925720215, "learning_rate": 5e-07, "loss": 1.0093, "num_input_tokens_seen": 46017552, "step": 398 }, { "epoch": 2.139784946236559, "loss": 0.8364906311035156, "loss_ce": 6.483525066869333e-05, "loss_iou": 0.34375, "loss_num": 0.0296630859375, "loss_xval": 0.8359375, "num_input_tokens_seen": 46017552, "step": 398 }, { "epoch": 2.1451612903225805, "grad_norm": 18.058103561401367, "learning_rate": 5e-07, "loss": 1.0703, "num_input_tokens_seen": 46134956, "step": 399 }, { "epoch": 2.1451612903225805, "loss": 1.193534016609192, "loss_ce": 0.00017465403652749956, "loss_iou": 0.50390625, "loss_num": 0.036865234375, "loss_xval": 1.1953125, "num_input_tokens_seen": 46134956, "step": 399 }, { "epoch": 2.150537634408602, "grad_norm": 17.906354904174805, "learning_rate": 5e-07, "loss": 0.8587, "num_input_tokens_seen": 46252788, "step": 400 }, { "epoch": 2.150537634408602, "loss": 1.0112895965576172, "loss_ce": 5.915061046835035e-05, "loss_iou": 0.44140625, "loss_num": 0.026123046875, "loss_xval": 1.0078125, "num_input_tokens_seen": 46252788, "step": 400 }, { "epoch": 2.1559139784946235, "grad_norm": 28.75383949279785, "learning_rate": 5e-07, "loss": 1.0986, "num_input_tokens_seen": 46371824, "step": 401 }, { "epoch": 2.1559139784946235, "loss": 1.2594249248504639, "loss_ce": 0.0001476423058193177, "loss_iou": 0.5234375, "loss_num": 0.042236328125, "loss_xval": 1.2578125, "num_input_tokens_seen": 46371824, "step": 401 }, { "epoch": 2.161290322580645, "grad_norm": 27.938901901245117, "learning_rate": 5e-07, "loss": 1.198, "num_input_tokens_seen": 46487936, "step": 402 }, { "epoch": 2.161290322580645, "loss": 1.5396322011947632, "loss_ce": 8.129433263093233e-05, "loss_iou": 0.6328125, "loss_num": 0.0537109375, "loss_xval": 1.5390625, "num_input_tokens_seen": 46487936, "step": 402 }, { "epoch": 2.1666666666666665, "grad_norm": 23.74474334716797, "learning_rate": 5e-07, "loss": 1.2689, "num_input_tokens_seen": 46604848, "step": 403 }, { "epoch": 2.1666666666666665, "loss": 1.2063428163528442, "loss_ce": 0.00028815428959205747, "loss_iou": 0.5, "loss_num": 0.041259765625, "loss_xval": 1.203125, "num_input_tokens_seen": 46604848, "step": 403 }, { "epoch": 2.172043010752688, "grad_norm": 28.12541961669922, "learning_rate": 5e-07, "loss": 1.0269, "num_input_tokens_seen": 46721476, "step": 404 }, { "epoch": 2.172043010752688, "loss": 0.9769971370697021, "loss_ce": 0.000190503807971254, "loss_iou": 0.408203125, "loss_num": 0.032470703125, "loss_xval": 0.9765625, "num_input_tokens_seen": 46721476, "step": 404 }, { "epoch": 2.1774193548387095, "grad_norm": 43.488319396972656, "learning_rate": 5e-07, "loss": 1.047, "num_input_tokens_seen": 46837720, "step": 405 }, { "epoch": 2.1774193548387095, "loss": 1.1680833101272583, "loss_ce": 0.00011454369814600796, "loss_iou": 0.4921875, "loss_num": 0.03662109375, "loss_xval": 1.171875, "num_input_tokens_seen": 46837720, "step": 405 }, { "epoch": 2.182795698924731, "grad_norm": 34.567012786865234, "learning_rate": 5e-07, "loss": 1.054, "num_input_tokens_seen": 46953512, "step": 406 }, { "epoch": 2.182795698924731, "loss": 1.0838062763214111, "loss_ce": 0.00031020614551380277, "loss_iou": 0.431640625, "loss_num": 0.044189453125, "loss_xval": 1.0859375, "num_input_tokens_seen": 46953512, "step": 406 }, { "epoch": 2.1881720430107525, "grad_norm": 25.819557189941406, "learning_rate": 5e-07, "loss": 1.1636, "num_input_tokens_seen": 47068844, "step": 407 }, { "epoch": 2.1881720430107525, "loss": 1.2803220748901367, "loss_ce": 4.8659225285518914e-05, "loss_iou": 0.5625, "loss_num": 0.03173828125, "loss_xval": 1.28125, "num_input_tokens_seen": 47068844, "step": 407 }, { "epoch": 2.193548387096774, "grad_norm": 27.717172622680664, "learning_rate": 5e-07, "loss": 1.0821, "num_input_tokens_seen": 47187744, "step": 408 }, { "epoch": 2.193548387096774, "loss": 0.8509466052055359, "loss_ce": 0.0001165430003311485, "loss_iou": 0.3515625, "loss_num": 0.029052734375, "loss_xval": 0.8515625, "num_input_tokens_seen": 47187744, "step": 408 }, { "epoch": 2.1989247311827955, "grad_norm": 35.53479766845703, "learning_rate": 5e-07, "loss": 1.15, "num_input_tokens_seen": 47305892, "step": 409 }, { "epoch": 2.1989247311827955, "loss": 1.1757898330688477, "loss_ce": 0.0014734455617144704, "loss_iou": 0.45703125, "loss_num": 0.0517578125, "loss_xval": 1.171875, "num_input_tokens_seen": 47305892, "step": 409 }, { "epoch": 2.204301075268817, "grad_norm": 27.255151748657227, "learning_rate": 5e-07, "loss": 1.2494, "num_input_tokens_seen": 47423920, "step": 410 }, { "epoch": 2.204301075268817, "loss": 1.027686357498169, "loss_ce": 9.843394946074113e-05, "loss_iou": 0.431640625, "loss_num": 0.032470703125, "loss_xval": 1.03125, "num_input_tokens_seen": 47423920, "step": 410 }, { "epoch": 2.2096774193548385, "grad_norm": 22.448976516723633, "learning_rate": 5e-07, "loss": 1.1695, "num_input_tokens_seen": 47541688, "step": 411 }, { "epoch": 2.2096774193548385, "loss": 1.2508069276809692, "loss_ce": 7.446954259648919e-05, "loss_iou": 0.5078125, "loss_num": 0.046875, "loss_xval": 1.25, "num_input_tokens_seen": 47541688, "step": 411 }, { "epoch": 2.21505376344086, "grad_norm": 28.730241775512695, "learning_rate": 5e-07, "loss": 1.4838, "num_input_tokens_seen": 47655092, "step": 412 }, { "epoch": 2.21505376344086, "loss": 1.3450961112976074, "loss_ce": 0.00036948645720258355, "loss_iou": 0.55078125, "loss_num": 0.0478515625, "loss_xval": 1.34375, "num_input_tokens_seen": 47655092, "step": 412 }, { "epoch": 2.2204301075268815, "grad_norm": 31.948688507080078, "learning_rate": 5e-07, "loss": 1.1856, "num_input_tokens_seen": 47768948, "step": 413 }, { "epoch": 2.2204301075268815, "loss": 1.488875150680542, "loss_ce": 0.00010559949441812932, "loss_iou": 0.6015625, "loss_num": 0.0576171875, "loss_xval": 1.4921875, "num_input_tokens_seen": 47768948, "step": 413 }, { "epoch": 2.225806451612903, "grad_norm": 21.96146583557129, "learning_rate": 5e-07, "loss": 1.3412, "num_input_tokens_seen": 47883168, "step": 414 }, { "epoch": 2.225806451612903, "loss": 1.764957070350647, "loss_ce": 6.458328425651416e-05, "loss_iou": 0.7578125, "loss_num": 0.049072265625, "loss_xval": 1.765625, "num_input_tokens_seen": 47883168, "step": 414 }, { "epoch": 2.2311827956989245, "grad_norm": 18.671855926513672, "learning_rate": 5e-07, "loss": 1.2784, "num_input_tokens_seen": 48000756, "step": 415 }, { "epoch": 2.2311827956989245, "loss": 1.1895248889923096, "loss_ce": 7.184570131357759e-05, "loss_iou": 0.482421875, "loss_num": 0.044921875, "loss_xval": 1.1875, "num_input_tokens_seen": 48000756, "step": 415 }, { "epoch": 2.236559139784946, "grad_norm": 24.862743377685547, "learning_rate": 5e-07, "loss": 1.0924, "num_input_tokens_seen": 48117380, "step": 416 }, { "epoch": 2.236559139784946, "loss": 1.2938294410705566, "loss_ce": 0.00012818533286917955, "loss_iou": 0.5546875, "loss_num": 0.035888671875, "loss_xval": 1.296875, "num_input_tokens_seen": 48117380, "step": 416 }, { "epoch": 2.241935483870968, "grad_norm": 22.227251052856445, "learning_rate": 5e-07, "loss": 1.1037, "num_input_tokens_seen": 48233804, "step": 417 }, { "epoch": 2.241935483870968, "loss": 1.1917157173156738, "loss_ce": 6.538121670018882e-05, "loss_iou": 0.5078125, "loss_num": 0.03564453125, "loss_xval": 1.1953125, "num_input_tokens_seen": 48233804, "step": 417 }, { "epoch": 2.247311827956989, "grad_norm": 20.60923957824707, "learning_rate": 5e-07, "loss": 1.11, "num_input_tokens_seen": 48350532, "step": 418 }, { "epoch": 2.247311827956989, "loss": 1.3250372409820557, "loss_ce": 8.604960021330044e-05, "loss_iou": 0.55078125, "loss_num": 0.045166015625, "loss_xval": 1.328125, "num_input_tokens_seen": 48350532, "step": 418 }, { "epoch": 2.252688172043011, "grad_norm": 25.28318214416504, "learning_rate": 5e-07, "loss": 1.0196, "num_input_tokens_seen": 48468972, "step": 419 }, { "epoch": 2.252688172043011, "loss": 0.7651908993721008, "loss_ce": 5.4178675782168284e-05, "loss_iou": 0.333984375, "loss_num": 0.0196533203125, "loss_xval": 0.765625, "num_input_tokens_seen": 48468972, "step": 419 }, { "epoch": 2.258064516129032, "grad_norm": 28.183090209960938, "learning_rate": 5e-07, "loss": 1.1206, "num_input_tokens_seen": 48583576, "step": 420 }, { "epoch": 2.258064516129032, "loss": 0.9478361010551453, "loss_ce": 8.222197357099503e-05, "loss_iou": 0.408203125, "loss_num": 0.026611328125, "loss_xval": 0.94921875, "num_input_tokens_seen": 48583576, "step": 420 }, { "epoch": 2.263440860215054, "grad_norm": 21.173593521118164, "learning_rate": 5e-07, "loss": 1.0476, "num_input_tokens_seen": 48697700, "step": 421 }, { "epoch": 2.263440860215054, "loss": 0.9187718629837036, "loss_ce": 7.065266254357994e-05, "loss_iou": 0.373046875, "loss_num": 0.034423828125, "loss_xval": 0.91796875, "num_input_tokens_seen": 48697700, "step": 421 }, { "epoch": 2.268817204301075, "grad_norm": 20.93606948852539, "learning_rate": 5e-07, "loss": 1.0191, "num_input_tokens_seen": 48815324, "step": 422 }, { "epoch": 2.268817204301075, "loss": 1.077737808227539, "loss_ce": 0.00010112969175679609, "loss_iou": 0.44921875, "loss_num": 0.035888671875, "loss_xval": 1.078125, "num_input_tokens_seen": 48815324, "step": 422 }, { "epoch": 2.274193548387097, "grad_norm": 25.310138702392578, "learning_rate": 5e-07, "loss": 1.1967, "num_input_tokens_seen": 48929352, "step": 423 }, { "epoch": 2.274193548387097, "loss": 1.0349830389022827, "loss_ce": 7.098200148902833e-05, "loss_iou": 0.37890625, "loss_num": 0.05517578125, "loss_xval": 1.03125, "num_input_tokens_seen": 48929352, "step": 423 }, { "epoch": 2.279569892473118, "grad_norm": 29.386259078979492, "learning_rate": 5e-07, "loss": 1.2171, "num_input_tokens_seen": 49046488, "step": 424 }, { "epoch": 2.279569892473118, "loss": 1.1831927299499512, "loss_ce": 8.72045784490183e-05, "loss_iou": 0.48046875, "loss_num": 0.044677734375, "loss_xval": 1.1796875, "num_input_tokens_seen": 49046488, "step": 424 }, { "epoch": 2.28494623655914, "grad_norm": 45.5872802734375, "learning_rate": 5e-07, "loss": 1.1481, "num_input_tokens_seen": 49160324, "step": 425 }, { "epoch": 2.28494623655914, "loss": 0.8818759322166443, "loss_ce": 4.002233617939055e-05, "loss_iou": 0.353515625, "loss_num": 0.03515625, "loss_xval": 0.8828125, "num_input_tokens_seen": 49160324, "step": 425 }, { "epoch": 2.2903225806451615, "grad_norm": 24.708654403686523, "learning_rate": 5e-07, "loss": 0.9009, "num_input_tokens_seen": 49278180, "step": 426 }, { "epoch": 2.2903225806451615, "loss": 1.0570452213287354, "loss_ce": 3.839195778709836e-05, "loss_iou": 0.4140625, "loss_num": 0.045654296875, "loss_xval": 1.0546875, "num_input_tokens_seen": 49278180, "step": 426 }, { "epoch": 2.295698924731183, "grad_norm": 22.62456703186035, "learning_rate": 5e-07, "loss": 1.0272, "num_input_tokens_seen": 49392420, "step": 427 }, { "epoch": 2.295698924731183, "loss": 1.123003363609314, "loss_ce": 0.0016655162908136845, "loss_iou": 0.45703125, "loss_num": 0.041259765625, "loss_xval": 1.125, "num_input_tokens_seen": 49392420, "step": 427 }, { "epoch": 2.3010752688172045, "grad_norm": 23.786062240600586, "learning_rate": 5e-07, "loss": 1.1257, "num_input_tokens_seen": 49508968, "step": 428 }, { "epoch": 2.3010752688172045, "loss": 0.9988740682601929, "loss_ce": 9.479367145104334e-05, "loss_iou": 0.41796875, "loss_num": 0.032470703125, "loss_xval": 1.0, "num_input_tokens_seen": 49508968, "step": 428 }, { "epoch": 2.306451612903226, "grad_norm": 22.026674270629883, "learning_rate": 5e-07, "loss": 1.3368, "num_input_tokens_seen": 49623432, "step": 429 }, { "epoch": 2.306451612903226, "loss": 0.9367260336875916, "loss_ce": 0.0002026203874265775, "loss_iou": 0.3828125, "loss_num": 0.033935546875, "loss_xval": 0.9375, "num_input_tokens_seen": 49623432, "step": 429 }, { "epoch": 2.3118279569892475, "grad_norm": 28.196598052978516, "learning_rate": 5e-07, "loss": 0.9973, "num_input_tokens_seen": 49739376, "step": 430 }, { "epoch": 2.3118279569892475, "loss": 0.9739794731140137, "loss_ce": 0.00010253010259475559, "loss_iou": 0.359375, "loss_num": 0.051025390625, "loss_xval": 0.97265625, "num_input_tokens_seen": 49739376, "step": 430 }, { "epoch": 2.317204301075269, "grad_norm": 40.60808181762695, "learning_rate": 5e-07, "loss": 1.0842, "num_input_tokens_seen": 49855800, "step": 431 }, { "epoch": 2.317204301075269, "loss": 1.2353966236114502, "loss_ce": 4.509467544266954e-05, "loss_iou": 0.5078125, "loss_num": 0.044189453125, "loss_xval": 1.234375, "num_input_tokens_seen": 49855800, "step": 431 }, { "epoch": 2.3225806451612905, "grad_norm": 26.325517654418945, "learning_rate": 5e-07, "loss": 1.0719, "num_input_tokens_seen": 49968864, "step": 432 }, { "epoch": 2.3225806451612905, "loss": 0.9081317782402039, "loss_ce": 0.00017284035857301205, "loss_iou": 0.388671875, "loss_num": 0.0262451171875, "loss_xval": 0.90625, "num_input_tokens_seen": 49968864, "step": 432 }, { "epoch": 2.327956989247312, "grad_norm": 33.23957061767578, "learning_rate": 5e-07, "loss": 1.0661, "num_input_tokens_seen": 50084692, "step": 433 }, { "epoch": 2.327956989247312, "loss": 0.9331699013710022, "loss_ce": 6.441098230425268e-05, "loss_iou": 0.369140625, "loss_num": 0.0390625, "loss_xval": 0.93359375, "num_input_tokens_seen": 50084692, "step": 433 }, { "epoch": 2.3333333333333335, "grad_norm": 35.04689407348633, "learning_rate": 5e-07, "loss": 1.0487, "num_input_tokens_seen": 50195672, "step": 434 }, { "epoch": 2.3333333333333335, "loss": 1.0783984661102295, "loss_ce": 0.00039554949034936726, "loss_iou": 0.447265625, "loss_num": 0.036376953125, "loss_xval": 1.078125, "num_input_tokens_seen": 50195672, "step": 434 }, { "epoch": 2.338709677419355, "grad_norm": 24.483154296875, "learning_rate": 5e-07, "loss": 1.1979, "num_input_tokens_seen": 50313064, "step": 435 }, { "epoch": 2.338709677419355, "loss": 1.6211371421813965, "loss_ce": 4.333335891715251e-05, "loss_iou": 0.68359375, "loss_num": 0.050048828125, "loss_xval": 1.625, "num_input_tokens_seen": 50313064, "step": 435 }, { "epoch": 2.3440860215053765, "grad_norm": 28.063854217529297, "learning_rate": 5e-07, "loss": 1.0718, "num_input_tokens_seen": 50428608, "step": 436 }, { "epoch": 2.3440860215053765, "loss": 0.8634016513824463, "loss_ce": 0.00012039833382004872, "loss_iou": 0.361328125, "loss_num": 0.0279541015625, "loss_xval": 0.86328125, "num_input_tokens_seen": 50428608, "step": 436 }, { "epoch": 2.349462365591398, "grad_norm": 23.764488220214844, "learning_rate": 5e-07, "loss": 1.0705, "num_input_tokens_seen": 50539952, "step": 437 }, { "epoch": 2.349462365591398, "loss": 1.1048719882965088, "loss_ce": 0.00013563981337938458, "loss_iou": 0.443359375, "loss_num": 0.04345703125, "loss_xval": 1.1015625, "num_input_tokens_seen": 50539952, "step": 437 }, { "epoch": 2.3548387096774195, "grad_norm": 29.346738815307617, "learning_rate": 5e-07, "loss": 1.3305, "num_input_tokens_seen": 50651484, "step": 438 }, { "epoch": 2.3548387096774195, "loss": 1.0913875102996826, "loss_ce": 7.888016261858866e-05, "loss_iou": 0.369140625, "loss_num": 0.07080078125, "loss_xval": 1.09375, "num_input_tokens_seen": 50651484, "step": 438 }, { "epoch": 2.360215053763441, "grad_norm": 21.28940200805664, "learning_rate": 5e-07, "loss": 1.112, "num_input_tokens_seen": 50768244, "step": 439 }, { "epoch": 2.360215053763441, "loss": 1.6489931344985962, "loss_ce": 6.733924965374172e-05, "loss_iou": 0.6484375, "loss_num": 0.07080078125, "loss_xval": 1.6484375, "num_input_tokens_seen": 50768244, "step": 439 }, { "epoch": 2.3655913978494625, "grad_norm": 29.100505828857422, "learning_rate": 5e-07, "loss": 1.1377, "num_input_tokens_seen": 50885592, "step": 440 }, { "epoch": 2.3655913978494625, "loss": 0.9400951862335205, "loss_ce": 0.0001538391225039959, "loss_iou": 0.390625, "loss_num": 0.031982421875, "loss_xval": 0.94140625, "num_input_tokens_seen": 50885592, "step": 440 }, { "epoch": 2.370967741935484, "grad_norm": 26.356998443603516, "learning_rate": 5e-07, "loss": 1.0556, "num_input_tokens_seen": 51000456, "step": 441 }, { "epoch": 2.370967741935484, "loss": 0.5986810922622681, "loss_ce": 4.826868826057762e-05, "loss_iou": 0.234375, "loss_num": 0.026123046875, "loss_xval": 0.59765625, "num_input_tokens_seen": 51000456, "step": 441 }, { "epoch": 2.3763440860215055, "grad_norm": 22.834983825683594, "learning_rate": 5e-07, "loss": 0.9765, "num_input_tokens_seen": 51114504, "step": 442 }, { "epoch": 2.3763440860215055, "loss": 0.6431561708450317, "loss_ce": 8.972820069175214e-05, "loss_iou": 0.22265625, "loss_num": 0.039306640625, "loss_xval": 0.64453125, "num_input_tokens_seen": 51114504, "step": 442 }, { "epoch": 2.381720430107527, "grad_norm": 22.69744873046875, "learning_rate": 5e-07, "loss": 0.9301, "num_input_tokens_seen": 51230724, "step": 443 }, { "epoch": 2.381720430107527, "loss": 0.968841016292572, "loss_ce": 9.097604925045744e-05, "loss_iou": 0.41015625, "loss_num": 0.029541015625, "loss_xval": 0.96875, "num_input_tokens_seen": 51230724, "step": 443 }, { "epoch": 2.3870967741935485, "grad_norm": 58.92821502685547, "learning_rate": 5e-07, "loss": 1.1793, "num_input_tokens_seen": 51348436, "step": 444 }, { "epoch": 2.3870967741935485, "loss": 0.7937574982643127, "loss_ce": 5.629320730804466e-05, "loss_iou": 0.35546875, "loss_num": 0.0167236328125, "loss_xval": 0.79296875, "num_input_tokens_seen": 51348436, "step": 444 }, { "epoch": 2.39247311827957, "grad_norm": 23.085290908813477, "learning_rate": 5e-07, "loss": 1.0152, "num_input_tokens_seen": 51466276, "step": 445 }, { "epoch": 2.39247311827957, "loss": 1.158266305923462, "loss_ce": 6.315957580227405e-05, "loss_iou": 0.48046875, "loss_num": 0.039794921875, "loss_xval": 1.15625, "num_input_tokens_seen": 51466276, "step": 445 }, { "epoch": 2.3978494623655915, "grad_norm": 35.84708023071289, "learning_rate": 5e-07, "loss": 1.0752, "num_input_tokens_seen": 51583620, "step": 446 }, { "epoch": 2.3978494623655915, "loss": 0.7890132665634155, "loss_ce": 0.00019486328528728336, "loss_iou": 0.298828125, "loss_num": 0.038330078125, "loss_xval": 0.7890625, "num_input_tokens_seen": 51583620, "step": 446 }, { "epoch": 2.403225806451613, "grad_norm": 24.092498779296875, "learning_rate": 5e-07, "loss": 0.9526, "num_input_tokens_seen": 51695992, "step": 447 }, { "epoch": 2.403225806451613, "loss": 1.0240108966827393, "loss_ce": 8.508682367391884e-05, "loss_iou": 0.421875, "loss_num": 0.035888671875, "loss_xval": 1.0234375, "num_input_tokens_seen": 51695992, "step": 447 }, { "epoch": 2.4086021505376345, "grad_norm": 20.56044578552246, "learning_rate": 5e-07, "loss": 1.0513, "num_input_tokens_seen": 51811280, "step": 448 }, { "epoch": 2.4086021505376345, "loss": 1.0535329580307007, "loss_ce": 6.619846681132913e-05, "loss_iou": 0.4296875, "loss_num": 0.038818359375, "loss_xval": 1.0546875, "num_input_tokens_seen": 51811280, "step": 448 }, { "epoch": 2.413978494623656, "grad_norm": 30.77098274230957, "learning_rate": 5e-07, "loss": 1.1062, "num_input_tokens_seen": 51928152, "step": 449 }, { "epoch": 2.413978494623656, "loss": 1.0205680131912231, "loss_ce": 6.0207603382878006e-05, "loss_iou": 0.41796875, "loss_num": 0.03662109375, "loss_xval": 1.0234375, "num_input_tokens_seen": 51928152, "step": 449 }, { "epoch": 2.4193548387096775, "grad_norm": 27.109920501708984, "learning_rate": 5e-07, "loss": 1.1249, "num_input_tokens_seen": 52046984, "step": 450 }, { "epoch": 2.4193548387096775, "loss": 1.0907057523727417, "loss_ce": 0.00012958732258994132, "loss_iou": 0.451171875, "loss_num": 0.038330078125, "loss_xval": 1.09375, "num_input_tokens_seen": 52046984, "step": 450 }, { "epoch": 2.424731182795699, "grad_norm": 28.789356231689453, "learning_rate": 5e-07, "loss": 0.9937, "num_input_tokens_seen": 52163540, "step": 451 }, { "epoch": 2.424731182795699, "loss": 1.1199467182159424, "loss_ce": 7.367561192950234e-05, "loss_iou": 0.443359375, "loss_num": 0.046875, "loss_xval": 1.1171875, "num_input_tokens_seen": 52163540, "step": 451 }, { "epoch": 2.4301075268817205, "grad_norm": 18.19638442993164, "learning_rate": 5e-07, "loss": 1.0691, "num_input_tokens_seen": 52281732, "step": 452 }, { "epoch": 2.4301075268817205, "loss": 1.0571839809417725, "loss_ce": 5.500642873812467e-05, "loss_iou": 0.447265625, "loss_num": 0.032958984375, "loss_xval": 1.0546875, "num_input_tokens_seen": 52281732, "step": 452 }, { "epoch": 2.435483870967742, "grad_norm": 19.232227325439453, "learning_rate": 5e-07, "loss": 0.9036, "num_input_tokens_seen": 52396336, "step": 453 }, { "epoch": 2.435483870967742, "loss": 0.7126905918121338, "loss_ce": 4.4125270505901426e-05, "loss_iou": 0.3046875, "loss_num": 0.020751953125, "loss_xval": 0.7109375, "num_input_tokens_seen": 52396336, "step": 453 }, { "epoch": 2.4408602150537635, "grad_norm": 27.8459415435791, "learning_rate": 5e-07, "loss": 1.1694, "num_input_tokens_seen": 52514952, "step": 454 }, { "epoch": 2.4408602150537635, "loss": 0.852349579334259, "loss_ce": 5.4627511417493224e-05, "loss_iou": 0.357421875, "loss_num": 0.027587890625, "loss_xval": 0.8515625, "num_input_tokens_seen": 52514952, "step": 454 }, { "epoch": 2.446236559139785, "grad_norm": 19.993066787719727, "learning_rate": 5e-07, "loss": 1.1399, "num_input_tokens_seen": 52632600, "step": 455 }, { "epoch": 2.446236559139785, "loss": 0.8955490589141846, "loss_ce": 4.1310944652650505e-05, "loss_iou": 0.365234375, "loss_num": 0.033203125, "loss_xval": 0.89453125, "num_input_tokens_seen": 52632600, "step": 455 }, { "epoch": 2.4516129032258065, "grad_norm": 18.60322380065918, "learning_rate": 5e-07, "loss": 1.1152, "num_input_tokens_seen": 52745644, "step": 456 }, { "epoch": 2.4516129032258065, "loss": 1.0396084785461426, "loss_ce": 5.7705452491063625e-05, "loss_iou": 0.41796875, "loss_num": 0.04052734375, "loss_xval": 1.0390625, "num_input_tokens_seen": 52745644, "step": 456 }, { "epoch": 2.456989247311828, "grad_norm": 20.73372459411621, "learning_rate": 5e-07, "loss": 1.1682, "num_input_tokens_seen": 52865520, "step": 457 }, { "epoch": 2.456989247311828, "loss": 1.2457115650177002, "loss_ce": 0.00010610866593196988, "loss_iou": 0.53515625, "loss_num": 0.03515625, "loss_xval": 1.2421875, "num_input_tokens_seen": 52865520, "step": 457 }, { "epoch": 2.4623655913978495, "grad_norm": 24.715341567993164, "learning_rate": 5e-07, "loss": 1.1869, "num_input_tokens_seen": 52978292, "step": 458 }, { "epoch": 2.4623655913978495, "loss": 1.1238315105438232, "loss_ce": 5.2283248805906624e-05, "loss_iou": 0.48046875, "loss_num": 0.032470703125, "loss_xval": 1.125, "num_input_tokens_seen": 52978292, "step": 458 }, { "epoch": 2.467741935483871, "grad_norm": 24.654245376586914, "learning_rate": 5e-07, "loss": 0.9932, "num_input_tokens_seen": 53097988, "step": 459 }, { "epoch": 2.467741935483871, "loss": 0.7612711787223816, "loss_ce": 4.067530971951783e-05, "loss_iou": 0.3125, "loss_num": 0.02734375, "loss_xval": 0.76171875, "num_input_tokens_seen": 53097988, "step": 459 }, { "epoch": 2.4731182795698925, "grad_norm": 21.639211654663086, "learning_rate": 5e-07, "loss": 1.2364, "num_input_tokens_seen": 53213140, "step": 460 }, { "epoch": 2.4731182795698925, "loss": 1.233449935913086, "loss_ce": 5.147688716533594e-05, "loss_iou": 0.5078125, "loss_num": 0.04443359375, "loss_xval": 1.234375, "num_input_tokens_seen": 53213140, "step": 460 }, { "epoch": 2.478494623655914, "grad_norm": 30.200191497802734, "learning_rate": 5e-07, "loss": 0.8887, "num_input_tokens_seen": 53330528, "step": 461 }, { "epoch": 2.478494623655914, "loss": 0.6585630774497986, "loss_ce": 0.00011581034050323069, "loss_iou": 0.27734375, "loss_num": 0.0205078125, "loss_xval": 0.66015625, "num_input_tokens_seen": 53330528, "step": 461 }, { "epoch": 2.4838709677419355, "grad_norm": 18.792526245117188, "learning_rate": 5e-07, "loss": 1.0404, "num_input_tokens_seen": 53447152, "step": 462 }, { "epoch": 2.4838709677419355, "loss": 1.1002771854400635, "loss_ce": 0.0001795324351405725, "loss_iou": 0.447265625, "loss_num": 0.041259765625, "loss_xval": 1.1015625, "num_input_tokens_seen": 53447152, "step": 462 }, { "epoch": 2.489247311827957, "grad_norm": 20.957128524780273, "learning_rate": 5e-07, "loss": 1.1121, "num_input_tokens_seen": 53565592, "step": 463 }, { "epoch": 2.489247311827957, "loss": 0.9387374520301819, "loss_ce": 0.0002608525101095438, "loss_iou": 0.375, "loss_num": 0.03759765625, "loss_xval": 0.9375, "num_input_tokens_seen": 53565592, "step": 463 }, { "epoch": 2.4946236559139785, "grad_norm": 17.519014358520508, "learning_rate": 5e-07, "loss": 1.067, "num_input_tokens_seen": 53678804, "step": 464 }, { "epoch": 2.4946236559139785, "loss": 1.1978774070739746, "loss_ce": 0.00012347818119451404, "loss_iou": 0.5078125, "loss_num": 0.036376953125, "loss_xval": 1.1953125, "num_input_tokens_seen": 53678804, "step": 464 }, { "epoch": 2.5, "grad_norm": 35.44404602050781, "learning_rate": 5e-07, "loss": 1.0443, "num_input_tokens_seen": 53795400, "step": 465 }, { "epoch": 2.5, "loss": 0.845744252204895, "loss_ce": 4.113461181987077e-05, "loss_iou": 0.353515625, "loss_num": 0.027587890625, "loss_xval": 0.84375, "num_input_tokens_seen": 53795400, "step": 465 }, { "epoch": 2.5053763440860215, "grad_norm": 49.23858642578125, "learning_rate": 5e-07, "loss": 1.0727, "num_input_tokens_seen": 53909624, "step": 466 }, { "epoch": 2.5053763440860215, "loss": 0.9416398406028748, "loss_ce": 0.004628097638487816, "loss_iou": 0.38671875, "loss_num": 0.032470703125, "loss_xval": 0.9375, "num_input_tokens_seen": 53909624, "step": 466 }, { "epoch": 2.510752688172043, "grad_norm": 22.333940505981445, "learning_rate": 5e-07, "loss": 1.0487, "num_input_tokens_seen": 54025436, "step": 467 }, { "epoch": 2.510752688172043, "loss": 1.342340111732483, "loss_ce": 0.0002991179935634136, "loss_iou": 0.59765625, "loss_num": 0.0296630859375, "loss_xval": 1.34375, "num_input_tokens_seen": 54025436, "step": 467 }, { "epoch": 2.5161290322580645, "grad_norm": 22.546083450317383, "learning_rate": 5e-07, "loss": 1.0825, "num_input_tokens_seen": 54139892, "step": 468 }, { "epoch": 2.5161290322580645, "loss": 0.8623963594436646, "loss_ce": 9.164227958535776e-05, "loss_iou": 0.35546875, "loss_num": 0.0306396484375, "loss_xval": 0.86328125, "num_input_tokens_seen": 54139892, "step": 468 }, { "epoch": 2.521505376344086, "grad_norm": 35.66588592529297, "learning_rate": 5e-07, "loss": 1.1946, "num_input_tokens_seen": 54253520, "step": 469 }, { "epoch": 2.521505376344086, "loss": 1.118459701538086, "loss_ce": 0.00029570667538791895, "loss_iou": 0.45703125, "loss_num": 0.041015625, "loss_xval": 1.1171875, "num_input_tokens_seen": 54253520, "step": 469 }, { "epoch": 2.5268817204301075, "grad_norm": 21.025257110595703, "learning_rate": 5e-07, "loss": 0.9865, "num_input_tokens_seen": 54369160, "step": 470 }, { "epoch": 2.5268817204301075, "loss": 0.7858576774597168, "loss_ce": 0.0007013880531303585, "loss_iou": 0.3203125, "loss_num": 0.028564453125, "loss_xval": 0.78515625, "num_input_tokens_seen": 54369160, "step": 470 }, { "epoch": 2.532258064516129, "grad_norm": 20.737058639526367, "learning_rate": 5e-07, "loss": 1.0435, "num_input_tokens_seen": 54483156, "step": 471 }, { "epoch": 2.532258064516129, "loss": 0.6922920346260071, "loss_ce": 3.1289640901377425e-05, "loss_iou": 0.265625, "loss_num": 0.032470703125, "loss_xval": 0.69140625, "num_input_tokens_seen": 54483156, "step": 471 }, { "epoch": 2.5376344086021505, "grad_norm": 21.35611343383789, "learning_rate": 5e-07, "loss": 0.9637, "num_input_tokens_seen": 54599144, "step": 472 }, { "epoch": 2.5376344086021505, "loss": 0.7642460465431213, "loss_ce": 8.587645424995571e-05, "loss_iou": 0.306640625, "loss_num": 0.02978515625, "loss_xval": 0.765625, "num_input_tokens_seen": 54599144, "step": 472 }, { "epoch": 2.543010752688172, "grad_norm": 23.71376609802246, "learning_rate": 5e-07, "loss": 1.1339, "num_input_tokens_seen": 54714068, "step": 473 }, { "epoch": 2.543010752688172, "loss": 0.843073844909668, "loss_ce": 5.629657971439883e-05, "loss_iou": 0.32421875, "loss_num": 0.038818359375, "loss_xval": 0.84375, "num_input_tokens_seen": 54714068, "step": 473 }, { "epoch": 2.5483870967741935, "grad_norm": 18.623804092407227, "learning_rate": 5e-07, "loss": 0.9634, "num_input_tokens_seen": 54829448, "step": 474 }, { "epoch": 2.5483870967741935, "loss": 1.1402008533477783, "loss_ce": 6.40619546175003e-05, "loss_iou": 0.474609375, "loss_num": 0.037841796875, "loss_xval": 1.140625, "num_input_tokens_seen": 54829448, "step": 474 }, { "epoch": 2.553763440860215, "grad_norm": 32.8242301940918, "learning_rate": 5e-07, "loss": 1.1317, "num_input_tokens_seen": 54943816, "step": 475 }, { "epoch": 2.553763440860215, "loss": 1.4075498580932617, "loss_ce": 7.914168963907287e-05, "loss_iou": 0.6015625, "loss_num": 0.04150390625, "loss_xval": 1.40625, "num_input_tokens_seen": 54943816, "step": 475 }, { "epoch": 2.5591397849462365, "grad_norm": 32.34157943725586, "learning_rate": 5e-07, "loss": 0.9952, "num_input_tokens_seen": 55062344, "step": 476 }, { "epoch": 2.5591397849462365, "loss": 0.5716862678527832, "loss_ce": 3.101715992670506e-05, "loss_iou": 0.232421875, "loss_num": 0.0216064453125, "loss_xval": 0.5703125, "num_input_tokens_seen": 55062344, "step": 476 }, { "epoch": 2.564516129032258, "grad_norm": 34.65834045410156, "learning_rate": 5e-07, "loss": 0.9898, "num_input_tokens_seen": 55175120, "step": 477 }, { "epoch": 2.564516129032258, "loss": 1.1514148712158203, "loss_ce": 4.768842336488888e-05, "loss_iou": 0.4609375, "loss_num": 0.0458984375, "loss_xval": 1.1484375, "num_input_tokens_seen": 55175120, "step": 477 }, { "epoch": 2.5698924731182795, "grad_norm": 23.455995559692383, "learning_rate": 5e-07, "loss": 0.9309, "num_input_tokens_seen": 55294020, "step": 478 }, { "epoch": 2.5698924731182795, "loss": 0.7177064418792725, "loss_ce": 5.512370989890769e-05, "loss_iou": 0.2890625, "loss_num": 0.027587890625, "loss_xval": 0.71875, "num_input_tokens_seen": 55294020, "step": 478 }, { "epoch": 2.575268817204301, "grad_norm": 21.30423927307129, "learning_rate": 5e-07, "loss": 0.951, "num_input_tokens_seen": 55411508, "step": 479 }, { "epoch": 2.575268817204301, "loss": 1.065452218055725, "loss_ce": 0.0002665779902599752, "loss_iou": 0.46484375, "loss_num": 0.027099609375, "loss_xval": 1.0625, "num_input_tokens_seen": 55411508, "step": 479 }, { "epoch": 2.5806451612903225, "grad_norm": 20.794824600219727, "learning_rate": 5e-07, "loss": 1.1648, "num_input_tokens_seen": 55527908, "step": 480 }, { "epoch": 2.5806451612903225, "loss": 1.0689210891723633, "loss_ce": 7.345399353653193e-05, "loss_iou": 0.466796875, "loss_num": 0.0267333984375, "loss_xval": 1.0703125, "num_input_tokens_seen": 55527908, "step": 480 }, { "epoch": 2.586021505376344, "grad_norm": 27.384376525878906, "learning_rate": 5e-07, "loss": 1.2733, "num_input_tokens_seen": 55642332, "step": 481 }, { "epoch": 2.586021505376344, "loss": 1.223265290260315, "loss_ce": 0.0001207640889333561, "loss_iou": 0.546875, "loss_num": 0.026611328125, "loss_xval": 1.2265625, "num_input_tokens_seen": 55642332, "step": 481 }, { "epoch": 2.5913978494623655, "grad_norm": 31.712194442749023, "learning_rate": 5e-07, "loss": 1.206, "num_input_tokens_seen": 55754192, "step": 482 }, { "epoch": 2.5913978494623655, "loss": 1.1834347248077393, "loss_ce": 8.497543603880331e-05, "loss_iou": 0.48828125, "loss_num": 0.04052734375, "loss_xval": 1.1796875, "num_input_tokens_seen": 55754192, "step": 482 }, { "epoch": 2.596774193548387, "grad_norm": 22.883764266967773, "learning_rate": 5e-07, "loss": 1.1538, "num_input_tokens_seen": 55869888, "step": 483 }, { "epoch": 2.596774193548387, "loss": 1.178765058517456, "loss_ce": 5.408422293839976e-05, "loss_iou": 0.482421875, "loss_num": 0.04296875, "loss_xval": 1.1796875, "num_input_tokens_seen": 55869888, "step": 483 }, { "epoch": 2.6021505376344085, "grad_norm": 23.7934627532959, "learning_rate": 5e-07, "loss": 1.1249, "num_input_tokens_seen": 55986592, "step": 484 }, { "epoch": 2.6021505376344085, "loss": 1.1153545379638672, "loss_ce": 0.0001201609629788436, "loss_iou": 0.462890625, "loss_num": 0.03759765625, "loss_xval": 1.1171875, "num_input_tokens_seen": 55986592, "step": 484 }, { "epoch": 2.60752688172043, "grad_norm": 17.310527801513672, "learning_rate": 5e-07, "loss": 1.1159, "num_input_tokens_seen": 56103572, "step": 485 }, { "epoch": 2.60752688172043, "loss": 1.1470301151275635, "loss_ce": 5.739855987485498e-05, "loss_iou": 0.4609375, "loss_num": 0.04541015625, "loss_xval": 1.1484375, "num_input_tokens_seen": 56103572, "step": 485 }, { "epoch": 2.6129032258064515, "grad_norm": 66.87462615966797, "learning_rate": 5e-07, "loss": 1.0906, "num_input_tokens_seen": 56221188, "step": 486 }, { "epoch": 2.6129032258064515, "loss": 1.1492323875427246, "loss_ce": 6.244835094548762e-05, "loss_iou": 0.462890625, "loss_num": 0.044921875, "loss_xval": 1.1484375, "num_input_tokens_seen": 56221188, "step": 486 }, { "epoch": 2.618279569892473, "grad_norm": 23.490371704101562, "learning_rate": 5e-07, "loss": 0.9524, "num_input_tokens_seen": 56333408, "step": 487 }, { "epoch": 2.618279569892473, "loss": 0.6468039751052856, "loss_ce": 7.542384264525026e-05, "loss_iou": 0.259765625, "loss_num": 0.025390625, "loss_xval": 0.6484375, "num_input_tokens_seen": 56333408, "step": 487 }, { "epoch": 2.6236559139784945, "grad_norm": 22.929536819458008, "learning_rate": 5e-07, "loss": 0.9807, "num_input_tokens_seen": 56452000, "step": 488 }, { "epoch": 2.6236559139784945, "loss": 0.6590333580970764, "loss_ce": 9.781308472156525e-05, "loss_iou": 0.29296875, "loss_num": 0.014404296875, "loss_xval": 0.66015625, "num_input_tokens_seen": 56452000, "step": 488 }, { "epoch": 2.629032258064516, "grad_norm": 17.965229034423828, "learning_rate": 5e-07, "loss": 1.1666, "num_input_tokens_seen": 56568580, "step": 489 }, { "epoch": 2.629032258064516, "loss": 1.3384307622909546, "loss_ce": 5.181278902455233e-05, "loss_iou": 0.5546875, "loss_num": 0.0458984375, "loss_xval": 1.3359375, "num_input_tokens_seen": 56568580, "step": 489 }, { "epoch": 2.6344086021505375, "grad_norm": 27.731042861938477, "learning_rate": 5e-07, "loss": 1.098, "num_input_tokens_seen": 56685112, "step": 490 }, { "epoch": 2.6344086021505375, "loss": 1.164123773574829, "loss_ce": 6.127840606495738e-05, "loss_iou": 0.51171875, "loss_num": 0.027587890625, "loss_xval": 1.1640625, "num_input_tokens_seen": 56685112, "step": 490 }, { "epoch": 2.639784946236559, "grad_norm": 18.83871078491211, "learning_rate": 5e-07, "loss": 0.9432, "num_input_tokens_seen": 56801412, "step": 491 }, { "epoch": 2.639784946236559, "loss": 0.8043019771575928, "loss_ce": 0.00010268132609780878, "loss_iou": 0.337890625, "loss_num": 0.025390625, "loss_xval": 0.8046875, "num_input_tokens_seen": 56801412, "step": 491 }, { "epoch": 2.6451612903225805, "grad_norm": 72.23200225830078, "learning_rate": 5e-07, "loss": 1.1011, "num_input_tokens_seen": 56914592, "step": 492 }, { "epoch": 2.6451612903225805, "loss": 0.802817702293396, "loss_ce": 8.33554076962173e-05, "loss_iou": 0.341796875, "loss_num": 0.0238037109375, "loss_xval": 0.8046875, "num_input_tokens_seen": 56914592, "step": 492 }, { "epoch": 2.650537634408602, "grad_norm": 22.734683990478516, "learning_rate": 5e-07, "loss": 0.9872, "num_input_tokens_seen": 57030284, "step": 493 }, { "epoch": 2.650537634408602, "loss": 0.9265541434288025, "loss_ce": 4.047531547257677e-05, "loss_iou": 0.388671875, "loss_num": 0.0299072265625, "loss_xval": 0.92578125, "num_input_tokens_seen": 57030284, "step": 493 }, { "epoch": 2.6559139784946235, "grad_norm": 18.284624099731445, "learning_rate": 5e-07, "loss": 1.069, "num_input_tokens_seen": 57145760, "step": 494 }, { "epoch": 2.6559139784946235, "loss": 1.156775712966919, "loss_ce": 3.750343603314832e-05, "loss_iou": 0.48828125, "loss_num": 0.035888671875, "loss_xval": 1.15625, "num_input_tokens_seen": 57145760, "step": 494 }, { "epoch": 2.661290322580645, "grad_norm": 24.575777053833008, "learning_rate": 5e-07, "loss": 1.1352, "num_input_tokens_seen": 57260208, "step": 495 }, { "epoch": 2.661290322580645, "loss": 1.4271275997161865, "loss_ce": 0.00012562084884848446, "loss_iou": 0.6171875, "loss_num": 0.03857421875, "loss_xval": 1.4296875, "num_input_tokens_seen": 57260208, "step": 495 }, { "epoch": 2.6666666666666665, "grad_norm": 22.653366088867188, "learning_rate": 5e-07, "loss": 1.1213, "num_input_tokens_seen": 57376840, "step": 496 }, { "epoch": 2.6666666666666665, "loss": 1.3345916271209717, "loss_ce": 0.00011897778313141316, "loss_iou": 0.54296875, "loss_num": 0.0498046875, "loss_xval": 1.3359375, "num_input_tokens_seen": 57376840, "step": 496 }, { "epoch": 2.672043010752688, "grad_norm": 41.80381774902344, "learning_rate": 5e-07, "loss": 1.1987, "num_input_tokens_seen": 57493608, "step": 497 }, { "epoch": 2.672043010752688, "loss": 1.1787935495376587, "loss_ce": 8.263348718173802e-05, "loss_iou": 0.50390625, "loss_num": 0.034912109375, "loss_xval": 1.1796875, "num_input_tokens_seen": 57493608, "step": 497 }, { "epoch": 2.6774193548387095, "grad_norm": 19.494413375854492, "learning_rate": 5e-07, "loss": 0.9371, "num_input_tokens_seen": 57606696, "step": 498 }, { "epoch": 2.6774193548387095, "loss": 1.1350799798965454, "loss_ce": 7.024510705377907e-05, "loss_iou": 0.46875, "loss_num": 0.039306640625, "loss_xval": 1.1328125, "num_input_tokens_seen": 57606696, "step": 498 }, { "epoch": 2.682795698924731, "grad_norm": 26.471403121948242, "learning_rate": 5e-07, "loss": 0.95, "num_input_tokens_seen": 57720072, "step": 499 }, { "epoch": 2.682795698924731, "loss": 1.0277398824691772, "loss_ce": 0.0013726575998589396, "loss_iou": 0.412109375, "loss_num": 0.0400390625, "loss_xval": 1.0234375, "num_input_tokens_seen": 57720072, "step": 499 }, { "epoch": 2.688172043010753, "grad_norm": 26.15498924255371, "learning_rate": 5e-07, "loss": 1.0214, "num_input_tokens_seen": 57838912, "step": 500 }, { "epoch": 2.688172043010753, "eval_icons_CIoU": 0.10132496058940887, "eval_icons_GIoU": 0.0655208914540708, "eval_icons_IoU": 0.267132006585598, "eval_icons_MAE_all": 0.033393727615475655, "eval_icons_MAE_h": 0.03752017579972744, "eval_icons_MAE_w": 0.05901048704981804, "eval_icons_MAE_x_boxes": 0.05394728109240532, "eval_icons_MAE_y_boxes": 0.034801225177943707, "eval_icons_NUM_probability": 0.9995730519294739, "eval_icons_inside_bbox": 0.5503472238779068, "eval_icons_loss": 2.024327278137207, "eval_icons_loss_ce": 0.0009507770300842822, "eval_icons_loss_iou": 0.917236328125, "eval_icons_loss_num": 0.034420013427734375, "eval_icons_loss_xval": 2.00537109375, "eval_icons_runtime": 39.8567, "eval_icons_samples_per_second": 1.254, "eval_icons_steps_per_second": 0.05, "num_input_tokens_seen": 57838912, "step": 500 }, { "epoch": 2.688172043010753, "eval_screenspot_CIoU": 0.22391105691591898, "eval_screenspot_GIoU": 0.2092596193154653, "eval_screenspot_IoU": 0.3422169089317322, "eval_screenspot_MAE_all": 0.07749960695703824, "eval_screenspot_MAE_h": 0.05976089338461558, "eval_screenspot_MAE_w": 0.10438546041647594, "eval_screenspot_MAE_x_boxes": 0.10764879733324051, "eval_screenspot_MAE_y_boxes": 0.04432833567261696, "eval_screenspot_NUM_probability": 0.9999380111694336, "eval_screenspot_inside_bbox": 0.659583330154419, "eval_screenspot_loss": 2.0268075466156006, "eval_screenspot_loss_ce": 0.00011930003165616654, "eval_screenspot_loss_iou": 0.8318684895833334, "eval_screenspot_loss_num": 0.08716837565104167, "eval_screenspot_loss_xval": 2.099609375, "eval_screenspot_runtime": 70.9857, "eval_screenspot_samples_per_second": 1.254, "eval_screenspot_steps_per_second": 0.042, "num_input_tokens_seen": 57838912, "step": 500 }, { "epoch": 2.688172043010753, "loss": 2.0156896114349365, "loss_ce": 6.465662590926513e-05, "loss_iou": 0.81640625, "loss_num": 0.07666015625, "loss_xval": 2.015625, "num_input_tokens_seen": 57838912, "step": 500 }, { "epoch": 2.693548387096774, "grad_norm": 41.068084716796875, "learning_rate": 5e-07, "loss": 1.0286, "num_input_tokens_seen": 57953040, "step": 501 }, { "epoch": 2.693548387096774, "loss": 0.9416850209236145, "loss_ce": 3.459643994574435e-05, "loss_iou": 0.40625, "loss_num": 0.0263671875, "loss_xval": 0.94140625, "num_input_tokens_seen": 57953040, "step": 501 }, { "epoch": 2.698924731182796, "grad_norm": 24.91327476501465, "learning_rate": 5e-07, "loss": 1.0834, "num_input_tokens_seen": 58070668, "step": 502 }, { "epoch": 2.698924731182796, "loss": 0.9837210178375244, "loss_ce": 7.85084193921648e-05, "loss_iou": 0.421875, "loss_num": 0.0283203125, "loss_xval": 0.984375, "num_input_tokens_seen": 58070668, "step": 502 }, { "epoch": 2.704301075268817, "grad_norm": 25.985673904418945, "learning_rate": 5e-07, "loss": 1.3102, "num_input_tokens_seen": 58184148, "step": 503 }, { "epoch": 2.704301075268817, "loss": 0.9774428606033325, "loss_ce": 0.0001479649217799306, "loss_iou": 0.431640625, "loss_num": 0.0225830078125, "loss_xval": 0.9765625, "num_input_tokens_seen": 58184148, "step": 503 }, { "epoch": 2.709677419354839, "grad_norm": 22.896745681762695, "learning_rate": 5e-07, "loss": 1.0465, "num_input_tokens_seen": 58296568, "step": 504 }, { "epoch": 2.709677419354839, "loss": 1.1919139623641968, "loss_ce": 0.009296748787164688, "loss_iou": 0.453125, "loss_num": 0.05517578125, "loss_xval": 1.1796875, "num_input_tokens_seen": 58296568, "step": 504 }, { "epoch": 2.71505376344086, "grad_norm": 18.188879013061523, "learning_rate": 5e-07, "loss": 1.0821, "num_input_tokens_seen": 58409024, "step": 505 }, { "epoch": 2.71505376344086, "loss": 1.0620315074920654, "loss_ce": 1.9850886019412428e-05, "loss_iou": 0.41796875, "loss_num": 0.044677734375, "loss_xval": 1.0625, "num_input_tokens_seen": 58409024, "step": 505 }, { "epoch": 2.720430107526882, "grad_norm": 27.852462768554688, "learning_rate": 5e-07, "loss": 1.1838, "num_input_tokens_seen": 58523300, "step": 506 }, { "epoch": 2.720430107526882, "loss": 1.1758166551589966, "loss_ce": 3.544461651472375e-05, "loss_iou": 0.4765625, "loss_num": 0.044189453125, "loss_xval": 1.171875, "num_input_tokens_seen": 58523300, "step": 506 }, { "epoch": 2.725806451612903, "grad_norm": 24.9827823638916, "learning_rate": 5e-07, "loss": 1.0285, "num_input_tokens_seen": 58638832, "step": 507 }, { "epoch": 2.725806451612903, "loss": 1.0824103355407715, "loss_ce": 0.0001349371304968372, "loss_iou": 0.431640625, "loss_num": 0.043701171875, "loss_xval": 1.0859375, "num_input_tokens_seen": 58638832, "step": 507 }, { "epoch": 2.731182795698925, "grad_norm": 22.8240909576416, "learning_rate": 5e-07, "loss": 0.895, "num_input_tokens_seen": 58755676, "step": 508 }, { "epoch": 2.731182795698925, "loss": 0.7825629711151123, "loss_ce": 9.222899097949266e-05, "loss_iou": 0.337890625, "loss_num": 0.0211181640625, "loss_xval": 0.78125, "num_input_tokens_seen": 58755676, "step": 508 }, { "epoch": 2.736559139784946, "grad_norm": 37.21711730957031, "learning_rate": 5e-07, "loss": 1.0878, "num_input_tokens_seen": 58871616, "step": 509 }, { "epoch": 2.736559139784946, "loss": 1.4055875539779663, "loss_ce": 6.993793067522347e-05, "loss_iou": 0.59375, "loss_num": 0.044189453125, "loss_xval": 1.40625, "num_input_tokens_seen": 58871616, "step": 509 }, { "epoch": 2.741935483870968, "grad_norm": 18.385013580322266, "learning_rate": 5e-07, "loss": 1.03, "num_input_tokens_seen": 58987244, "step": 510 }, { "epoch": 2.741935483870968, "loss": 0.7782043218612671, "loss_ce": 0.00012811859778594226, "loss_iou": 0.30859375, "loss_num": 0.03271484375, "loss_xval": 0.77734375, "num_input_tokens_seen": 58987244, "step": 510 }, { "epoch": 2.747311827956989, "grad_norm": 28.11832046508789, "learning_rate": 5e-07, "loss": 1.0311, "num_input_tokens_seen": 59106960, "step": 511 }, { "epoch": 2.747311827956989, "loss": 0.9195431470870972, "loss_ce": 0.00010954445315292105, "loss_iou": 0.396484375, "loss_num": 0.0255126953125, "loss_xval": 0.91796875, "num_input_tokens_seen": 59106960, "step": 511 }, { "epoch": 2.752688172043011, "grad_norm": 26.76439666748047, "learning_rate": 5e-07, "loss": 0.9729, "num_input_tokens_seen": 59221996, "step": 512 }, { "epoch": 2.752688172043011, "loss": 0.9793656468391418, "loss_ce": 0.00011758694745367393, "loss_iou": 0.40234375, "loss_num": 0.034912109375, "loss_xval": 0.98046875, "num_input_tokens_seen": 59221996, "step": 512 }, { "epoch": 2.758064516129032, "grad_norm": 25.561372756958008, "learning_rate": 5e-07, "loss": 1.0203, "num_input_tokens_seen": 59338316, "step": 513 }, { "epoch": 2.758064516129032, "loss": 1.0393399000167847, "loss_ce": 3.3238757168874145e-05, "loss_iou": 0.455078125, "loss_num": 0.0260009765625, "loss_xval": 1.0390625, "num_input_tokens_seen": 59338316, "step": 513 }, { "epoch": 2.763440860215054, "grad_norm": 28.92034912109375, "learning_rate": 5e-07, "loss": 1.0179, "num_input_tokens_seen": 59454588, "step": 514 }, { "epoch": 2.763440860215054, "loss": 1.0524394512176514, "loss_ce": 7.120901136659086e-05, "loss_iou": 0.443359375, "loss_num": 0.033203125, "loss_xval": 1.0546875, "num_input_tokens_seen": 59454588, "step": 514 }, { "epoch": 2.768817204301075, "grad_norm": 19.01809310913086, "learning_rate": 5e-07, "loss": 1.1876, "num_input_tokens_seen": 59571212, "step": 515 }, { "epoch": 2.768817204301075, "loss": 1.3425699472427368, "loss_ce": 4.0700557292439044e-05, "loss_iou": 0.578125, "loss_num": 0.037353515625, "loss_xval": 1.34375, "num_input_tokens_seen": 59571212, "step": 515 }, { "epoch": 2.774193548387097, "grad_norm": 22.47633171081543, "learning_rate": 5e-07, "loss": 1.0244, "num_input_tokens_seen": 59686572, "step": 516 }, { "epoch": 2.774193548387097, "loss": 0.7930299043655396, "loss_ce": 6.116426084190607e-05, "loss_iou": 0.3203125, "loss_num": 0.03076171875, "loss_xval": 0.79296875, "num_input_tokens_seen": 59686572, "step": 516 }, { "epoch": 2.779569892473118, "grad_norm": 24.6093807220459, "learning_rate": 5e-07, "loss": 1.0389, "num_input_tokens_seen": 59800792, "step": 517 }, { "epoch": 2.779569892473118, "loss": 1.1963332891464233, "loss_ce": 4.420333789312281e-05, "loss_iou": 0.49609375, "loss_num": 0.04052734375, "loss_xval": 1.1953125, "num_input_tokens_seen": 59800792, "step": 517 }, { "epoch": 2.78494623655914, "grad_norm": 25.987449645996094, "learning_rate": 5e-07, "loss": 1.0973, "num_input_tokens_seen": 59914684, "step": 518 }, { "epoch": 2.78494623655914, "loss": 1.1892467737197876, "loss_ce": 3.785374065046199e-05, "loss_iou": 0.52734375, "loss_num": 0.0277099609375, "loss_xval": 1.1875, "num_input_tokens_seen": 59914684, "step": 518 }, { "epoch": 2.790322580645161, "grad_norm": 17.59604263305664, "learning_rate": 5e-07, "loss": 1.0731, "num_input_tokens_seen": 60030708, "step": 519 }, { "epoch": 2.790322580645161, "loss": 1.0167641639709473, "loss_ce": 4.0542916394770145e-05, "loss_iou": 0.40625, "loss_num": 0.04052734375, "loss_xval": 1.015625, "num_input_tokens_seen": 60030708, "step": 519 }, { "epoch": 2.795698924731183, "grad_norm": 20.75042724609375, "learning_rate": 5e-07, "loss": 1.1014, "num_input_tokens_seen": 60144044, "step": 520 }, { "epoch": 2.795698924731183, "loss": 0.9292649626731873, "loss_ce": 6.574942381121218e-05, "loss_iou": 0.39453125, "loss_num": 0.0281982421875, "loss_xval": 0.9296875, "num_input_tokens_seen": 60144044, "step": 520 }, { "epoch": 2.801075268817204, "grad_norm": 25.804285049438477, "learning_rate": 5e-07, "loss": 0.9442, "num_input_tokens_seen": 60258636, "step": 521 }, { "epoch": 2.801075268817204, "loss": 0.4099963307380676, "loss_ce": 0.00014526656013913453, "loss_iou": 0.162109375, "loss_num": 0.017333984375, "loss_xval": 0.41015625, "num_input_tokens_seen": 60258636, "step": 521 }, { "epoch": 2.806451612903226, "grad_norm": 30.3845272064209, "learning_rate": 5e-07, "loss": 1.1923, "num_input_tokens_seen": 60375588, "step": 522 }, { "epoch": 2.806451612903226, "loss": 1.280104637145996, "loss_ce": 7.529184949817136e-05, "loss_iou": 0.51953125, "loss_num": 0.048095703125, "loss_xval": 1.28125, "num_input_tokens_seen": 60375588, "step": 522 }, { "epoch": 2.811827956989247, "grad_norm": 27.56556510925293, "learning_rate": 5e-07, "loss": 1.0462, "num_input_tokens_seen": 60491988, "step": 523 }, { "epoch": 2.811827956989247, "loss": 1.1177773475646973, "loss_ce": 0.0001014696026686579, "loss_iou": 0.46875, "loss_num": 0.03564453125, "loss_xval": 1.1171875, "num_input_tokens_seen": 60491988, "step": 523 }, { "epoch": 2.817204301075269, "grad_norm": 21.66545867919922, "learning_rate": 5e-07, "loss": 0.9759, "num_input_tokens_seen": 60602668, "step": 524 }, { "epoch": 2.817204301075269, "loss": 1.0308129787445068, "loss_ce": 5.123763548908755e-05, "loss_iou": 0.40234375, "loss_num": 0.045166015625, "loss_xval": 1.03125, "num_input_tokens_seen": 60602668, "step": 524 }, { "epoch": 2.8225806451612905, "grad_norm": 21.88824462890625, "learning_rate": 5e-07, "loss": 0.9583, "num_input_tokens_seen": 60717652, "step": 525 }, { "epoch": 2.8225806451612905, "loss": 1.2076866626739502, "loss_ce": 0.00016726090689189732, "loss_iou": 0.515625, "loss_num": 0.035400390625, "loss_xval": 1.2109375, "num_input_tokens_seen": 60717652, "step": 525 }, { "epoch": 2.827956989247312, "grad_norm": 133.6829071044922, "learning_rate": 5e-07, "loss": 1.4029, "num_input_tokens_seen": 60831240, "step": 526 }, { "epoch": 2.827956989247312, "loss": 1.219517707824707, "loss_ce": 3.528527304297313e-05, "loss_iou": 0.5078125, "loss_num": 0.04052734375, "loss_xval": 1.21875, "num_input_tokens_seen": 60831240, "step": 526 }, { "epoch": 2.8333333333333335, "grad_norm": 22.250160217285156, "learning_rate": 5e-07, "loss": 1.034, "num_input_tokens_seen": 60944996, "step": 527 }, { "epoch": 2.8333333333333335, "loss": 0.9309725761413574, "loss_ce": 6.439528078772128e-05, "loss_iou": 0.380859375, "loss_num": 0.0341796875, "loss_xval": 0.9296875, "num_input_tokens_seen": 60944996, "step": 527 }, { "epoch": 2.838709677419355, "grad_norm": 18.682424545288086, "learning_rate": 5e-07, "loss": 0.9597, "num_input_tokens_seen": 61059760, "step": 528 }, { "epoch": 2.838709677419355, "loss": 1.1172562837600708, "loss_ce": 6.878745625726879e-05, "loss_iou": 0.451171875, "loss_num": 0.04296875, "loss_xval": 1.1171875, "num_input_tokens_seen": 61059760, "step": 528 }, { "epoch": 2.8440860215053765, "grad_norm": 21.253293991088867, "learning_rate": 5e-07, "loss": 0.9007, "num_input_tokens_seen": 61173428, "step": 529 }, { "epoch": 2.8440860215053765, "loss": 0.7832532525062561, "loss_ce": 5.0150323659181595e-05, "loss_iou": 0.302734375, "loss_num": 0.035888671875, "loss_xval": 0.78125, "num_input_tokens_seen": 61173428, "step": 529 }, { "epoch": 2.849462365591398, "grad_norm": 20.958160400390625, "learning_rate": 5e-07, "loss": 0.9825, "num_input_tokens_seen": 61290200, "step": 530 }, { "epoch": 2.849462365591398, "loss": 0.9900355935096741, "loss_ce": 4.538588473224081e-05, "loss_iou": 0.419921875, "loss_num": 0.0299072265625, "loss_xval": 0.98828125, "num_input_tokens_seen": 61290200, "step": 530 }, { "epoch": 2.8548387096774195, "grad_norm": 24.959348678588867, "learning_rate": 5e-07, "loss": 0.8657, "num_input_tokens_seen": 61406744, "step": 531 }, { "epoch": 2.8548387096774195, "loss": 0.8962998986244202, "loss_ce": 5.9709309425670654e-05, "loss_iou": 0.357421875, "loss_num": 0.0361328125, "loss_xval": 0.89453125, "num_input_tokens_seen": 61406744, "step": 531 }, { "epoch": 2.860215053763441, "grad_norm": 22.636791229248047, "learning_rate": 5e-07, "loss": 0.9895, "num_input_tokens_seen": 61523780, "step": 532 }, { "epoch": 2.860215053763441, "loss": 0.9247728586196899, "loss_ce": 0.00021227994875516742, "loss_iou": 0.380859375, "loss_num": 0.03271484375, "loss_xval": 0.92578125, "num_input_tokens_seen": 61523780, "step": 532 }, { "epoch": 2.8655913978494625, "grad_norm": 22.37879180908203, "learning_rate": 5e-07, "loss": 0.9955, "num_input_tokens_seen": 61640640, "step": 533 }, { "epoch": 2.8655913978494625, "loss": 0.7153705358505249, "loss_ce": 3.8517231587320566e-05, "loss_iou": 0.294921875, "loss_num": 0.0252685546875, "loss_xval": 0.71484375, "num_input_tokens_seen": 61640640, "step": 533 }, { "epoch": 2.870967741935484, "grad_norm": 63.97672653198242, "learning_rate": 5e-07, "loss": 1.1074, "num_input_tokens_seen": 61752728, "step": 534 }, { "epoch": 2.870967741935484, "loss": 0.8509214520454407, "loss_ce": 9.134541323874146e-05, "loss_iou": 0.349609375, "loss_num": 0.0303955078125, "loss_xval": 0.8515625, "num_input_tokens_seen": 61752728, "step": 534 }, { "epoch": 2.8763440860215055, "grad_norm": 18.84422492980957, "learning_rate": 5e-07, "loss": 1.061, "num_input_tokens_seen": 61867324, "step": 535 }, { "epoch": 2.8763440860215055, "loss": 1.147517442703247, "loss_ce": 5.652084655594081e-05, "loss_iou": 0.466796875, "loss_num": 0.04248046875, "loss_xval": 1.1484375, "num_input_tokens_seen": 61867324, "step": 535 }, { "epoch": 2.881720430107527, "grad_norm": 25.313758850097656, "learning_rate": 5e-07, "loss": 1.1523, "num_input_tokens_seen": 61985116, "step": 536 }, { "epoch": 2.881720430107527, "loss": 0.9475721120834351, "loss_ce": 6.233781459741294e-05, "loss_iou": 0.3984375, "loss_num": 0.0303955078125, "loss_xval": 0.94921875, "num_input_tokens_seen": 61985116, "step": 536 }, { "epoch": 2.8870967741935485, "grad_norm": 66.57193756103516, "learning_rate": 5e-07, "loss": 0.9992, "num_input_tokens_seen": 62100024, "step": 537 }, { "epoch": 2.8870967741935485, "loss": 0.877539336681366, "loss_ce": 9.791785851120949e-05, "loss_iou": 0.3671875, "loss_num": 0.0284423828125, "loss_xval": 0.87890625, "num_input_tokens_seen": 62100024, "step": 537 }, { "epoch": 2.89247311827957, "grad_norm": 35.13947677612305, "learning_rate": 5e-07, "loss": 1.0417, "num_input_tokens_seen": 62215604, "step": 538 }, { "epoch": 2.89247311827957, "loss": 0.8948169946670532, "loss_ce": 4.154173075221479e-05, "loss_iou": 0.35546875, "loss_num": 0.03662109375, "loss_xval": 0.89453125, "num_input_tokens_seen": 62215604, "step": 538 }, { "epoch": 2.8978494623655915, "grad_norm": 22.710786819458008, "learning_rate": 5e-07, "loss": 1.2251, "num_input_tokens_seen": 62328112, "step": 539 }, { "epoch": 2.8978494623655915, "loss": 0.9393775463104248, "loss_ce": 0.0004126919084228575, "loss_iou": 0.37109375, "loss_num": 0.038818359375, "loss_xval": 0.9375, "num_input_tokens_seen": 62328112, "step": 539 }, { "epoch": 2.903225806451613, "grad_norm": 25.56049156188965, "learning_rate": 5e-07, "loss": 1.0335, "num_input_tokens_seen": 62443384, "step": 540 }, { "epoch": 2.903225806451613, "loss": 1.0678367614746094, "loss_ce": 0.00020969187607988715, "loss_iou": 0.427734375, "loss_num": 0.042236328125, "loss_xval": 1.0703125, "num_input_tokens_seen": 62443384, "step": 540 }, { "epoch": 2.9086021505376345, "grad_norm": 20.319087982177734, "learning_rate": 5e-07, "loss": 1.1332, "num_input_tokens_seen": 62558120, "step": 541 }, { "epoch": 2.9086021505376345, "loss": 1.1037018299102783, "loss_ce": 0.00018621723575051874, "loss_iou": 0.4765625, "loss_num": 0.0302734375, "loss_xval": 1.1015625, "num_input_tokens_seen": 62558120, "step": 541 }, { "epoch": 2.913978494623656, "grad_norm": 18.845653533935547, "learning_rate": 5e-07, "loss": 0.9146, "num_input_tokens_seen": 62673964, "step": 542 }, { "epoch": 2.913978494623656, "loss": 0.7852068543434143, "loss_ce": 5.065237928647548e-05, "loss_iou": 0.330078125, "loss_num": 0.025146484375, "loss_xval": 0.78515625, "num_input_tokens_seen": 62673964, "step": 542 }, { "epoch": 2.9193548387096775, "grad_norm": 28.719024658203125, "learning_rate": 5e-07, "loss": 1.0485, "num_input_tokens_seen": 62789784, "step": 543 }, { "epoch": 2.9193548387096775, "loss": 1.204742670059204, "loss_ce": 0.002106043975800276, "loss_iou": 0.5078125, "loss_num": 0.036865234375, "loss_xval": 1.203125, "num_input_tokens_seen": 62789784, "step": 543 }, { "epoch": 2.924731182795699, "grad_norm": 19.294179916381836, "learning_rate": 5e-07, "loss": 1.1104, "num_input_tokens_seen": 62902752, "step": 544 }, { "epoch": 2.924731182795699, "loss": 1.2576314210891724, "loss_ce": 6.306414434220642e-05, "loss_iou": 0.51953125, "loss_num": 0.043701171875, "loss_xval": 1.2578125, "num_input_tokens_seen": 62902752, "step": 544 }, { "epoch": 2.9301075268817205, "grad_norm": 36.7462043762207, "learning_rate": 5e-07, "loss": 1.0117, "num_input_tokens_seen": 63018044, "step": 545 }, { "epoch": 2.9301075268817205, "loss": 0.9498646259307861, "loss_ce": 3.5495420888764784e-05, "loss_iou": 0.408203125, "loss_num": 0.02685546875, "loss_xval": 0.94921875, "num_input_tokens_seen": 63018044, "step": 545 }, { "epoch": 2.935483870967742, "grad_norm": 30.70102310180664, "learning_rate": 5e-07, "loss": 1.0364, "num_input_tokens_seen": 63134168, "step": 546 }, { "epoch": 2.935483870967742, "loss": 0.932861328125, "loss_ce": 0.0002441575634293258, "loss_iou": 0.34375, "loss_num": 0.049072265625, "loss_xval": 0.93359375, "num_input_tokens_seen": 63134168, "step": 546 }, { "epoch": 2.9408602150537635, "grad_norm": 41.0186653137207, "learning_rate": 5e-07, "loss": 1.0241, "num_input_tokens_seen": 63250376, "step": 547 }, { "epoch": 2.9408602150537635, "loss": 0.9967174530029297, "loss_ce": 0.00013542268425226212, "loss_iou": 0.40234375, "loss_num": 0.038330078125, "loss_xval": 0.99609375, "num_input_tokens_seen": 63250376, "step": 547 }, { "epoch": 2.946236559139785, "grad_norm": 34.027408599853516, "learning_rate": 5e-07, "loss": 1.0583, "num_input_tokens_seen": 63370088, "step": 548 }, { "epoch": 2.946236559139785, "loss": 1.0324108600616455, "loss_ce": 0.00018429361807648093, "loss_iou": 0.443359375, "loss_num": 0.029052734375, "loss_xval": 1.03125, "num_input_tokens_seen": 63370088, "step": 548 }, { "epoch": 2.9516129032258065, "grad_norm": 30.45508575439453, "learning_rate": 5e-07, "loss": 0.9459, "num_input_tokens_seen": 63485036, "step": 549 }, { "epoch": 2.9516129032258065, "loss": 1.080384612083435, "loss_ce": 0.0005507200839929283, "loss_iou": 0.4296875, "loss_num": 0.0439453125, "loss_xval": 1.078125, "num_input_tokens_seen": 63485036, "step": 549 }, { "epoch": 2.956989247311828, "grad_norm": 31.61918067932129, "learning_rate": 5e-07, "loss": 0.9498, "num_input_tokens_seen": 63602888, "step": 550 }, { "epoch": 2.956989247311828, "loss": 0.941436231136322, "loss_ce": 3.0034419978619553e-05, "loss_iou": 0.384765625, "loss_num": 0.034423828125, "loss_xval": 0.94140625, "num_input_tokens_seen": 63602888, "step": 550 }, { "epoch": 2.9623655913978495, "grad_norm": 21.61182975769043, "learning_rate": 5e-07, "loss": 1.0427, "num_input_tokens_seen": 63718980, "step": 551 }, { "epoch": 2.9623655913978495, "loss": 1.2370295524597168, "loss_ce": 0.0007013530703261495, "loss_iou": 0.51171875, "loss_num": 0.04248046875, "loss_xval": 1.234375, "num_input_tokens_seen": 63718980, "step": 551 }, { "epoch": 2.967741935483871, "grad_norm": 29.887893676757812, "learning_rate": 5e-07, "loss": 1.0513, "num_input_tokens_seen": 63835492, "step": 552 }, { "epoch": 2.967741935483871, "loss": 0.9267951846122742, "loss_ce": 3.7419216823764145e-05, "loss_iou": 0.34765625, "loss_num": 0.04638671875, "loss_xval": 0.92578125, "num_input_tokens_seen": 63835492, "step": 552 }, { "epoch": 2.9731182795698925, "grad_norm": 23.10103988647461, "learning_rate": 5e-07, "loss": 0.9577, "num_input_tokens_seen": 63952572, "step": 553 }, { "epoch": 2.9731182795698925, "loss": 0.8351247906684875, "loss_ce": 0.0001638481771806255, "loss_iou": 0.353515625, "loss_num": 0.0257568359375, "loss_xval": 0.8359375, "num_input_tokens_seen": 63952572, "step": 553 }, { "epoch": 2.978494623655914, "grad_norm": 23.477867126464844, "learning_rate": 5e-07, "loss": 1.0891, "num_input_tokens_seen": 64068952, "step": 554 }, { "epoch": 2.978494623655914, "loss": 1.4581336975097656, "loss_ce": 0.00012585025979205966, "loss_iou": 0.6015625, "loss_num": 0.05078125, "loss_xval": 1.4609375, "num_input_tokens_seen": 64068952, "step": 554 }, { "epoch": 2.9838709677419355, "grad_norm": 18.881067276000977, "learning_rate": 5e-07, "loss": 0.9163, "num_input_tokens_seen": 64180504, "step": 555 }, { "epoch": 2.9838709677419355, "loss": 0.9253350496292114, "loss_ce": 4.205176082905382e-05, "loss_iou": 0.373046875, "loss_num": 0.035888671875, "loss_xval": 0.92578125, "num_input_tokens_seen": 64180504, "step": 555 }, { "epoch": 2.989247311827957, "grad_norm": 35.447608947753906, "learning_rate": 5e-07, "loss": 0.8566, "num_input_tokens_seen": 64298588, "step": 556 }, { "epoch": 2.989247311827957, "loss": 0.8515162467956543, "loss_ce": 7.577160431537777e-05, "loss_iou": 0.34375, "loss_num": 0.03271484375, "loss_xval": 0.8515625, "num_input_tokens_seen": 64298588, "step": 556 }, { "epoch": 2.9946236559139785, "grad_norm": 27.422901153564453, "learning_rate": 5e-07, "loss": 1.2383, "num_input_tokens_seen": 64414372, "step": 557 }, { "epoch": 2.9946236559139785, "loss": 1.2711814641952515, "loss_ce": 0.00018530312809161842, "loss_iou": 0.54296875, "loss_num": 0.036865234375, "loss_xval": 1.2734375, "num_input_tokens_seen": 64414372, "step": 557 }, { "epoch": 3.0, "grad_norm": 23.61506462097168, "learning_rate": 5e-07, "loss": 1.0466, "num_input_tokens_seen": 64528392, "step": 558 }, { "epoch": 3.0, "loss": 1.3897221088409424, "loss_ce": 7.366888894466683e-05, "loss_iou": 0.5625, "loss_num": 0.0537109375, "loss_xval": 1.390625, "num_input_tokens_seen": 64528392, "step": 558 }, { "epoch": 3.0053763440860215, "grad_norm": 23.43360710144043, "learning_rate": 5e-07, "loss": 1.1958, "num_input_tokens_seen": 64644156, "step": 559 }, { "epoch": 3.0053763440860215, "loss": 0.9119795560836792, "loss_ce": 0.00011427226127125323, "loss_iou": 0.3984375, "loss_num": 0.0228271484375, "loss_xval": 0.91015625, "num_input_tokens_seen": 64644156, "step": 559 }, { "epoch": 3.010752688172043, "grad_norm": 22.693479537963867, "learning_rate": 5e-07, "loss": 0.889, "num_input_tokens_seen": 64760976, "step": 560 }, { "epoch": 3.010752688172043, "loss": 0.878840446472168, "loss_ce": 5.629836959997192e-05, "loss_iou": 0.357421875, "loss_num": 0.032470703125, "loss_xval": 0.87890625, "num_input_tokens_seen": 64760976, "step": 560 }, { "epoch": 3.0161290322580645, "grad_norm": 29.278104782104492, "learning_rate": 5e-07, "loss": 1.1427, "num_input_tokens_seen": 64876868, "step": 561 }, { "epoch": 3.0161290322580645, "loss": 1.528903603553772, "loss_ce": 9.49900786508806e-05, "loss_iou": 0.62109375, "loss_num": 0.057861328125, "loss_xval": 1.53125, "num_input_tokens_seen": 64876868, "step": 561 }, { "epoch": 3.021505376344086, "grad_norm": 22.698974609375, "learning_rate": 5e-07, "loss": 1.0636, "num_input_tokens_seen": 64990044, "step": 562 }, { "epoch": 3.021505376344086, "loss": 1.069865345954895, "loss_ce": 4.1177961975336075e-05, "loss_iou": 0.44921875, "loss_num": 0.0341796875, "loss_xval": 1.0703125, "num_input_tokens_seen": 64990044, "step": 562 }, { "epoch": 3.0268817204301075, "grad_norm": 17.74418830871582, "learning_rate": 5e-07, "loss": 1.1086, "num_input_tokens_seen": 65101244, "step": 563 }, { "epoch": 3.0268817204301075, "loss": 1.078681230545044, "loss_ce": 6.799650873290375e-05, "loss_iou": 0.4765625, "loss_num": 0.025146484375, "loss_xval": 1.078125, "num_input_tokens_seen": 65101244, "step": 563 }, { "epoch": 3.032258064516129, "grad_norm": 35.78413772583008, "learning_rate": 5e-07, "loss": 0.9135, "num_input_tokens_seen": 65216660, "step": 564 }, { "epoch": 3.032258064516129, "loss": 0.7989140152931213, "loss_ce": 8.58699349919334e-05, "loss_iou": 0.3125, "loss_num": 0.03466796875, "loss_xval": 0.796875, "num_input_tokens_seen": 65216660, "step": 564 }, { "epoch": 3.0376344086021505, "grad_norm": 18.552719116210938, "learning_rate": 5e-07, "loss": 0.8116, "num_input_tokens_seen": 65331140, "step": 565 }, { "epoch": 3.0376344086021505, "loss": 0.7242457270622253, "loss_ce": 0.0001246067404281348, "loss_iou": 0.310546875, "loss_num": 0.020751953125, "loss_xval": 0.72265625, "num_input_tokens_seen": 65331140, "step": 565 }, { "epoch": 3.043010752688172, "grad_norm": 20.455181121826172, "learning_rate": 5e-07, "loss": 0.9159, "num_input_tokens_seen": 65448784, "step": 566 }, { "epoch": 3.043010752688172, "loss": 0.5842821598052979, "loss_ce": 5.36843654117547e-05, "loss_iou": 0.255859375, "loss_num": 0.01434326171875, "loss_xval": 0.5859375, "num_input_tokens_seen": 65448784, "step": 566 }, { "epoch": 3.0483870967741935, "grad_norm": 22.676990509033203, "learning_rate": 5e-07, "loss": 0.8429, "num_input_tokens_seen": 65562696, "step": 567 }, { "epoch": 3.0483870967741935, "loss": 0.5715851783752441, "loss_ce": 5.19483583047986e-05, "loss_iou": 0.2490234375, "loss_num": 0.014892578125, "loss_xval": 0.5703125, "num_input_tokens_seen": 65562696, "step": 567 }, { "epoch": 3.053763440860215, "grad_norm": 25.869260787963867, "learning_rate": 5e-07, "loss": 0.9062, "num_input_tokens_seen": 65680704, "step": 568 }, { "epoch": 3.053763440860215, "loss": 0.8718727827072144, "loss_ce": 4.660749254981056e-05, "loss_iou": 0.35546875, "loss_num": 0.03271484375, "loss_xval": 0.87109375, "num_input_tokens_seen": 65680704, "step": 568 }, { "epoch": 3.0591397849462365, "grad_norm": 28.313798904418945, "learning_rate": 5e-07, "loss": 0.9432, "num_input_tokens_seen": 65797220, "step": 569 }, { "epoch": 3.0591397849462365, "loss": 0.8049854636192322, "loss_ce": 5.3822404879610986e-05, "loss_iou": 0.34765625, "loss_num": 0.021484375, "loss_xval": 0.8046875, "num_input_tokens_seen": 65797220, "step": 569 }, { "epoch": 3.064516129032258, "grad_norm": 27.52315902709961, "learning_rate": 5e-07, "loss": 0.9677, "num_input_tokens_seen": 65911616, "step": 570 }, { "epoch": 3.064516129032258, "loss": 1.0747593641281128, "loss_ce": 5.2344126743264496e-05, "loss_iou": 0.46484375, "loss_num": 0.02978515625, "loss_xval": 1.078125, "num_input_tokens_seen": 65911616, "step": 570 }, { "epoch": 3.0698924731182795, "grad_norm": 25.1391658782959, "learning_rate": 5e-07, "loss": 1.0272, "num_input_tokens_seen": 66027600, "step": 571 }, { "epoch": 3.0698924731182795, "loss": 0.7500638961791992, "loss_ce": 6.389217014657333e-05, "loss_iou": 0.314453125, "loss_num": 0.0242919921875, "loss_xval": 0.75, "num_input_tokens_seen": 66027600, "step": 571 }, { "epoch": 3.075268817204301, "grad_norm": 24.93133544921875, "learning_rate": 5e-07, "loss": 0.9997, "num_input_tokens_seen": 66142808, "step": 572 }, { "epoch": 3.075268817204301, "loss": 1.138256549835205, "loss_ce": 7.29201638023369e-05, "loss_iou": 0.458984375, "loss_num": 0.043701171875, "loss_xval": 1.140625, "num_input_tokens_seen": 66142808, "step": 572 }, { "epoch": 3.0806451612903225, "grad_norm": 17.571786880493164, "learning_rate": 5e-07, "loss": 1.0502, "num_input_tokens_seen": 66259072, "step": 573 }, { "epoch": 3.0806451612903225, "loss": 0.7318180799484253, "loss_ce": 0.0023258994333446026, "loss_iou": 0.283203125, "loss_num": 0.032470703125, "loss_xval": 0.73046875, "num_input_tokens_seen": 66259072, "step": 573 }, { "epoch": 3.086021505376344, "grad_norm": 21.311967849731445, "learning_rate": 5e-07, "loss": 0.9399, "num_input_tokens_seen": 66375460, "step": 574 }, { "epoch": 3.086021505376344, "loss": 1.0215339660644531, "loss_ce": 4.962410457665101e-05, "loss_iou": 0.41796875, "loss_num": 0.037109375, "loss_xval": 1.0234375, "num_input_tokens_seen": 66375460, "step": 574 }, { "epoch": 3.0913978494623655, "grad_norm": 27.56854248046875, "learning_rate": 5e-07, "loss": 0.982, "num_input_tokens_seen": 66491580, "step": 575 }, { "epoch": 3.0913978494623655, "loss": 1.0379114151000977, "loss_ce": 6.957682489883155e-05, "loss_iou": 0.427734375, "loss_num": 0.03662109375, "loss_xval": 1.0390625, "num_input_tokens_seen": 66491580, "step": 575 }, { "epoch": 3.096774193548387, "grad_norm": 25.910686492919922, "learning_rate": 5e-07, "loss": 1.3127, "num_input_tokens_seen": 66607320, "step": 576 }, { "epoch": 3.096774193548387, "loss": 1.2696032524108887, "loss_ce": 7.195000216597691e-05, "loss_iou": 0.48828125, "loss_num": 0.058837890625, "loss_xval": 1.265625, "num_input_tokens_seen": 66607320, "step": 576 }, { "epoch": 3.1021505376344085, "grad_norm": 26.313785552978516, "learning_rate": 5e-07, "loss": 1.2021, "num_input_tokens_seen": 66723396, "step": 577 }, { "epoch": 3.1021505376344085, "loss": 0.8809092044830322, "loss_ce": 4.9787711759563535e-05, "loss_iou": 0.369140625, "loss_num": 0.0286865234375, "loss_xval": 0.8828125, "num_input_tokens_seen": 66723396, "step": 577 }, { "epoch": 3.10752688172043, "grad_norm": 23.758527755737305, "learning_rate": 5e-07, "loss": 0.8626, "num_input_tokens_seen": 66836952, "step": 578 }, { "epoch": 3.10752688172043, "loss": 0.7293645143508911, "loss_ce": 0.00011647488281596452, "loss_iou": 0.3125, "loss_num": 0.0203857421875, "loss_xval": 0.73046875, "num_input_tokens_seen": 66836952, "step": 578 }, { "epoch": 3.1129032258064515, "grad_norm": 19.086658477783203, "learning_rate": 5e-07, "loss": 1.0482, "num_input_tokens_seen": 66952060, "step": 579 }, { "epoch": 3.1129032258064515, "loss": 0.8814196586608887, "loss_ce": 7.200564141385257e-05, "loss_iou": 0.37890625, "loss_num": 0.0245361328125, "loss_xval": 0.8828125, "num_input_tokens_seen": 66952060, "step": 579 }, { "epoch": 3.118279569892473, "grad_norm": 22.181276321411133, "learning_rate": 5e-07, "loss": 1.2934, "num_input_tokens_seen": 67065548, "step": 580 }, { "epoch": 3.118279569892473, "loss": 1.0388500690460205, "loss_ce": 3.1654766644351184e-05, "loss_iou": 0.443359375, "loss_num": 0.0306396484375, "loss_xval": 1.0390625, "num_input_tokens_seen": 67065548, "step": 580 }, { "epoch": 3.1236559139784945, "grad_norm": 22.39266014099121, "learning_rate": 5e-07, "loss": 0.8761, "num_input_tokens_seen": 67183928, "step": 581 }, { "epoch": 3.1236559139784945, "loss": 0.8350236415863037, "loss_ce": 6.267859134823084e-05, "loss_iou": 0.34375, "loss_num": 0.0291748046875, "loss_xval": 0.8359375, "num_input_tokens_seen": 67183928, "step": 581 }, { "epoch": 3.129032258064516, "grad_norm": 22.54981231689453, "learning_rate": 5e-07, "loss": 0.8452, "num_input_tokens_seen": 67301024, "step": 582 }, { "epoch": 3.129032258064516, "loss": 0.6879168748855591, "loss_ce": 5.067234087618999e-05, "loss_iou": 0.2890625, "loss_num": 0.0218505859375, "loss_xval": 0.6875, "num_input_tokens_seen": 67301024, "step": 582 }, { "epoch": 3.1344086021505375, "grad_norm": 18.485794067382812, "learning_rate": 5e-07, "loss": 1.1929, "num_input_tokens_seen": 67415712, "step": 583 }, { "epoch": 3.1344086021505375, "loss": 0.9587877988815308, "loss_ce": 4.7563284169882536e-05, "loss_iou": 0.408203125, "loss_num": 0.0281982421875, "loss_xval": 0.95703125, "num_input_tokens_seen": 67415712, "step": 583 }, { "epoch": 3.139784946236559, "grad_norm": 14.54542064666748, "learning_rate": 5e-07, "loss": 1.0633, "num_input_tokens_seen": 67531328, "step": 584 }, { "epoch": 3.139784946236559, "loss": 1.387988805770874, "loss_ce": 4.9387024773750454e-05, "loss_iou": 0.58203125, "loss_num": 0.045166015625, "loss_xval": 1.390625, "num_input_tokens_seen": 67531328, "step": 584 }, { "epoch": 3.1451612903225805, "grad_norm": 25.622886657714844, "learning_rate": 5e-07, "loss": 1.0481, "num_input_tokens_seen": 67646864, "step": 585 }, { "epoch": 3.1451612903225805, "loss": 1.0203609466552734, "loss_ce": 9.721559763420373e-05, "loss_iou": 0.4140625, "loss_num": 0.038330078125, "loss_xval": 1.0234375, "num_input_tokens_seen": 67646864, "step": 585 }, { "epoch": 3.150537634408602, "grad_norm": 30.80066680908203, "learning_rate": 5e-07, "loss": 1.0977, "num_input_tokens_seen": 67764104, "step": 586 }, { "epoch": 3.150537634408602, "loss": 1.4234942197799683, "loss_ce": 0.0001543294347357005, "loss_iou": 0.60546875, "loss_num": 0.042236328125, "loss_xval": 1.421875, "num_input_tokens_seen": 67764104, "step": 586 }, { "epoch": 3.1559139784946235, "grad_norm": 27.312835693359375, "learning_rate": 5e-07, "loss": 1.0942, "num_input_tokens_seen": 67879836, "step": 587 }, { "epoch": 3.1559139784946235, "loss": 1.056823492050171, "loss_ce": 0.00018281053053215146, "loss_iou": 0.44140625, "loss_num": 0.03515625, "loss_xval": 1.0546875, "num_input_tokens_seen": 67879836, "step": 587 }, { "epoch": 3.161290322580645, "grad_norm": 25.032623291015625, "learning_rate": 5e-07, "loss": 0.8913, "num_input_tokens_seen": 67995064, "step": 588 }, { "epoch": 3.161290322580645, "loss": 0.99348384141922, "loss_ce": 7.562668906757608e-05, "loss_iou": 0.384765625, "loss_num": 0.04443359375, "loss_xval": 0.9921875, "num_input_tokens_seen": 67995064, "step": 588 }, { "epoch": 3.1666666666666665, "grad_norm": 23.238630294799805, "learning_rate": 5e-07, "loss": 1.0018, "num_input_tokens_seen": 68113664, "step": 589 }, { "epoch": 3.1666666666666665, "loss": 1.3875079154968262, "loss_ce": 5.6708857300691307e-05, "loss_iou": 0.5625, "loss_num": 0.052001953125, "loss_xval": 1.390625, "num_input_tokens_seen": 68113664, "step": 589 }, { "epoch": 3.172043010752688, "grad_norm": 23.75888442993164, "learning_rate": 5e-07, "loss": 0.8726, "num_input_tokens_seen": 68230336, "step": 590 }, { "epoch": 3.172043010752688, "loss": 0.5586471557617188, "loss_ce": 5.340862844604999e-05, "loss_iou": 0.2294921875, "loss_num": 0.02001953125, "loss_xval": 0.55859375, "num_input_tokens_seen": 68230336, "step": 590 }, { "epoch": 3.1774193548387095, "grad_norm": 21.77613067626953, "learning_rate": 5e-07, "loss": 1.1813, "num_input_tokens_seen": 68342608, "step": 591 }, { "epoch": 3.1774193548387095, "loss": 1.1412440538406372, "loss_ce": 0.000130776475998573, "loss_iou": 0.486328125, "loss_num": 0.033447265625, "loss_xval": 1.140625, "num_input_tokens_seen": 68342608, "step": 591 }, { "epoch": 3.182795698924731, "grad_norm": 25.825403213500977, "learning_rate": 5e-07, "loss": 0.9204, "num_input_tokens_seen": 68460068, "step": 592 }, { "epoch": 3.182795698924731, "loss": 1.1104109287261963, "loss_ce": 5.938930553384125e-05, "loss_iou": 0.423828125, "loss_num": 0.052734375, "loss_xval": 1.109375, "num_input_tokens_seen": 68460068, "step": 592 }, { "epoch": 3.1881720430107525, "grad_norm": 36.84263229370117, "learning_rate": 5e-07, "loss": 0.987, "num_input_tokens_seen": 68577344, "step": 593 }, { "epoch": 3.1881720430107525, "loss": 0.9798403978347778, "loss_ce": 0.00010404584463685751, "loss_iou": 0.416015625, "loss_num": 0.029052734375, "loss_xval": 0.98046875, "num_input_tokens_seen": 68577344, "step": 593 }, { "epoch": 3.193548387096774, "grad_norm": 23.762514114379883, "learning_rate": 5e-07, "loss": 1.1711, "num_input_tokens_seen": 68694068, "step": 594 }, { "epoch": 3.193548387096774, "loss": 1.2462092638015747, "loss_ce": 0.00011552418436622247, "loss_iou": 0.54296875, "loss_num": 0.031494140625, "loss_xval": 1.25, "num_input_tokens_seen": 68694068, "step": 594 }, { "epoch": 3.1989247311827955, "grad_norm": 19.579187393188477, "learning_rate": 5e-07, "loss": 0.9499, "num_input_tokens_seen": 68813960, "step": 595 }, { "epoch": 3.1989247311827955, "loss": 1.1355648040771484, "loss_ce": 6.677045894321054e-05, "loss_iou": 0.48046875, "loss_num": 0.034912109375, "loss_xval": 1.1328125, "num_input_tokens_seen": 68813960, "step": 595 }, { "epoch": 3.204301075268817, "grad_norm": 40.93342971801758, "learning_rate": 5e-07, "loss": 1.0895, "num_input_tokens_seen": 68929784, "step": 596 }, { "epoch": 3.204301075268817, "loss": 1.2217437028884888, "loss_ce": 6.40003418084234e-05, "loss_iou": 0.515625, "loss_num": 0.037109375, "loss_xval": 1.21875, "num_input_tokens_seen": 68929784, "step": 596 }, { "epoch": 3.2096774193548385, "grad_norm": 22.453536987304688, "learning_rate": 5e-07, "loss": 0.9841, "num_input_tokens_seen": 69044116, "step": 597 }, { "epoch": 3.2096774193548385, "loss": 1.0142253637313843, "loss_ce": 6.52450107736513e-05, "loss_iou": 0.4296875, "loss_num": 0.0303955078125, "loss_xval": 1.015625, "num_input_tokens_seen": 69044116, "step": 597 }, { "epoch": 3.21505376344086, "grad_norm": 21.31499481201172, "learning_rate": 5e-07, "loss": 1.0771, "num_input_tokens_seen": 69157144, "step": 598 }, { "epoch": 3.21505376344086, "loss": 1.595276117324829, "loss_ce": 6.122431659605354e-05, "loss_iou": 0.671875, "loss_num": 0.05078125, "loss_xval": 1.59375, "num_input_tokens_seen": 69157144, "step": 598 }, { "epoch": 3.2204301075268815, "grad_norm": 23.71531105041504, "learning_rate": 5e-07, "loss": 0.8461, "num_input_tokens_seen": 69273400, "step": 599 }, { "epoch": 3.2204301075268815, "loss": 0.9543935060501099, "loss_ce": 4.78103211207781e-05, "loss_iou": 0.396484375, "loss_num": 0.032470703125, "loss_xval": 0.953125, "num_input_tokens_seen": 69273400, "step": 599 }, { "epoch": 3.225806451612903, "grad_norm": 34.90068435668945, "learning_rate": 5e-07, "loss": 1.1964, "num_input_tokens_seen": 69386600, "step": 600 }, { "epoch": 3.225806451612903, "loss": 0.7950854301452637, "loss_ce": 0.0001635472581256181, "loss_iou": 0.337890625, "loss_num": 0.0238037109375, "loss_xval": 0.796875, "num_input_tokens_seen": 69386600, "step": 600 }, { "epoch": 3.2311827956989245, "grad_norm": 32.0247802734375, "learning_rate": 5e-07, "loss": 0.9906, "num_input_tokens_seen": 69500284, "step": 601 }, { "epoch": 3.2311827956989245, "loss": 0.9675564765930176, "loss_ce": 2.7227179089095443e-05, "loss_iou": 0.375, "loss_num": 0.04345703125, "loss_xval": 0.96875, "num_input_tokens_seen": 69500284, "step": 601 }, { "epoch": 3.236559139784946, "grad_norm": 25.32822608947754, "learning_rate": 5e-07, "loss": 0.9582, "num_input_tokens_seen": 69618876, "step": 602 }, { "epoch": 3.236559139784946, "loss": 0.8675037622451782, "loss_ce": 7.210345211206004e-05, "loss_iou": 0.37109375, "loss_num": 0.025390625, "loss_xval": 0.8671875, "num_input_tokens_seen": 69618876, "step": 602 }, { "epoch": 3.241935483870968, "grad_norm": 20.033123016357422, "learning_rate": 5e-07, "loss": 0.9044, "num_input_tokens_seen": 69730040, "step": 603 }, { "epoch": 3.241935483870968, "loss": 0.9421736001968384, "loss_ce": 3.495945566101e-05, "loss_iou": 0.404296875, "loss_num": 0.0269775390625, "loss_xval": 0.94140625, "num_input_tokens_seen": 69730040, "step": 603 }, { "epoch": 3.247311827956989, "grad_norm": 47.340904235839844, "learning_rate": 5e-07, "loss": 1.0095, "num_input_tokens_seen": 69842356, "step": 604 }, { "epoch": 3.247311827956989, "loss": 1.0381462574005127, "loss_ce": 6.032722012605518e-05, "loss_iou": 0.451171875, "loss_num": 0.02685546875, "loss_xval": 1.0390625, "num_input_tokens_seen": 69842356, "step": 604 }, { "epoch": 3.252688172043011, "grad_norm": 21.6306095123291, "learning_rate": 5e-07, "loss": 0.9507, "num_input_tokens_seen": 69956544, "step": 605 }, { "epoch": 3.252688172043011, "loss": 1.3228390216827393, "loss_ce": 8.50492506287992e-05, "loss_iou": 0.55078125, "loss_num": 0.044189453125, "loss_xval": 1.3203125, "num_input_tokens_seen": 69956544, "step": 605 }, { "epoch": 3.258064516129032, "grad_norm": 37.70954895019531, "learning_rate": 5e-07, "loss": 0.7604, "num_input_tokens_seen": 70072476, "step": 606 }, { "epoch": 3.258064516129032, "loss": 0.7078139781951904, "loss_ce": 5.031747787143104e-05, "loss_iou": 0.3046875, "loss_num": 0.0194091796875, "loss_xval": 0.70703125, "num_input_tokens_seen": 70072476, "step": 606 }, { "epoch": 3.263440860215054, "grad_norm": 20.62675666809082, "learning_rate": 5e-07, "loss": 0.9037, "num_input_tokens_seen": 70186792, "step": 607 }, { "epoch": 3.263440860215054, "loss": 1.0640039443969727, "loss_ce": 3.918791844625957e-05, "loss_iou": 0.3984375, "loss_num": 0.0537109375, "loss_xval": 1.0625, "num_input_tokens_seen": 70186792, "step": 607 }, { "epoch": 3.268817204301075, "grad_norm": 18.62238311767578, "learning_rate": 5e-07, "loss": 0.8771, "num_input_tokens_seen": 70302904, "step": 608 }, { "epoch": 3.268817204301075, "loss": 0.7862098217010498, "loss_ce": 7.706723408773541e-05, "loss_iou": 0.33984375, "loss_num": 0.021484375, "loss_xval": 0.78515625, "num_input_tokens_seen": 70302904, "step": 608 }, { "epoch": 3.274193548387097, "grad_norm": 20.938390731811523, "learning_rate": 5e-07, "loss": 1.0665, "num_input_tokens_seen": 70416688, "step": 609 }, { "epoch": 3.274193548387097, "loss": 1.0503368377685547, "loss_ce": 4.38811257481575e-05, "loss_iou": 0.4140625, "loss_num": 0.044677734375, "loss_xval": 1.046875, "num_input_tokens_seen": 70416688, "step": 609 }, { "epoch": 3.279569892473118, "grad_norm": 25.960041046142578, "learning_rate": 5e-07, "loss": 0.8875, "num_input_tokens_seen": 70526236, "step": 610 }, { "epoch": 3.279569892473118, "loss": 0.932243824005127, "loss_ce": 0.00011494255886645988, "loss_iou": 0.404296875, "loss_num": 0.024658203125, "loss_xval": 0.93359375, "num_input_tokens_seen": 70526236, "step": 610 }, { "epoch": 3.28494623655914, "grad_norm": 43.7734260559082, "learning_rate": 5e-07, "loss": 0.9902, "num_input_tokens_seen": 70643716, "step": 611 }, { "epoch": 3.28494623655914, "loss": 0.9410219192504883, "loss_ce": 0.00010392632248112932, "loss_iou": 0.419921875, "loss_num": 0.0205078125, "loss_xval": 0.94140625, "num_input_tokens_seen": 70643716, "step": 611 }, { "epoch": 3.2903225806451615, "grad_norm": 20.969934463500977, "learning_rate": 5e-07, "loss": 0.9727, "num_input_tokens_seen": 70763520, "step": 612 }, { "epoch": 3.2903225806451615, "loss": 0.9102039337158203, "loss_ce": 4.768373764818534e-05, "loss_iou": 0.3828125, "loss_num": 0.02880859375, "loss_xval": 0.91015625, "num_input_tokens_seen": 70763520, "step": 612 }, { "epoch": 3.295698924731183, "grad_norm": 19.12283706665039, "learning_rate": 5e-07, "loss": 0.9627, "num_input_tokens_seen": 70877220, "step": 613 }, { "epoch": 3.295698924731183, "loss": 0.7747144103050232, "loss_ce": 5.622323806164786e-05, "loss_iou": 0.32421875, "loss_num": 0.0257568359375, "loss_xval": 0.7734375, "num_input_tokens_seen": 70877220, "step": 613 }, { "epoch": 3.3010752688172045, "grad_norm": 24.522354125976562, "learning_rate": 5e-07, "loss": 0.9527, "num_input_tokens_seen": 70993292, "step": 614 }, { "epoch": 3.3010752688172045, "loss": 1.122801423072815, "loss_ce": 0.00024286254483740777, "loss_iou": 0.439453125, "loss_num": 0.04833984375, "loss_xval": 1.125, "num_input_tokens_seen": 70993292, "step": 614 }, { "epoch": 3.306451612903226, "grad_norm": 18.717092514038086, "learning_rate": 5e-07, "loss": 0.9537, "num_input_tokens_seen": 71107852, "step": 615 }, { "epoch": 3.306451612903226, "loss": 1.066496729850769, "loss_ce": 9.050592052517459e-05, "loss_iou": 0.45703125, "loss_num": 0.0303955078125, "loss_xval": 1.0625, "num_input_tokens_seen": 71107852, "step": 615 }, { "epoch": 3.3118279569892475, "grad_norm": 16.006683349609375, "learning_rate": 5e-07, "loss": 0.7758, "num_input_tokens_seen": 71226792, "step": 616 }, { "epoch": 3.3118279569892475, "loss": 0.649986982345581, "loss_ce": 8.464253187412396e-05, "loss_iou": 0.251953125, "loss_num": 0.029052734375, "loss_xval": 0.6484375, "num_input_tokens_seen": 71226792, "step": 616 }, { "epoch": 3.317204301075269, "grad_norm": 24.582605361938477, "learning_rate": 5e-07, "loss": 1.0266, "num_input_tokens_seen": 71345468, "step": 617 }, { "epoch": 3.317204301075269, "loss": 1.0259242057800293, "loss_ce": 4.524613177636638e-05, "loss_iou": 0.421875, "loss_num": 0.0361328125, "loss_xval": 1.0234375, "num_input_tokens_seen": 71345468, "step": 617 }, { "epoch": 3.3225806451612905, "grad_norm": 17.236351013183594, "learning_rate": 5e-07, "loss": 0.9839, "num_input_tokens_seen": 71463260, "step": 618 }, { "epoch": 3.3225806451612905, "loss": 0.9814944267272949, "loss_ce": 4.907770562567748e-05, "loss_iou": 0.4140625, "loss_num": 0.030517578125, "loss_xval": 0.98046875, "num_input_tokens_seen": 71463260, "step": 618 }, { "epoch": 3.327956989247312, "grad_norm": 27.032724380493164, "learning_rate": 5e-07, "loss": 0.9908, "num_input_tokens_seen": 71578804, "step": 619 }, { "epoch": 3.327956989247312, "loss": 1.1550694704055786, "loss_ce": 4.0154423913918436e-05, "loss_iou": 0.484375, "loss_num": 0.03759765625, "loss_xval": 1.15625, "num_input_tokens_seen": 71578804, "step": 619 }, { "epoch": 3.3333333333333335, "grad_norm": 18.005531311035156, "learning_rate": 5e-07, "loss": 1.1021, "num_input_tokens_seen": 71696612, "step": 620 }, { "epoch": 3.3333333333333335, "loss": 0.8882321119308472, "loss_ce": 4.850741242989898e-05, "loss_iou": 0.380859375, "loss_num": 0.025146484375, "loss_xval": 0.88671875, "num_input_tokens_seen": 71696612, "step": 620 }, { "epoch": 3.338709677419355, "grad_norm": 29.9730167388916, "learning_rate": 5e-07, "loss": 0.9982, "num_input_tokens_seen": 71811424, "step": 621 }, { "epoch": 3.338709677419355, "loss": 1.0454683303833008, "loss_ce": 5.8114710554946214e-05, "loss_iou": 0.439453125, "loss_num": 0.03369140625, "loss_xval": 1.046875, "num_input_tokens_seen": 71811424, "step": 621 }, { "epoch": 3.3440860215053765, "grad_norm": 22.68943214416504, "learning_rate": 5e-07, "loss": 1.0038, "num_input_tokens_seen": 71929300, "step": 622 }, { "epoch": 3.3440860215053765, "loss": 0.9612760543823242, "loss_ce": 9.441897418582812e-05, "loss_iou": 0.421875, "loss_num": 0.0238037109375, "loss_xval": 0.9609375, "num_input_tokens_seen": 71929300, "step": 622 }, { "epoch": 3.349462365591398, "grad_norm": 133.2996063232422, "learning_rate": 5e-07, "loss": 0.9523, "num_input_tokens_seen": 72045484, "step": 623 }, { "epoch": 3.349462365591398, "loss": 0.9268693923950195, "loss_ce": 0.00011158542474731803, "loss_iou": 0.369140625, "loss_num": 0.03759765625, "loss_xval": 0.92578125, "num_input_tokens_seen": 72045484, "step": 623 }, { "epoch": 3.3548387096774195, "grad_norm": 25.527385711669922, "learning_rate": 5e-07, "loss": 1.0347, "num_input_tokens_seen": 72162416, "step": 624 }, { "epoch": 3.3548387096774195, "loss": 0.9200006127357483, "loss_ce": 7.878035103203729e-05, "loss_iou": 0.3671875, "loss_num": 0.036865234375, "loss_xval": 0.921875, "num_input_tokens_seen": 72162416, "step": 624 }, { "epoch": 3.360215053763441, "grad_norm": 18.765945434570312, "learning_rate": 5e-07, "loss": 1.0202, "num_input_tokens_seen": 72279448, "step": 625 }, { "epoch": 3.360215053763441, "loss": 1.071352243423462, "loss_ce": 6.310064054559916e-05, "loss_iou": 0.451171875, "loss_num": 0.03369140625, "loss_xval": 1.0703125, "num_input_tokens_seen": 72279448, "step": 625 }, { "epoch": 3.3655913978494625, "grad_norm": 20.737930297851562, "learning_rate": 5e-07, "loss": 1.1041, "num_input_tokens_seen": 72395976, "step": 626 }, { "epoch": 3.3655913978494625, "loss": 1.2378817796707153, "loss_ce": 8.879846427589655e-05, "loss_iou": 0.5234375, "loss_num": 0.038330078125, "loss_xval": 1.234375, "num_input_tokens_seen": 72395976, "step": 626 }, { "epoch": 3.370967741935484, "grad_norm": 20.189178466796875, "learning_rate": 5e-07, "loss": 0.9075, "num_input_tokens_seen": 72512408, "step": 627 }, { "epoch": 3.370967741935484, "loss": 0.9655964374542236, "loss_ce": 0.0034381840378046036, "loss_iou": 0.3828125, "loss_num": 0.039306640625, "loss_xval": 0.9609375, "num_input_tokens_seen": 72512408, "step": 627 }, { "epoch": 3.3763440860215055, "grad_norm": 26.928253173828125, "learning_rate": 5e-07, "loss": 1.0161, "num_input_tokens_seen": 72627352, "step": 628 }, { "epoch": 3.3763440860215055, "loss": 0.8369929790496826, "loss_ce": 7.885954983066767e-05, "loss_iou": 0.36328125, "loss_num": 0.022216796875, "loss_xval": 0.8359375, "num_input_tokens_seen": 72627352, "step": 628 }, { "epoch": 3.381720430107527, "grad_norm": 17.736745834350586, "learning_rate": 5e-07, "loss": 0.872, "num_input_tokens_seen": 72741636, "step": 629 }, { "epoch": 3.381720430107527, "loss": 1.2097468376159668, "loss_ce": 3.0139282898744568e-05, "loss_iou": 0.515625, "loss_num": 0.0361328125, "loss_xval": 1.2109375, "num_input_tokens_seen": 72741636, "step": 629 }, { "epoch": 3.3870967741935485, "grad_norm": 32.69085693359375, "learning_rate": 5e-07, "loss": 0.9186, "num_input_tokens_seen": 72860224, "step": 630 }, { "epoch": 3.3870967741935485, "loss": 1.2007384300231934, "loss_ce": 5.488251554197632e-05, "loss_iou": 0.482421875, "loss_num": 0.047119140625, "loss_xval": 1.203125, "num_input_tokens_seen": 72860224, "step": 630 }, { "epoch": 3.39247311827957, "grad_norm": 26.674083709716797, "learning_rate": 5e-07, "loss": 0.9081, "num_input_tokens_seen": 72973428, "step": 631 }, { "epoch": 3.39247311827957, "loss": 1.2911032438278198, "loss_ce": 8.761685603531078e-05, "loss_iou": 0.5703125, "loss_num": 0.02978515625, "loss_xval": 1.2890625, "num_input_tokens_seen": 72973428, "step": 631 }, { "epoch": 3.3978494623655915, "grad_norm": 24.540115356445312, "learning_rate": 5e-07, "loss": 1.0478, "num_input_tokens_seen": 73089260, "step": 632 }, { "epoch": 3.3978494623655915, "loss": 0.964199423789978, "loss_ce": 8.817429625196382e-05, "loss_iou": 0.41015625, "loss_num": 0.028564453125, "loss_xval": 0.96484375, "num_input_tokens_seen": 73089260, "step": 632 }, { "epoch": 3.403225806451613, "grad_norm": 34.79616928100586, "learning_rate": 5e-07, "loss": 1.2044, "num_input_tokens_seen": 73199664, "step": 633 }, { "epoch": 3.403225806451613, "loss": 1.0708435773849487, "loss_ce": 4.2825784476008266e-05, "loss_iou": 0.470703125, "loss_num": 0.0260009765625, "loss_xval": 1.0703125, "num_input_tokens_seen": 73199664, "step": 633 }, { "epoch": 3.4086021505376345, "grad_norm": 25.115280151367188, "learning_rate": 5e-07, "loss": 0.9135, "num_input_tokens_seen": 73313484, "step": 634 }, { "epoch": 3.4086021505376345, "loss": 1.1008930206298828, "loss_ce": 6.301067332969978e-05, "loss_iou": 0.482421875, "loss_num": 0.0272216796875, "loss_xval": 1.1015625, "num_input_tokens_seen": 73313484, "step": 634 }, { "epoch": 3.413978494623656, "grad_norm": 22.44522476196289, "learning_rate": 5e-07, "loss": 0.8526, "num_input_tokens_seen": 73430044, "step": 635 }, { "epoch": 3.413978494623656, "loss": 1.0596375465393066, "loss_ce": 6.721950194332749e-05, "loss_iou": 0.4375, "loss_num": 0.036865234375, "loss_xval": 1.0625, "num_input_tokens_seen": 73430044, "step": 635 }, { "epoch": 3.4193548387096775, "grad_norm": 20.72500991821289, "learning_rate": 5e-07, "loss": 1.0012, "num_input_tokens_seen": 73549000, "step": 636 }, { "epoch": 3.4193548387096775, "loss": 0.7398578524589539, "loss_ce": 0.00011176110274391249, "loss_iou": 0.29296875, "loss_num": 0.030517578125, "loss_xval": 0.73828125, "num_input_tokens_seen": 73549000, "step": 636 }, { "epoch": 3.424731182795699, "grad_norm": 13.468002319335938, "learning_rate": 5e-07, "loss": 0.9252, "num_input_tokens_seen": 73663480, "step": 637 }, { "epoch": 3.424731182795699, "loss": 1.0933618545532227, "loss_ce": 0.00010017913882620633, "loss_iou": 0.453125, "loss_num": 0.037109375, "loss_xval": 1.09375, "num_input_tokens_seen": 73663480, "step": 637 }, { "epoch": 3.4301075268817205, "grad_norm": 60.102272033691406, "learning_rate": 5e-07, "loss": 0.9854, "num_input_tokens_seen": 73779684, "step": 638 }, { "epoch": 3.4301075268817205, "loss": 1.480037808418274, "loss_ce": 5.736282037105411e-05, "loss_iou": 0.61328125, "loss_num": 0.050537109375, "loss_xval": 1.4765625, "num_input_tokens_seen": 73779684, "step": 638 }, { "epoch": 3.435483870967742, "grad_norm": 27.135616302490234, "learning_rate": 5e-07, "loss": 0.9731, "num_input_tokens_seen": 73896348, "step": 639 }, { "epoch": 3.435483870967742, "loss": 0.9727107286453247, "loss_ce": 5.4428215662483126e-05, "loss_iou": 0.41796875, "loss_num": 0.02734375, "loss_xval": 0.97265625, "num_input_tokens_seen": 73896348, "step": 639 }, { "epoch": 3.4408602150537635, "grad_norm": 27.057294845581055, "learning_rate": 5e-07, "loss": 1.0346, "num_input_tokens_seen": 74011652, "step": 640 }, { "epoch": 3.4408602150537635, "loss": 1.320112943649292, "loss_ce": 4.462298238649964e-05, "loss_iou": 0.58203125, "loss_num": 0.0308837890625, "loss_xval": 1.3203125, "num_input_tokens_seen": 74011652, "step": 640 }, { "epoch": 3.446236559139785, "grad_norm": 20.627687454223633, "learning_rate": 5e-07, "loss": 0.8295, "num_input_tokens_seen": 74128172, "step": 641 }, { "epoch": 3.446236559139785, "loss": 0.7193266749382019, "loss_ce": 8.841283124638721e-05, "loss_iou": 0.279296875, "loss_num": 0.031982421875, "loss_xval": 0.71875, "num_input_tokens_seen": 74128172, "step": 641 }, { "epoch": 3.4516129032258065, "grad_norm": 37.449485778808594, "learning_rate": 5e-07, "loss": 0.9399, "num_input_tokens_seen": 74244484, "step": 642 }, { "epoch": 3.4516129032258065, "loss": 0.7529805898666382, "loss_ce": 5.088924808660522e-05, "loss_iou": 0.306640625, "loss_num": 0.0279541015625, "loss_xval": 0.75390625, "num_input_tokens_seen": 74244484, "step": 642 }, { "epoch": 3.456989247311828, "grad_norm": 23.362281799316406, "learning_rate": 5e-07, "loss": 0.8665, "num_input_tokens_seen": 74360892, "step": 643 }, { "epoch": 3.456989247311828, "loss": 0.736971378326416, "loss_ce": 9.393668005941436e-05, "loss_iou": 0.3203125, "loss_num": 0.0196533203125, "loss_xval": 0.73828125, "num_input_tokens_seen": 74360892, "step": 643 }, { "epoch": 3.4623655913978495, "grad_norm": 29.438940048217773, "learning_rate": 5e-07, "loss": 1.0921, "num_input_tokens_seen": 74477552, "step": 644 }, { "epoch": 3.4623655913978495, "loss": 0.9856338500976562, "loss_ce": 3.8094567571533844e-05, "loss_iou": 0.4296875, "loss_num": 0.0252685546875, "loss_xval": 0.984375, "num_input_tokens_seen": 74477552, "step": 644 }, { "epoch": 3.467741935483871, "grad_norm": 25.309099197387695, "learning_rate": 5e-07, "loss": 0.827, "num_input_tokens_seen": 74593292, "step": 645 }, { "epoch": 3.467741935483871, "loss": 0.7547299265861511, "loss_ce": 9.126185614150017e-05, "loss_iou": 0.33203125, "loss_num": 0.017822265625, "loss_xval": 0.75390625, "num_input_tokens_seen": 74593292, "step": 645 }, { "epoch": 3.4731182795698925, "grad_norm": 26.70629119873047, "learning_rate": 5e-07, "loss": 0.9961, "num_input_tokens_seen": 74708620, "step": 646 }, { "epoch": 3.4731182795698925, "loss": 0.9726065397262573, "loss_ce": 0.00019442258053459227, "loss_iou": 0.419921875, "loss_num": 0.026611328125, "loss_xval": 0.97265625, "num_input_tokens_seen": 74708620, "step": 646 }, { "epoch": 3.478494623655914, "grad_norm": 24.154136657714844, "learning_rate": 5e-07, "loss": 0.7863, "num_input_tokens_seen": 74825572, "step": 647 }, { "epoch": 3.478494623655914, "loss": 1.1263222694396973, "loss_ce": 0.00010157639189856127, "loss_iou": 0.46875, "loss_num": 0.037109375, "loss_xval": 1.125, "num_input_tokens_seen": 74825572, "step": 647 }, { "epoch": 3.4838709677419355, "grad_norm": 28.385496139526367, "learning_rate": 5e-07, "loss": 1.0758, "num_input_tokens_seen": 74938304, "step": 648 }, { "epoch": 3.4838709677419355, "loss": 0.8293942213058472, "loss_ce": 4.8512345529161394e-05, "loss_iou": 0.3359375, "loss_num": 0.031494140625, "loss_xval": 0.828125, "num_input_tokens_seen": 74938304, "step": 648 }, { "epoch": 3.489247311827957, "grad_norm": 24.44945526123047, "learning_rate": 5e-07, "loss": 1.0103, "num_input_tokens_seen": 75053564, "step": 649 }, { "epoch": 3.489247311827957, "loss": 1.1221133470535278, "loss_ce": 4.305757465772331e-05, "loss_iou": 0.482421875, "loss_num": 0.03125, "loss_xval": 1.125, "num_input_tokens_seen": 75053564, "step": 649 }, { "epoch": 3.4946236559139785, "grad_norm": 27.55080795288086, "learning_rate": 5e-07, "loss": 1.0055, "num_input_tokens_seen": 75168596, "step": 650 }, { "epoch": 3.4946236559139785, "loss": 1.054735779762268, "loss_ce": 4.829757017432712e-05, "loss_iou": 0.462890625, "loss_num": 0.025634765625, "loss_xval": 1.0546875, "num_input_tokens_seen": 75168596, "step": 650 }, { "epoch": 3.5, "grad_norm": 15.575220108032227, "learning_rate": 5e-07, "loss": 1.0783, "num_input_tokens_seen": 75283332, "step": 651 }, { "epoch": 3.5, "loss": 1.0675902366638184, "loss_ce": 8.538212568964809e-05, "loss_iou": 0.4375, "loss_num": 0.038330078125, "loss_xval": 1.0703125, "num_input_tokens_seen": 75283332, "step": 651 }, { "epoch": 3.5053763440860215, "grad_norm": 17.646337509155273, "learning_rate": 5e-07, "loss": 0.7813, "num_input_tokens_seen": 75399180, "step": 652 }, { "epoch": 3.5053763440860215, "loss": 0.8836292028427124, "loss_ce": 8.434167830273509e-05, "loss_iou": 0.375, "loss_num": 0.026611328125, "loss_xval": 0.8828125, "num_input_tokens_seen": 75399180, "step": 652 }, { "epoch": 3.510752688172043, "grad_norm": 15.966257095336914, "learning_rate": 5e-07, "loss": 0.8806, "num_input_tokens_seen": 75514108, "step": 653 }, { "epoch": 3.510752688172043, "loss": 0.6094462871551514, "loss_ce": 7.129260484362021e-05, "loss_iou": 0.251953125, "loss_num": 0.0211181640625, "loss_xval": 0.609375, "num_input_tokens_seen": 75514108, "step": 653 }, { "epoch": 3.5161290322580645, "grad_norm": 16.23122215270996, "learning_rate": 5e-07, "loss": 1.1611, "num_input_tokens_seen": 75630580, "step": 654 }, { "epoch": 3.5161290322580645, "loss": 0.9459793567657471, "loss_ce": 0.00017860745720099658, "loss_iou": 0.400390625, "loss_num": 0.02880859375, "loss_xval": 0.9453125, "num_input_tokens_seen": 75630580, "step": 654 }, { "epoch": 3.521505376344086, "grad_norm": 27.527482986450195, "learning_rate": 5e-07, "loss": 0.9186, "num_input_tokens_seen": 75748448, "step": 655 }, { "epoch": 3.521505376344086, "loss": 0.9766464829444885, "loss_ce": 8.399233047384769e-05, "loss_iou": 0.4375, "loss_num": 0.0203857421875, "loss_xval": 0.9765625, "num_input_tokens_seen": 75748448, "step": 655 }, { "epoch": 3.5268817204301075, "grad_norm": 22.973651885986328, "learning_rate": 5e-07, "loss": 1.0028, "num_input_tokens_seen": 75866112, "step": 656 }, { "epoch": 3.5268817204301075, "loss": 0.765466570854187, "loss_ce": 8.57091581565328e-05, "loss_iou": 0.318359375, "loss_num": 0.025390625, "loss_xval": 0.765625, "num_input_tokens_seen": 75866112, "step": 656 }, { "epoch": 3.532258064516129, "grad_norm": 18.31791114807129, "learning_rate": 5e-07, "loss": 0.8519, "num_input_tokens_seen": 75982912, "step": 657 }, { "epoch": 3.532258064516129, "loss": 0.902788519859314, "loss_ce": 7.852116686990485e-05, "loss_iou": 0.375, "loss_num": 0.0306396484375, "loss_xval": 0.90234375, "num_input_tokens_seen": 75982912, "step": 657 }, { "epoch": 3.5376344086021505, "grad_norm": 28.056482315063477, "learning_rate": 5e-07, "loss": 0.8731, "num_input_tokens_seen": 76098340, "step": 658 }, { "epoch": 3.5376344086021505, "loss": 0.7390767335891724, "loss_ce": 6.309370655799285e-05, "loss_iou": 0.314453125, "loss_num": 0.0220947265625, "loss_xval": 0.73828125, "num_input_tokens_seen": 76098340, "step": 658 }, { "epoch": 3.543010752688172, "grad_norm": 24.72953987121582, "learning_rate": 5e-07, "loss": 0.8242, "num_input_tokens_seen": 76213244, "step": 659 }, { "epoch": 3.543010752688172, "loss": 1.0010590553283691, "loss_ce": 8.247859659604728e-05, "loss_iou": 0.435546875, "loss_num": 0.026123046875, "loss_xval": 1.0, "num_input_tokens_seen": 76213244, "step": 659 }, { "epoch": 3.5483870967741935, "grad_norm": 20.381839752197266, "learning_rate": 5e-07, "loss": 0.973, "num_input_tokens_seen": 76329844, "step": 660 }, { "epoch": 3.5483870967741935, "loss": 1.0046930313110352, "loss_ce": 5.4329582781065255e-05, "loss_iou": 0.388671875, "loss_num": 0.044921875, "loss_xval": 1.0078125, "num_input_tokens_seen": 76329844, "step": 660 }, { "epoch": 3.553763440860215, "grad_norm": 37.969417572021484, "learning_rate": 5e-07, "loss": 0.8458, "num_input_tokens_seen": 76444444, "step": 661 }, { "epoch": 3.553763440860215, "loss": 0.7660146951675415, "loss_ce": 0.00014557407121174037, "loss_iou": 0.326171875, "loss_num": 0.0225830078125, "loss_xval": 0.765625, "num_input_tokens_seen": 76444444, "step": 661 }, { "epoch": 3.5591397849462365, "grad_norm": 21.51639747619629, "learning_rate": 5e-07, "loss": 0.9868, "num_input_tokens_seen": 76559780, "step": 662 }, { "epoch": 3.5591397849462365, "loss": 1.155334234237671, "loss_ce": 6.081149331294e-05, "loss_iou": 0.482421875, "loss_num": 0.0380859375, "loss_xval": 1.15625, "num_input_tokens_seen": 76559780, "step": 662 }, { "epoch": 3.564516129032258, "grad_norm": 17.29229736328125, "learning_rate": 5e-07, "loss": 0.9318, "num_input_tokens_seen": 76678508, "step": 663 }, { "epoch": 3.564516129032258, "loss": 0.7718230485916138, "loss_ce": 9.451069490751252e-05, "loss_iou": 0.345703125, "loss_num": 0.0159912109375, "loss_xval": 0.7734375, "num_input_tokens_seen": 76678508, "step": 663 }, { "epoch": 3.5698924731182795, "grad_norm": 21.130151748657227, "learning_rate": 5e-07, "loss": 0.8192, "num_input_tokens_seen": 76794716, "step": 664 }, { "epoch": 3.5698924731182795, "loss": 0.9039531946182251, "loss_ce": 0.000266685092356056, "loss_iou": 0.359375, "loss_num": 0.036376953125, "loss_xval": 0.90234375, "num_input_tokens_seen": 76794716, "step": 664 }, { "epoch": 3.575268817204301, "grad_norm": 28.97303009033203, "learning_rate": 5e-07, "loss": 0.9387, "num_input_tokens_seen": 76907212, "step": 665 }, { "epoch": 3.575268817204301, "loss": 0.864799439907074, "loss_ce": 5.335031892172992e-05, "loss_iou": 0.34765625, "loss_num": 0.03369140625, "loss_xval": 0.86328125, "num_input_tokens_seen": 76907212, "step": 665 }, { "epoch": 3.5806451612903225, "grad_norm": 19.689599990844727, "learning_rate": 5e-07, "loss": 1.0307, "num_input_tokens_seen": 77021672, "step": 666 }, { "epoch": 3.5806451612903225, "loss": 0.7782560586929321, "loss_ce": 5.775979661848396e-05, "loss_iou": 0.30078125, "loss_num": 0.03564453125, "loss_xval": 0.77734375, "num_input_tokens_seen": 77021672, "step": 666 }, { "epoch": 3.586021505376344, "grad_norm": 28.51669692993164, "learning_rate": 5e-07, "loss": 1.031, "num_input_tokens_seen": 77138404, "step": 667 }, { "epoch": 3.586021505376344, "loss": 1.0549769401550293, "loss_ce": 4.5319255150388926e-05, "loss_iou": 0.45703125, "loss_num": 0.0283203125, "loss_xval": 1.0546875, "num_input_tokens_seen": 77138404, "step": 667 }, { "epoch": 3.5913978494623655, "grad_norm": 18.614315032958984, "learning_rate": 5e-07, "loss": 0.8393, "num_input_tokens_seen": 77257108, "step": 668 }, { "epoch": 3.5913978494623655, "loss": 0.8326135873794556, "loss_ce": 9.403371950611472e-05, "loss_iou": 0.34375, "loss_num": 0.0286865234375, "loss_xval": 0.83203125, "num_input_tokens_seen": 77257108, "step": 668 }, { "epoch": 3.596774193548387, "grad_norm": 20.787425994873047, "learning_rate": 5e-07, "loss": 0.9897, "num_input_tokens_seen": 77373552, "step": 669 }, { "epoch": 3.596774193548387, "loss": 1.0141816139221191, "loss_ce": 2.1390240362961777e-05, "loss_iou": 0.4453125, "loss_num": 0.0250244140625, "loss_xval": 1.015625, "num_input_tokens_seen": 77373552, "step": 669 }, { "epoch": 3.6021505376344085, "grad_norm": 22.48851776123047, "learning_rate": 5e-07, "loss": 1.0183, "num_input_tokens_seen": 77489364, "step": 670 }, { "epoch": 3.6021505376344085, "loss": 1.083356499671936, "loss_ce": 0.00010457172174938023, "loss_iou": 0.46875, "loss_num": 0.02978515625, "loss_xval": 1.0859375, "num_input_tokens_seen": 77489364, "step": 670 }, { "epoch": 3.60752688172043, "grad_norm": 30.968896865844727, "learning_rate": 5e-07, "loss": 0.9815, "num_input_tokens_seen": 77605472, "step": 671 }, { "epoch": 3.60752688172043, "loss": 1.4608347415924072, "loss_ce": 0.0003856285475194454, "loss_iou": 0.671875, "loss_num": 0.024169921875, "loss_xval": 1.4609375, "num_input_tokens_seen": 77605472, "step": 671 }, { "epoch": 3.6129032258064515, "grad_norm": 32.91668701171875, "learning_rate": 5e-07, "loss": 0.8851, "num_input_tokens_seen": 77723068, "step": 672 }, { "epoch": 3.6129032258064515, "loss": 0.8499090075492859, "loss_ce": 5.5482887546531856e-05, "loss_iou": 0.357421875, "loss_num": 0.0269775390625, "loss_xval": 0.8515625, "num_input_tokens_seen": 77723068, "step": 672 }, { "epoch": 3.618279569892473, "grad_norm": 18.030467987060547, "learning_rate": 5e-07, "loss": 0.9623, "num_input_tokens_seen": 77839528, "step": 673 }, { "epoch": 3.618279569892473, "loss": 0.9897817373275757, "loss_ce": 3.565366205293685e-05, "loss_iou": 0.431640625, "loss_num": 0.025634765625, "loss_xval": 0.98828125, "num_input_tokens_seen": 77839528, "step": 673 }, { "epoch": 3.6236559139784945, "grad_norm": 30.061588287353516, "learning_rate": 5e-07, "loss": 0.8922, "num_input_tokens_seen": 77953536, "step": 674 }, { "epoch": 3.6236559139784945, "loss": 0.9142510890960693, "loss_ce": 6.648190174018964e-05, "loss_iou": 0.400390625, "loss_num": 0.0225830078125, "loss_xval": 0.9140625, "num_input_tokens_seen": 77953536, "step": 674 }, { "epoch": 3.629032258064516, "grad_norm": 36.3795051574707, "learning_rate": 5e-07, "loss": 1.0062, "num_input_tokens_seen": 78069936, "step": 675 }, { "epoch": 3.629032258064516, "loss": 0.7776190042495728, "loss_ce": 3.106397343799472e-05, "loss_iou": 0.32421875, "loss_num": 0.025634765625, "loss_xval": 0.77734375, "num_input_tokens_seen": 78069936, "step": 675 }, { "epoch": 3.6344086021505375, "grad_norm": 19.301023483276367, "learning_rate": 5e-07, "loss": 0.9684, "num_input_tokens_seen": 78188832, "step": 676 }, { "epoch": 3.6344086021505375, "loss": 1.2480969429016113, "loss_ce": 5.00191017636098e-05, "loss_iou": 0.51953125, "loss_num": 0.041015625, "loss_xval": 1.25, "num_input_tokens_seen": 78188832, "step": 676 }, { "epoch": 3.639784946236559, "grad_norm": 16.46856689453125, "learning_rate": 5e-07, "loss": 0.9194, "num_input_tokens_seen": 78303440, "step": 677 }, { "epoch": 3.639784946236559, "loss": 1.3717914819717407, "loss_ce": 8.732250717002898e-05, "loss_iou": 0.54296875, "loss_num": 0.056884765625, "loss_xval": 1.375, "num_input_tokens_seen": 78303440, "step": 677 }, { "epoch": 3.6451612903225805, "grad_norm": 271.0241394042969, "learning_rate": 5e-07, "loss": 1.1761, "num_input_tokens_seen": 78416124, "step": 678 }, { "epoch": 3.6451612903225805, "loss": 1.2041475772857666, "loss_ce": 4.610352698364295e-05, "loss_iou": 0.5078125, "loss_num": 0.038330078125, "loss_xval": 1.203125, "num_input_tokens_seen": 78416124, "step": 678 }, { "epoch": 3.650537634408602, "grad_norm": 24.093400955200195, "learning_rate": 5e-07, "loss": 0.9624, "num_input_tokens_seen": 78533716, "step": 679 }, { "epoch": 3.650537634408602, "loss": 1.046767234802246, "loss_ce": 0.00013633808703161776, "loss_iou": 0.435546875, "loss_num": 0.035400390625, "loss_xval": 1.046875, "num_input_tokens_seen": 78533716, "step": 679 }, { "epoch": 3.6559139784946235, "grad_norm": 34.59588623046875, "learning_rate": 5e-07, "loss": 1.178, "num_input_tokens_seen": 78648532, "step": 680 }, { "epoch": 3.6559139784946235, "loss": 1.2146832942962646, "loss_ce": 0.000327783462125808, "loss_iou": 0.5, "loss_num": 0.04296875, "loss_xval": 1.2109375, "num_input_tokens_seen": 78648532, "step": 680 }, { "epoch": 3.661290322580645, "grad_norm": 20.89113998413086, "learning_rate": 5e-07, "loss": 0.999, "num_input_tokens_seen": 78763272, "step": 681 }, { "epoch": 3.661290322580645, "loss": 1.1763348579406738, "loss_ce": 6.533972918987274e-05, "loss_iou": 0.48828125, "loss_num": 0.039794921875, "loss_xval": 1.1796875, "num_input_tokens_seen": 78763272, "step": 681 }, { "epoch": 3.6666666666666665, "grad_norm": 24.136066436767578, "learning_rate": 5e-07, "loss": 0.8693, "num_input_tokens_seen": 78879320, "step": 682 }, { "epoch": 3.6666666666666665, "loss": 0.9946682453155518, "loss_ce": 3.9371887396555394e-05, "loss_iou": 0.427734375, "loss_num": 0.0279541015625, "loss_xval": 0.99609375, "num_input_tokens_seen": 78879320, "step": 682 }, { "epoch": 3.672043010752688, "grad_norm": 32.20932388305664, "learning_rate": 5e-07, "loss": 1.0027, "num_input_tokens_seen": 78992524, "step": 683 }, { "epoch": 3.672043010752688, "loss": 0.9904147386550903, "loss_ce": 5.8328540035290644e-05, "loss_iou": 0.40625, "loss_num": 0.035888671875, "loss_xval": 0.9921875, "num_input_tokens_seen": 78992524, "step": 683 }, { "epoch": 3.6774193548387095, "grad_norm": 21.752185821533203, "learning_rate": 5e-07, "loss": 1.0451, "num_input_tokens_seen": 79108004, "step": 684 }, { "epoch": 3.6774193548387095, "loss": 1.0812562704086304, "loss_ce": 0.0002015720820054412, "loss_iou": 0.45703125, "loss_num": 0.032958984375, "loss_xval": 1.078125, "num_input_tokens_seen": 79108004, "step": 684 }, { "epoch": 3.682795698924731, "grad_norm": 23.468164443969727, "learning_rate": 5e-07, "loss": 1.0878, "num_input_tokens_seen": 79223600, "step": 685 }, { "epoch": 3.682795698924731, "loss": 1.2101271152496338, "loss_ce": 0.00016621698159724474, "loss_iou": 0.50390625, "loss_num": 0.041015625, "loss_xval": 1.2109375, "num_input_tokens_seen": 79223600, "step": 685 }, { "epoch": 3.688172043010753, "grad_norm": 24.676773071289062, "learning_rate": 5e-07, "loss": 0.8277, "num_input_tokens_seen": 79339968, "step": 686 }, { "epoch": 3.688172043010753, "loss": 1.2032150030136108, "loss_ce": 9.003015293274075e-05, "loss_iou": 0.51171875, "loss_num": 0.035888671875, "loss_xval": 1.203125, "num_input_tokens_seen": 79339968, "step": 686 }, { "epoch": 3.693548387096774, "grad_norm": 17.819801330566406, "learning_rate": 5e-07, "loss": 0.8274, "num_input_tokens_seen": 79456624, "step": 687 }, { "epoch": 3.693548387096774, "loss": 0.6519208550453186, "loss_ce": 6.540908361785114e-05, "loss_iou": 0.25390625, "loss_num": 0.028564453125, "loss_xval": 0.65234375, "num_input_tokens_seen": 79456624, "step": 687 }, { "epoch": 3.698924731182796, "grad_norm": 25.343727111816406, "learning_rate": 5e-07, "loss": 0.8852, "num_input_tokens_seen": 79572344, "step": 688 }, { "epoch": 3.698924731182796, "loss": 0.7749922871589661, "loss_ce": 8.99397928151302e-05, "loss_iou": 0.2451171875, "loss_num": 0.056884765625, "loss_xval": 0.7734375, "num_input_tokens_seen": 79572344, "step": 688 }, { "epoch": 3.704301075268817, "grad_norm": 25.451555252075195, "learning_rate": 5e-07, "loss": 1.1254, "num_input_tokens_seen": 79688820, "step": 689 }, { "epoch": 3.704301075268817, "loss": 1.192140817642212, "loss_ce": 0.0002462802513036877, "loss_iou": 0.474609375, "loss_num": 0.049072265625, "loss_xval": 1.1953125, "num_input_tokens_seen": 79688820, "step": 689 }, { "epoch": 3.709677419354839, "grad_norm": 36.26627731323242, "learning_rate": 5e-07, "loss": 1.0956, "num_input_tokens_seen": 79807108, "step": 690 }, { "epoch": 3.709677419354839, "loss": 0.9326996207237244, "loss_ce": 8.24834278319031e-05, "loss_iou": 0.404296875, "loss_num": 0.024658203125, "loss_xval": 0.93359375, "num_input_tokens_seen": 79807108, "step": 690 }, { "epoch": 3.71505376344086, "grad_norm": 24.683292388916016, "learning_rate": 5e-07, "loss": 0.8922, "num_input_tokens_seen": 79925164, "step": 691 }, { "epoch": 3.71505376344086, "loss": 0.6951122283935547, "loss_ce": 4.3861531594302505e-05, "loss_iou": 0.306640625, "loss_num": 0.0162353515625, "loss_xval": 0.6953125, "num_input_tokens_seen": 79925164, "step": 691 }, { "epoch": 3.720430107526882, "grad_norm": 23.580156326293945, "learning_rate": 5e-07, "loss": 0.911, "num_input_tokens_seen": 80042840, "step": 692 }, { "epoch": 3.720430107526882, "loss": 0.8086424469947815, "loss_ce": 4.865807932219468e-05, "loss_iou": 0.341796875, "loss_num": 0.0250244140625, "loss_xval": 0.80859375, "num_input_tokens_seen": 80042840, "step": 692 }, { "epoch": 3.725806451612903, "grad_norm": 21.888044357299805, "learning_rate": 5e-07, "loss": 0.9953, "num_input_tokens_seen": 80156104, "step": 693 }, { "epoch": 3.725806451612903, "loss": 1.1566252708435059, "loss_ce": 0.0001311137602897361, "loss_iou": 0.50390625, "loss_num": 0.029541015625, "loss_xval": 1.15625, "num_input_tokens_seen": 80156104, "step": 693 }, { "epoch": 3.731182795698925, "grad_norm": 16.928884506225586, "learning_rate": 5e-07, "loss": 0.9057, "num_input_tokens_seen": 80269456, "step": 694 }, { "epoch": 3.731182795698925, "loss": 0.6745443940162659, "loss_ce": 0.00010594442574074492, "loss_iou": 0.275390625, "loss_num": 0.0244140625, "loss_xval": 0.67578125, "num_input_tokens_seen": 80269456, "step": 694 }, { "epoch": 3.736559139784946, "grad_norm": 22.99903678894043, "learning_rate": 5e-07, "loss": 1.0695, "num_input_tokens_seen": 80384376, "step": 695 }, { "epoch": 3.736559139784946, "loss": 1.2324604988098145, "loss_ce": 3.872137676808052e-05, "loss_iou": 0.5390625, "loss_num": 0.031494140625, "loss_xval": 1.234375, "num_input_tokens_seen": 80384376, "step": 695 }, { "epoch": 3.741935483870968, "grad_norm": 19.477794647216797, "learning_rate": 5e-07, "loss": 0.9675, "num_input_tokens_seen": 80497224, "step": 696 }, { "epoch": 3.741935483870968, "loss": 1.035732388496399, "loss_ce": 8.782879740465432e-05, "loss_iou": 0.45703125, "loss_num": 0.0244140625, "loss_xval": 1.0390625, "num_input_tokens_seen": 80497224, "step": 696 }, { "epoch": 3.747311827956989, "grad_norm": 25.006256103515625, "learning_rate": 5e-07, "loss": 0.8687, "num_input_tokens_seen": 80613460, "step": 697 }, { "epoch": 3.747311827956989, "loss": 0.9906350374221802, "loss_ce": 3.4456283174222335e-05, "loss_iou": 0.40625, "loss_num": 0.03564453125, "loss_xval": 0.9921875, "num_input_tokens_seen": 80613460, "step": 697 }, { "epoch": 3.752688172043011, "grad_norm": 19.09154510498047, "learning_rate": 5e-07, "loss": 0.9241, "num_input_tokens_seen": 80726824, "step": 698 }, { "epoch": 3.752688172043011, "loss": 0.7354475259780884, "loss_ce": 9.596609743312001e-05, "loss_iou": 0.29296875, "loss_num": 0.030029296875, "loss_xval": 0.734375, "num_input_tokens_seen": 80726824, "step": 698 }, { "epoch": 3.758064516129032, "grad_norm": 25.10212516784668, "learning_rate": 5e-07, "loss": 0.8649, "num_input_tokens_seen": 80841856, "step": 699 }, { "epoch": 3.758064516129032, "loss": 0.8127799034118652, "loss_ce": 3.5757333535002545e-05, "loss_iou": 0.3203125, "loss_num": 0.034423828125, "loss_xval": 0.8125, "num_input_tokens_seen": 80841856, "step": 699 }, { "epoch": 3.763440860215054, "grad_norm": 22.02937126159668, "learning_rate": 5e-07, "loss": 0.7767, "num_input_tokens_seen": 80955304, "step": 700 }, { "epoch": 3.763440860215054, "loss": 1.106573224067688, "loss_ce": 0.00012795634393114597, "loss_iou": 0.48828125, "loss_num": 0.0260009765625, "loss_xval": 1.109375, "num_input_tokens_seen": 80955304, "step": 700 }, { "epoch": 3.768817204301075, "grad_norm": 25.812393188476562, "learning_rate": 5e-07, "loss": 1.1236, "num_input_tokens_seen": 81065260, "step": 701 }, { "epoch": 3.768817204301075, "loss": 1.01639986038208, "loss_ce": 4.2485065932851285e-05, "loss_iou": 0.41796875, "loss_num": 0.03564453125, "loss_xval": 1.015625, "num_input_tokens_seen": 81065260, "step": 701 }, { "epoch": 3.774193548387097, "grad_norm": 21.092561721801758, "learning_rate": 5e-07, "loss": 1.101, "num_input_tokens_seen": 81179832, "step": 702 }, { "epoch": 3.774193548387097, "loss": 1.1004459857940674, "loss_ce": 0.00010420649778097868, "loss_iou": 0.462890625, "loss_num": 0.034912109375, "loss_xval": 1.1015625, "num_input_tokens_seen": 81179832, "step": 702 }, { "epoch": 3.779569892473118, "grad_norm": 22.226951599121094, "learning_rate": 5e-07, "loss": 0.9771, "num_input_tokens_seen": 81300340, "step": 703 }, { "epoch": 3.779569892473118, "loss": 0.9432034492492676, "loss_ce": 8.81971136550419e-05, "loss_iou": 0.376953125, "loss_num": 0.03759765625, "loss_xval": 0.94140625, "num_input_tokens_seen": 81300340, "step": 703 }, { "epoch": 3.78494623655914, "grad_norm": 17.183320999145508, "learning_rate": 5e-07, "loss": 0.8538, "num_input_tokens_seen": 81415688, "step": 704 }, { "epoch": 3.78494623655914, "loss": 0.5200709104537964, "loss_ce": 5.137055995874107e-05, "loss_iou": 0.1982421875, "loss_num": 0.0245361328125, "loss_xval": 0.51953125, "num_input_tokens_seen": 81415688, "step": 704 }, { "epoch": 3.790322580645161, "grad_norm": 18.517547607421875, "learning_rate": 5e-07, "loss": 0.9701, "num_input_tokens_seen": 81532024, "step": 705 }, { "epoch": 3.790322580645161, "loss": 0.681352436542511, "loss_ce": 7.80379050411284e-05, "loss_iou": 0.283203125, "loss_num": 0.0230712890625, "loss_xval": 0.6796875, "num_input_tokens_seen": 81532024, "step": 705 }, { "epoch": 3.795698924731183, "grad_norm": 28.64894676208496, "learning_rate": 5e-07, "loss": 0.8982, "num_input_tokens_seen": 81646080, "step": 706 }, { "epoch": 3.795698924731183, "loss": 0.9927443265914917, "loss_ce": 6.85472332406789e-05, "loss_iou": 0.41015625, "loss_num": 0.034423828125, "loss_xval": 0.9921875, "num_input_tokens_seen": 81646080, "step": 706 }, { "epoch": 3.801075268817204, "grad_norm": 31.262210845947266, "learning_rate": 5e-07, "loss": 1.0626, "num_input_tokens_seen": 81762384, "step": 707 }, { "epoch": 3.801075268817204, "loss": 0.8712539076805115, "loss_ce": 3.807514440268278e-05, "loss_iou": 0.376953125, "loss_num": 0.0234375, "loss_xval": 0.87109375, "num_input_tokens_seen": 81762384, "step": 707 }, { "epoch": 3.806451612903226, "grad_norm": 45.37864685058594, "learning_rate": 5e-07, "loss": 0.8687, "num_input_tokens_seen": 81879152, "step": 708 }, { "epoch": 3.806451612903226, "loss": 0.8140531778335571, "loss_ce": 8.834023174131289e-05, "loss_iou": 0.33203125, "loss_num": 0.02978515625, "loss_xval": 0.8125, "num_input_tokens_seen": 81879152, "step": 708 }, { "epoch": 3.811827956989247, "grad_norm": 14.572982788085938, "learning_rate": 5e-07, "loss": 0.7596, "num_input_tokens_seen": 81998448, "step": 709 }, { "epoch": 3.811827956989247, "loss": 0.702477216720581, "loss_ce": 8.463216363452375e-05, "loss_iou": 0.294921875, "loss_num": 0.0225830078125, "loss_xval": 0.703125, "num_input_tokens_seen": 81998448, "step": 709 }, { "epoch": 3.817204301075269, "grad_norm": 20.56422996520996, "learning_rate": 5e-07, "loss": 0.7631, "num_input_tokens_seen": 82117612, "step": 710 }, { "epoch": 3.817204301075269, "loss": 0.6616871953010559, "loss_ce": 6.608142575714737e-05, "loss_iou": 0.287109375, "loss_num": 0.017578125, "loss_xval": 0.66015625, "num_input_tokens_seen": 82117612, "step": 710 }, { "epoch": 3.8225806451612905, "grad_norm": 22.511388778686523, "learning_rate": 5e-07, "loss": 1.0079, "num_input_tokens_seen": 82232500, "step": 711 }, { "epoch": 3.8225806451612905, "loss": 0.6952488422393799, "loss_ce": 0.00018048079800792038, "loss_iou": 0.275390625, "loss_num": 0.0286865234375, "loss_xval": 0.6953125, "num_input_tokens_seen": 82232500, "step": 711 }, { "epoch": 3.827956989247312, "grad_norm": 28.26279640197754, "learning_rate": 5e-07, "loss": 0.8258, "num_input_tokens_seen": 82349272, "step": 712 }, { "epoch": 3.827956989247312, "loss": 0.790876567363739, "loss_ce": 0.00010511015716474503, "loss_iou": 0.3125, "loss_num": 0.033203125, "loss_xval": 0.7890625, "num_input_tokens_seen": 82349272, "step": 712 }, { "epoch": 3.8333333333333335, "grad_norm": 19.495712280273438, "learning_rate": 5e-07, "loss": 0.8096, "num_input_tokens_seen": 82463696, "step": 713 }, { "epoch": 3.8333333333333335, "loss": 0.8826963901519775, "loss_ce": 0.00012799359683413059, "loss_iou": 0.357421875, "loss_num": 0.03369140625, "loss_xval": 0.8828125, "num_input_tokens_seen": 82463696, "step": 713 }, { "epoch": 3.838709677419355, "grad_norm": 21.86402702331543, "learning_rate": 5e-07, "loss": 1.0913, "num_input_tokens_seen": 82576600, "step": 714 }, { "epoch": 3.838709677419355, "loss": 0.9293177127838135, "loss_ce": 0.00011849129805341363, "loss_iou": 0.408203125, "loss_num": 0.022705078125, "loss_xval": 0.9296875, "num_input_tokens_seen": 82576600, "step": 714 }, { "epoch": 3.8440860215053765, "grad_norm": 21.211706161499023, "learning_rate": 5e-07, "loss": 0.9786, "num_input_tokens_seen": 82691660, "step": 715 }, { "epoch": 3.8440860215053765, "loss": 0.9761725068092346, "loss_ce": 9.831730858422816e-05, "loss_iou": 0.419921875, "loss_num": 0.02685546875, "loss_xval": 0.9765625, "num_input_tokens_seen": 82691660, "step": 715 }, { "epoch": 3.849462365591398, "grad_norm": 22.683835983276367, "learning_rate": 5e-07, "loss": 0.8668, "num_input_tokens_seen": 82808976, "step": 716 }, { "epoch": 3.849462365591398, "loss": 0.7979419827461243, "loss_ce": 9.04688349692151e-05, "loss_iou": 0.33984375, "loss_num": 0.0238037109375, "loss_xval": 0.796875, "num_input_tokens_seen": 82808976, "step": 716 }, { "epoch": 3.8548387096774195, "grad_norm": 19.35234832763672, "learning_rate": 5e-07, "loss": 0.9369, "num_input_tokens_seen": 82926744, "step": 717 }, { "epoch": 3.8548387096774195, "loss": 0.8196360468864441, "loss_ce": 5.5980872275540605e-05, "loss_iou": 0.353515625, "loss_num": 0.0224609375, "loss_xval": 0.8203125, "num_input_tokens_seen": 82926744, "step": 717 }, { "epoch": 3.860215053763441, "grad_norm": 42.38694381713867, "learning_rate": 5e-07, "loss": 0.8787, "num_input_tokens_seen": 83043524, "step": 718 }, { "epoch": 3.860215053763441, "loss": 0.7032163143157959, "loss_ce": 9.130118269240484e-05, "loss_iou": 0.28515625, "loss_num": 0.027099609375, "loss_xval": 0.703125, "num_input_tokens_seen": 83043524, "step": 718 }, { "epoch": 3.8655913978494625, "grad_norm": 27.891925811767578, "learning_rate": 5e-07, "loss": 0.7006, "num_input_tokens_seen": 83159220, "step": 719 }, { "epoch": 3.8655913978494625, "loss": 0.7273565530776978, "loss_ce": 6.165119702927768e-05, "loss_iou": 0.27734375, "loss_num": 0.034423828125, "loss_xval": 0.7265625, "num_input_tokens_seen": 83159220, "step": 719 }, { "epoch": 3.870967741935484, "grad_norm": 24.52166748046875, "learning_rate": 5e-07, "loss": 0.9255, "num_input_tokens_seen": 83277996, "step": 720 }, { "epoch": 3.870967741935484, "loss": 0.6673548817634583, "loss_ce": 0.00011856519995490089, "loss_iou": 0.287109375, "loss_num": 0.0186767578125, "loss_xval": 0.66796875, "num_input_tokens_seen": 83277996, "step": 720 }, { "epoch": 3.8763440860215055, "grad_norm": 20.248735427856445, "learning_rate": 5e-07, "loss": 0.7627, "num_input_tokens_seen": 83393080, "step": 721 }, { "epoch": 3.8763440860215055, "loss": 0.716408371925354, "loss_ce": 9.978600428439677e-05, "loss_iou": 0.30078125, "loss_num": 0.023193359375, "loss_xval": 0.71484375, "num_input_tokens_seen": 83393080, "step": 721 }, { "epoch": 3.881720430107527, "grad_norm": 21.63620376586914, "learning_rate": 5e-07, "loss": 0.8788, "num_input_tokens_seen": 83507416, "step": 722 }, { "epoch": 3.881720430107527, "loss": 1.1143572330474854, "loss_ce": 9.943418262992054e-05, "loss_iou": 0.47265625, "loss_num": 0.033447265625, "loss_xval": 1.1171875, "num_input_tokens_seen": 83507416, "step": 722 }, { "epoch": 3.8870967741935485, "grad_norm": 23.734516143798828, "learning_rate": 5e-07, "loss": 0.9021, "num_input_tokens_seen": 83626360, "step": 723 }, { "epoch": 3.8870967741935485, "loss": 1.011425495147705, "loss_ce": 0.0033688461408019066, "loss_iou": 0.423828125, "loss_num": 0.031982421875, "loss_xval": 1.0078125, "num_input_tokens_seen": 83626360, "step": 723 }, { "epoch": 3.89247311827957, "grad_norm": 23.70997428894043, "learning_rate": 5e-07, "loss": 0.9456, "num_input_tokens_seen": 83740704, "step": 724 }, { "epoch": 3.89247311827957, "loss": 1.179335594177246, "loss_ce": 0.00013634121569339186, "loss_iou": 0.4921875, "loss_num": 0.038818359375, "loss_xval": 1.1796875, "num_input_tokens_seen": 83740704, "step": 724 }, { "epoch": 3.8978494623655915, "grad_norm": 28.32930564880371, "learning_rate": 5e-07, "loss": 0.9032, "num_input_tokens_seen": 83856788, "step": 725 }, { "epoch": 3.8978494623655915, "loss": 0.9438728094100952, "loss_ce": 2.517439133953303e-05, "loss_iou": 0.392578125, "loss_num": 0.031494140625, "loss_xval": 0.9453125, "num_input_tokens_seen": 83856788, "step": 725 }, { "epoch": 3.903225806451613, "grad_norm": 23.02499771118164, "learning_rate": 5e-07, "loss": 0.7399, "num_input_tokens_seen": 83973172, "step": 726 }, { "epoch": 3.903225806451613, "loss": 0.7502688765525818, "loss_ce": 2.47067328018602e-05, "loss_iou": 0.318359375, "loss_num": 0.0225830078125, "loss_xval": 0.75, "num_input_tokens_seen": 83973172, "step": 726 }, { "epoch": 3.9086021505376345, "grad_norm": 22.39906120300293, "learning_rate": 5e-07, "loss": 0.8528, "num_input_tokens_seen": 84089248, "step": 727 }, { "epoch": 3.9086021505376345, "loss": 1.0825624465942383, "loss_ce": 4.2784857214428484e-05, "loss_iou": 0.451171875, "loss_num": 0.035888671875, "loss_xval": 1.0859375, "num_input_tokens_seen": 84089248, "step": 727 }, { "epoch": 3.913978494623656, "grad_norm": 19.300151824951172, "learning_rate": 5e-07, "loss": 0.9826, "num_input_tokens_seen": 84204532, "step": 728 }, { "epoch": 3.913978494623656, "loss": 0.8595088720321655, "loss_ce": 0.00013384546036832035, "loss_iou": 0.3671875, "loss_num": 0.0252685546875, "loss_xval": 0.859375, "num_input_tokens_seen": 84204532, "step": 728 }, { "epoch": 3.9193548387096775, "grad_norm": 59.14823913574219, "learning_rate": 5e-07, "loss": 1.0184, "num_input_tokens_seen": 84323344, "step": 729 }, { "epoch": 3.9193548387096775, "loss": 1.29863440990448, "loss_ce": 5.0419941544532776e-05, "loss_iou": 0.5390625, "loss_num": 0.043701171875, "loss_xval": 1.296875, "num_input_tokens_seen": 84323344, "step": 729 }, { "epoch": 3.924731182795699, "grad_norm": 19.535661697387695, "learning_rate": 5e-07, "loss": 0.8945, "num_input_tokens_seen": 84436792, "step": 730 }, { "epoch": 3.924731182795699, "loss": 1.1770182847976685, "loss_ce": 1.6273033907054923e-05, "loss_iou": 0.5078125, "loss_num": 0.032470703125, "loss_xval": 1.1796875, "num_input_tokens_seen": 84436792, "step": 730 }, { "epoch": 3.9301075268817205, "grad_norm": 21.679641723632812, "learning_rate": 5e-07, "loss": 1.0525, "num_input_tokens_seen": 84555684, "step": 731 }, { "epoch": 3.9301075268817205, "loss": 1.5869944095611572, "loss_ce": 8.030750905163586e-05, "loss_iou": 0.671875, "loss_num": 0.048583984375, "loss_xval": 1.5859375, "num_input_tokens_seen": 84555684, "step": 731 }, { "epoch": 3.935483870967742, "grad_norm": 18.779773712158203, "learning_rate": 5e-07, "loss": 0.8597, "num_input_tokens_seen": 84671456, "step": 732 }, { "epoch": 3.935483870967742, "loss": 0.7349478006362915, "loss_ce": 8.449405140709132e-05, "loss_iou": 0.296875, "loss_num": 0.0284423828125, "loss_xval": 0.734375, "num_input_tokens_seen": 84671456, "step": 732 }, { "epoch": 3.9408602150537635, "grad_norm": 21.376731872558594, "learning_rate": 5e-07, "loss": 0.9205, "num_input_tokens_seen": 84784800, "step": 733 }, { "epoch": 3.9408602150537635, "loss": 0.9778341054916382, "loss_ce": 5.0904040108434856e-05, "loss_iou": 0.427734375, "loss_num": 0.02392578125, "loss_xval": 0.9765625, "num_input_tokens_seen": 84784800, "step": 733 }, { "epoch": 3.946236559139785, "grad_norm": 27.427143096923828, "learning_rate": 5e-07, "loss": 0.9648, "num_input_tokens_seen": 84901904, "step": 734 }, { "epoch": 3.946236559139785, "loss": 0.9419596791267395, "loss_ce": 6.514722917927429e-05, "loss_iou": 0.396484375, "loss_num": 0.0302734375, "loss_xval": 0.94140625, "num_input_tokens_seen": 84901904, "step": 734 }, { "epoch": 3.9516129032258065, "grad_norm": 19.956859588623047, "learning_rate": 5e-07, "loss": 0.9814, "num_input_tokens_seen": 85017984, "step": 735 }, { "epoch": 3.9516129032258065, "loss": 0.7820315957069397, "loss_ce": 4.915174940833822e-05, "loss_iou": 0.341796875, "loss_num": 0.0194091796875, "loss_xval": 0.78125, "num_input_tokens_seen": 85017984, "step": 735 }, { "epoch": 3.956989247311828, "grad_norm": 18.517396926879883, "learning_rate": 5e-07, "loss": 0.6899, "num_input_tokens_seen": 85132800, "step": 736 }, { "epoch": 3.956989247311828, "loss": 0.8728870749473572, "loss_ce": 8.435287600150332e-05, "loss_iou": 0.376953125, "loss_num": 0.0240478515625, "loss_xval": 0.87109375, "num_input_tokens_seen": 85132800, "step": 736 }, { "epoch": 3.9623655913978495, "grad_norm": 27.628000259399414, "learning_rate": 5e-07, "loss": 0.8025, "num_input_tokens_seen": 85247652, "step": 737 }, { "epoch": 3.9623655913978495, "loss": 0.7161313891410828, "loss_ce": 6.689656584057957e-05, "loss_iou": 0.28515625, "loss_num": 0.0286865234375, "loss_xval": 0.71484375, "num_input_tokens_seen": 85247652, "step": 737 }, { "epoch": 3.967741935483871, "grad_norm": 22.83174705505371, "learning_rate": 5e-07, "loss": 0.8546, "num_input_tokens_seen": 85366300, "step": 738 }, { "epoch": 3.967741935483871, "loss": 1.0879998207092285, "loss_ce": 0.0001092323218472302, "loss_iou": 0.447265625, "loss_num": 0.038818359375, "loss_xval": 1.0859375, "num_input_tokens_seen": 85366300, "step": 738 }, { "epoch": 3.9731182795698925, "grad_norm": 17.548851013183594, "learning_rate": 5e-07, "loss": 0.7522, "num_input_tokens_seen": 85482180, "step": 739 }, { "epoch": 3.9731182795698925, "loss": 0.5891458988189697, "loss_ce": 3.4559234336484224e-05, "loss_iou": 0.251953125, "loss_num": 0.017333984375, "loss_xval": 0.58984375, "num_input_tokens_seen": 85482180, "step": 739 }, { "epoch": 3.978494623655914, "grad_norm": 20.842878341674805, "learning_rate": 5e-07, "loss": 0.782, "num_input_tokens_seen": 85599648, "step": 740 }, { "epoch": 3.978494623655914, "loss": 0.7567110061645508, "loss_ce": 0.00011920435645151883, "loss_iou": 0.30859375, "loss_num": 0.0277099609375, "loss_xval": 0.7578125, "num_input_tokens_seen": 85599648, "step": 740 }, { "epoch": 3.9838709677419355, "grad_norm": 18.977304458618164, "learning_rate": 5e-07, "loss": 0.7596, "num_input_tokens_seen": 85716148, "step": 741 }, { "epoch": 3.9838709677419355, "loss": 0.7700891494750977, "loss_ce": 6.965614011278376e-05, "loss_iou": 0.3203125, "loss_num": 0.02587890625, "loss_xval": 0.76953125, "num_input_tokens_seen": 85716148, "step": 741 }, { "epoch": 3.989247311827957, "grad_norm": 22.274415969848633, "learning_rate": 5e-07, "loss": 1.001, "num_input_tokens_seen": 85827600, "step": 742 }, { "epoch": 3.989247311827957, "loss": 1.1041176319122314, "loss_ce": 0.0018226479878649116, "loss_iou": 0.458984375, "loss_num": 0.03662109375, "loss_xval": 1.1015625, "num_input_tokens_seen": 85827600, "step": 742 }, { "epoch": 3.9946236559139785, "grad_norm": 22.274391174316406, "learning_rate": 5e-07, "loss": 0.9231, "num_input_tokens_seen": 85940472, "step": 743 }, { "epoch": 3.9946236559139785, "loss": 1.0436853170394897, "loss_ce": 0.00022823381004855037, "loss_iou": 0.46484375, "loss_num": 0.022705078125, "loss_xval": 1.046875, "num_input_tokens_seen": 85940472, "step": 743 }, { "epoch": 4.0, "grad_norm": 15.558987617492676, "learning_rate": 5e-07, "loss": 0.8367, "num_input_tokens_seen": 86055152, "step": 744 }, { "epoch": 4.0, "loss": 0.6243903636932373, "loss_ce": 0.00012281359522603452, "loss_iou": 0.2392578125, "loss_num": 0.029052734375, "loss_xval": 0.625, "num_input_tokens_seen": 86055152, "step": 744 }, { "epoch": 4.005376344086022, "grad_norm": 21.080116271972656, "learning_rate": 5e-07, "loss": 0.9317, "num_input_tokens_seen": 86171920, "step": 745 }, { "epoch": 4.005376344086022, "loss": 0.583550751209259, "loss_ce": 5.4688749514753e-05, "loss_iou": 0.23828125, "loss_num": 0.0213623046875, "loss_xval": 0.58203125, "num_input_tokens_seen": 86171920, "step": 745 }, { "epoch": 4.010752688172043, "grad_norm": 16.314958572387695, "learning_rate": 5e-07, "loss": 0.7593, "num_input_tokens_seen": 86286824, "step": 746 }, { "epoch": 4.010752688172043, "loss": 0.9771534204483032, "loss_ce": 0.00010260142880724743, "loss_iou": 0.421875, "loss_num": 0.026611328125, "loss_xval": 0.9765625, "num_input_tokens_seen": 86286824, "step": 746 }, { "epoch": 4.016129032258065, "grad_norm": 22.463558197021484, "learning_rate": 5e-07, "loss": 0.872, "num_input_tokens_seen": 86403052, "step": 747 }, { "epoch": 4.016129032258065, "loss": 0.7954956293106079, "loss_ce": 8.546833123546094e-05, "loss_iou": 0.34375, "loss_num": 0.021484375, "loss_xval": 0.796875, "num_input_tokens_seen": 86403052, "step": 747 }, { "epoch": 4.021505376344086, "grad_norm": 20.916366577148438, "learning_rate": 5e-07, "loss": 0.7733, "num_input_tokens_seen": 86519624, "step": 748 }, { "epoch": 4.021505376344086, "loss": 0.8523421287536621, "loss_ce": 4.724222526419908e-05, "loss_iou": 0.361328125, "loss_num": 0.02587890625, "loss_xval": 0.8515625, "num_input_tokens_seen": 86519624, "step": 748 }, { "epoch": 4.026881720430108, "grad_norm": 16.6671085357666, "learning_rate": 5e-07, "loss": 0.8126, "num_input_tokens_seen": 86636640, "step": 749 }, { "epoch": 4.026881720430108, "loss": 0.6484883427619934, "loss_ce": 5.086265446152538e-05, "loss_iou": 0.26953125, "loss_num": 0.02197265625, "loss_xval": 0.6484375, "num_input_tokens_seen": 86636640, "step": 749 }, { "epoch": 4.032258064516129, "grad_norm": 28.58279037475586, "learning_rate": 5e-07, "loss": 0.8878, "num_input_tokens_seen": 86752340, "step": 750 }, { "epoch": 4.032258064516129, "eval_icons_CIoU": 0.1066616103053093, "eval_icons_GIoU": 0.0757441446185112, "eval_icons_IoU": 0.26799970865249634, "eval_icons_MAE_all": 0.03326491825282574, "eval_icons_MAE_h": 0.03689845837652683, "eval_icons_MAE_w": 0.05851474404335022, "eval_icons_MAE_x_boxes": 0.05344069562852383, "eval_icons_MAE_y_boxes": 0.035274417139589787, "eval_icons_NUM_probability": 0.9987397193908691, "eval_icons_inside_bbox": 0.578125, "eval_icons_loss": 1.9946258068084717, "eval_icons_loss_ce": 0.00020569741536746733, "eval_icons_loss_iou": 0.90478515625, "eval_icons_loss_num": 0.03426361083984375, "eval_icons_loss_xval": 1.98193359375, "eval_icons_runtime": 40.5467, "eval_icons_samples_per_second": 1.233, "eval_icons_steps_per_second": 0.049, "num_input_tokens_seen": 86752340, "step": 750 }, { "epoch": 4.032258064516129, "eval_screenspot_CIoU": 0.2605700095494588, "eval_screenspot_GIoU": 0.24466626842816672, "eval_screenspot_IoU": 0.3707685669263204, "eval_screenspot_MAE_all": 0.07006563742955525, "eval_screenspot_MAE_h": 0.05450528487563133, "eval_screenspot_MAE_w": 0.09384973595539729, "eval_screenspot_MAE_x_boxes": 0.0987589160601298, "eval_screenspot_MAE_y_boxes": 0.04146050040920576, "eval_screenspot_NUM_probability": 0.999890923500061, "eval_screenspot_inside_bbox": 0.6804166634877523, "eval_screenspot_loss": 1.9012624025344849, "eval_screenspot_loss_ce": 9.838870270565774e-05, "eval_screenspot_loss_iou": 0.7876790364583334, "eval_screenspot_loss_num": 0.07931772867838542, "eval_screenspot_loss_xval": 1.9729817708333333, "eval_screenspot_runtime": 73.5892, "eval_screenspot_samples_per_second": 1.209, "eval_screenspot_steps_per_second": 0.041, "num_input_tokens_seen": 86752340, "step": 750 }, { "epoch": 4.032258064516129, "loss": 1.8935883045196533, "loss_ce": 3.3557247661519796e-05, "loss_iou": 0.77734375, "loss_num": 0.068359375, "loss_xval": 1.890625, "num_input_tokens_seen": 86752340, "step": 750 }, { "epoch": 4.037634408602151, "grad_norm": 56.5999641418457, "learning_rate": 5e-07, "loss": 0.8485, "num_input_tokens_seen": 86862776, "step": 751 }, { "epoch": 4.037634408602151, "loss": 0.7348114252090454, "loss_ce": 7.01810495229438e-05, "loss_iou": 0.267578125, "loss_num": 0.039794921875, "loss_xval": 0.734375, "num_input_tokens_seen": 86862776, "step": 751 }, { "epoch": 4.043010752688172, "grad_norm": 20.489551544189453, "learning_rate": 5e-07, "loss": 0.8121, "num_input_tokens_seen": 86979048, "step": 752 }, { "epoch": 4.043010752688172, "loss": 0.9448278546333313, "loss_ce": 0.000491881393827498, "loss_iou": 0.380859375, "loss_num": 0.036376953125, "loss_xval": 0.9453125, "num_input_tokens_seen": 86979048, "step": 752 }, { "epoch": 4.048387096774194, "grad_norm": 19.533292770385742, "learning_rate": 5e-07, "loss": 0.9586, "num_input_tokens_seen": 87087860, "step": 753 }, { "epoch": 4.048387096774194, "loss": 0.8201708793640137, "loss_ce": 0.00010254090011585504, "loss_iou": 0.318359375, "loss_num": 0.03662109375, "loss_xval": 0.8203125, "num_input_tokens_seen": 87087860, "step": 753 }, { "epoch": 4.053763440860215, "grad_norm": 20.52452850341797, "learning_rate": 5e-07, "loss": 1.2228, "num_input_tokens_seen": 87201552, "step": 754 }, { "epoch": 4.053763440860215, "loss": 1.299896478652954, "loss_ce": 9.186795796267688e-05, "loss_iou": 0.55859375, "loss_num": 0.0361328125, "loss_xval": 1.296875, "num_input_tokens_seen": 87201552, "step": 754 }, { "epoch": 4.059139784946237, "grad_norm": 26.220457077026367, "learning_rate": 5e-07, "loss": 0.7013, "num_input_tokens_seen": 87320844, "step": 755 }, { "epoch": 4.059139784946237, "loss": 0.6040670871734619, "loss_ce": 6.319187377812341e-05, "loss_iou": 0.2578125, "loss_num": 0.0174560546875, "loss_xval": 0.60546875, "num_input_tokens_seen": 87320844, "step": 755 }, { "epoch": 4.064516129032258, "grad_norm": 21.1065731048584, "learning_rate": 5e-07, "loss": 0.8297, "num_input_tokens_seen": 87436320, "step": 756 }, { "epoch": 4.064516129032258, "loss": 0.7695380449295044, "loss_ce": 0.0002509169280529022, "loss_iou": 0.330078125, "loss_num": 0.022216796875, "loss_xval": 0.76953125, "num_input_tokens_seen": 87436320, "step": 756 }, { "epoch": 4.06989247311828, "grad_norm": 18.465871810913086, "learning_rate": 5e-07, "loss": 0.9513, "num_input_tokens_seen": 87548848, "step": 757 }, { "epoch": 4.06989247311828, "loss": 1.01350736618042, "loss_ce": 7.974505570018664e-05, "loss_iou": 0.431640625, "loss_num": 0.030517578125, "loss_xval": 1.015625, "num_input_tokens_seen": 87548848, "step": 757 }, { "epoch": 4.075268817204301, "grad_norm": 20.90911293029785, "learning_rate": 5e-07, "loss": 0.7689, "num_input_tokens_seen": 87664112, "step": 758 }, { "epoch": 4.075268817204301, "loss": 1.1417843103408813, "loss_ce": 6.0704725910909474e-05, "loss_iou": 0.451171875, "loss_num": 0.048095703125, "loss_xval": 1.140625, "num_input_tokens_seen": 87664112, "step": 758 }, { "epoch": 4.080645161290323, "grad_norm": 25.44759750366211, "learning_rate": 5e-07, "loss": 1.1153, "num_input_tokens_seen": 87779308, "step": 759 }, { "epoch": 4.080645161290323, "loss": 1.3175148963928223, "loss_ce": 0.00013206075527705252, "loss_iou": 0.5625, "loss_num": 0.0390625, "loss_xval": 1.3203125, "num_input_tokens_seen": 87779308, "step": 759 }, { "epoch": 4.086021505376344, "grad_norm": 22.796079635620117, "learning_rate": 5e-07, "loss": 0.9582, "num_input_tokens_seen": 87895780, "step": 760 }, { "epoch": 4.086021505376344, "loss": 1.0279393196105957, "loss_ce": 0.00010723403829615563, "loss_iou": 0.4296875, "loss_num": 0.03369140625, "loss_xval": 1.03125, "num_input_tokens_seen": 87895780, "step": 760 }, { "epoch": 4.091397849462366, "grad_norm": 25.01409912109375, "learning_rate": 5e-07, "loss": 0.9341, "num_input_tokens_seen": 88012964, "step": 761 }, { "epoch": 4.091397849462366, "loss": 0.8213870525360107, "loss_ce": 9.798143582884222e-05, "loss_iou": 0.36328125, "loss_num": 0.0191650390625, "loss_xval": 0.8203125, "num_input_tokens_seen": 88012964, "step": 761 }, { "epoch": 4.096774193548387, "grad_norm": 30.334070205688477, "learning_rate": 5e-07, "loss": 0.8593, "num_input_tokens_seen": 88131596, "step": 762 }, { "epoch": 4.096774193548387, "loss": 1.0601065158843994, "loss_ce": 4.789894228451885e-05, "loss_iou": 0.44921875, "loss_num": 0.032470703125, "loss_xval": 1.0625, "num_input_tokens_seen": 88131596, "step": 762 }, { "epoch": 4.102150537634409, "grad_norm": 24.21741485595703, "learning_rate": 5e-07, "loss": 0.7164, "num_input_tokens_seen": 88249712, "step": 763 }, { "epoch": 4.102150537634409, "loss": 0.49291226267814636, "loss_ce": 0.00023649842478334904, "loss_iou": 0.2099609375, "loss_num": 0.01470947265625, "loss_xval": 0.4921875, "num_input_tokens_seen": 88249712, "step": 763 }, { "epoch": 4.10752688172043, "grad_norm": 21.334857940673828, "learning_rate": 5e-07, "loss": 0.9253, "num_input_tokens_seen": 88363040, "step": 764 }, { "epoch": 4.10752688172043, "loss": 0.8818705081939697, "loss_ce": 3.454305260675028e-05, "loss_iou": 0.34375, "loss_num": 0.038330078125, "loss_xval": 0.8828125, "num_input_tokens_seen": 88363040, "step": 764 }, { "epoch": 4.112903225806452, "grad_norm": 16.219614028930664, "learning_rate": 5e-07, "loss": 1.0299, "num_input_tokens_seen": 88478176, "step": 765 }, { "epoch": 4.112903225806452, "loss": 0.8092681169509888, "loss_ce": 6.401524296961725e-05, "loss_iou": 0.333984375, "loss_num": 0.028076171875, "loss_xval": 0.80859375, "num_input_tokens_seen": 88478176, "step": 765 }, { "epoch": 4.118279569892473, "grad_norm": 23.265199661254883, "learning_rate": 5e-07, "loss": 1.0308, "num_input_tokens_seen": 88591168, "step": 766 }, { "epoch": 4.118279569892473, "loss": 0.8636101484298706, "loss_ce": 8.476112270727754e-05, "loss_iou": 0.337890625, "loss_num": 0.037841796875, "loss_xval": 0.86328125, "num_input_tokens_seen": 88591168, "step": 766 }, { "epoch": 4.123655913978495, "grad_norm": 27.68526268005371, "learning_rate": 5e-07, "loss": 0.8985, "num_input_tokens_seen": 88709176, "step": 767 }, { "epoch": 4.123655913978495, "loss": 1.1580344438552856, "loss_ce": 7.545309199485928e-05, "loss_iou": 0.474609375, "loss_num": 0.041748046875, "loss_xval": 1.15625, "num_input_tokens_seen": 88709176, "step": 767 }, { "epoch": 4.129032258064516, "grad_norm": 33.99291229248047, "learning_rate": 5e-07, "loss": 0.7646, "num_input_tokens_seen": 88823592, "step": 768 }, { "epoch": 4.129032258064516, "loss": 0.8698927164077759, "loss_ce": 1.967634125321638e-05, "loss_iou": 0.388671875, "loss_num": 0.0189208984375, "loss_xval": 0.87109375, "num_input_tokens_seen": 88823592, "step": 768 }, { "epoch": 4.134408602150538, "grad_norm": 22.937381744384766, "learning_rate": 5e-07, "loss": 0.8696, "num_input_tokens_seen": 88940300, "step": 769 }, { "epoch": 4.134408602150538, "loss": 0.8965482711791992, "loss_ce": 6.386380846379325e-05, "loss_iou": 0.35546875, "loss_num": 0.037109375, "loss_xval": 0.8984375, "num_input_tokens_seen": 88940300, "step": 769 }, { "epoch": 4.139784946236559, "grad_norm": 22.578187942504883, "learning_rate": 5e-07, "loss": 0.8114, "num_input_tokens_seen": 89053928, "step": 770 }, { "epoch": 4.139784946236559, "loss": 0.8872901201248169, "loss_ce": 8.305854862555861e-05, "loss_iou": 0.35546875, "loss_num": 0.03515625, "loss_xval": 0.88671875, "num_input_tokens_seen": 89053928, "step": 770 }, { "epoch": 4.145161290322581, "grad_norm": 18.905529022216797, "learning_rate": 5e-07, "loss": 0.7723, "num_input_tokens_seen": 89171212, "step": 771 }, { "epoch": 4.145161290322581, "loss": 0.8902699947357178, "loss_ce": 0.00013330810179468244, "loss_iou": 0.3984375, "loss_num": 0.0186767578125, "loss_xval": 0.890625, "num_input_tokens_seen": 89171212, "step": 771 }, { "epoch": 4.150537634408602, "grad_norm": 21.448543548583984, "learning_rate": 5e-07, "loss": 0.9739, "num_input_tokens_seen": 89285680, "step": 772 }, { "epoch": 4.150537634408602, "loss": 0.7996124029159546, "loss_ce": 5.189283183426596e-05, "loss_iou": 0.3359375, "loss_num": 0.0252685546875, "loss_xval": 0.80078125, "num_input_tokens_seen": 89285680, "step": 772 }, { "epoch": 4.155913978494624, "grad_norm": 21.068571090698242, "learning_rate": 5e-07, "loss": 0.8096, "num_input_tokens_seen": 89402384, "step": 773 }, { "epoch": 4.155913978494624, "loss": 0.8597042560577393, "loss_ce": 8.513114880770445e-05, "loss_iou": 0.365234375, "loss_num": 0.02587890625, "loss_xval": 0.859375, "num_input_tokens_seen": 89402384, "step": 773 }, { "epoch": 4.161290322580645, "grad_norm": 26.404743194580078, "learning_rate": 5e-07, "loss": 0.7593, "num_input_tokens_seen": 89520620, "step": 774 }, { "epoch": 4.161290322580645, "loss": 0.7944980263710022, "loss_ce": 6.440423749154434e-05, "loss_iou": 0.330078125, "loss_num": 0.0272216796875, "loss_xval": 0.79296875, "num_input_tokens_seen": 89520620, "step": 774 }, { "epoch": 4.166666666666667, "grad_norm": 34.84348678588867, "learning_rate": 5e-07, "loss": 1.0282, "num_input_tokens_seen": 89636740, "step": 775 }, { "epoch": 4.166666666666667, "loss": 0.8606441020965576, "loss_ce": 4.842742782784626e-05, "loss_iou": 0.353515625, "loss_num": 0.03076171875, "loss_xval": 0.859375, "num_input_tokens_seen": 89636740, "step": 775 }, { "epoch": 4.172043010752688, "grad_norm": 40.33241271972656, "learning_rate": 5e-07, "loss": 0.9952, "num_input_tokens_seen": 89752884, "step": 776 }, { "epoch": 4.172043010752688, "loss": 1.0828371047973633, "loss_ce": 7.347247446887195e-05, "loss_iou": 0.462890625, "loss_num": 0.031494140625, "loss_xval": 1.0859375, "num_input_tokens_seen": 89752884, "step": 776 }, { "epoch": 4.17741935483871, "grad_norm": 25.02279281616211, "learning_rate": 5e-07, "loss": 0.7493, "num_input_tokens_seen": 89866032, "step": 777 }, { "epoch": 4.17741935483871, "loss": 0.7722636461257935, "loss_ce": 4.691860158345662e-05, "loss_iou": 0.28515625, "loss_num": 0.0400390625, "loss_xval": 0.7734375, "num_input_tokens_seen": 89866032, "step": 777 }, { "epoch": 4.182795698924731, "grad_norm": 22.353328704833984, "learning_rate": 5e-07, "loss": 0.8214, "num_input_tokens_seen": 89981868, "step": 778 }, { "epoch": 4.182795698924731, "loss": 0.9506738781929016, "loss_ce": 0.00023442100791726261, "loss_iou": 0.37890625, "loss_num": 0.038330078125, "loss_xval": 0.94921875, "num_input_tokens_seen": 89981868, "step": 778 }, { "epoch": 4.188172043010753, "grad_norm": 17.994104385375977, "learning_rate": 5e-07, "loss": 0.8536, "num_input_tokens_seen": 90099280, "step": 779 }, { "epoch": 4.188172043010753, "loss": 0.8010673522949219, "loss_ce": 4.197496673441492e-05, "loss_iou": 0.33984375, "loss_num": 0.024169921875, "loss_xval": 0.80078125, "num_input_tokens_seen": 90099280, "step": 779 }, { "epoch": 4.193548387096774, "grad_norm": 19.3334903717041, "learning_rate": 5e-07, "loss": 1.0069, "num_input_tokens_seen": 90217408, "step": 780 }, { "epoch": 4.193548387096774, "loss": 0.9043241739273071, "loss_ce": 0.0002714003494475037, "loss_iou": 0.380859375, "loss_num": 0.0286865234375, "loss_xval": 0.90234375, "num_input_tokens_seen": 90217408, "step": 780 }, { "epoch": 4.198924731182796, "grad_norm": 17.562435150146484, "learning_rate": 5e-07, "loss": 0.9249, "num_input_tokens_seen": 90333792, "step": 781 }, { "epoch": 4.198924731182796, "loss": 0.8153393864631653, "loss_ce": 3.1758703698869795e-05, "loss_iou": 0.3515625, "loss_num": 0.021728515625, "loss_xval": 0.81640625, "num_input_tokens_seen": 90333792, "step": 781 }, { "epoch": 4.204301075268817, "grad_norm": 23.617000579833984, "learning_rate": 5e-07, "loss": 0.7504, "num_input_tokens_seen": 90451868, "step": 782 }, { "epoch": 4.204301075268817, "loss": 0.9429503083229065, "loss_ce": 7.924405508674681e-05, "loss_iou": 0.390625, "loss_num": 0.032470703125, "loss_xval": 0.94140625, "num_input_tokens_seen": 90451868, "step": 782 }, { "epoch": 4.209677419354839, "grad_norm": 25.612808227539062, "learning_rate": 5e-07, "loss": 0.8685, "num_input_tokens_seen": 90566144, "step": 783 }, { "epoch": 4.209677419354839, "loss": 1.0850718021392822, "loss_ce": 0.0001108465512515977, "loss_iou": 0.4609375, "loss_num": 0.031982421875, "loss_xval": 1.0859375, "num_input_tokens_seen": 90566144, "step": 783 }, { "epoch": 4.21505376344086, "grad_norm": 22.559947967529297, "learning_rate": 5e-07, "loss": 0.9012, "num_input_tokens_seen": 90683404, "step": 784 }, { "epoch": 4.21505376344086, "loss": 0.8925306797027588, "loss_ce": 7.465200906153768e-05, "loss_iou": 0.37109375, "loss_num": 0.030029296875, "loss_xval": 0.890625, "num_input_tokens_seen": 90683404, "step": 784 }, { "epoch": 4.220430107526882, "grad_norm": 22.789596557617188, "learning_rate": 5e-07, "loss": 0.8927, "num_input_tokens_seen": 90798636, "step": 785 }, { "epoch": 4.220430107526882, "loss": 0.8299336433410645, "loss_ce": 9.964022319763899e-05, "loss_iou": 0.34765625, "loss_num": 0.026611328125, "loss_xval": 0.828125, "num_input_tokens_seen": 90798636, "step": 785 }, { "epoch": 4.225806451612903, "grad_norm": 30.58551788330078, "learning_rate": 5e-07, "loss": 0.9546, "num_input_tokens_seen": 90916188, "step": 786 }, { "epoch": 4.225806451612903, "loss": 0.8451451659202576, "loss_ce": 0.00017446777201257646, "loss_iou": 0.34375, "loss_num": 0.031494140625, "loss_xval": 0.84375, "num_input_tokens_seen": 90916188, "step": 786 }, { "epoch": 4.231182795698925, "grad_norm": 26.839466094970703, "learning_rate": 5e-07, "loss": 0.9297, "num_input_tokens_seen": 91030488, "step": 787 }, { "epoch": 4.231182795698925, "loss": 1.0037477016448975, "loss_ce": 8.566137694288045e-05, "loss_iou": 0.4140625, "loss_num": 0.034912109375, "loss_xval": 1.0, "num_input_tokens_seen": 91030488, "step": 787 }, { "epoch": 4.236559139784946, "grad_norm": 27.642223358154297, "learning_rate": 5e-07, "loss": 0.939, "num_input_tokens_seen": 91144012, "step": 788 }, { "epoch": 4.236559139784946, "loss": 0.8245848417282104, "loss_ce": 0.0001219438636326231, "loss_iou": 0.34375, "loss_num": 0.02734375, "loss_xval": 0.82421875, "num_input_tokens_seen": 91144012, "step": 788 }, { "epoch": 4.241935483870968, "grad_norm": 28.25255584716797, "learning_rate": 5e-07, "loss": 0.9086, "num_input_tokens_seen": 91261868, "step": 789 }, { "epoch": 4.241935483870968, "loss": 0.6510105133056641, "loss_ce": 0.0001315999252256006, "loss_iou": 0.259765625, "loss_num": 0.026123046875, "loss_xval": 0.65234375, "num_input_tokens_seen": 91261868, "step": 789 }, { "epoch": 4.247311827956989, "grad_norm": 19.176639556884766, "learning_rate": 5e-07, "loss": 0.9521, "num_input_tokens_seen": 91378980, "step": 790 }, { "epoch": 4.247311827956989, "loss": 0.6647244691848755, "loss_ce": 0.00017366431711707264, "loss_iou": 0.2890625, "loss_num": 0.017578125, "loss_xval": 0.6640625, "num_input_tokens_seen": 91378980, "step": 790 }, { "epoch": 4.252688172043011, "grad_norm": 22.159330368041992, "learning_rate": 5e-07, "loss": 0.8996, "num_input_tokens_seen": 91497196, "step": 791 }, { "epoch": 4.252688172043011, "loss": 1.0108418464660645, "loss_ce": 9.969208622351289e-05, "loss_iou": 0.431640625, "loss_num": 0.0294189453125, "loss_xval": 1.0078125, "num_input_tokens_seen": 91497196, "step": 791 }, { "epoch": 4.258064516129032, "grad_norm": 20.069799423217773, "learning_rate": 5e-07, "loss": 0.9241, "num_input_tokens_seen": 91617528, "step": 792 }, { "epoch": 4.258064516129032, "loss": 0.8755736351013184, "loss_ce": 8.534819062333554e-05, "loss_iou": 0.384765625, "loss_num": 0.02099609375, "loss_xval": 0.875, "num_input_tokens_seen": 91617528, "step": 792 }, { "epoch": 4.263440860215054, "grad_norm": 19.816478729248047, "learning_rate": 5e-07, "loss": 0.8706, "num_input_tokens_seen": 91734668, "step": 793 }, { "epoch": 4.263440860215054, "loss": 1.047907829284668, "loss_ce": 5.627570499200374e-05, "loss_iou": 0.447265625, "loss_num": 0.0303955078125, "loss_xval": 1.046875, "num_input_tokens_seen": 91734668, "step": 793 }, { "epoch": 4.268817204301075, "grad_norm": 29.928579330444336, "learning_rate": 5e-07, "loss": 0.9229, "num_input_tokens_seen": 91851908, "step": 794 }, { "epoch": 4.268817204301075, "loss": 0.6293858885765076, "loss_ce": 0.00023549923207610846, "loss_iou": 0.271484375, "loss_num": 0.0177001953125, "loss_xval": 0.62890625, "num_input_tokens_seen": 91851908, "step": 794 }, { "epoch": 4.274193548387097, "grad_norm": 24.957727432250977, "learning_rate": 5e-07, "loss": 0.8693, "num_input_tokens_seen": 91969900, "step": 795 }, { "epoch": 4.274193548387097, "loss": 0.9523414373397827, "loss_ce": 7.09152445779182e-05, "loss_iou": 0.384765625, "loss_num": 0.036376953125, "loss_xval": 0.953125, "num_input_tokens_seen": 91969900, "step": 795 }, { "epoch": 4.279569892473118, "grad_norm": 21.14610481262207, "learning_rate": 5e-07, "loss": 1.0361, "num_input_tokens_seen": 92086336, "step": 796 }, { "epoch": 4.279569892473118, "loss": 1.0015523433685303, "loss_ce": 8.751425048103556e-05, "loss_iou": 0.41015625, "loss_num": 0.03662109375, "loss_xval": 1.0, "num_input_tokens_seen": 92086336, "step": 796 }, { "epoch": 4.28494623655914, "grad_norm": 20.460163116455078, "learning_rate": 5e-07, "loss": 0.9002, "num_input_tokens_seen": 92201436, "step": 797 }, { "epoch": 4.28494623655914, "loss": 0.6848917007446289, "loss_ce": 7.723178714513779e-05, "loss_iou": 0.310546875, "loss_num": 0.01287841796875, "loss_xval": 0.68359375, "num_input_tokens_seen": 92201436, "step": 797 }, { "epoch": 4.290322580645161, "grad_norm": 22.85779571533203, "learning_rate": 5e-07, "loss": 1.0414, "num_input_tokens_seen": 92317876, "step": 798 }, { "epoch": 4.290322580645161, "loss": 1.082176685333252, "loss_ce": 0.0001454684534110129, "loss_iou": 0.439453125, "loss_num": 0.04052734375, "loss_xval": 1.078125, "num_input_tokens_seen": 92317876, "step": 798 }, { "epoch": 4.295698924731183, "grad_norm": 20.622228622436523, "learning_rate": 5e-07, "loss": 1.0328, "num_input_tokens_seen": 92431060, "step": 799 }, { "epoch": 4.295698924731183, "loss": 0.9533141255378723, "loss_ce": 0.00018912830273620784, "loss_iou": 0.392578125, "loss_num": 0.03369140625, "loss_xval": 0.953125, "num_input_tokens_seen": 92431060, "step": 799 }, { "epoch": 4.301075268817204, "grad_norm": 22.59819221496582, "learning_rate": 5e-07, "loss": 0.8998, "num_input_tokens_seen": 92548844, "step": 800 }, { "epoch": 4.301075268817204, "loss": 0.9361464381217957, "loss_ce": 0.00011131450446555391, "loss_iou": 0.39453125, "loss_num": 0.0296630859375, "loss_xval": 0.9375, "num_input_tokens_seen": 92548844, "step": 800 }, { "epoch": 4.306451612903226, "grad_norm": 22.160017013549805, "learning_rate": 5e-07, "loss": 0.9588, "num_input_tokens_seen": 92661380, "step": 801 }, { "epoch": 4.306451612903226, "loss": 0.8504064679145813, "loss_ce": 6.467072671512142e-05, "loss_iou": 0.345703125, "loss_num": 0.03173828125, "loss_xval": 0.8515625, "num_input_tokens_seen": 92661380, "step": 801 }, { "epoch": 4.311827956989247, "grad_norm": 18.56253433227539, "learning_rate": 5e-07, "loss": 0.8509, "num_input_tokens_seen": 92772576, "step": 802 }, { "epoch": 4.311827956989247, "loss": 0.6060924530029297, "loss_ce": 1.335142496827757e-05, "loss_iou": 0.255859375, "loss_num": 0.018798828125, "loss_xval": 0.60546875, "num_input_tokens_seen": 92772576, "step": 802 }, { "epoch": 4.317204301075269, "grad_norm": 25.466169357299805, "learning_rate": 5e-07, "loss": 0.8777, "num_input_tokens_seen": 92887980, "step": 803 }, { "epoch": 4.317204301075269, "loss": 0.7356249094009399, "loss_ce": 2.920898077718448e-05, "loss_iou": 0.314453125, "loss_num": 0.021728515625, "loss_xval": 0.734375, "num_input_tokens_seen": 92887980, "step": 803 }, { "epoch": 4.32258064516129, "grad_norm": 18.271686553955078, "learning_rate": 5e-07, "loss": 0.8472, "num_input_tokens_seen": 93003408, "step": 804 }, { "epoch": 4.32258064516129, "loss": 0.8570713996887207, "loss_ce": 0.00013780791778117418, "loss_iou": 0.373046875, "loss_num": 0.0223388671875, "loss_xval": 0.85546875, "num_input_tokens_seen": 93003408, "step": 804 }, { "epoch": 4.327956989247312, "grad_norm": 28.881214141845703, "learning_rate": 5e-07, "loss": 0.9023, "num_input_tokens_seen": 93118092, "step": 805 }, { "epoch": 4.327956989247312, "loss": 0.7853369116783142, "loss_ce": 5.8585261285770684e-05, "loss_iou": 0.333984375, "loss_num": 0.0234375, "loss_xval": 0.78515625, "num_input_tokens_seen": 93118092, "step": 805 }, { "epoch": 4.333333333333333, "grad_norm": 18.391653060913086, "learning_rate": 5e-07, "loss": 0.9574, "num_input_tokens_seen": 93232844, "step": 806 }, { "epoch": 4.333333333333333, "loss": 1.249085783958435, "loss_ce": 6.229977589100599e-05, "loss_iou": 0.53125, "loss_num": 0.036376953125, "loss_xval": 1.25, "num_input_tokens_seen": 93232844, "step": 806 }, { "epoch": 4.338709677419355, "grad_norm": 21.74759292602539, "learning_rate": 5e-07, "loss": 0.9293, "num_input_tokens_seen": 93348588, "step": 807 }, { "epoch": 4.338709677419355, "loss": 0.7345851063728333, "loss_ce": 8.802571392152458e-05, "loss_iou": 0.28515625, "loss_num": 0.03271484375, "loss_xval": 0.734375, "num_input_tokens_seen": 93348588, "step": 807 }, { "epoch": 4.344086021505376, "grad_norm": 38.039608001708984, "learning_rate": 5e-07, "loss": 0.907, "num_input_tokens_seen": 93465780, "step": 808 }, { "epoch": 4.344086021505376, "loss": 0.7237485647201538, "loss_ce": 0.00011573563824640587, "loss_iou": 0.3203125, "loss_num": 0.016845703125, "loss_xval": 0.72265625, "num_input_tokens_seen": 93465780, "step": 808 }, { "epoch": 4.349462365591398, "grad_norm": 14.239039421081543, "learning_rate": 5e-07, "loss": 0.848, "num_input_tokens_seen": 93583000, "step": 809 }, { "epoch": 4.349462365591398, "loss": 0.8103687763214111, "loss_ce": 0.00018804447608999908, "loss_iou": 0.337890625, "loss_num": 0.027099609375, "loss_xval": 0.80859375, "num_input_tokens_seen": 93583000, "step": 809 }, { "epoch": 4.354838709677419, "grad_norm": 39.69384002685547, "learning_rate": 5e-07, "loss": 0.9055, "num_input_tokens_seen": 93696728, "step": 810 }, { "epoch": 4.354838709677419, "loss": 1.1548948287963867, "loss_ce": 0.0001096688283723779, "loss_iou": 0.4921875, "loss_num": 0.03369140625, "loss_xval": 1.15625, "num_input_tokens_seen": 93696728, "step": 810 }, { "epoch": 4.360215053763441, "grad_norm": 20.42586326599121, "learning_rate": 5e-07, "loss": 0.7807, "num_input_tokens_seen": 93808508, "step": 811 }, { "epoch": 4.360215053763441, "loss": 0.7433328032493591, "loss_ce": 0.00016872375272214413, "loss_iou": 0.291015625, "loss_num": 0.0322265625, "loss_xval": 0.7421875, "num_input_tokens_seen": 93808508, "step": 811 }, { "epoch": 4.365591397849462, "grad_norm": 17.71512794494629, "learning_rate": 5e-07, "loss": 0.8725, "num_input_tokens_seen": 93925116, "step": 812 }, { "epoch": 4.365591397849462, "loss": 1.0018656253814697, "loss_ce": 0.00015669311687815934, "loss_iou": 0.423828125, "loss_num": 0.030517578125, "loss_xval": 1.0, "num_input_tokens_seen": 93925116, "step": 812 }, { "epoch": 4.370967741935484, "grad_norm": 23.98025894165039, "learning_rate": 5e-07, "loss": 0.8973, "num_input_tokens_seen": 94039104, "step": 813 }, { "epoch": 4.370967741935484, "loss": 0.42490440607070923, "loss_ce": 9.97377501334995e-05, "loss_iou": 0.1337890625, "loss_num": 0.03173828125, "loss_xval": 0.42578125, "num_input_tokens_seen": 94039104, "step": 813 }, { "epoch": 4.376344086021505, "grad_norm": 21.62874984741211, "learning_rate": 5e-07, "loss": 0.8306, "num_input_tokens_seen": 94157232, "step": 814 }, { "epoch": 4.376344086021505, "loss": 0.7844734191894531, "loss_ce": 4.955337863066234e-05, "loss_iou": 0.333984375, "loss_num": 0.0235595703125, "loss_xval": 0.78515625, "num_input_tokens_seen": 94157232, "step": 814 }, { "epoch": 4.381720430107527, "grad_norm": 22.504932403564453, "learning_rate": 5e-07, "loss": 0.8225, "num_input_tokens_seen": 94273964, "step": 815 }, { "epoch": 4.381720430107527, "loss": 0.5645406246185303, "loss_ce": 8.748137042857707e-05, "loss_iou": 0.2333984375, "loss_num": 0.01953125, "loss_xval": 0.5625, "num_input_tokens_seen": 94273964, "step": 815 }, { "epoch": 4.387096774193548, "grad_norm": 27.907451629638672, "learning_rate": 5e-07, "loss": 1.0656, "num_input_tokens_seen": 94387756, "step": 816 }, { "epoch": 4.387096774193548, "loss": 1.1490261554718018, "loss_ce": 0.000100436904176604, "loss_iou": 0.47265625, "loss_num": 0.040283203125, "loss_xval": 1.1484375, "num_input_tokens_seen": 94387756, "step": 816 }, { "epoch": 4.39247311827957, "grad_norm": 26.005929946899414, "learning_rate": 5e-07, "loss": 0.9339, "num_input_tokens_seen": 94503432, "step": 817 }, { "epoch": 4.39247311827957, "loss": 0.7803943157196045, "loss_ce": 0.00012082755711162463, "loss_iou": 0.3125, "loss_num": 0.031005859375, "loss_xval": 0.78125, "num_input_tokens_seen": 94503432, "step": 817 }, { "epoch": 4.397849462365591, "grad_norm": 25.76316261291504, "learning_rate": 5e-07, "loss": 0.8362, "num_input_tokens_seen": 94620764, "step": 818 }, { "epoch": 4.397849462365591, "loss": 0.8218269348144531, "loss_ce": 4.956166594638489e-05, "loss_iou": 0.36328125, "loss_num": 0.018798828125, "loss_xval": 0.8203125, "num_input_tokens_seen": 94620764, "step": 818 }, { "epoch": 4.403225806451613, "grad_norm": 18.4884033203125, "learning_rate": 5e-07, "loss": 0.8386, "num_input_tokens_seen": 94732944, "step": 819 }, { "epoch": 4.403225806451613, "loss": 0.9620329737663269, "loss_ce": 0.00011890282621607184, "loss_iou": 0.404296875, "loss_num": 0.030029296875, "loss_xval": 0.9609375, "num_input_tokens_seen": 94732944, "step": 819 }, { "epoch": 4.408602150537634, "grad_norm": 30.957252502441406, "learning_rate": 5e-07, "loss": 1.1661, "num_input_tokens_seen": 94852100, "step": 820 }, { "epoch": 4.408602150537634, "loss": 0.8213328123092651, "loss_ce": 4.372636976768263e-05, "loss_iou": 0.34375, "loss_num": 0.0264892578125, "loss_xval": 0.8203125, "num_input_tokens_seen": 94852100, "step": 820 }, { "epoch": 4.413978494623656, "grad_norm": 27.89453125, "learning_rate": 5e-07, "loss": 0.9328, "num_input_tokens_seen": 94969732, "step": 821 }, { "epoch": 4.413978494623656, "loss": 1.254193663597107, "loss_ce": 0.0007756588165648282, "loss_iou": 0.52734375, "loss_num": 0.03955078125, "loss_xval": 1.25, "num_input_tokens_seen": 94969732, "step": 821 }, { "epoch": 4.419354838709677, "grad_norm": 23.181827545166016, "learning_rate": 5e-07, "loss": 0.8929, "num_input_tokens_seen": 95082036, "step": 822 }, { "epoch": 4.419354838709677, "loss": 0.8650805950164795, "loss_ce": 9.040105942403898e-05, "loss_iou": 0.353515625, "loss_num": 0.03173828125, "loss_xval": 0.86328125, "num_input_tokens_seen": 95082036, "step": 822 }, { "epoch": 4.424731182795699, "grad_norm": 21.963451385498047, "learning_rate": 5e-07, "loss": 0.84, "num_input_tokens_seen": 95198224, "step": 823 }, { "epoch": 4.424731182795699, "loss": 0.7835482358932495, "loss_ce": 0.00010101188672706485, "loss_iou": 0.283203125, "loss_num": 0.04345703125, "loss_xval": 0.78515625, "num_input_tokens_seen": 95198224, "step": 823 }, { "epoch": 4.43010752688172, "grad_norm": 26.051422119140625, "learning_rate": 5e-07, "loss": 0.7791, "num_input_tokens_seen": 95310516, "step": 824 }, { "epoch": 4.43010752688172, "loss": 0.9354004859924316, "loss_ce": 9.772732300916687e-05, "loss_iou": 0.404296875, "loss_num": 0.0252685546875, "loss_xval": 0.93359375, "num_input_tokens_seen": 95310516, "step": 824 }, { "epoch": 4.435483870967742, "grad_norm": 25.94269371032715, "learning_rate": 5e-07, "loss": 0.937, "num_input_tokens_seen": 95427244, "step": 825 }, { "epoch": 4.435483870967742, "loss": 1.0111734867095947, "loss_ce": 0.00018720526713877916, "loss_iou": 0.404296875, "loss_num": 0.040771484375, "loss_xval": 1.0078125, "num_input_tokens_seen": 95427244, "step": 825 }, { "epoch": 4.440860215053763, "grad_norm": 22.927682876586914, "learning_rate": 5e-07, "loss": 0.7549, "num_input_tokens_seen": 95545636, "step": 826 }, { "epoch": 4.440860215053763, "loss": 0.4493176341056824, "loss_ce": 9.887206397252157e-05, "loss_iou": 0.1796875, "loss_num": 0.0179443359375, "loss_xval": 0.44921875, "num_input_tokens_seen": 95545636, "step": 826 }, { "epoch": 4.446236559139785, "grad_norm": 37.66203308105469, "learning_rate": 5e-07, "loss": 1.0394, "num_input_tokens_seen": 95661940, "step": 827 }, { "epoch": 4.446236559139785, "loss": 0.9466314911842346, "loss_ce": 9.826578025240451e-05, "loss_iou": 0.400390625, "loss_num": 0.029296875, "loss_xval": 0.9453125, "num_input_tokens_seen": 95661940, "step": 827 }, { "epoch": 4.451612903225806, "grad_norm": 38.642887115478516, "learning_rate": 5e-07, "loss": 0.7859, "num_input_tokens_seen": 95774648, "step": 828 }, { "epoch": 4.451612903225806, "loss": 0.5488751530647278, "loss_ce": 0.00019961863290518522, "loss_iou": 0.2041015625, "loss_num": 0.02783203125, "loss_xval": 0.546875, "num_input_tokens_seen": 95774648, "step": 828 }, { "epoch": 4.456989247311828, "grad_norm": 30.2302188873291, "learning_rate": 5e-07, "loss": 0.9783, "num_input_tokens_seen": 95890784, "step": 829 }, { "epoch": 4.456989247311828, "loss": 1.0213829278945923, "loss_ce": 0.0001426904636900872, "loss_iou": 0.453125, "loss_num": 0.023193359375, "loss_xval": 1.0234375, "num_input_tokens_seen": 95890784, "step": 829 }, { "epoch": 4.462365591397849, "grad_norm": 19.439367294311523, "learning_rate": 5e-07, "loss": 0.8796, "num_input_tokens_seen": 96008652, "step": 830 }, { "epoch": 4.462365591397849, "loss": 0.8359919786453247, "loss_ce": 5.4474723583552986e-05, "loss_iou": 0.375, "loss_num": 0.01708984375, "loss_xval": 0.8359375, "num_input_tokens_seen": 96008652, "step": 830 }, { "epoch": 4.467741935483871, "grad_norm": 21.225032806396484, "learning_rate": 5e-07, "loss": 0.82, "num_input_tokens_seen": 96127732, "step": 831 }, { "epoch": 4.467741935483871, "loss": 0.5585259199142456, "loss_ce": 5.426603456726298e-05, "loss_iou": 0.24609375, "loss_num": 0.01336669921875, "loss_xval": 0.55859375, "num_input_tokens_seen": 96127732, "step": 831 }, { "epoch": 4.473118279569892, "grad_norm": 32.692142486572266, "learning_rate": 5e-07, "loss": 0.8801, "num_input_tokens_seen": 96241992, "step": 832 }, { "epoch": 4.473118279569892, "loss": 1.304481863975525, "loss_ce": 3.844347520498559e-05, "loss_iou": 0.55078125, "loss_num": 0.039794921875, "loss_xval": 1.3046875, "num_input_tokens_seen": 96241992, "step": 832 }, { "epoch": 4.478494623655914, "grad_norm": 17.31731605529785, "learning_rate": 5e-07, "loss": 0.899, "num_input_tokens_seen": 96356840, "step": 833 }, { "epoch": 4.478494623655914, "loss": 0.6885122060775757, "loss_ce": 3.565175939002074e-05, "loss_iou": 0.2890625, "loss_num": 0.022216796875, "loss_xval": 0.6875, "num_input_tokens_seen": 96356840, "step": 833 }, { "epoch": 4.483870967741936, "grad_norm": 22.161264419555664, "learning_rate": 5e-07, "loss": 0.8609, "num_input_tokens_seen": 96473492, "step": 834 }, { "epoch": 4.483870967741936, "loss": 1.058905839920044, "loss_ce": 6.798043614253402e-05, "loss_iou": 0.439453125, "loss_num": 0.0361328125, "loss_xval": 1.0625, "num_input_tokens_seen": 96473492, "step": 834 }, { "epoch": 4.489247311827957, "grad_norm": 27.325088500976562, "learning_rate": 5e-07, "loss": 1.042, "num_input_tokens_seen": 96588128, "step": 835 }, { "epoch": 4.489247311827957, "loss": 1.3462319374084473, "loss_ce": 4.065053508384153e-05, "loss_iou": 0.56640625, "loss_num": 0.04345703125, "loss_xval": 1.34375, "num_input_tokens_seen": 96588128, "step": 835 }, { "epoch": 4.494623655913978, "grad_norm": 18.941497802734375, "learning_rate": 5e-07, "loss": 0.8722, "num_input_tokens_seen": 96704692, "step": 836 }, { "epoch": 4.494623655913978, "loss": 0.9883644580841064, "loss_ce": 8.325908856932074e-05, "loss_iou": 0.41796875, "loss_num": 0.03076171875, "loss_xval": 0.98828125, "num_input_tokens_seen": 96704692, "step": 836 }, { "epoch": 4.5, "grad_norm": 18.552978515625, "learning_rate": 5e-07, "loss": 0.926, "num_input_tokens_seen": 96824676, "step": 837 }, { "epoch": 4.5, "loss": 0.7303255796432495, "loss_ce": 0.00010096129699377343, "loss_iou": 0.298828125, "loss_num": 0.026123046875, "loss_xval": 0.73046875, "num_input_tokens_seen": 96824676, "step": 837 }, { "epoch": 4.505376344086022, "grad_norm": 244.17044067382812, "learning_rate": 5e-07, "loss": 0.7564, "num_input_tokens_seen": 96937664, "step": 838 }, { "epoch": 4.505376344086022, "loss": 0.8799958229064941, "loss_ce": 0.00011303603241685778, "loss_iou": 0.376953125, "loss_num": 0.025634765625, "loss_xval": 0.87890625, "num_input_tokens_seen": 96937664, "step": 838 }, { "epoch": 4.510752688172043, "grad_norm": 23.092426300048828, "learning_rate": 5e-07, "loss": 0.9856, "num_input_tokens_seen": 97053148, "step": 839 }, { "epoch": 4.510752688172043, "loss": 0.7663551568984985, "loss_ce": 0.00024184159701690078, "loss_iou": 0.318359375, "loss_num": 0.025634765625, "loss_xval": 0.765625, "num_input_tokens_seen": 97053148, "step": 839 }, { "epoch": 4.516129032258064, "grad_norm": 20.099727630615234, "learning_rate": 5e-07, "loss": 0.9156, "num_input_tokens_seen": 97165916, "step": 840 }, { "epoch": 4.516129032258064, "loss": 1.130591630935669, "loss_ce": 0.00022047065431252122, "loss_iou": 0.494140625, "loss_num": 0.0281982421875, "loss_xval": 1.1328125, "num_input_tokens_seen": 97165916, "step": 840 }, { "epoch": 4.521505376344086, "grad_norm": 22.04678726196289, "learning_rate": 5e-07, "loss": 0.807, "num_input_tokens_seen": 97281404, "step": 841 }, { "epoch": 4.521505376344086, "loss": 0.9346936941146851, "loss_ce": 0.000123387566418387, "loss_iou": 0.37890625, "loss_num": 0.035400390625, "loss_xval": 0.93359375, "num_input_tokens_seen": 97281404, "step": 841 }, { "epoch": 4.526881720430108, "grad_norm": 16.89019203186035, "learning_rate": 5e-07, "loss": 0.7029, "num_input_tokens_seen": 97398636, "step": 842 }, { "epoch": 4.526881720430108, "loss": 0.8193720579147339, "loss_ce": 0.00028029229724779725, "loss_iou": 0.3359375, "loss_num": 0.0299072265625, "loss_xval": 0.8203125, "num_input_tokens_seen": 97398636, "step": 842 }, { "epoch": 4.532258064516129, "grad_norm": 23.585472106933594, "learning_rate": 5e-07, "loss": 0.8356, "num_input_tokens_seen": 97511952, "step": 843 }, { "epoch": 4.532258064516129, "loss": 0.6754105091094971, "loss_ce": 0.00011756567255361006, "loss_iou": 0.306640625, "loss_num": 0.0120849609375, "loss_xval": 0.67578125, "num_input_tokens_seen": 97511952, "step": 843 }, { "epoch": 4.53763440860215, "grad_norm": 18.46160125732422, "learning_rate": 5e-07, "loss": 0.8078, "num_input_tokens_seen": 97629060, "step": 844 }, { "epoch": 4.53763440860215, "loss": 0.5239468812942505, "loss_ce": 0.000143222336191684, "loss_iou": 0.220703125, "loss_num": 0.016357421875, "loss_xval": 0.5234375, "num_input_tokens_seen": 97629060, "step": 844 }, { "epoch": 4.543010752688172, "grad_norm": 24.83058738708496, "learning_rate": 5e-07, "loss": 0.7333, "num_input_tokens_seen": 97743256, "step": 845 }, { "epoch": 4.543010752688172, "loss": 0.734628438949585, "loss_ce": 0.00025347055634483695, "loss_iou": 0.310546875, "loss_num": 0.0220947265625, "loss_xval": 0.734375, "num_input_tokens_seen": 97743256, "step": 845 }, { "epoch": 4.548387096774194, "grad_norm": 25.400922775268555, "learning_rate": 5e-07, "loss": 0.8829, "num_input_tokens_seen": 97860072, "step": 846 }, { "epoch": 4.548387096774194, "loss": 0.6401724815368652, "loss_ce": 3.5782690247287974e-05, "loss_iou": 0.2734375, "loss_num": 0.0189208984375, "loss_xval": 0.640625, "num_input_tokens_seen": 97860072, "step": 846 }, { "epoch": 4.553763440860215, "grad_norm": 30.59087371826172, "learning_rate": 5e-07, "loss": 1.0285, "num_input_tokens_seen": 97973092, "step": 847 }, { "epoch": 4.553763440860215, "loss": 0.7837563753128052, "loss_ce": 6.495056732092053e-05, "loss_iou": 0.322265625, "loss_num": 0.028076171875, "loss_xval": 0.78515625, "num_input_tokens_seen": 97973092, "step": 847 }, { "epoch": 4.559139784946236, "grad_norm": 36.17023468017578, "learning_rate": 5e-07, "loss": 0.7556, "num_input_tokens_seen": 98085740, "step": 848 }, { "epoch": 4.559139784946236, "loss": 0.7907640933990479, "loss_ce": 0.00011469742457848042, "loss_iou": 0.32421875, "loss_num": 0.0286865234375, "loss_xval": 0.7890625, "num_input_tokens_seen": 98085740, "step": 848 }, { "epoch": 4.564516129032258, "grad_norm": 30.776491165161133, "learning_rate": 5e-07, "loss": 0.9084, "num_input_tokens_seen": 98202308, "step": 849 }, { "epoch": 4.564516129032258, "loss": 0.8675141334533691, "loss_ce": 8.245072967838496e-05, "loss_iou": 0.376953125, "loss_num": 0.02294921875, "loss_xval": 0.8671875, "num_input_tokens_seen": 98202308, "step": 849 }, { "epoch": 4.56989247311828, "grad_norm": 24.15325355529785, "learning_rate": 5e-07, "loss": 0.8737, "num_input_tokens_seen": 98317328, "step": 850 }, { "epoch": 4.56989247311828, "loss": 0.7268706560134888, "loss_ce": 6.398833647836e-05, "loss_iou": 0.30859375, "loss_num": 0.0218505859375, "loss_xval": 0.7265625, "num_input_tokens_seen": 98317328, "step": 850 }, { "epoch": 4.575268817204301, "grad_norm": 36.547298431396484, "learning_rate": 5e-07, "loss": 0.8243, "num_input_tokens_seen": 98431564, "step": 851 }, { "epoch": 4.575268817204301, "loss": 0.9827964305877686, "loss_ce": 0.00013045831292402, "loss_iou": 0.42578125, "loss_num": 0.0263671875, "loss_xval": 0.984375, "num_input_tokens_seen": 98431564, "step": 851 }, { "epoch": 4.580645161290323, "grad_norm": 34.24976348876953, "learning_rate": 5e-07, "loss": 0.7951, "num_input_tokens_seen": 98548036, "step": 852 }, { "epoch": 4.580645161290323, "loss": 0.7524808049201965, "loss_ce": 3.938098961953074e-05, "loss_iou": 0.3203125, "loss_num": 0.022216796875, "loss_xval": 0.75390625, "num_input_tokens_seen": 98548036, "step": 852 }, { "epoch": 4.586021505376344, "grad_norm": 24.947118759155273, "learning_rate": 5e-07, "loss": 0.939, "num_input_tokens_seen": 98663564, "step": 853 }, { "epoch": 4.586021505376344, "loss": 0.7588548064231873, "loss_ce": 6.576291343662888e-05, "loss_iou": 0.333984375, "loss_num": 0.0185546875, "loss_xval": 0.7578125, "num_input_tokens_seen": 98663564, "step": 853 }, { "epoch": 4.591397849462366, "grad_norm": 23.576370239257812, "learning_rate": 5e-07, "loss": 0.9045, "num_input_tokens_seen": 98778108, "step": 854 }, { "epoch": 4.591397849462366, "loss": 0.9415098428726196, "loss_ce": 0.00010364077024860308, "loss_iou": 0.38671875, "loss_num": 0.03369140625, "loss_xval": 0.94140625, "num_input_tokens_seen": 98778108, "step": 854 }, { "epoch": 4.596774193548387, "grad_norm": 19.7305965423584, "learning_rate": 5e-07, "loss": 0.7309, "num_input_tokens_seen": 98893908, "step": 855 }, { "epoch": 4.596774193548387, "loss": 0.8082001805305481, "loss_ce": 9.472787496633828e-05, "loss_iou": 0.328125, "loss_num": 0.0306396484375, "loss_xval": 0.80859375, "num_input_tokens_seen": 98893908, "step": 855 }, { "epoch": 4.602150537634409, "grad_norm": 32.18356704711914, "learning_rate": 5e-07, "loss": 0.7964, "num_input_tokens_seen": 99008360, "step": 856 }, { "epoch": 4.602150537634409, "loss": 0.9004464149475098, "loss_ce": 5.578630225500092e-05, "loss_iou": 0.390625, "loss_num": 0.0240478515625, "loss_xval": 0.8984375, "num_input_tokens_seen": 99008360, "step": 856 }, { "epoch": 4.60752688172043, "grad_norm": 52.06953811645508, "learning_rate": 5e-07, "loss": 1.0049, "num_input_tokens_seen": 99123940, "step": 857 }, { "epoch": 4.60752688172043, "loss": 0.7591762542724609, "loss_ce": 0.00014306927914731205, "loss_iou": 0.32421875, "loss_num": 0.022216796875, "loss_xval": 0.7578125, "num_input_tokens_seen": 99123940, "step": 857 }, { "epoch": 4.612903225806452, "grad_norm": 20.624357223510742, "learning_rate": 5e-07, "loss": 0.795, "num_input_tokens_seen": 99238840, "step": 858 }, { "epoch": 4.612903225806452, "loss": 0.8533859252929688, "loss_ce": 0.00011443646508269012, "loss_iou": 0.37890625, "loss_num": 0.018798828125, "loss_xval": 0.8515625, "num_input_tokens_seen": 99238840, "step": 858 }, { "epoch": 4.618279569892473, "grad_norm": 19.335777282714844, "learning_rate": 5e-07, "loss": 0.8054, "num_input_tokens_seen": 99354036, "step": 859 }, { "epoch": 4.618279569892473, "loss": 0.7822799682617188, "loss_ce": 5.343123848433606e-05, "loss_iou": 0.306640625, "loss_num": 0.03369140625, "loss_xval": 0.78125, "num_input_tokens_seen": 99354036, "step": 859 }, { "epoch": 4.623655913978495, "grad_norm": 33.58926010131836, "learning_rate": 5e-07, "loss": 0.7799, "num_input_tokens_seen": 99469204, "step": 860 }, { "epoch": 4.623655913978495, "loss": 0.7506564855575562, "loss_ce": 4.608414747053757e-05, "loss_iou": 0.30859375, "loss_num": 0.026611328125, "loss_xval": 0.75, "num_input_tokens_seen": 99469204, "step": 860 }, { "epoch": 4.629032258064516, "grad_norm": 22.542236328125, "learning_rate": 5e-07, "loss": 1.0112, "num_input_tokens_seen": 99584768, "step": 861 }, { "epoch": 4.629032258064516, "loss": 0.9309395551681519, "loss_ce": 3.132822166662663e-05, "loss_iou": 0.384765625, "loss_num": 0.0322265625, "loss_xval": 0.9296875, "num_input_tokens_seen": 99584768, "step": 861 }, { "epoch": 4.634408602150538, "grad_norm": 29.218549728393555, "learning_rate": 5e-07, "loss": 0.7634, "num_input_tokens_seen": 99698124, "step": 862 }, { "epoch": 4.634408602150538, "loss": 0.7767894864082336, "loss_ce": 0.0005443766131065786, "loss_iou": 0.32421875, "loss_num": 0.02587890625, "loss_xval": 0.77734375, "num_input_tokens_seen": 99698124, "step": 862 }, { "epoch": 4.639784946236559, "grad_norm": 23.270246505737305, "learning_rate": 5e-07, "loss": 0.9917, "num_input_tokens_seen": 99811348, "step": 863 }, { "epoch": 4.639784946236559, "loss": 1.3263227939605713, "loss_ce": 0.00015093102410901338, "loss_iou": 0.5546875, "loss_num": 0.04296875, "loss_xval": 1.328125, "num_input_tokens_seen": 99811348, "step": 863 }, { "epoch": 4.645161290322581, "grad_norm": 25.6848201751709, "learning_rate": 5e-07, "loss": 1.0103, "num_input_tokens_seen": 99926352, "step": 864 }, { "epoch": 4.645161290322581, "loss": 1.1084743738174438, "loss_ce": 7.59488029871136e-05, "loss_iou": 0.431640625, "loss_num": 0.04931640625, "loss_xval": 1.109375, "num_input_tokens_seen": 99926352, "step": 864 }, { "epoch": 4.650537634408602, "grad_norm": 17.407596588134766, "learning_rate": 5e-07, "loss": 0.8201, "num_input_tokens_seen": 100043968, "step": 865 }, { "epoch": 4.650537634408602, "loss": 0.8991645574569702, "loss_ce": 0.0001167072041425854, "loss_iou": 0.357421875, "loss_num": 0.036865234375, "loss_xval": 0.8984375, "num_input_tokens_seen": 100043968, "step": 865 }, { "epoch": 4.655913978494624, "grad_norm": 27.106395721435547, "learning_rate": 5e-07, "loss": 0.7632, "num_input_tokens_seen": 100158484, "step": 866 }, { "epoch": 4.655913978494624, "loss": 0.7607058882713318, "loss_ce": 8.5784777184017e-05, "loss_iou": 0.302734375, "loss_num": 0.031005859375, "loss_xval": 0.76171875, "num_input_tokens_seen": 100158484, "step": 866 }, { "epoch": 4.661290322580645, "grad_norm": 21.187780380249023, "learning_rate": 5e-07, "loss": 0.9016, "num_input_tokens_seen": 100276684, "step": 867 }, { "epoch": 4.661290322580645, "loss": 0.6107337474822998, "loss_ce": 0.001114618149586022, "loss_iou": 0.265625, "loss_num": 0.01556396484375, "loss_xval": 0.609375, "num_input_tokens_seen": 100276684, "step": 867 }, { "epoch": 4.666666666666667, "grad_norm": 40.14695739746094, "learning_rate": 5e-07, "loss": 0.8788, "num_input_tokens_seen": 100391892, "step": 868 }, { "epoch": 4.666666666666667, "loss": 0.660341739654541, "loss_ce": 6.337513332255185e-05, "loss_iou": 0.2734375, "loss_num": 0.0224609375, "loss_xval": 0.66015625, "num_input_tokens_seen": 100391892, "step": 868 }, { "epoch": 4.672043010752688, "grad_norm": 17.920164108276367, "learning_rate": 5e-07, "loss": 0.7999, "num_input_tokens_seen": 100510064, "step": 869 }, { "epoch": 4.672043010752688, "loss": 0.6189466118812561, "loss_ce": 5.011633038520813e-05, "loss_iou": 0.2255859375, "loss_num": 0.033203125, "loss_xval": 0.6171875, "num_input_tokens_seen": 100510064, "step": 869 }, { "epoch": 4.67741935483871, "grad_norm": 23.72347068786621, "learning_rate": 5e-07, "loss": 0.8343, "num_input_tokens_seen": 100627668, "step": 870 }, { "epoch": 4.67741935483871, "loss": 1.0238640308380127, "loss_ce": 0.00018239986093249172, "loss_iou": 0.4453125, "loss_num": 0.0260009765625, "loss_xval": 1.0234375, "num_input_tokens_seen": 100627668, "step": 870 }, { "epoch": 4.682795698924731, "grad_norm": 17.835386276245117, "learning_rate": 5e-07, "loss": 0.8294, "num_input_tokens_seen": 100740828, "step": 871 }, { "epoch": 4.682795698924731, "loss": 0.7578625679016113, "loss_ce": 5.005455750506371e-05, "loss_iou": 0.326171875, "loss_num": 0.0211181640625, "loss_xval": 0.7578125, "num_input_tokens_seen": 100740828, "step": 871 }, { "epoch": 4.688172043010753, "grad_norm": 21.097980499267578, "learning_rate": 5e-07, "loss": 0.9161, "num_input_tokens_seen": 100858984, "step": 872 }, { "epoch": 4.688172043010753, "loss": 0.9766516089439392, "loss_ce": 8.907858864404261e-05, "loss_iou": 0.396484375, "loss_num": 0.037353515625, "loss_xval": 0.9765625, "num_input_tokens_seen": 100858984, "step": 872 }, { "epoch": 4.693548387096774, "grad_norm": 23.29046630859375, "learning_rate": 5e-07, "loss": 0.9038, "num_input_tokens_seen": 100974640, "step": 873 }, { "epoch": 4.693548387096774, "loss": 0.898026704788208, "loss_ce": 7.747122435830534e-05, "loss_iou": 0.3828125, "loss_num": 0.02685546875, "loss_xval": 0.8984375, "num_input_tokens_seen": 100974640, "step": 873 }, { "epoch": 4.698924731182796, "grad_norm": 17.796424865722656, "learning_rate": 5e-07, "loss": 0.9395, "num_input_tokens_seen": 101089240, "step": 874 }, { "epoch": 4.698924731182796, "loss": 1.105658769607544, "loss_ce": 0.0001901288633234799, "loss_iou": 0.435546875, "loss_num": 0.046630859375, "loss_xval": 1.109375, "num_input_tokens_seen": 101089240, "step": 874 }, { "epoch": 4.704301075268817, "grad_norm": 16.309040069580078, "learning_rate": 5e-07, "loss": 1.0726, "num_input_tokens_seen": 101204144, "step": 875 }, { "epoch": 4.704301075268817, "loss": 1.1487462520599365, "loss_ce": 6.46105982013978e-05, "loss_iou": 0.474609375, "loss_num": 0.039794921875, "loss_xval": 1.1484375, "num_input_tokens_seen": 101204144, "step": 875 }, { "epoch": 4.709677419354839, "grad_norm": 17.216358184814453, "learning_rate": 5e-07, "loss": 0.8171, "num_input_tokens_seen": 101321144, "step": 876 }, { "epoch": 4.709677419354839, "loss": 0.512057900428772, "loss_ce": 9.497567953076214e-05, "loss_iou": 0.2236328125, "loss_num": 0.012939453125, "loss_xval": 0.51171875, "num_input_tokens_seen": 101321144, "step": 876 }, { "epoch": 4.71505376344086, "grad_norm": 38.55146026611328, "learning_rate": 5e-07, "loss": 0.9287, "num_input_tokens_seen": 101436896, "step": 877 }, { "epoch": 4.71505376344086, "loss": 0.6019599437713623, "loss_ce": 3.12678275804501e-05, "loss_iou": 0.251953125, "loss_num": 0.0196533203125, "loss_xval": 0.6015625, "num_input_tokens_seen": 101436896, "step": 877 }, { "epoch": 4.720430107526882, "grad_norm": 30.31635856628418, "learning_rate": 5e-07, "loss": 0.7043, "num_input_tokens_seen": 101553944, "step": 878 }, { "epoch": 4.720430107526882, "loss": 0.7583848237991333, "loss_ce": 8.404873369727284e-05, "loss_iou": 0.30859375, "loss_num": 0.0279541015625, "loss_xval": 0.7578125, "num_input_tokens_seen": 101553944, "step": 878 }, { "epoch": 4.725806451612903, "grad_norm": 22.922231674194336, "learning_rate": 5e-07, "loss": 0.9664, "num_input_tokens_seen": 101669320, "step": 879 }, { "epoch": 4.725806451612903, "loss": 0.8389371633529663, "loss_ce": 6.999609468039125e-05, "loss_iou": 0.349609375, "loss_num": 0.028076171875, "loss_xval": 0.83984375, "num_input_tokens_seen": 101669320, "step": 879 }, { "epoch": 4.731182795698925, "grad_norm": 19.80655288696289, "learning_rate": 5e-07, "loss": 0.7141, "num_input_tokens_seen": 101783088, "step": 880 }, { "epoch": 4.731182795698925, "loss": 0.9321617484092712, "loss_ce": 3.287187428213656e-05, "loss_iou": 0.39453125, "loss_num": 0.028564453125, "loss_xval": 0.93359375, "num_input_tokens_seen": 101783088, "step": 880 }, { "epoch": 4.736559139784946, "grad_norm": 16.457509994506836, "learning_rate": 5e-07, "loss": 0.8823, "num_input_tokens_seen": 101901576, "step": 881 }, { "epoch": 4.736559139784946, "loss": 0.885084867477417, "loss_ce": 7.514090975746512e-05, "loss_iou": 0.375, "loss_num": 0.02685546875, "loss_xval": 0.88671875, "num_input_tokens_seen": 101901576, "step": 881 }, { "epoch": 4.741935483870968, "grad_norm": 24.645139694213867, "learning_rate": 5e-07, "loss": 0.8418, "num_input_tokens_seen": 102018016, "step": 882 }, { "epoch": 4.741935483870968, "loss": 0.605424165725708, "loss_ce": 7.7475153375417e-05, "loss_iou": 0.263671875, "loss_num": 0.015869140625, "loss_xval": 0.60546875, "num_input_tokens_seen": 102018016, "step": 882 }, { "epoch": 4.747311827956989, "grad_norm": 20.63474464416504, "learning_rate": 5e-07, "loss": 0.826, "num_input_tokens_seen": 102134612, "step": 883 }, { "epoch": 4.747311827956989, "loss": 0.9102281332015991, "loss_ce": 7.187073060777038e-05, "loss_iou": 0.384765625, "loss_num": 0.0284423828125, "loss_xval": 0.91015625, "num_input_tokens_seen": 102134612, "step": 883 }, { "epoch": 4.752688172043011, "grad_norm": 24.51767921447754, "learning_rate": 5e-07, "loss": 0.8111, "num_input_tokens_seen": 102251220, "step": 884 }, { "epoch": 4.752688172043011, "loss": 1.0930955410003662, "loss_ce": 7.803343760315329e-05, "loss_iou": 0.458984375, "loss_num": 0.03515625, "loss_xval": 1.09375, "num_input_tokens_seen": 102251220, "step": 884 }, { "epoch": 4.758064516129032, "grad_norm": 26.385847091674805, "learning_rate": 5e-07, "loss": 1.1125, "num_input_tokens_seen": 102365032, "step": 885 }, { "epoch": 4.758064516129032, "loss": 1.0437521934509277, "loss_ce": 5.0972616008948535e-05, "loss_iou": 0.435546875, "loss_num": 0.034423828125, "loss_xval": 1.046875, "num_input_tokens_seen": 102365032, "step": 885 }, { "epoch": 4.763440860215054, "grad_norm": 115.09992218017578, "learning_rate": 5e-07, "loss": 0.8794, "num_input_tokens_seen": 102478644, "step": 886 }, { "epoch": 4.763440860215054, "loss": 0.9292473196983337, "loss_ce": 4.816191358258948e-05, "loss_iou": 0.390625, "loss_num": 0.029296875, "loss_xval": 0.9296875, "num_input_tokens_seen": 102478644, "step": 886 }, { "epoch": 4.768817204301075, "grad_norm": 24.34832763671875, "learning_rate": 5e-07, "loss": 0.8998, "num_input_tokens_seen": 102595748, "step": 887 }, { "epoch": 4.768817204301075, "loss": 0.9224369525909424, "loss_ce": 7.367449870798737e-05, "loss_iou": 0.392578125, "loss_num": 0.0277099609375, "loss_xval": 0.921875, "num_input_tokens_seen": 102595748, "step": 887 }, { "epoch": 4.774193548387097, "grad_norm": 21.83662223815918, "learning_rate": 5e-07, "loss": 0.9415, "num_input_tokens_seen": 102706340, "step": 888 }, { "epoch": 4.774193548387097, "loss": 0.7410439252853394, "loss_ce": 0.000321238418109715, "loss_iou": 0.314453125, "loss_num": 0.0224609375, "loss_xval": 0.7421875, "num_input_tokens_seen": 102706340, "step": 888 }, { "epoch": 4.779569892473118, "grad_norm": 22.02582550048828, "learning_rate": 5e-07, "loss": 0.82, "num_input_tokens_seen": 102821328, "step": 889 }, { "epoch": 4.779569892473118, "loss": 0.5708651542663574, "loss_ce": 6.43835446680896e-05, "loss_iou": 0.220703125, "loss_num": 0.0260009765625, "loss_xval": 0.5703125, "num_input_tokens_seen": 102821328, "step": 889 }, { "epoch": 4.78494623655914, "grad_norm": 21.15550994873047, "learning_rate": 5e-07, "loss": 0.8555, "num_input_tokens_seen": 102937852, "step": 890 }, { "epoch": 4.78494623655914, "loss": 0.8050613403320312, "loss_ce": 0.0001296100817853585, "loss_iou": 0.345703125, "loss_num": 0.0224609375, "loss_xval": 0.8046875, "num_input_tokens_seen": 102937852, "step": 890 }, { "epoch": 4.790322580645161, "grad_norm": 24.120309829711914, "learning_rate": 5e-07, "loss": 0.799, "num_input_tokens_seen": 103052040, "step": 891 }, { "epoch": 4.790322580645161, "loss": 0.8369565010070801, "loss_ce": 4.2394363845232874e-05, "loss_iou": 0.36328125, "loss_num": 0.0220947265625, "loss_xval": 0.8359375, "num_input_tokens_seen": 103052040, "step": 891 }, { "epoch": 4.795698924731183, "grad_norm": 20.134424209594727, "learning_rate": 5e-07, "loss": 0.9093, "num_input_tokens_seen": 103167304, "step": 892 }, { "epoch": 4.795698924731183, "loss": 0.9034935235977173, "loss_ce": 0.0001731621305225417, "loss_iou": 0.35546875, "loss_num": 0.03857421875, "loss_xval": 0.90234375, "num_input_tokens_seen": 103167304, "step": 892 }, { "epoch": 4.801075268817204, "grad_norm": 99.9727783203125, "learning_rate": 5e-07, "loss": 0.877, "num_input_tokens_seen": 103281448, "step": 893 }, { "epoch": 4.801075268817204, "loss": 1.3513474464416504, "loss_ce": 2.908652459154837e-05, "loss_iou": 0.57421875, "loss_num": 0.040771484375, "loss_xval": 1.3515625, "num_input_tokens_seen": 103281448, "step": 893 }, { "epoch": 4.806451612903226, "grad_norm": 18.697296142578125, "learning_rate": 5e-07, "loss": 0.9829, "num_input_tokens_seen": 103397420, "step": 894 }, { "epoch": 4.806451612903226, "loss": 1.3892313241958618, "loss_ce": 7.118156645447016e-05, "loss_iou": 0.60546875, "loss_num": 0.0361328125, "loss_xval": 1.390625, "num_input_tokens_seen": 103397420, "step": 894 }, { "epoch": 4.811827956989247, "grad_norm": 25.034090042114258, "learning_rate": 5e-07, "loss": 0.8259, "num_input_tokens_seen": 103515560, "step": 895 }, { "epoch": 4.811827956989247, "loss": 1.1700897216796875, "loss_ce": 0.00016784282342996448, "loss_iou": 0.4921875, "loss_num": 0.03759765625, "loss_xval": 1.171875, "num_input_tokens_seen": 103515560, "step": 895 }, { "epoch": 4.817204301075269, "grad_norm": 18.211761474609375, "learning_rate": 5e-07, "loss": 0.8623, "num_input_tokens_seen": 103632292, "step": 896 }, { "epoch": 4.817204301075269, "loss": 0.7530097961425781, "loss_ce": 8.011562749743462e-05, "loss_iou": 0.322265625, "loss_num": 0.0218505859375, "loss_xval": 0.75390625, "num_input_tokens_seen": 103632292, "step": 896 }, { "epoch": 4.82258064516129, "grad_norm": 17.928674697875977, "learning_rate": 5e-07, "loss": 0.968, "num_input_tokens_seen": 103747676, "step": 897 }, { "epoch": 4.82258064516129, "loss": 1.3617396354675293, "loss_ce": 4.5281616621650755e-05, "loss_iou": 0.58203125, "loss_num": 0.0400390625, "loss_xval": 1.359375, "num_input_tokens_seen": 103747676, "step": 897 }, { "epoch": 4.827956989247312, "grad_norm": 25.16057014465332, "learning_rate": 5e-07, "loss": 0.8494, "num_input_tokens_seen": 103865732, "step": 898 }, { "epoch": 4.827956989247312, "loss": 0.8333253264427185, "loss_ce": 0.000317531666951254, "loss_iou": 0.365234375, "loss_num": 0.0201416015625, "loss_xval": 0.83203125, "num_input_tokens_seen": 103865732, "step": 898 }, { "epoch": 4.833333333333333, "grad_norm": 21.883756637573242, "learning_rate": 5e-07, "loss": 0.9684, "num_input_tokens_seen": 103979884, "step": 899 }, { "epoch": 4.833333333333333, "loss": 1.2818301916122437, "loss_ce": 9.191679419018328e-05, "loss_iou": 0.578125, "loss_num": 0.0250244140625, "loss_xval": 1.28125, "num_input_tokens_seen": 103979884, "step": 899 }, { "epoch": 4.838709677419355, "grad_norm": 25.469749450683594, "learning_rate": 5e-07, "loss": 0.9241, "num_input_tokens_seen": 104093964, "step": 900 }, { "epoch": 4.838709677419355, "loss": 0.5174036026000977, "loss_ce": 6.962695624679327e-05, "loss_iou": 0.2265625, "loss_num": 0.01287841796875, "loss_xval": 0.515625, "num_input_tokens_seen": 104093964, "step": 900 }, { "epoch": 4.844086021505376, "grad_norm": 18.193056106567383, "learning_rate": 5e-07, "loss": 0.9109, "num_input_tokens_seen": 104208080, "step": 901 }, { "epoch": 4.844086021505376, "loss": 0.918825626373291, "loss_ce": 0.00012448785128071904, "loss_iou": 0.400390625, "loss_num": 0.0238037109375, "loss_xval": 0.91796875, "num_input_tokens_seen": 104208080, "step": 901 }, { "epoch": 4.849462365591398, "grad_norm": 23.13768768310547, "learning_rate": 5e-07, "loss": 0.9005, "num_input_tokens_seen": 104324472, "step": 902 }, { "epoch": 4.849462365591398, "loss": 1.150599479675293, "loss_ce": 0.00020878709619864821, "loss_iou": 0.455078125, "loss_num": 0.048095703125, "loss_xval": 1.1484375, "num_input_tokens_seen": 104324472, "step": 902 }, { "epoch": 4.854838709677419, "grad_norm": 20.237459182739258, "learning_rate": 5e-07, "loss": 0.7245, "num_input_tokens_seen": 104441432, "step": 903 }, { "epoch": 4.854838709677419, "loss": 0.7847545146942139, "loss_ce": 8.652421820443124e-05, "loss_iou": 0.3359375, "loss_num": 0.0223388671875, "loss_xval": 0.78515625, "num_input_tokens_seen": 104441432, "step": 903 }, { "epoch": 4.860215053763441, "grad_norm": 22.01346206665039, "learning_rate": 5e-07, "loss": 0.8605, "num_input_tokens_seen": 104555256, "step": 904 }, { "epoch": 4.860215053763441, "loss": 0.9711517691612244, "loss_ce": 0.00020450318697839975, "loss_iou": 0.3984375, "loss_num": 0.034912109375, "loss_xval": 0.97265625, "num_input_tokens_seen": 104555256, "step": 904 }, { "epoch": 4.865591397849462, "grad_norm": 23.106290817260742, "learning_rate": 5e-07, "loss": 0.8293, "num_input_tokens_seen": 104670328, "step": 905 }, { "epoch": 4.865591397849462, "loss": 0.642428994178772, "loss_ce": 9.501111344434321e-05, "loss_iou": 0.25390625, "loss_num": 0.0269775390625, "loss_xval": 0.640625, "num_input_tokens_seen": 104670328, "step": 905 }, { "epoch": 4.870967741935484, "grad_norm": 23.720611572265625, "learning_rate": 5e-07, "loss": 0.8025, "num_input_tokens_seen": 104781272, "step": 906 }, { "epoch": 4.870967741935484, "loss": 0.8286669254302979, "loss_ce": 5.368741040001623e-05, "loss_iou": 0.337890625, "loss_num": 0.03076171875, "loss_xval": 0.828125, "num_input_tokens_seen": 104781272, "step": 906 }, { "epoch": 4.876344086021505, "grad_norm": 22.349905014038086, "learning_rate": 5e-07, "loss": 0.7983, "num_input_tokens_seen": 104892384, "step": 907 }, { "epoch": 4.876344086021505, "loss": 0.9341791868209839, "loss_ce": 9.717550710774958e-05, "loss_iou": 0.373046875, "loss_num": 0.0380859375, "loss_xval": 0.93359375, "num_input_tokens_seen": 104892384, "step": 907 }, { "epoch": 4.881720430107527, "grad_norm": 18.008113861083984, "learning_rate": 5e-07, "loss": 0.9168, "num_input_tokens_seen": 105009484, "step": 908 }, { "epoch": 4.881720430107527, "loss": 0.8120461702346802, "loss_ce": 3.441510853008367e-05, "loss_iou": 0.34375, "loss_num": 0.0245361328125, "loss_xval": 0.8125, "num_input_tokens_seen": 105009484, "step": 908 }, { "epoch": 4.887096774193548, "grad_norm": 22.97702407836914, "learning_rate": 5e-07, "loss": 1.0314, "num_input_tokens_seen": 105126272, "step": 909 }, { "epoch": 4.887096774193548, "loss": 0.7598257064819336, "loss_ce": 6.007315096212551e-05, "loss_iou": 0.318359375, "loss_num": 0.0250244140625, "loss_xval": 0.7578125, "num_input_tokens_seen": 105126272, "step": 909 }, { "epoch": 4.89247311827957, "grad_norm": 17.953811645507812, "learning_rate": 5e-07, "loss": 0.9479, "num_input_tokens_seen": 105238388, "step": 910 }, { "epoch": 4.89247311827957, "loss": 1.0645265579223633, "loss_ce": 7.341058517340571e-05, "loss_iou": 0.44140625, "loss_num": 0.036376953125, "loss_xval": 1.0625, "num_input_tokens_seen": 105238388, "step": 910 }, { "epoch": 4.897849462365591, "grad_norm": 23.289716720581055, "learning_rate": 5e-07, "loss": 0.8533, "num_input_tokens_seen": 105353272, "step": 911 }, { "epoch": 4.897849462365591, "loss": 0.5269389152526855, "loss_ce": 8.346361573785543e-05, "loss_iou": 0.1982421875, "loss_num": 0.026123046875, "loss_xval": 0.52734375, "num_input_tokens_seen": 105353272, "step": 911 }, { "epoch": 4.903225806451613, "grad_norm": 17.433666229248047, "learning_rate": 5e-07, "loss": 0.7693, "num_input_tokens_seen": 105467068, "step": 912 }, { "epoch": 4.903225806451613, "loss": 0.6135797500610352, "loss_ce": 5.434370177681558e-05, "loss_iou": 0.2578125, "loss_num": 0.02001953125, "loss_xval": 0.61328125, "num_input_tokens_seen": 105467068, "step": 912 }, { "epoch": 4.908602150537634, "grad_norm": 24.533790588378906, "learning_rate": 5e-07, "loss": 0.7973, "num_input_tokens_seen": 105585876, "step": 913 }, { "epoch": 4.908602150537634, "loss": 0.6992930173873901, "loss_ce": 7.422738417517394e-05, "loss_iou": 0.298828125, "loss_num": 0.0203857421875, "loss_xval": 0.69921875, "num_input_tokens_seen": 105585876, "step": 913 }, { "epoch": 4.913978494623656, "grad_norm": 21.35983657836914, "learning_rate": 5e-07, "loss": 0.8154, "num_input_tokens_seen": 105701392, "step": 914 }, { "epoch": 4.913978494623656, "loss": 0.8417348861694336, "loss_ce": 0.003111800644546747, "loss_iou": 0.337890625, "loss_num": 0.0322265625, "loss_xval": 0.83984375, "num_input_tokens_seen": 105701392, "step": 914 }, { "epoch": 4.919354838709677, "grad_norm": 24.76087188720703, "learning_rate": 5e-07, "loss": 1.0238, "num_input_tokens_seen": 105819736, "step": 915 }, { "epoch": 4.919354838709677, "loss": 1.1172436475753784, "loss_ce": 5.6166845752159134e-05, "loss_iou": 0.46484375, "loss_num": 0.03759765625, "loss_xval": 1.1171875, "num_input_tokens_seen": 105819736, "step": 915 }, { "epoch": 4.924731182795699, "grad_norm": 32.23694610595703, "learning_rate": 5e-07, "loss": 0.6764, "num_input_tokens_seen": 105935500, "step": 916 }, { "epoch": 4.924731182795699, "loss": 0.5906476974487305, "loss_ce": 7.153616024879739e-05, "loss_iou": 0.224609375, "loss_num": 0.0284423828125, "loss_xval": 0.58984375, "num_input_tokens_seen": 105935500, "step": 916 }, { "epoch": 4.93010752688172, "grad_norm": 36.00068664550781, "learning_rate": 5e-07, "loss": 1.2206, "num_input_tokens_seen": 106050900, "step": 917 }, { "epoch": 4.93010752688172, "loss": 1.260066270828247, "loss_ce": 5.6420074542984366e-05, "loss_iou": 0.5546875, "loss_num": 0.029541015625, "loss_xval": 1.2578125, "num_input_tokens_seen": 106050900, "step": 917 }, { "epoch": 4.935483870967742, "grad_norm": 25.1900577545166, "learning_rate": 5e-07, "loss": 0.9418, "num_input_tokens_seen": 106167704, "step": 918 }, { "epoch": 4.935483870967742, "loss": 1.0832955837249756, "loss_ce": 4.356484714662656e-05, "loss_iou": 0.48828125, "loss_num": 0.0218505859375, "loss_xval": 1.0859375, "num_input_tokens_seen": 106167704, "step": 918 }, { "epoch": 4.940860215053764, "grad_norm": 17.48443603515625, "learning_rate": 5e-07, "loss": 0.9863, "num_input_tokens_seen": 106281208, "step": 919 }, { "epoch": 4.940860215053764, "loss": 0.7099953889846802, "loss_ce": 3.446359187364578e-05, "loss_iou": 0.2734375, "loss_num": 0.032958984375, "loss_xval": 0.7109375, "num_input_tokens_seen": 106281208, "step": 919 }, { "epoch": 4.946236559139785, "grad_norm": 19.136173248291016, "learning_rate": 5e-07, "loss": 0.612, "num_input_tokens_seen": 106398816, "step": 920 }, { "epoch": 4.946236559139785, "loss": 0.49625295400619507, "loss_ce": 0.0001592242915648967, "loss_iou": 0.2041015625, "loss_num": 0.017578125, "loss_xval": 0.49609375, "num_input_tokens_seen": 106398816, "step": 920 }, { "epoch": 4.951612903225806, "grad_norm": 18.06033706665039, "learning_rate": 5e-07, "loss": 0.8818, "num_input_tokens_seen": 106514284, "step": 921 }, { "epoch": 4.951612903225806, "loss": 1.102339506149292, "loss_ce": 4.455634916666895e-05, "loss_iou": 0.470703125, "loss_num": 0.031982421875, "loss_xval": 1.1015625, "num_input_tokens_seen": 106514284, "step": 921 }, { "epoch": 4.956989247311828, "grad_norm": 25.500642776489258, "learning_rate": 5e-07, "loss": 0.955, "num_input_tokens_seen": 106631604, "step": 922 }, { "epoch": 4.956989247311828, "loss": 0.9461056590080261, "loss_ce": 6.071461393730715e-05, "loss_iou": 0.396484375, "loss_num": 0.03076171875, "loss_xval": 0.9453125, "num_input_tokens_seen": 106631604, "step": 922 }, { "epoch": 4.96236559139785, "grad_norm": 17.955968856811523, "learning_rate": 5e-07, "loss": 1.0994, "num_input_tokens_seen": 106746824, "step": 923 }, { "epoch": 4.96236559139785, "loss": 1.194125771522522, "loss_ce": 3.39831349265296e-05, "loss_iou": 0.515625, "loss_num": 0.031982421875, "loss_xval": 1.1953125, "num_input_tokens_seen": 106746824, "step": 923 }, { "epoch": 4.967741935483871, "grad_norm": 32.39043045043945, "learning_rate": 5e-07, "loss": 0.907, "num_input_tokens_seen": 106866832, "step": 924 }, { "epoch": 4.967741935483871, "loss": 0.7395632863044739, "loss_ce": 6.131105328677222e-05, "loss_iou": 0.28125, "loss_num": 0.03564453125, "loss_xval": 0.73828125, "num_input_tokens_seen": 106866832, "step": 924 }, { "epoch": 4.973118279569892, "grad_norm": 42.70771408081055, "learning_rate": 5e-07, "loss": 0.9154, "num_input_tokens_seen": 106983852, "step": 925 }, { "epoch": 4.973118279569892, "loss": 0.9224252104759216, "loss_ce": 6.194744491949677e-05, "loss_iou": 0.37109375, "loss_num": 0.03564453125, "loss_xval": 0.921875, "num_input_tokens_seen": 106983852, "step": 925 }, { "epoch": 4.978494623655914, "grad_norm": 17.482044219970703, "learning_rate": 5e-07, "loss": 0.6956, "num_input_tokens_seen": 107099184, "step": 926 }, { "epoch": 4.978494623655914, "loss": 0.6425273418426514, "loss_ce": 7.131574238883331e-05, "loss_iou": 0.267578125, "loss_num": 0.021484375, "loss_xval": 0.640625, "num_input_tokens_seen": 107099184, "step": 926 }, { "epoch": 4.983870967741936, "grad_norm": 17.56452751159668, "learning_rate": 5e-07, "loss": 0.662, "num_input_tokens_seen": 107217388, "step": 927 }, { "epoch": 4.983870967741936, "loss": 0.6947378516197205, "loss_ce": 0.00015777218504808843, "loss_iou": 0.291015625, "loss_num": 0.022705078125, "loss_xval": 0.6953125, "num_input_tokens_seen": 107217388, "step": 927 }, { "epoch": 4.989247311827957, "grad_norm": 22.969135284423828, "learning_rate": 5e-07, "loss": 0.7852, "num_input_tokens_seen": 107336240, "step": 928 }, { "epoch": 4.989247311827957, "loss": 0.6380927562713623, "loss_ce": 0.00015326243010349572, "loss_iou": 0.24609375, "loss_num": 0.0291748046875, "loss_xval": 0.63671875, "num_input_tokens_seen": 107336240, "step": 928 }, { "epoch": 4.994623655913978, "grad_norm": 21.039339065551758, "learning_rate": 5e-07, "loss": 0.8104, "num_input_tokens_seen": 107452644, "step": 929 }, { "epoch": 4.994623655913978, "loss": 0.9791451692581177, "loss_ce": 0.00014129295595921576, "loss_iou": 0.396484375, "loss_num": 0.037353515625, "loss_xval": 0.98046875, "num_input_tokens_seen": 107452644, "step": 929 }, { "epoch": 5.0, "grad_norm": 27.182146072387695, "learning_rate": 5e-07, "loss": 0.9205, "num_input_tokens_seen": 107567876, "step": 930 }, { "epoch": 5.0, "loss": 1.0117580890655518, "loss_ce": 3.9354861655738205e-05, "loss_iou": 0.44140625, "loss_num": 0.0257568359375, "loss_xval": 1.015625, "num_input_tokens_seen": 107567876, "step": 930 }, { "epoch": 5.005376344086022, "grad_norm": 30.223493576049805, "learning_rate": 5e-07, "loss": 0.9539, "num_input_tokens_seen": 107685448, "step": 931 }, { "epoch": 5.005376344086022, "loss": 0.712992250919342, "loss_ce": 0.00010163188562728465, "loss_iou": 0.287109375, "loss_num": 0.02783203125, "loss_xval": 0.7109375, "num_input_tokens_seen": 107685448, "step": 931 }, { "epoch": 5.010752688172043, "grad_norm": 31.322492599487305, "learning_rate": 5e-07, "loss": 0.7419, "num_input_tokens_seen": 107798604, "step": 932 }, { "epoch": 5.010752688172043, "loss": 0.5204803943634033, "loss_ce": 0.00021671407739631832, "loss_iou": 0.2265625, "loss_num": 0.013427734375, "loss_xval": 0.51953125, "num_input_tokens_seen": 107798604, "step": 932 }, { "epoch": 5.016129032258065, "grad_norm": 39.34040832519531, "learning_rate": 5e-07, "loss": 0.8351, "num_input_tokens_seen": 107911712, "step": 933 }, { "epoch": 5.016129032258065, "loss": 0.834511935710907, "loss_ce": 3.9279682823689654e-05, "loss_iou": 0.333984375, "loss_num": 0.033447265625, "loss_xval": 0.8359375, "num_input_tokens_seen": 107911712, "step": 933 }, { "epoch": 5.021505376344086, "grad_norm": 22.65049171447754, "learning_rate": 5e-07, "loss": 0.6826, "num_input_tokens_seen": 108028124, "step": 934 }, { "epoch": 5.021505376344086, "loss": 0.6382594704627991, "loss_ce": 7.589998131152242e-05, "loss_iou": 0.279296875, "loss_num": 0.0159912109375, "loss_xval": 0.63671875, "num_input_tokens_seen": 108028124, "step": 934 }, { "epoch": 5.026881720430108, "grad_norm": 17.840129852294922, "learning_rate": 5e-07, "loss": 0.8455, "num_input_tokens_seen": 108143572, "step": 935 }, { "epoch": 5.026881720430108, "loss": 0.9394749402999878, "loss_ce": 2.1793362975586206e-05, "loss_iou": 0.400390625, "loss_num": 0.0274658203125, "loss_xval": 0.9375, "num_input_tokens_seen": 108143572, "step": 935 }, { "epoch": 5.032258064516129, "grad_norm": 27.635976791381836, "learning_rate": 5e-07, "loss": 0.9265, "num_input_tokens_seen": 108261640, "step": 936 }, { "epoch": 5.032258064516129, "loss": 0.9818801879882812, "loss_ce": 0.00019072381837759167, "loss_iou": 0.421875, "loss_num": 0.027099609375, "loss_xval": 0.98046875, "num_input_tokens_seen": 108261640, "step": 936 }, { "epoch": 5.037634408602151, "grad_norm": 21.007953643798828, "learning_rate": 5e-07, "loss": 0.7671, "num_input_tokens_seen": 108379368, "step": 937 }, { "epoch": 5.037634408602151, "loss": 0.6056091785430908, "loss_ce": 0.000140461212140508, "loss_iou": 0.2490234375, "loss_num": 0.021484375, "loss_xval": 0.60546875, "num_input_tokens_seen": 108379368, "step": 937 }, { "epoch": 5.043010752688172, "grad_norm": 18.5598087310791, "learning_rate": 5e-07, "loss": 1.0033, "num_input_tokens_seen": 108496564, "step": 938 }, { "epoch": 5.043010752688172, "loss": 0.7588214874267578, "loss_ce": 3.241165541112423e-05, "loss_iou": 0.333984375, "loss_num": 0.018310546875, "loss_xval": 0.7578125, "num_input_tokens_seen": 108496564, "step": 938 }, { "epoch": 5.048387096774194, "grad_norm": 24.02916717529297, "learning_rate": 5e-07, "loss": 0.8227, "num_input_tokens_seen": 108613752, "step": 939 }, { "epoch": 5.048387096774194, "loss": 0.6163343191146851, "loss_ce": 0.00012339996465016156, "loss_iou": 0.265625, "loss_num": 0.0167236328125, "loss_xval": 0.6171875, "num_input_tokens_seen": 108613752, "step": 939 }, { "epoch": 5.053763440860215, "grad_norm": 15.627666473388672, "learning_rate": 5e-07, "loss": 0.6634, "num_input_tokens_seen": 108733040, "step": 940 }, { "epoch": 5.053763440860215, "loss": 0.5962462425231934, "loss_ce": 5.483471613842994e-05, "loss_iou": 0.244140625, "loss_num": 0.021728515625, "loss_xval": 0.59765625, "num_input_tokens_seen": 108733040, "step": 940 }, { "epoch": 5.059139784946237, "grad_norm": 23.6945743560791, "learning_rate": 5e-07, "loss": 0.8045, "num_input_tokens_seen": 108851628, "step": 941 }, { "epoch": 5.059139784946237, "loss": 0.933387041091919, "loss_ce": 3.7411398807307705e-05, "loss_iou": 0.421875, "loss_num": 0.0179443359375, "loss_xval": 0.93359375, "num_input_tokens_seen": 108851628, "step": 941 }, { "epoch": 5.064516129032258, "grad_norm": 17.33426856994629, "learning_rate": 5e-07, "loss": 0.7213, "num_input_tokens_seen": 108965828, "step": 942 }, { "epoch": 5.064516129032258, "loss": 0.6566569805145264, "loss_ce": 0.0001628709287615493, "loss_iou": 0.291015625, "loss_num": 0.014892578125, "loss_xval": 0.65625, "num_input_tokens_seen": 108965828, "step": 942 }, { "epoch": 5.06989247311828, "grad_norm": 22.255510330200195, "learning_rate": 5e-07, "loss": 0.846, "num_input_tokens_seen": 109078164, "step": 943 }, { "epoch": 5.06989247311828, "loss": 1.1442047357559204, "loss_ce": 0.0001617821108084172, "loss_iou": 0.486328125, "loss_num": 0.03466796875, "loss_xval": 1.140625, "num_input_tokens_seen": 109078164, "step": 943 }, { "epoch": 5.075268817204301, "grad_norm": 23.468029022216797, "learning_rate": 5e-07, "loss": 0.7316, "num_input_tokens_seen": 109195560, "step": 944 }, { "epoch": 5.075268817204301, "loss": 0.5147402286529541, "loss_ce": 9.177043102681637e-05, "loss_iou": 0.2265625, "loss_num": 0.01226806640625, "loss_xval": 0.515625, "num_input_tokens_seen": 109195560, "step": 944 }, { "epoch": 5.080645161290323, "grad_norm": 22.951297760009766, "learning_rate": 5e-07, "loss": 0.7061, "num_input_tokens_seen": 109311768, "step": 945 }, { "epoch": 5.080645161290323, "loss": 0.654183030128479, "loss_ce": 0.000130333355627954, "loss_iou": 0.2890625, "loss_num": 0.0155029296875, "loss_xval": 0.65234375, "num_input_tokens_seen": 109311768, "step": 945 }, { "epoch": 5.086021505376344, "grad_norm": 18.990924835205078, "learning_rate": 5e-07, "loss": 0.67, "num_input_tokens_seen": 109429992, "step": 946 }, { "epoch": 5.086021505376344, "loss": 0.7920880317687988, "loss_ce": 9.588393004378304e-05, "loss_iou": 0.330078125, "loss_num": 0.0262451171875, "loss_xval": 0.79296875, "num_input_tokens_seen": 109429992, "step": 946 }, { "epoch": 5.091397849462366, "grad_norm": 25.533931732177734, "learning_rate": 5e-07, "loss": 0.8877, "num_input_tokens_seen": 109546664, "step": 947 }, { "epoch": 5.091397849462366, "loss": 0.7213703393936157, "loss_ce": 5.689826139132492e-05, "loss_iou": 0.3125, "loss_num": 0.0191650390625, "loss_xval": 0.72265625, "num_input_tokens_seen": 109546664, "step": 947 }, { "epoch": 5.096774193548387, "grad_norm": 451.4793701171875, "learning_rate": 5e-07, "loss": 0.9211, "num_input_tokens_seen": 109659908, "step": 948 }, { "epoch": 5.096774193548387, "loss": 0.9294153451919556, "loss_ce": 0.00021609431132674217, "loss_iou": 0.40234375, "loss_num": 0.0247802734375, "loss_xval": 0.9296875, "num_input_tokens_seen": 109659908, "step": 948 }, { "epoch": 5.102150537634409, "grad_norm": 16.831863403320312, "learning_rate": 5e-07, "loss": 0.825, "num_input_tokens_seen": 109774920, "step": 949 }, { "epoch": 5.102150537634409, "loss": 0.7052018642425537, "loss_ce": 0.00012372466153465211, "loss_iou": 0.279296875, "loss_num": 0.0289306640625, "loss_xval": 0.703125, "num_input_tokens_seen": 109774920, "step": 949 }, { "epoch": 5.10752688172043, "grad_norm": 29.807411193847656, "learning_rate": 5e-07, "loss": 0.9129, "num_input_tokens_seen": 109889672, "step": 950 }, { "epoch": 5.10752688172043, "loss": 0.6691285371780396, "loss_ce": 0.0001832441339502111, "loss_iou": 0.279296875, "loss_num": 0.021728515625, "loss_xval": 0.66796875, "num_input_tokens_seen": 109889672, "step": 950 }, { "epoch": 5.112903225806452, "grad_norm": 20.006757736206055, "learning_rate": 5e-07, "loss": 0.8159, "num_input_tokens_seen": 110003128, "step": 951 }, { "epoch": 5.112903225806452, "loss": 0.7347996234893799, "loss_ce": 0.0004245828022249043, "loss_iou": 0.294921875, "loss_num": 0.0283203125, "loss_xval": 0.734375, "num_input_tokens_seen": 110003128, "step": 951 }, { "epoch": 5.118279569892473, "grad_norm": 24.68810272216797, "learning_rate": 5e-07, "loss": 0.8178, "num_input_tokens_seen": 110117488, "step": 952 }, { "epoch": 5.118279569892473, "loss": 0.7791240215301514, "loss_ce": 7.124908006517217e-05, "loss_iou": 0.3203125, "loss_num": 0.0274658203125, "loss_xval": 0.77734375, "num_input_tokens_seen": 110117488, "step": 952 }, { "epoch": 5.123655913978495, "grad_norm": 26.336580276489258, "learning_rate": 5e-07, "loss": 1.0895, "num_input_tokens_seen": 110233132, "step": 953 }, { "epoch": 5.123655913978495, "loss": 1.4908766746520996, "loss_ce": 0.00015404039004351944, "loss_iou": 0.5703125, "loss_num": 0.07080078125, "loss_xval": 1.4921875, "num_input_tokens_seen": 110233132, "step": 953 }, { "epoch": 5.129032258064516, "grad_norm": 23.885072708129883, "learning_rate": 5e-07, "loss": 0.7822, "num_input_tokens_seen": 110349400, "step": 954 }, { "epoch": 5.129032258064516, "loss": 0.7961430549621582, "loss_ce": 0.0009770472534000874, "loss_iou": 0.337890625, "loss_num": 0.023681640625, "loss_xval": 0.796875, "num_input_tokens_seen": 110349400, "step": 954 }, { "epoch": 5.134408602150538, "grad_norm": 18.483932495117188, "learning_rate": 5e-07, "loss": 0.8685, "num_input_tokens_seen": 110466140, "step": 955 }, { "epoch": 5.134408602150538, "loss": 0.8064749240875244, "loss_ce": 0.0004446861566975713, "loss_iou": 0.337890625, "loss_num": 0.0260009765625, "loss_xval": 0.8046875, "num_input_tokens_seen": 110466140, "step": 955 }, { "epoch": 5.139784946236559, "grad_norm": 21.899871826171875, "learning_rate": 5e-07, "loss": 1.0622, "num_input_tokens_seen": 110579600, "step": 956 }, { "epoch": 5.139784946236559, "loss": 1.0740374326705933, "loss_ce": 6.284849951043725e-05, "loss_iou": 0.46484375, "loss_num": 0.029296875, "loss_xval": 1.0703125, "num_input_tokens_seen": 110579600, "step": 956 }, { "epoch": 5.145161290322581, "grad_norm": 27.406951904296875, "learning_rate": 5e-07, "loss": 0.798, "num_input_tokens_seen": 110695804, "step": 957 }, { "epoch": 5.145161290322581, "loss": 0.8356641530990601, "loss_ce": 0.00021495662804227322, "loss_iou": 0.341796875, "loss_num": 0.030517578125, "loss_xval": 0.8359375, "num_input_tokens_seen": 110695804, "step": 957 }, { "epoch": 5.150537634408602, "grad_norm": 24.581663131713867, "learning_rate": 5e-07, "loss": 0.8812, "num_input_tokens_seen": 110811104, "step": 958 }, { "epoch": 5.150537634408602, "loss": 0.6047899723052979, "loss_ce": 5.365232573240064e-05, "loss_iou": 0.2734375, "loss_num": 0.01153564453125, "loss_xval": 0.60546875, "num_input_tokens_seen": 110811104, "step": 958 }, { "epoch": 5.155913978494624, "grad_norm": 19.51398468017578, "learning_rate": 5e-07, "loss": 0.7782, "num_input_tokens_seen": 110926664, "step": 959 }, { "epoch": 5.155913978494624, "loss": 0.7234416604042053, "loss_ce": 5.299368058331311e-05, "loss_iou": 0.3046875, "loss_num": 0.0225830078125, "loss_xval": 0.72265625, "num_input_tokens_seen": 110926664, "step": 959 }, { "epoch": 5.161290322580645, "grad_norm": 21.622446060180664, "learning_rate": 5e-07, "loss": 0.7096, "num_input_tokens_seen": 111042964, "step": 960 }, { "epoch": 5.161290322580645, "loss": 0.667110800743103, "loss_ce": 0.00011864669795613736, "loss_iou": 0.28515625, "loss_num": 0.02001953125, "loss_xval": 0.66796875, "num_input_tokens_seen": 111042964, "step": 960 }, { "epoch": 5.166666666666667, "grad_norm": 19.518892288208008, "learning_rate": 5e-07, "loss": 0.8675, "num_input_tokens_seen": 111159128, "step": 961 }, { "epoch": 5.166666666666667, "loss": 0.8183494210243225, "loss_ce": 0.0001121341047110036, "loss_iou": 0.345703125, "loss_num": 0.0252685546875, "loss_xval": 0.81640625, "num_input_tokens_seen": 111159128, "step": 961 }, { "epoch": 5.172043010752688, "grad_norm": 19.28098487854004, "learning_rate": 5e-07, "loss": 0.9384, "num_input_tokens_seen": 111275884, "step": 962 }, { "epoch": 5.172043010752688, "loss": 1.0084519386291504, "loss_ce": 0.000151147716678679, "loss_iou": 0.4296875, "loss_num": 0.0299072265625, "loss_xval": 1.0078125, "num_input_tokens_seen": 111275884, "step": 962 }, { "epoch": 5.17741935483871, "grad_norm": 29.74858856201172, "learning_rate": 5e-07, "loss": 0.9204, "num_input_tokens_seen": 111390064, "step": 963 }, { "epoch": 5.17741935483871, "loss": 0.5437759160995483, "loss_ce": 7.475628808606416e-05, "loss_iou": 0.2138671875, "loss_num": 0.0235595703125, "loss_xval": 0.54296875, "num_input_tokens_seen": 111390064, "step": 963 }, { "epoch": 5.182795698924731, "grad_norm": 26.007421493530273, "learning_rate": 5e-07, "loss": 0.8461, "num_input_tokens_seen": 111505244, "step": 964 }, { "epoch": 5.182795698924731, "loss": 0.7576566338539124, "loss_ce": 8.828088903101161e-05, "loss_iou": 0.322265625, "loss_num": 0.0228271484375, "loss_xval": 0.7578125, "num_input_tokens_seen": 111505244, "step": 964 }, { "epoch": 5.188172043010753, "grad_norm": 41.37009811401367, "learning_rate": 5e-07, "loss": 0.8476, "num_input_tokens_seen": 111620116, "step": 965 }, { "epoch": 5.188172043010753, "loss": 0.8733950257301331, "loss_ce": 0.00010405569628346711, "loss_iou": 0.357421875, "loss_num": 0.03125, "loss_xval": 0.875, "num_input_tokens_seen": 111620116, "step": 965 }, { "epoch": 5.193548387096774, "grad_norm": 33.44457244873047, "learning_rate": 5e-07, "loss": 0.8906, "num_input_tokens_seen": 111736680, "step": 966 }, { "epoch": 5.193548387096774, "loss": 1.268667221069336, "loss_ce": 0.0001125705530284904, "loss_iou": 0.5390625, "loss_num": 0.03759765625, "loss_xval": 1.265625, "num_input_tokens_seen": 111736680, "step": 966 }, { "epoch": 5.198924731182796, "grad_norm": 31.14161491394043, "learning_rate": 5e-07, "loss": 0.9705, "num_input_tokens_seen": 111850540, "step": 967 }, { "epoch": 5.198924731182796, "loss": 0.6226041913032532, "loss_ce": 4.559689114103094e-05, "loss_iou": 0.259765625, "loss_num": 0.0208740234375, "loss_xval": 0.62109375, "num_input_tokens_seen": 111850540, "step": 967 }, { "epoch": 5.204301075268817, "grad_norm": 19.85148811340332, "learning_rate": 5e-07, "loss": 0.9074, "num_input_tokens_seen": 111965792, "step": 968 }, { "epoch": 5.204301075268817, "loss": 0.8223303556442261, "loss_ce": 6.47775232209824e-05, "loss_iou": 0.37109375, "loss_num": 0.0162353515625, "loss_xval": 0.8203125, "num_input_tokens_seen": 111965792, "step": 968 }, { "epoch": 5.209677419354839, "grad_norm": 16.931440353393555, "learning_rate": 5e-07, "loss": 0.7083, "num_input_tokens_seen": 112080120, "step": 969 }, { "epoch": 5.209677419354839, "loss": 0.3956436812877655, "loss_ce": 0.00013587225112132728, "loss_iou": 0.1591796875, "loss_num": 0.01556396484375, "loss_xval": 0.39453125, "num_input_tokens_seen": 112080120, "step": 969 }, { "epoch": 5.21505376344086, "grad_norm": 45.84040069580078, "learning_rate": 5e-07, "loss": 0.907, "num_input_tokens_seen": 112196040, "step": 970 }, { "epoch": 5.21505376344086, "loss": 1.0074305534362793, "loss_ce": 0.00010634810314513743, "loss_iou": 0.435546875, "loss_num": 0.02734375, "loss_xval": 1.0078125, "num_input_tokens_seen": 112196040, "step": 970 }, { "epoch": 5.220430107526882, "grad_norm": 14.754634857177734, "learning_rate": 5e-07, "loss": 0.898, "num_input_tokens_seen": 112310264, "step": 971 }, { "epoch": 5.220430107526882, "loss": 1.0479291677474976, "loss_ce": 7.755213300697505e-05, "loss_iou": 0.443359375, "loss_num": 0.032470703125, "loss_xval": 1.046875, "num_input_tokens_seen": 112310264, "step": 971 }, { "epoch": 5.225806451612903, "grad_norm": 20.005626678466797, "learning_rate": 5e-07, "loss": 0.9375, "num_input_tokens_seen": 112427940, "step": 972 }, { "epoch": 5.225806451612903, "loss": 0.8758045434951782, "loss_ce": 7.213355274870992e-05, "loss_iou": 0.388671875, "loss_num": 0.019775390625, "loss_xval": 0.875, "num_input_tokens_seen": 112427940, "step": 972 }, { "epoch": 5.231182795698925, "grad_norm": 28.49736785888672, "learning_rate": 5e-07, "loss": 0.7845, "num_input_tokens_seen": 112543680, "step": 973 }, { "epoch": 5.231182795698925, "loss": 0.6501961946487427, "loss_ce": 4.967572385794483e-05, "loss_iou": 0.28515625, "loss_num": 0.0157470703125, "loss_xval": 0.6484375, "num_input_tokens_seen": 112543680, "step": 973 }, { "epoch": 5.236559139784946, "grad_norm": 19.749181747436523, "learning_rate": 5e-07, "loss": 0.9379, "num_input_tokens_seen": 112661724, "step": 974 }, { "epoch": 5.236559139784946, "loss": 0.7291199564933777, "loss_ce": 0.00011605054896790534, "loss_iou": 0.302734375, "loss_num": 0.02490234375, "loss_xval": 0.73046875, "num_input_tokens_seen": 112661724, "step": 974 }, { "epoch": 5.241935483870968, "grad_norm": 35.5351676940918, "learning_rate": 5e-07, "loss": 0.9697, "num_input_tokens_seen": 112777552, "step": 975 }, { "epoch": 5.241935483870968, "loss": 0.9566102027893066, "loss_ce": 6.723313708789647e-05, "loss_iou": 0.423828125, "loss_num": 0.021484375, "loss_xval": 0.95703125, "num_input_tokens_seen": 112777552, "step": 975 }, { "epoch": 5.247311827956989, "grad_norm": 20.717309951782227, "learning_rate": 5e-07, "loss": 0.7715, "num_input_tokens_seen": 112890192, "step": 976 }, { "epoch": 5.247311827956989, "loss": 0.680234968662262, "loss_ce": 5.922700802329928e-05, "loss_iou": 0.228515625, "loss_num": 0.044677734375, "loss_xval": 0.6796875, "num_input_tokens_seen": 112890192, "step": 976 }, { "epoch": 5.252688172043011, "grad_norm": 19.791107177734375, "learning_rate": 5e-07, "loss": 0.9127, "num_input_tokens_seen": 113005360, "step": 977 }, { "epoch": 5.252688172043011, "loss": 1.1172900199890137, "loss_ce": 0.00010263813601341099, "loss_iou": 0.48046875, "loss_num": 0.031005859375, "loss_xval": 1.1171875, "num_input_tokens_seen": 113005360, "step": 977 }, { "epoch": 5.258064516129032, "grad_norm": 22.616127014160156, "learning_rate": 5e-07, "loss": 0.8365, "num_input_tokens_seen": 113119720, "step": 978 }, { "epoch": 5.258064516129032, "loss": 0.8443759083747864, "loss_ce": 0.00013762840535491705, "loss_iou": 0.373046875, "loss_num": 0.019775390625, "loss_xval": 0.84375, "num_input_tokens_seen": 113119720, "step": 978 }, { "epoch": 5.263440860215054, "grad_norm": 22.014680862426758, "learning_rate": 5e-07, "loss": 1.0424, "num_input_tokens_seen": 113232972, "step": 979 }, { "epoch": 5.263440860215054, "loss": 1.1677894592285156, "loss_ce": 0.0003089120436925441, "loss_iou": 0.470703125, "loss_num": 0.045166015625, "loss_xval": 1.1640625, "num_input_tokens_seen": 113232972, "step": 979 }, { "epoch": 5.268817204301075, "grad_norm": 24.203428268432617, "learning_rate": 5e-07, "loss": 0.8739, "num_input_tokens_seen": 113346608, "step": 980 }, { "epoch": 5.268817204301075, "loss": 0.7201173305511475, "loss_ce": 2.453786692058202e-05, "loss_iou": 0.3046875, "loss_num": 0.0220947265625, "loss_xval": 0.71875, "num_input_tokens_seen": 113346608, "step": 980 }, { "epoch": 5.274193548387097, "grad_norm": 41.4599723815918, "learning_rate": 5e-07, "loss": 0.9417, "num_input_tokens_seen": 113459924, "step": 981 }, { "epoch": 5.274193548387097, "loss": 1.1257874965667725, "loss_ce": 5.5048963986337185e-05, "loss_iou": 0.478515625, "loss_num": 0.033935546875, "loss_xval": 1.125, "num_input_tokens_seen": 113459924, "step": 981 }, { "epoch": 5.279569892473118, "grad_norm": 29.91318702697754, "learning_rate": 5e-07, "loss": 0.8424, "num_input_tokens_seen": 113577796, "step": 982 }, { "epoch": 5.279569892473118, "loss": 0.6635273694992065, "loss_ce": 0.00019731577776838094, "loss_iou": 0.275390625, "loss_num": 0.0223388671875, "loss_xval": 0.6640625, "num_input_tokens_seen": 113577796, "step": 982 }, { "epoch": 5.28494623655914, "grad_norm": 21.32967185974121, "learning_rate": 5e-07, "loss": 0.6676, "num_input_tokens_seen": 113696988, "step": 983 }, { "epoch": 5.28494623655914, "loss": 0.5713807344436646, "loss_ce": 9.163974755210802e-05, "loss_iou": 0.23046875, "loss_num": 0.022216796875, "loss_xval": 0.5703125, "num_input_tokens_seen": 113696988, "step": 983 }, { "epoch": 5.290322580645161, "grad_norm": 24.32184410095215, "learning_rate": 5e-07, "loss": 0.8025, "num_input_tokens_seen": 113813964, "step": 984 }, { "epoch": 5.290322580645161, "loss": 1.0932440757751465, "loss_ce": 0.00022650620667263865, "loss_iou": 0.451171875, "loss_num": 0.037841796875, "loss_xval": 1.09375, "num_input_tokens_seen": 113813964, "step": 984 }, { "epoch": 5.295698924731183, "grad_norm": 17.606706619262695, "learning_rate": 5e-07, "loss": 0.7452, "num_input_tokens_seen": 113929856, "step": 985 }, { "epoch": 5.295698924731183, "loss": 0.885560154914856, "loss_ce": 6.2120336224325e-05, "loss_iou": 0.373046875, "loss_num": 0.027587890625, "loss_xval": 0.88671875, "num_input_tokens_seen": 113929856, "step": 985 }, { "epoch": 5.301075268817204, "grad_norm": 24.43839454650879, "learning_rate": 5e-07, "loss": 0.8784, "num_input_tokens_seen": 114042588, "step": 986 }, { "epoch": 5.301075268817204, "loss": 0.7742716073989868, "loss_ce": 0.00010168163862545043, "loss_iou": 0.3359375, "loss_num": 0.0208740234375, "loss_xval": 0.7734375, "num_input_tokens_seen": 114042588, "step": 986 }, { "epoch": 5.306451612903226, "grad_norm": 15.608203887939453, "learning_rate": 5e-07, "loss": 0.8711, "num_input_tokens_seen": 114155620, "step": 987 }, { "epoch": 5.306451612903226, "loss": 0.9046227931976318, "loss_ce": 8.175938273780048e-05, "loss_iou": 0.37109375, "loss_num": 0.032470703125, "loss_xval": 0.90625, "num_input_tokens_seen": 114155620, "step": 987 }, { "epoch": 5.311827956989247, "grad_norm": 20.724849700927734, "learning_rate": 5e-07, "loss": 0.7115, "num_input_tokens_seen": 114273004, "step": 988 }, { "epoch": 5.311827956989247, "loss": 0.5960096120834351, "loss_ce": 6.236406625248492e-05, "loss_iou": 0.25390625, "loss_num": 0.017578125, "loss_xval": 0.59765625, "num_input_tokens_seen": 114273004, "step": 988 }, { "epoch": 5.317204301075269, "grad_norm": 29.125120162963867, "learning_rate": 5e-07, "loss": 0.9703, "num_input_tokens_seen": 114387128, "step": 989 }, { "epoch": 5.317204301075269, "loss": 1.1873359680175781, "loss_ce": 8.016960055101663e-05, "loss_iou": 0.5, "loss_num": 0.037353515625, "loss_xval": 1.1875, "num_input_tokens_seen": 114387128, "step": 989 }, { "epoch": 5.32258064516129, "grad_norm": 22.320629119873047, "learning_rate": 5e-07, "loss": 0.8723, "num_input_tokens_seen": 114501684, "step": 990 }, { "epoch": 5.32258064516129, "loss": 1.0698007345199585, "loss_ce": 9.852195216808468e-05, "loss_iou": 0.43359375, "loss_num": 0.040283203125, "loss_xval": 1.0703125, "num_input_tokens_seen": 114501684, "step": 990 }, { "epoch": 5.327956989247312, "grad_norm": 19.993806838989258, "learning_rate": 5e-07, "loss": 0.809, "num_input_tokens_seen": 114620192, "step": 991 }, { "epoch": 5.327956989247312, "loss": 0.631579577922821, "loss_ce": 0.00010983629908878356, "loss_iou": 0.28125, "loss_num": 0.01348876953125, "loss_xval": 0.6328125, "num_input_tokens_seen": 114620192, "step": 991 }, { "epoch": 5.333333333333333, "grad_norm": 39.94511413574219, "learning_rate": 5e-07, "loss": 0.8758, "num_input_tokens_seen": 114737780, "step": 992 }, { "epoch": 5.333333333333333, "loss": 0.804857075214386, "loss_ce": 4.752490349346772e-05, "loss_iou": 0.341796875, "loss_num": 0.024658203125, "loss_xval": 0.8046875, "num_input_tokens_seen": 114737780, "step": 992 }, { "epoch": 5.338709677419355, "grad_norm": 31.233739852905273, "learning_rate": 5e-07, "loss": 0.772, "num_input_tokens_seen": 114853628, "step": 993 }, { "epoch": 5.338709677419355, "loss": 0.7433304786682129, "loss_ce": 0.00016641248657833785, "loss_iou": 0.271484375, "loss_num": 0.039794921875, "loss_xval": 0.7421875, "num_input_tokens_seen": 114853628, "step": 993 }, { "epoch": 5.344086021505376, "grad_norm": 40.74252700805664, "learning_rate": 5e-07, "loss": 0.7098, "num_input_tokens_seen": 114971232, "step": 994 }, { "epoch": 5.344086021505376, "loss": 0.8043434619903564, "loss_ce": 0.00014426870620809495, "loss_iou": 0.337890625, "loss_num": 0.0255126953125, "loss_xval": 0.8046875, "num_input_tokens_seen": 114971232, "step": 994 }, { "epoch": 5.349462365591398, "grad_norm": 19.484222412109375, "learning_rate": 5e-07, "loss": 0.8103, "num_input_tokens_seen": 115087368, "step": 995 }, { "epoch": 5.349462365591398, "loss": 0.8147953152656555, "loss_ce": 9.804253932088614e-05, "loss_iou": 0.353515625, "loss_num": 0.0218505859375, "loss_xval": 0.81640625, "num_input_tokens_seen": 115087368, "step": 995 }, { "epoch": 5.354838709677419, "grad_norm": 23.970918655395508, "learning_rate": 5e-07, "loss": 0.8829, "num_input_tokens_seen": 115205400, "step": 996 }, { "epoch": 5.354838709677419, "loss": 1.0835533142089844, "loss_ce": 5.731018609367311e-05, "loss_iou": 0.458984375, "loss_num": 0.032958984375, "loss_xval": 1.0859375, "num_input_tokens_seen": 115205400, "step": 996 }, { "epoch": 5.360215053763441, "grad_norm": 23.655349731445312, "learning_rate": 5e-07, "loss": 0.8119, "num_input_tokens_seen": 115318716, "step": 997 }, { "epoch": 5.360215053763441, "loss": 0.7061066627502441, "loss_ce": 5.19650275236927e-05, "loss_iou": 0.306640625, "loss_num": 0.018310546875, "loss_xval": 0.70703125, "num_input_tokens_seen": 115318716, "step": 997 }, { "epoch": 5.365591397849462, "grad_norm": 22.852298736572266, "learning_rate": 5e-07, "loss": 0.9225, "num_input_tokens_seen": 115430696, "step": 998 }, { "epoch": 5.365591397849462, "loss": 0.6081452369689941, "loss_ce": 0.0001130382006522268, "loss_iou": 0.2578125, "loss_num": 0.0184326171875, "loss_xval": 0.609375, "num_input_tokens_seen": 115430696, "step": 998 }, { "epoch": 5.370967741935484, "grad_norm": 34.289955139160156, "learning_rate": 5e-07, "loss": 0.6236, "num_input_tokens_seen": 115546732, "step": 999 }, { "epoch": 5.370967741935484, "loss": 0.46182912588119507, "loss_ce": 3.71288406313397e-05, "loss_iou": 0.1923828125, "loss_num": 0.01531982421875, "loss_xval": 0.4609375, "num_input_tokens_seen": 115546732, "step": 999 }, { "epoch": 5.376344086021505, "grad_norm": 17.00465965270996, "learning_rate": 5e-07, "loss": 0.8604, "num_input_tokens_seen": 115662512, "step": 1000 }, { "epoch": 5.376344086021505, "eval_icons_CIoU": 0.1431984044611454, "eval_icons_GIoU": 0.1119944155216217, "eval_icons_IoU": 0.3019924908876419, "eval_icons_MAE_all": 0.03257213905453682, "eval_icons_MAE_h": 0.03575834073126316, "eval_icons_MAE_w": 0.05724859982728958, "eval_icons_MAE_x_boxes": 0.0540449395775795, "eval_icons_MAE_y_boxes": 0.034935121424496174, "eval_icons_NUM_probability": 0.9981670081615448, "eval_icons_inside_bbox": 0.578125, "eval_icons_loss": 1.9265587329864502, "eval_icons_loss_ce": 0.00025651217583799735, "eval_icons_loss_iou": 0.87109375, "eval_icons_loss_num": 0.033550262451171875, "eval_icons_loss_xval": 1.9091796875, "eval_icons_runtime": 41.2582, "eval_icons_samples_per_second": 1.212, "eval_icons_steps_per_second": 0.048, "num_input_tokens_seen": 115662512, "step": 1000 }, { "epoch": 5.376344086021505, "eval_screenspot_CIoU": 0.2763674358526866, "eval_screenspot_GIoU": 0.2601564625898997, "eval_screenspot_IoU": 0.3782426317532857, "eval_screenspot_MAE_all": 0.0681186355650425, "eval_screenspot_MAE_h": 0.050001259272297226, "eval_screenspot_MAE_w": 0.0916728029648463, "eval_screenspot_MAE_x_boxes": 0.0983146404226621, "eval_screenspot_MAE_y_boxes": 0.0408933088183403, "eval_screenspot_NUM_probability": 0.9997990131378174, "eval_screenspot_inside_bbox": 0.6804166634877523, "eval_screenspot_loss": 1.870013952255249, "eval_screenspot_loss_ce": 9.900589308623846e-05, "eval_screenspot_loss_iou": 0.7779947916666666, "eval_screenspot_loss_num": 0.07763926188151042, "eval_screenspot_loss_xval": 1.9441731770833333, "eval_screenspot_runtime": 66.96, "eval_screenspot_samples_per_second": 1.329, "eval_screenspot_steps_per_second": 0.045, "num_input_tokens_seen": 115662512, "step": 1000 }, { "epoch": 5.376344086021505, "loss": 1.8569754362106323, "loss_ce": 4.1764185880310833e-05, "loss_iou": 0.76171875, "loss_num": 0.06591796875, "loss_xval": 1.859375, "num_input_tokens_seen": 115662512, "step": 1000 }, { "epoch": 5.381720430107527, "grad_norm": 26.46554946899414, "learning_rate": 5e-07, "loss": 0.8564, "num_input_tokens_seen": 115778864, "step": 1001 }, { "epoch": 5.381720430107527, "loss": 0.803631067276001, "loss_ce": 0.00028633768670260906, "loss_iou": 0.33984375, "loss_num": 0.024658203125, "loss_xval": 0.8046875, "num_input_tokens_seen": 115778864, "step": 1001 }, { "epoch": 5.387096774193548, "grad_norm": 19.943986892700195, "learning_rate": 5e-07, "loss": 0.8006, "num_input_tokens_seen": 115892680, "step": 1002 }, { "epoch": 5.387096774193548, "loss": 0.8816574811935425, "loss_ce": 6.569357356056571e-05, "loss_iou": 0.375, "loss_num": 0.026611328125, "loss_xval": 0.8828125, "num_input_tokens_seen": 115892680, "step": 1002 }, { "epoch": 5.39247311827957, "grad_norm": 15.120465278625488, "learning_rate": 5e-07, "loss": 0.7566, "num_input_tokens_seen": 116009772, "step": 1003 }, { "epoch": 5.39247311827957, "loss": 0.9390265941619873, "loss_ce": 6.177990871947259e-05, "loss_iou": 0.40234375, "loss_num": 0.026611328125, "loss_xval": 0.9375, "num_input_tokens_seen": 116009772, "step": 1003 }, { "epoch": 5.397849462365591, "grad_norm": 20.098363876342773, "learning_rate": 5e-07, "loss": 0.7118, "num_input_tokens_seen": 116125908, "step": 1004 }, { "epoch": 5.397849462365591, "loss": 0.44638848304748535, "loss_ce": 9.9440265330486e-05, "loss_iou": 0.1884765625, "loss_num": 0.01385498046875, "loss_xval": 0.4453125, "num_input_tokens_seen": 116125908, "step": 1004 }, { "epoch": 5.403225806451613, "grad_norm": 24.70789337158203, "learning_rate": 5e-07, "loss": 0.6952, "num_input_tokens_seen": 116241220, "step": 1005 }, { "epoch": 5.403225806451613, "loss": 0.6777696013450623, "loss_ce": 3.524331987136975e-05, "loss_iou": 0.279296875, "loss_num": 0.0238037109375, "loss_xval": 0.6796875, "num_input_tokens_seen": 116241220, "step": 1005 }, { "epoch": 5.408602150537634, "grad_norm": 22.774497985839844, "learning_rate": 5e-07, "loss": 0.7798, "num_input_tokens_seen": 116357012, "step": 1006 }, { "epoch": 5.408602150537634, "loss": 0.7890336513519287, "loss_ce": 0.0002152999659301713, "loss_iou": 0.328125, "loss_num": 0.026123046875, "loss_xval": 0.7890625, "num_input_tokens_seen": 116357012, "step": 1006 }, { "epoch": 5.413978494623656, "grad_norm": 24.898347854614258, "learning_rate": 5e-07, "loss": 0.8919, "num_input_tokens_seen": 116472996, "step": 1007 }, { "epoch": 5.413978494623656, "loss": 0.7019522190093994, "loss_ce": 4.792587424162775e-05, "loss_iou": 0.30078125, "loss_num": 0.02001953125, "loss_xval": 0.703125, "num_input_tokens_seen": 116472996, "step": 1007 }, { "epoch": 5.419354838709677, "grad_norm": 20.34859275817871, "learning_rate": 5e-07, "loss": 0.8327, "num_input_tokens_seen": 116587084, "step": 1008 }, { "epoch": 5.419354838709677, "loss": 0.8022640943527222, "loss_ce": 0.0002621495514176786, "loss_iou": 0.3359375, "loss_num": 0.026123046875, "loss_xval": 0.80078125, "num_input_tokens_seen": 116587084, "step": 1008 }, { "epoch": 5.424731182795699, "grad_norm": 23.58875846862793, "learning_rate": 5e-07, "loss": 0.9461, "num_input_tokens_seen": 116701308, "step": 1009 }, { "epoch": 5.424731182795699, "loss": 0.5937784910202026, "loss_ce": 2.8486461815191433e-05, "loss_iou": 0.228515625, "loss_num": 0.0272216796875, "loss_xval": 0.59375, "num_input_tokens_seen": 116701308, "step": 1009 }, { "epoch": 5.43010752688172, "grad_norm": 19.04088020324707, "learning_rate": 5e-07, "loss": 0.8925, "num_input_tokens_seen": 116817836, "step": 1010 }, { "epoch": 5.43010752688172, "loss": 0.758860170841217, "loss_ce": 7.111189916031435e-05, "loss_iou": 0.302734375, "loss_num": 0.03076171875, "loss_xval": 0.7578125, "num_input_tokens_seen": 116817836, "step": 1010 }, { "epoch": 5.435483870967742, "grad_norm": 22.957624435424805, "learning_rate": 5e-07, "loss": 0.8149, "num_input_tokens_seen": 116932500, "step": 1011 }, { "epoch": 5.435483870967742, "loss": 0.7815693616867065, "loss_ce": 7.522432133555412e-05, "loss_iou": 0.3125, "loss_num": 0.031494140625, "loss_xval": 0.78125, "num_input_tokens_seen": 116932500, "step": 1011 }, { "epoch": 5.440860215053763, "grad_norm": 21.0329647064209, "learning_rate": 5e-07, "loss": 0.7629, "num_input_tokens_seen": 117050388, "step": 1012 }, { "epoch": 5.440860215053763, "loss": 0.7374191284179688, "loss_ce": 0.0006027383496984839, "loss_iou": 0.31640625, "loss_num": 0.02099609375, "loss_xval": 0.73828125, "num_input_tokens_seen": 117050388, "step": 1012 }, { "epoch": 5.446236559139785, "grad_norm": 16.793659210205078, "learning_rate": 5e-07, "loss": 0.8723, "num_input_tokens_seen": 117162348, "step": 1013 }, { "epoch": 5.446236559139785, "loss": 0.7153887748718262, "loss_ce": 5.674107524100691e-05, "loss_iou": 0.28515625, "loss_num": 0.0291748046875, "loss_xval": 0.71484375, "num_input_tokens_seen": 117162348, "step": 1013 }, { "epoch": 5.451612903225806, "grad_norm": 20.05118179321289, "learning_rate": 5e-07, "loss": 0.7877, "num_input_tokens_seen": 117276136, "step": 1014 }, { "epoch": 5.451612903225806, "loss": 0.45569682121276855, "loss_ce": 0.00025247770827263594, "loss_iou": 0.193359375, "loss_num": 0.013671875, "loss_xval": 0.455078125, "num_input_tokens_seen": 117276136, "step": 1014 }, { "epoch": 5.456989247311828, "grad_norm": 17.107030868530273, "learning_rate": 5e-07, "loss": 0.725, "num_input_tokens_seen": 117394292, "step": 1015 }, { "epoch": 5.456989247311828, "loss": 0.5877854824066162, "loss_ce": 0.00013903755461797118, "loss_iou": 0.2412109375, "loss_num": 0.0211181640625, "loss_xval": 0.5859375, "num_input_tokens_seen": 117394292, "step": 1015 }, { "epoch": 5.462365591397849, "grad_norm": 19.895980834960938, "learning_rate": 5e-07, "loss": 0.9124, "num_input_tokens_seen": 117510636, "step": 1016 }, { "epoch": 5.462365591397849, "loss": 0.9750365018844604, "loss_ce": 6.089838279876858e-05, "loss_iou": 0.390625, "loss_num": 0.038818359375, "loss_xval": 0.9765625, "num_input_tokens_seen": 117510636, "step": 1016 }, { "epoch": 5.467741935483871, "grad_norm": 16.95079231262207, "learning_rate": 5e-07, "loss": 0.8908, "num_input_tokens_seen": 117621136, "step": 1017 }, { "epoch": 5.467741935483871, "loss": 1.0133438110351562, "loss_ce": 3.8238606066443026e-05, "loss_iou": 0.453125, "loss_num": 0.021728515625, "loss_xval": 1.015625, "num_input_tokens_seen": 117621136, "step": 1017 }, { "epoch": 5.473118279569892, "grad_norm": 23.77897834777832, "learning_rate": 5e-07, "loss": 0.935, "num_input_tokens_seen": 117735948, "step": 1018 }, { "epoch": 5.473118279569892, "loss": 0.6700220108032227, "loss_ce": 0.0001001116179395467, "loss_iou": 0.2890625, "loss_num": 0.0186767578125, "loss_xval": 0.671875, "num_input_tokens_seen": 117735948, "step": 1018 }, { "epoch": 5.478494623655914, "grad_norm": 21.70502471923828, "learning_rate": 5e-07, "loss": 0.7963, "num_input_tokens_seen": 117847512, "step": 1019 }, { "epoch": 5.478494623655914, "loss": 0.611150860786438, "loss_ce": 6.688533176202327e-05, "loss_iou": 0.26171875, "loss_num": 0.017333984375, "loss_xval": 0.609375, "num_input_tokens_seen": 117847512, "step": 1019 }, { "epoch": 5.483870967741936, "grad_norm": 25.720298767089844, "learning_rate": 5e-07, "loss": 0.8846, "num_input_tokens_seen": 117965692, "step": 1020 }, { "epoch": 5.483870967741936, "loss": 1.0232864618301392, "loss_ce": 9.307469008490443e-05, "loss_iou": 0.44921875, "loss_num": 0.0247802734375, "loss_xval": 1.0234375, "num_input_tokens_seen": 117965692, "step": 1020 }, { "epoch": 5.489247311827957, "grad_norm": 26.078044891357422, "learning_rate": 5e-07, "loss": 0.7786, "num_input_tokens_seen": 118077724, "step": 1021 }, { "epoch": 5.489247311827957, "loss": 0.8584893345832825, "loss_ce": 0.0008232677355408669, "loss_iou": 0.36328125, "loss_num": 0.02587890625, "loss_xval": 0.859375, "num_input_tokens_seen": 118077724, "step": 1021 }, { "epoch": 5.494623655913978, "grad_norm": 21.55709457397461, "learning_rate": 5e-07, "loss": 0.8687, "num_input_tokens_seen": 118194568, "step": 1022 }, { "epoch": 5.494623655913978, "loss": 0.8727207779884338, "loss_ce": 0.00040638542850501835, "loss_iou": 0.3515625, "loss_num": 0.0341796875, "loss_xval": 0.87109375, "num_input_tokens_seen": 118194568, "step": 1022 }, { "epoch": 5.5, "grad_norm": 17.198841094970703, "learning_rate": 5e-07, "loss": 0.8186, "num_input_tokens_seen": 118313568, "step": 1023 }, { "epoch": 5.5, "loss": 0.7341853380203247, "loss_ce": 5.446621071314439e-05, "loss_iou": 0.32421875, "loss_num": 0.01708984375, "loss_xval": 0.734375, "num_input_tokens_seen": 118313568, "step": 1023 }, { "epoch": 5.505376344086022, "grad_norm": 20.01660919189453, "learning_rate": 5e-07, "loss": 1.0034, "num_input_tokens_seen": 118430436, "step": 1024 }, { "epoch": 5.505376344086022, "loss": 1.048698902130127, "loss_ce": 0.00011490018368931487, "loss_iou": 0.447265625, "loss_num": 0.03076171875, "loss_xval": 1.046875, "num_input_tokens_seen": 118430436, "step": 1024 }, { "epoch": 5.510752688172043, "grad_norm": 25.626317977905273, "learning_rate": 5e-07, "loss": 0.7393, "num_input_tokens_seen": 118547836, "step": 1025 }, { "epoch": 5.510752688172043, "loss": 0.7297875881195068, "loss_ce": 5.126850010128692e-05, "loss_iou": 0.306640625, "loss_num": 0.0234375, "loss_xval": 0.73046875, "num_input_tokens_seen": 118547836, "step": 1025 }, { "epoch": 5.516129032258064, "grad_norm": 17.82383155822754, "learning_rate": 5e-07, "loss": 0.878, "num_input_tokens_seen": 118664840, "step": 1026 }, { "epoch": 5.516129032258064, "loss": 0.9370638132095337, "loss_ce": 5.213436088524759e-05, "loss_iou": 0.408203125, "loss_num": 0.0245361328125, "loss_xval": 0.9375, "num_input_tokens_seen": 118664840, "step": 1026 }, { "epoch": 5.521505376344086, "grad_norm": 18.949901580810547, "learning_rate": 5e-07, "loss": 0.7931, "num_input_tokens_seen": 118782272, "step": 1027 }, { "epoch": 5.521505376344086, "loss": 0.6934572458267212, "loss_ce": 9.782991401152685e-05, "loss_iou": 0.27734375, "loss_num": 0.02783203125, "loss_xval": 0.6953125, "num_input_tokens_seen": 118782272, "step": 1027 }, { "epoch": 5.526881720430108, "grad_norm": 19.030418395996094, "learning_rate": 5e-07, "loss": 0.809, "num_input_tokens_seen": 118901136, "step": 1028 }, { "epoch": 5.526881720430108, "loss": 0.7654236555099487, "loss_ce": 4.275661558494903e-05, "loss_iou": 0.337890625, "loss_num": 0.0179443359375, "loss_xval": 0.765625, "num_input_tokens_seen": 118901136, "step": 1028 }, { "epoch": 5.532258064516129, "grad_norm": 19.931856155395508, "learning_rate": 5e-07, "loss": 0.9438, "num_input_tokens_seen": 119013340, "step": 1029 }, { "epoch": 5.532258064516129, "loss": 0.6655006408691406, "loss_ce": 9.538985614199191e-05, "loss_iou": 0.28125, "loss_num": 0.0208740234375, "loss_xval": 0.6640625, "num_input_tokens_seen": 119013340, "step": 1029 }, { "epoch": 5.53763440860215, "grad_norm": 18.061710357666016, "learning_rate": 5e-07, "loss": 0.7786, "num_input_tokens_seen": 119125940, "step": 1030 }, { "epoch": 5.53763440860215, "loss": 0.8103165626525879, "loss_ce": 0.000990381115116179, "loss_iou": 0.3203125, "loss_num": 0.0341796875, "loss_xval": 0.80859375, "num_input_tokens_seen": 119125940, "step": 1030 }, { "epoch": 5.543010752688172, "grad_norm": 16.349830627441406, "learning_rate": 5e-07, "loss": 0.6793, "num_input_tokens_seen": 119244672, "step": 1031 }, { "epoch": 5.543010752688172, "loss": 0.5165116786956787, "loss_ce": 3.2172050850931555e-05, "loss_iou": 0.2294921875, "loss_num": 0.01153564453125, "loss_xval": 0.515625, "num_input_tokens_seen": 119244672, "step": 1031 }, { "epoch": 5.548387096774194, "grad_norm": 22.326555252075195, "learning_rate": 5e-07, "loss": 0.698, "num_input_tokens_seen": 119359140, "step": 1032 }, { "epoch": 5.548387096774194, "loss": 0.9558979272842407, "loss_ce": 8.733435242902488e-05, "loss_iou": 0.404296875, "loss_num": 0.0294189453125, "loss_xval": 0.95703125, "num_input_tokens_seen": 119359140, "step": 1032 }, { "epoch": 5.553763440860215, "grad_norm": 22.768014907836914, "learning_rate": 5e-07, "loss": 0.6821, "num_input_tokens_seen": 119474920, "step": 1033 }, { "epoch": 5.553763440860215, "loss": 0.6824921369552612, "loss_ce": 0.00011908027227036655, "loss_iou": 0.291015625, "loss_num": 0.0205078125, "loss_xval": 0.68359375, "num_input_tokens_seen": 119474920, "step": 1033 }, { "epoch": 5.559139784946236, "grad_norm": 24.467926025390625, "learning_rate": 5e-07, "loss": 0.888, "num_input_tokens_seen": 119588092, "step": 1034 }, { "epoch": 5.559139784946236, "loss": 0.9601010084152222, "loss_ce": 0.0008724901708774269, "loss_iou": 0.396484375, "loss_num": 0.032958984375, "loss_xval": 0.9609375, "num_input_tokens_seen": 119588092, "step": 1034 }, { "epoch": 5.564516129032258, "grad_norm": 19.513015747070312, "learning_rate": 5e-07, "loss": 0.6928, "num_input_tokens_seen": 119705132, "step": 1035 }, { "epoch": 5.564516129032258, "loss": 0.8777458667755127, "loss_ce": 6.0327365645207465e-05, "loss_iou": 0.341796875, "loss_num": 0.038818359375, "loss_xval": 0.87890625, "num_input_tokens_seen": 119705132, "step": 1035 }, { "epoch": 5.56989247311828, "grad_norm": 27.232027053833008, "learning_rate": 5e-07, "loss": 0.8327, "num_input_tokens_seen": 119820272, "step": 1036 }, { "epoch": 5.56989247311828, "loss": 1.0052123069763184, "loss_ce": 8.535086817573756e-05, "loss_iou": 0.427734375, "loss_num": 0.0301513671875, "loss_xval": 1.0078125, "num_input_tokens_seen": 119820272, "step": 1036 }, { "epoch": 5.575268817204301, "grad_norm": 17.792022705078125, "learning_rate": 5e-07, "loss": 0.8742, "num_input_tokens_seen": 119937040, "step": 1037 }, { "epoch": 5.575268817204301, "loss": 0.8625937700271606, "loss_ce": 4.4952994358027354e-05, "loss_iou": 0.36328125, "loss_num": 0.027099609375, "loss_xval": 0.86328125, "num_input_tokens_seen": 119937040, "step": 1037 }, { "epoch": 5.580645161290323, "grad_norm": 18.07575225830078, "learning_rate": 5e-07, "loss": 0.8811, "num_input_tokens_seen": 120050432, "step": 1038 }, { "epoch": 5.580645161290323, "loss": 0.7014530897140503, "loss_ce": 3.710873716045171e-05, "loss_iou": 0.30859375, "loss_num": 0.01708984375, "loss_xval": 0.703125, "num_input_tokens_seen": 120050432, "step": 1038 }, { "epoch": 5.586021505376344, "grad_norm": 21.08131980895996, "learning_rate": 5e-07, "loss": 0.734, "num_input_tokens_seen": 120166332, "step": 1039 }, { "epoch": 5.586021505376344, "loss": 0.8394078016281128, "loss_ce": 5.230515307630412e-05, "loss_iou": 0.375, "loss_num": 0.0177001953125, "loss_xval": 0.83984375, "num_input_tokens_seen": 120166332, "step": 1039 }, { "epoch": 5.591397849462366, "grad_norm": 20.74188995361328, "learning_rate": 5e-07, "loss": 0.9704, "num_input_tokens_seen": 120282232, "step": 1040 }, { "epoch": 5.591397849462366, "loss": 1.0970479249954224, "loss_ce": 0.0001241082645719871, "loss_iou": 0.478515625, "loss_num": 0.027587890625, "loss_xval": 1.09375, "num_input_tokens_seen": 120282232, "step": 1040 }, { "epoch": 5.596774193548387, "grad_norm": 23.10636329650879, "learning_rate": 5e-07, "loss": 0.778, "num_input_tokens_seen": 120396624, "step": 1041 }, { "epoch": 5.596774193548387, "loss": 0.6028925180435181, "loss_ce": 0.00010933751764241606, "loss_iou": 0.2578125, "loss_num": 0.0177001953125, "loss_xval": 0.6015625, "num_input_tokens_seen": 120396624, "step": 1041 }, { "epoch": 5.602150537634409, "grad_norm": 18.333646774291992, "learning_rate": 5e-07, "loss": 0.8612, "num_input_tokens_seen": 120509336, "step": 1042 }, { "epoch": 5.602150537634409, "loss": 1.0221068859100342, "loss_ce": 0.00013428770762402564, "loss_iou": 0.44140625, "loss_num": 0.028076171875, "loss_xval": 1.0234375, "num_input_tokens_seen": 120509336, "step": 1042 }, { "epoch": 5.60752688172043, "grad_norm": 16.163583755493164, "learning_rate": 5e-07, "loss": 0.8616, "num_input_tokens_seen": 120622828, "step": 1043 }, { "epoch": 5.60752688172043, "loss": 0.7031925916671753, "loss_ce": 6.757497612852603e-05, "loss_iou": 0.30078125, "loss_num": 0.0205078125, "loss_xval": 0.703125, "num_input_tokens_seen": 120622828, "step": 1043 }, { "epoch": 5.612903225806452, "grad_norm": 25.339433670043945, "learning_rate": 5e-07, "loss": 0.9404, "num_input_tokens_seen": 120739796, "step": 1044 }, { "epoch": 5.612903225806452, "loss": 1.0435466766357422, "loss_ce": 8.959023398347199e-05, "loss_iou": 0.435546875, "loss_num": 0.03466796875, "loss_xval": 1.046875, "num_input_tokens_seen": 120739796, "step": 1044 }, { "epoch": 5.618279569892473, "grad_norm": 17.174299240112305, "learning_rate": 5e-07, "loss": 0.7364, "num_input_tokens_seen": 120859328, "step": 1045 }, { "epoch": 5.618279569892473, "loss": 0.6946326494216919, "loss_ce": 5.254134885035455e-05, "loss_iou": 0.296875, "loss_num": 0.020263671875, "loss_xval": 0.6953125, "num_input_tokens_seen": 120859328, "step": 1045 }, { "epoch": 5.623655913978495, "grad_norm": 20.15995979309082, "learning_rate": 5e-07, "loss": 0.748, "num_input_tokens_seen": 120976024, "step": 1046 }, { "epoch": 5.623655913978495, "loss": 0.7835087776184082, "loss_ce": 6.15046446910128e-05, "loss_iou": 0.3203125, "loss_num": 0.0283203125, "loss_xval": 0.78515625, "num_input_tokens_seen": 120976024, "step": 1046 }, { "epoch": 5.629032258064516, "grad_norm": 24.234050750732422, "learning_rate": 5e-07, "loss": 0.9747, "num_input_tokens_seen": 121085400, "step": 1047 }, { "epoch": 5.629032258064516, "loss": 1.0562951564788818, "loss_ce": 0.00014276437286753207, "loss_iou": 0.4453125, "loss_num": 0.033447265625, "loss_xval": 1.0546875, "num_input_tokens_seen": 121085400, "step": 1047 }, { "epoch": 5.634408602150538, "grad_norm": 20.243955612182617, "learning_rate": 5e-07, "loss": 0.7717, "num_input_tokens_seen": 121201880, "step": 1048 }, { "epoch": 5.634408602150538, "loss": 0.6814554929733276, "loss_ce": 5.8965670177713037e-05, "loss_iou": 0.287109375, "loss_num": 0.0213623046875, "loss_xval": 0.6796875, "num_input_tokens_seen": 121201880, "step": 1048 }, { "epoch": 5.639784946236559, "grad_norm": 24.200008392333984, "learning_rate": 5e-07, "loss": 0.8475, "num_input_tokens_seen": 121316992, "step": 1049 }, { "epoch": 5.639784946236559, "loss": 0.8340150713920593, "loss_ce": 0.0010072228033095598, "loss_iou": 0.353515625, "loss_num": 0.025390625, "loss_xval": 0.83203125, "num_input_tokens_seen": 121316992, "step": 1049 }, { "epoch": 5.645161290322581, "grad_norm": 27.10001564025879, "learning_rate": 5e-07, "loss": 0.8812, "num_input_tokens_seen": 121429884, "step": 1050 }, { "epoch": 5.645161290322581, "loss": 1.045004963874817, "loss_ce": 8.309136319439858e-05, "loss_iou": 0.4609375, "loss_num": 0.0244140625, "loss_xval": 1.046875, "num_input_tokens_seen": 121429884, "step": 1050 }, { "epoch": 5.650537634408602, "grad_norm": 26.15041160583496, "learning_rate": 5e-07, "loss": 0.896, "num_input_tokens_seen": 121545896, "step": 1051 }, { "epoch": 5.650537634408602, "loss": 1.5503642559051514, "loss_ce": 7.126527634682134e-05, "loss_iou": 0.65625, "loss_num": 0.04736328125, "loss_xval": 1.546875, "num_input_tokens_seen": 121545896, "step": 1051 }, { "epoch": 5.655913978494624, "grad_norm": 22.59949493408203, "learning_rate": 5e-07, "loss": 0.8165, "num_input_tokens_seen": 121661264, "step": 1052 }, { "epoch": 5.655913978494624, "loss": 0.5970180034637451, "loss_ce": 9.419136040378362e-05, "loss_iou": 0.2255859375, "loss_num": 0.0291748046875, "loss_xval": 0.59765625, "num_input_tokens_seen": 121661264, "step": 1052 }, { "epoch": 5.661290322580645, "grad_norm": 31.401227951049805, "learning_rate": 5e-07, "loss": 0.9403, "num_input_tokens_seen": 121779912, "step": 1053 }, { "epoch": 5.661290322580645, "loss": 0.6676520109176636, "loss_ce": 0.00017156210378743708, "loss_iou": 0.26953125, "loss_num": 0.0255126953125, "loss_xval": 0.66796875, "num_input_tokens_seen": 121779912, "step": 1053 }, { "epoch": 5.666666666666667, "grad_norm": 19.341934204101562, "learning_rate": 5e-07, "loss": 0.7405, "num_input_tokens_seen": 121898664, "step": 1054 }, { "epoch": 5.666666666666667, "loss": 0.7449256181716919, "loss_ce": 5.264514766167849e-05, "loss_iou": 0.306640625, "loss_num": 0.026123046875, "loss_xval": 0.74609375, "num_input_tokens_seen": 121898664, "step": 1054 }, { "epoch": 5.672043010752688, "grad_norm": 22.455463409423828, "learning_rate": 5e-07, "loss": 0.7494, "num_input_tokens_seen": 122016392, "step": 1055 }, { "epoch": 5.672043010752688, "loss": 0.8438119888305664, "loss_ce": 6.200297502800822e-05, "loss_iou": 0.341796875, "loss_num": 0.031982421875, "loss_xval": 0.84375, "num_input_tokens_seen": 122016392, "step": 1055 }, { "epoch": 5.67741935483871, "grad_norm": 77.96773529052734, "learning_rate": 5e-07, "loss": 0.7533, "num_input_tokens_seen": 122132968, "step": 1056 }, { "epoch": 5.67741935483871, "loss": 0.9022048711776733, "loss_ce": 0.00034942181082442403, "loss_iou": 0.384765625, "loss_num": 0.0263671875, "loss_xval": 0.90234375, "num_input_tokens_seen": 122132968, "step": 1056 }, { "epoch": 5.682795698924731, "grad_norm": 26.93109893798828, "learning_rate": 5e-07, "loss": 0.8085, "num_input_tokens_seen": 122247148, "step": 1057 }, { "epoch": 5.682795698924731, "loss": 0.7601363658905029, "loss_ce": 0.00012660368520300835, "loss_iou": 0.310546875, "loss_num": 0.0281982421875, "loss_xval": 0.76171875, "num_input_tokens_seen": 122247148, "step": 1057 }, { "epoch": 5.688172043010753, "grad_norm": 15.606910705566406, "learning_rate": 5e-07, "loss": 0.7266, "num_input_tokens_seen": 122360388, "step": 1058 }, { "epoch": 5.688172043010753, "loss": 0.5525384545326233, "loss_ce": 4.8198966396739706e-05, "loss_iou": 0.208984375, "loss_num": 0.0267333984375, "loss_xval": 0.55078125, "num_input_tokens_seen": 122360388, "step": 1058 }, { "epoch": 5.693548387096774, "grad_norm": 20.28322982788086, "learning_rate": 5e-07, "loss": 0.7968, "num_input_tokens_seen": 122475764, "step": 1059 }, { "epoch": 5.693548387096774, "loss": 0.6353539228439331, "loss_ce": 9.999576286645606e-05, "loss_iou": 0.2470703125, "loss_num": 0.0281982421875, "loss_xval": 0.63671875, "num_input_tokens_seen": 122475764, "step": 1059 }, { "epoch": 5.698924731182796, "grad_norm": 20.049179077148438, "learning_rate": 5e-07, "loss": 0.7733, "num_input_tokens_seen": 122592424, "step": 1060 }, { "epoch": 5.698924731182796, "loss": 0.9764806032180786, "loss_ce": 4.0141254430636764e-05, "loss_iou": 0.4296875, "loss_num": 0.0233154296875, "loss_xval": 0.9765625, "num_input_tokens_seen": 122592424, "step": 1060 }, { "epoch": 5.704301075268817, "grad_norm": 23.70825958251953, "learning_rate": 5e-07, "loss": 0.7397, "num_input_tokens_seen": 122708876, "step": 1061 }, { "epoch": 5.704301075268817, "loss": 0.7784574031829834, "loss_ce": 0.00013706387835554779, "loss_iou": 0.322265625, "loss_num": 0.0267333984375, "loss_xval": 0.77734375, "num_input_tokens_seen": 122708876, "step": 1061 }, { "epoch": 5.709677419354839, "grad_norm": 22.52688980102539, "learning_rate": 5e-07, "loss": 1.1613, "num_input_tokens_seen": 122823172, "step": 1062 }, { "epoch": 5.709677419354839, "loss": 1.5064077377319336, "loss_ce": 6.0132762882858515e-05, "loss_iou": 0.62890625, "loss_num": 0.049560546875, "loss_xval": 1.5078125, "num_input_tokens_seen": 122823172, "step": 1062 }, { "epoch": 5.71505376344086, "grad_norm": 19.444799423217773, "learning_rate": 5e-07, "loss": 0.9448, "num_input_tokens_seen": 122937008, "step": 1063 }, { "epoch": 5.71505376344086, "loss": 0.9543938636779785, "loss_ce": 4.814189378521405e-05, "loss_iou": 0.400390625, "loss_num": 0.030517578125, "loss_xval": 0.953125, "num_input_tokens_seen": 122937008, "step": 1063 }, { "epoch": 5.720430107526882, "grad_norm": 14.881423950195312, "learning_rate": 5e-07, "loss": 0.7728, "num_input_tokens_seen": 123051716, "step": 1064 }, { "epoch": 5.720430107526882, "loss": 0.5230733752250671, "loss_ce": 0.00012413354124873877, "loss_iou": 0.21484375, "loss_num": 0.0189208984375, "loss_xval": 0.5234375, "num_input_tokens_seen": 123051716, "step": 1064 }, { "epoch": 5.725806451612903, "grad_norm": 22.104825973510742, "learning_rate": 5e-07, "loss": 0.6467, "num_input_tokens_seen": 123168220, "step": 1065 }, { "epoch": 5.725806451612903, "loss": 0.6242930293083191, "loss_ce": 2.548130214563571e-05, "loss_iou": 0.2578125, "loss_num": 0.0218505859375, "loss_xval": 0.625, "num_input_tokens_seen": 123168220, "step": 1065 }, { "epoch": 5.731182795698925, "grad_norm": 20.892688751220703, "learning_rate": 5e-07, "loss": 0.8593, "num_input_tokens_seen": 123285388, "step": 1066 }, { "epoch": 5.731182795698925, "loss": 0.9156888723373413, "loss_ce": 3.951518010580912e-05, "loss_iou": 0.373046875, "loss_num": 0.033935546875, "loss_xval": 0.9140625, "num_input_tokens_seen": 123285388, "step": 1066 }, { "epoch": 5.736559139784946, "grad_norm": 20.95519256591797, "learning_rate": 5e-07, "loss": 0.8112, "num_input_tokens_seen": 123399228, "step": 1067 }, { "epoch": 5.736559139784946, "loss": 0.6523944735527039, "loss_ce": 5.0701659347396344e-05, "loss_iou": 0.267578125, "loss_num": 0.0235595703125, "loss_xval": 0.65234375, "num_input_tokens_seen": 123399228, "step": 1067 }, { "epoch": 5.741935483870968, "grad_norm": 22.371471405029297, "learning_rate": 5e-07, "loss": 1.0567, "num_input_tokens_seen": 123512436, "step": 1068 }, { "epoch": 5.741935483870968, "loss": 0.8799211978912354, "loss_ce": 3.838208067463711e-05, "loss_iou": 0.3828125, "loss_num": 0.0233154296875, "loss_xval": 0.87890625, "num_input_tokens_seen": 123512436, "step": 1068 }, { "epoch": 5.747311827956989, "grad_norm": 27.349485397338867, "learning_rate": 5e-07, "loss": 0.8242, "num_input_tokens_seen": 123625688, "step": 1069 }, { "epoch": 5.747311827956989, "loss": 0.6536229848861694, "loss_ce": 0.00018064262985717505, "loss_iou": 0.255859375, "loss_num": 0.028564453125, "loss_xval": 0.65234375, "num_input_tokens_seen": 123625688, "step": 1069 }, { "epoch": 5.752688172043011, "grad_norm": 27.108972549438477, "learning_rate": 5e-07, "loss": 0.7976, "num_input_tokens_seen": 123740872, "step": 1070 }, { "epoch": 5.752688172043011, "loss": 0.8886064291000366, "loss_ce": 0.00017868142458610237, "loss_iou": 0.369140625, "loss_num": 0.0301513671875, "loss_xval": 0.88671875, "num_input_tokens_seen": 123740872, "step": 1070 }, { "epoch": 5.758064516129032, "grad_norm": 14.410316467285156, "learning_rate": 5e-07, "loss": 0.778, "num_input_tokens_seen": 123851796, "step": 1071 }, { "epoch": 5.758064516129032, "loss": 0.5657221078872681, "loss_ce": 4.827819793717936e-05, "loss_iou": 0.2294921875, "loss_num": 0.0213623046875, "loss_xval": 0.56640625, "num_input_tokens_seen": 123851796, "step": 1071 }, { "epoch": 5.763440860215054, "grad_norm": 32.44384765625, "learning_rate": 5e-07, "loss": 0.7982, "num_input_tokens_seen": 123964200, "step": 1072 }, { "epoch": 5.763440860215054, "loss": 0.6651029586791992, "loss_ce": 6.39116478851065e-05, "loss_iou": 0.28515625, "loss_num": 0.0186767578125, "loss_xval": 0.6640625, "num_input_tokens_seen": 123964200, "step": 1072 }, { "epoch": 5.768817204301075, "grad_norm": 20.31708335876465, "learning_rate": 5e-07, "loss": 0.925, "num_input_tokens_seen": 124078304, "step": 1073 }, { "epoch": 5.768817204301075, "loss": 1.068314790725708, "loss_ce": 7.740064029349014e-05, "loss_iou": 0.439453125, "loss_num": 0.037841796875, "loss_xval": 1.0703125, "num_input_tokens_seen": 124078304, "step": 1073 }, { "epoch": 5.774193548387097, "grad_norm": 30.928993225097656, "learning_rate": 5e-07, "loss": 0.7821, "num_input_tokens_seen": 124194152, "step": 1074 }, { "epoch": 5.774193548387097, "loss": 0.7837443351745605, "loss_ce": 5.294477887218818e-05, "loss_iou": 0.345703125, "loss_num": 0.0181884765625, "loss_xval": 0.78515625, "num_input_tokens_seen": 124194152, "step": 1074 }, { "epoch": 5.779569892473118, "grad_norm": 43.5381965637207, "learning_rate": 5e-07, "loss": 0.8642, "num_input_tokens_seen": 124311952, "step": 1075 }, { "epoch": 5.779569892473118, "loss": 0.866825520992279, "loss_ce": 0.00012634461745619774, "loss_iou": 0.376953125, "loss_num": 0.0223388671875, "loss_xval": 0.8671875, "num_input_tokens_seen": 124311952, "step": 1075 }, { "epoch": 5.78494623655914, "grad_norm": 21.173978805541992, "learning_rate": 5e-07, "loss": 0.7496, "num_input_tokens_seen": 124427148, "step": 1076 }, { "epoch": 5.78494623655914, "loss": 0.4217569828033447, "loss_ce": 0.0001261321740457788, "loss_iou": 0.16796875, "loss_num": 0.0172119140625, "loss_xval": 0.421875, "num_input_tokens_seen": 124427148, "step": 1076 }, { "epoch": 5.790322580645161, "grad_norm": 33.769718170166016, "learning_rate": 5e-07, "loss": 0.8081, "num_input_tokens_seen": 124539720, "step": 1077 }, { "epoch": 5.790322580645161, "loss": 0.7664108276367188, "loss_ce": 0.00017546152230352163, "loss_iou": 0.33203125, "loss_num": 0.0203857421875, "loss_xval": 0.765625, "num_input_tokens_seen": 124539720, "step": 1077 }, { "epoch": 5.795698924731183, "grad_norm": 24.897180557250977, "learning_rate": 5e-07, "loss": 0.8661, "num_input_tokens_seen": 124654360, "step": 1078 }, { "epoch": 5.795698924731183, "loss": 0.6350704431533813, "loss_ce": 6.068852235330269e-05, "loss_iou": 0.2734375, "loss_num": 0.01806640625, "loss_xval": 0.63671875, "num_input_tokens_seen": 124654360, "step": 1078 }, { "epoch": 5.801075268817204, "grad_norm": 24.18175506591797, "learning_rate": 5e-07, "loss": 0.9442, "num_input_tokens_seen": 124767748, "step": 1079 }, { "epoch": 5.801075268817204, "loss": 0.8057577610015869, "loss_ce": 9.367359598400071e-05, "loss_iou": 0.333984375, "loss_num": 0.0274658203125, "loss_xval": 0.8046875, "num_input_tokens_seen": 124767748, "step": 1079 }, { "epoch": 5.806451612903226, "grad_norm": 1103.7911376953125, "learning_rate": 5e-07, "loss": 0.847, "num_input_tokens_seen": 124881992, "step": 1080 }, { "epoch": 5.806451612903226, "loss": 0.9495391845703125, "loss_ce": 7.631105108885095e-05, "loss_iou": 0.388671875, "loss_num": 0.03466796875, "loss_xval": 0.94921875, "num_input_tokens_seen": 124881992, "step": 1080 }, { "epoch": 5.811827956989247, "grad_norm": 40.01178741455078, "learning_rate": 5e-07, "loss": 0.8769, "num_input_tokens_seen": 124997892, "step": 1081 }, { "epoch": 5.811827956989247, "loss": 0.7188394069671631, "loss_ce": 8.940316911321133e-05, "loss_iou": 0.30078125, "loss_num": 0.023681640625, "loss_xval": 0.71875, "num_input_tokens_seen": 124997892, "step": 1081 }, { "epoch": 5.817204301075269, "grad_norm": 44.9107780456543, "learning_rate": 5e-07, "loss": 0.9818, "num_input_tokens_seen": 125116080, "step": 1082 }, { "epoch": 5.817204301075269, "loss": 1.242928385734558, "loss_ce": 0.0002525626914575696, "loss_iou": 0.5390625, "loss_num": 0.033203125, "loss_xval": 1.2421875, "num_input_tokens_seen": 125116080, "step": 1082 }, { "epoch": 5.82258064516129, "grad_norm": 46.961326599121094, "learning_rate": 5e-07, "loss": 0.899, "num_input_tokens_seen": 125231804, "step": 1083 }, { "epoch": 5.82258064516129, "loss": 1.140189290046692, "loss_ce": 5.256460644886829e-05, "loss_iou": 0.484375, "loss_num": 0.0341796875, "loss_xval": 1.140625, "num_input_tokens_seen": 125231804, "step": 1083 }, { "epoch": 5.827956989247312, "grad_norm": 42.430233001708984, "learning_rate": 5e-07, "loss": 0.8463, "num_input_tokens_seen": 125345656, "step": 1084 }, { "epoch": 5.827956989247312, "loss": 0.8910994529724121, "loss_ce": 0.0002302926150150597, "loss_iou": 0.36328125, "loss_num": 0.032470703125, "loss_xval": 0.890625, "num_input_tokens_seen": 125345656, "step": 1084 }, { "epoch": 5.833333333333333, "grad_norm": 25.67405128479004, "learning_rate": 5e-07, "loss": 0.8716, "num_input_tokens_seen": 125457240, "step": 1085 }, { "epoch": 5.833333333333333, "loss": 0.7836522459983826, "loss_ce": 0.00020496711658779532, "loss_iou": 0.328125, "loss_num": 0.02587890625, "loss_xval": 0.78515625, "num_input_tokens_seen": 125457240, "step": 1085 }, { "epoch": 5.838709677419355, "grad_norm": 29.510648727416992, "learning_rate": 5e-07, "loss": 0.8411, "num_input_tokens_seen": 125569028, "step": 1086 }, { "epoch": 5.838709677419355, "loss": 0.6465678215026855, "loss_ce": 8.343778608832508e-05, "loss_iou": 0.251953125, "loss_num": 0.02880859375, "loss_xval": 0.6484375, "num_input_tokens_seen": 125569028, "step": 1086 }, { "epoch": 5.844086021505376, "grad_norm": 30.275793075561523, "learning_rate": 5e-07, "loss": 0.8869, "num_input_tokens_seen": 125682980, "step": 1087 }, { "epoch": 5.844086021505376, "loss": 0.5535332560539246, "loss_ce": 6.6460132075008e-05, "loss_iou": 0.2392578125, "loss_num": 0.01519775390625, "loss_xval": 0.5546875, "num_input_tokens_seen": 125682980, "step": 1087 }, { "epoch": 5.849462365591398, "grad_norm": 28.39118766784668, "learning_rate": 5e-07, "loss": 0.9726, "num_input_tokens_seen": 125796908, "step": 1088 }, { "epoch": 5.849462365591398, "loss": 1.3100411891937256, "loss_ce": 0.0002268555253976956, "loss_iou": 0.58984375, "loss_num": 0.0262451171875, "loss_xval": 1.3125, "num_input_tokens_seen": 125796908, "step": 1088 }, { "epoch": 5.854838709677419, "grad_norm": 29.759197235107422, "learning_rate": 5e-07, "loss": 0.9167, "num_input_tokens_seen": 125913840, "step": 1089 }, { "epoch": 5.854838709677419, "loss": 1.3604927062988281, "loss_ce": 0.00014117598766461015, "loss_iou": 0.5625, "loss_num": 0.046630859375, "loss_xval": 1.359375, "num_input_tokens_seen": 125913840, "step": 1089 }, { "epoch": 5.860215053763441, "grad_norm": 32.62646484375, "learning_rate": 5e-07, "loss": 0.9695, "num_input_tokens_seen": 126029416, "step": 1090 }, { "epoch": 5.860215053763441, "loss": 1.1245923042297363, "loss_ce": 8.054467616602778e-05, "loss_iou": 0.498046875, "loss_num": 0.0257568359375, "loss_xval": 1.125, "num_input_tokens_seen": 126029416, "step": 1090 }, { "epoch": 5.865591397849462, "grad_norm": 28.788291931152344, "learning_rate": 5e-07, "loss": 0.9412, "num_input_tokens_seen": 126145084, "step": 1091 }, { "epoch": 5.865591397849462, "loss": 1.0237672328948975, "loss_ce": 8.564641757402569e-05, "loss_iou": 0.45703125, "loss_num": 0.0218505859375, "loss_xval": 1.0234375, "num_input_tokens_seen": 126145084, "step": 1091 }, { "epoch": 5.870967741935484, "grad_norm": 28.302461624145508, "learning_rate": 5e-07, "loss": 0.8507, "num_input_tokens_seen": 126262636, "step": 1092 }, { "epoch": 5.870967741935484, "loss": 0.9251198172569275, "loss_ce": 7.096686749719083e-05, "loss_iou": 0.376953125, "loss_num": 0.034423828125, "loss_xval": 0.92578125, "num_input_tokens_seen": 126262636, "step": 1092 }, { "epoch": 5.876344086021505, "grad_norm": 33.94977569580078, "learning_rate": 5e-07, "loss": 0.7761, "num_input_tokens_seen": 126377632, "step": 1093 }, { "epoch": 5.876344086021505, "loss": 0.6845033764839172, "loss_ce": 5.5115888244472444e-05, "loss_iou": 0.27734375, "loss_num": 0.025634765625, "loss_xval": 0.68359375, "num_input_tokens_seen": 126377632, "step": 1093 }, { "epoch": 5.881720430107527, "grad_norm": 21.466793060302734, "learning_rate": 5e-07, "loss": 0.8911, "num_input_tokens_seen": 126492192, "step": 1094 }, { "epoch": 5.881720430107527, "loss": 0.9366035461425781, "loss_ce": 8.013640035642311e-05, "loss_iou": 0.408203125, "loss_num": 0.0242919921875, "loss_xval": 0.9375, "num_input_tokens_seen": 126492192, "step": 1094 }, { "epoch": 5.887096774193548, "grad_norm": 36.41246795654297, "learning_rate": 5e-07, "loss": 0.7303, "num_input_tokens_seen": 126609080, "step": 1095 }, { "epoch": 5.887096774193548, "loss": 0.7329756021499634, "loss_ce": 6.545825453940779e-05, "loss_iou": 0.32421875, "loss_num": 0.0172119140625, "loss_xval": 0.734375, "num_input_tokens_seen": 126609080, "step": 1095 }, { "epoch": 5.89247311827957, "grad_norm": 31.081642150878906, "learning_rate": 5e-07, "loss": 0.8669, "num_input_tokens_seen": 126727132, "step": 1096 }, { "epoch": 5.89247311827957, "loss": 0.7832926511764526, "loss_ce": 8.949971379479393e-05, "loss_iou": 0.33203125, "loss_num": 0.0233154296875, "loss_xval": 0.78125, "num_input_tokens_seen": 126727132, "step": 1096 }, { "epoch": 5.897849462365591, "grad_norm": 23.914306640625, "learning_rate": 5e-07, "loss": 1.004, "num_input_tokens_seen": 126839412, "step": 1097 }, { "epoch": 5.897849462365591, "loss": 0.9461119174957275, "loss_ce": 6.698520883219317e-05, "loss_iou": 0.42578125, "loss_num": 0.01904296875, "loss_xval": 0.9453125, "num_input_tokens_seen": 126839412, "step": 1097 }, { "epoch": 5.903225806451613, "grad_norm": 30.068506240844727, "learning_rate": 5e-07, "loss": 0.8432, "num_input_tokens_seen": 126959704, "step": 1098 }, { "epoch": 5.903225806451613, "loss": 0.41519561409950256, "loss_ce": 3.445789479883388e-05, "loss_iou": 0.1689453125, "loss_num": 0.0155029296875, "loss_xval": 0.416015625, "num_input_tokens_seen": 126959704, "step": 1098 }, { "epoch": 5.908602150537634, "grad_norm": 30.682294845581055, "learning_rate": 5e-07, "loss": 0.8352, "num_input_tokens_seen": 127076568, "step": 1099 }, { "epoch": 5.908602150537634, "loss": 0.7945137023925781, "loss_ce": 8.012547914404422e-05, "loss_iou": 0.33203125, "loss_num": 0.026123046875, "loss_xval": 0.79296875, "num_input_tokens_seen": 127076568, "step": 1099 }, { "epoch": 5.913978494623656, "grad_norm": 25.9898681640625, "learning_rate": 5e-07, "loss": 0.8127, "num_input_tokens_seen": 127193600, "step": 1100 }, { "epoch": 5.913978494623656, "loss": 0.9534818530082703, "loss_ce": 0.00011274621647316962, "loss_iou": 0.392578125, "loss_num": 0.033447265625, "loss_xval": 0.953125, "num_input_tokens_seen": 127193600, "step": 1100 }, { "epoch": 5.919354838709677, "grad_norm": 22.654760360717773, "learning_rate": 5e-07, "loss": 0.8762, "num_input_tokens_seen": 127311756, "step": 1101 }, { "epoch": 5.919354838709677, "loss": 0.7623111009597778, "loss_ce": 0.00010406791989225894, "loss_iou": 0.30859375, "loss_num": 0.029052734375, "loss_xval": 0.76171875, "num_input_tokens_seen": 127311756, "step": 1101 }, { "epoch": 5.924731182795699, "grad_norm": 32.06362533569336, "learning_rate": 5e-07, "loss": 0.9508, "num_input_tokens_seen": 127430784, "step": 1102 }, { "epoch": 5.924731182795699, "loss": 1.0310574769973755, "loss_ce": 0.0020047530997544527, "loss_iou": 0.447265625, "loss_num": 0.02685546875, "loss_xval": 1.03125, "num_input_tokens_seen": 127430784, "step": 1102 }, { "epoch": 5.93010752688172, "grad_norm": 22.218748092651367, "learning_rate": 5e-07, "loss": 0.7115, "num_input_tokens_seen": 127545824, "step": 1103 }, { "epoch": 5.93010752688172, "loss": 0.6250759959220886, "loss_ce": 7.598576485179365e-05, "loss_iou": 0.27734375, "loss_num": 0.01434326171875, "loss_xval": 0.625, "num_input_tokens_seen": 127545824, "step": 1103 }, { "epoch": 5.935483870967742, "grad_norm": 27.93819236755371, "learning_rate": 5e-07, "loss": 0.9482, "num_input_tokens_seen": 127664916, "step": 1104 }, { "epoch": 5.935483870967742, "loss": 1.0337445735931396, "loss_ce": 5.318627518136054e-05, "loss_iou": 0.443359375, "loss_num": 0.0294189453125, "loss_xval": 1.03125, "num_input_tokens_seen": 127664916, "step": 1104 }, { "epoch": 5.940860215053764, "grad_norm": 22.40281867980957, "learning_rate": 5e-07, "loss": 0.8092, "num_input_tokens_seen": 127780272, "step": 1105 }, { "epoch": 5.940860215053764, "loss": 0.7652357816696167, "loss_ce": 9.906831110129133e-05, "loss_iou": 0.314453125, "loss_num": 0.02685546875, "loss_xval": 0.765625, "num_input_tokens_seen": 127780272, "step": 1105 }, { "epoch": 5.946236559139785, "grad_norm": 25.339675903320312, "learning_rate": 5e-07, "loss": 0.7911, "num_input_tokens_seen": 127899516, "step": 1106 }, { "epoch": 5.946236559139785, "loss": 0.8341277241706848, "loss_ce": 0.00014332507271319628, "loss_iou": 0.37890625, "loss_num": 0.01531982421875, "loss_xval": 0.8359375, "num_input_tokens_seen": 127899516, "step": 1106 }, { "epoch": 5.951612903225806, "grad_norm": 19.08136558532715, "learning_rate": 5e-07, "loss": 0.9015, "num_input_tokens_seen": 128011964, "step": 1107 }, { "epoch": 5.951612903225806, "loss": 1.00189208984375, "loss_ce": 6.099601887399331e-05, "loss_iou": 0.40625, "loss_num": 0.0380859375, "loss_xval": 1.0, "num_input_tokens_seen": 128011964, "step": 1107 }, { "epoch": 5.956989247311828, "grad_norm": 22.27056884765625, "learning_rate": 5e-07, "loss": 0.9197, "num_input_tokens_seen": 128125316, "step": 1108 }, { "epoch": 5.956989247311828, "loss": 1.4663881063461304, "loss_ce": 7.947521226014942e-05, "loss_iou": 0.65234375, "loss_num": 0.031982421875, "loss_xval": 1.46875, "num_input_tokens_seen": 128125316, "step": 1108 }, { "epoch": 5.96236559139785, "grad_norm": 20.811325073242188, "learning_rate": 5e-07, "loss": 0.9968, "num_input_tokens_seen": 128239212, "step": 1109 }, { "epoch": 5.96236559139785, "loss": 0.9634491801261902, "loss_ce": 7.02859106240794e-05, "loss_iou": 0.408203125, "loss_num": 0.0294189453125, "loss_xval": 0.96484375, "num_input_tokens_seen": 128239212, "step": 1109 }, { "epoch": 5.967741935483871, "grad_norm": 43.38816452026367, "learning_rate": 5e-07, "loss": 0.8846, "num_input_tokens_seen": 128352992, "step": 1110 }, { "epoch": 5.967741935483871, "loss": 0.8660691976547241, "loss_ce": 0.00010244839359074831, "loss_iou": 0.3828125, "loss_num": 0.019775390625, "loss_xval": 0.8671875, "num_input_tokens_seen": 128352992, "step": 1110 }, { "epoch": 5.973118279569892, "grad_norm": 20.9058895111084, "learning_rate": 5e-07, "loss": 0.7962, "num_input_tokens_seen": 128469184, "step": 1111 }, { "epoch": 5.973118279569892, "loss": 1.1656749248504639, "loss_ce": 0.00014758000907022506, "loss_iou": 0.4765625, "loss_num": 0.042724609375, "loss_xval": 1.1640625, "num_input_tokens_seen": 128469184, "step": 1111 }, { "epoch": 5.978494623655914, "grad_norm": 26.214998245239258, "learning_rate": 5e-07, "loss": 1.038, "num_input_tokens_seen": 128584328, "step": 1112 }, { "epoch": 5.978494623655914, "loss": 1.5259824991226196, "loss_ce": 0.00010367634968133643, "loss_iou": 0.671875, "loss_num": 0.037353515625, "loss_xval": 1.5234375, "num_input_tokens_seen": 128584328, "step": 1112 }, { "epoch": 5.983870967741936, "grad_norm": 62.123199462890625, "learning_rate": 5e-07, "loss": 0.9087, "num_input_tokens_seen": 128699480, "step": 1113 }, { "epoch": 5.983870967741936, "loss": 0.8706742525100708, "loss_ce": 6.8813213147223e-05, "loss_iou": 0.36328125, "loss_num": 0.029052734375, "loss_xval": 0.87109375, "num_input_tokens_seen": 128699480, "step": 1113 }, { "epoch": 5.989247311827957, "grad_norm": 25.094236373901367, "learning_rate": 5e-07, "loss": 0.8763, "num_input_tokens_seen": 128812460, "step": 1114 }, { "epoch": 5.989247311827957, "loss": 1.031348466873169, "loss_ce": 9.845093882177025e-05, "loss_iou": 0.451171875, "loss_num": 0.02587890625, "loss_xval": 1.03125, "num_input_tokens_seen": 128812460, "step": 1114 }, { "epoch": 5.994623655913978, "grad_norm": 22.223344802856445, "learning_rate": 5e-07, "loss": 0.8669, "num_input_tokens_seen": 128924156, "step": 1115 }, { "epoch": 5.994623655913978, "loss": 0.7234697341918945, "loss_ce": 8.106414315989241e-05, "loss_iou": 0.294921875, "loss_num": 0.026611328125, "loss_xval": 0.72265625, "num_input_tokens_seen": 128924156, "step": 1115 }, { "epoch": 6.0, "grad_norm": 22.429126739501953, "learning_rate": 5e-07, "loss": 0.8069, "num_input_tokens_seen": 129038432, "step": 1116 }, { "epoch": 6.0, "loss": 1.0693137645721436, "loss_ce": 0.0011985736200585961, "loss_iou": 0.46484375, "loss_num": 0.0277099609375, "loss_xval": 1.0703125, "num_input_tokens_seen": 129038432, "step": 1116 }, { "epoch": 6.005376344086022, "grad_norm": 20.966218948364258, "learning_rate": 5e-07, "loss": 0.8635, "num_input_tokens_seen": 129157952, "step": 1117 }, { "epoch": 6.005376344086022, "loss": 0.8899843692779541, "loss_ce": 0.00033593596890568733, "loss_iou": 0.40234375, "loss_num": 0.0167236328125, "loss_xval": 0.890625, "num_input_tokens_seen": 129157952, "step": 1117 }, { "epoch": 6.010752688172043, "grad_norm": 19.158658981323242, "learning_rate": 5e-07, "loss": 0.7817, "num_input_tokens_seen": 129272556, "step": 1118 }, { "epoch": 6.010752688172043, "loss": 0.8364973068237305, "loss_ce": 7.152376201702282e-05, "loss_iou": 0.349609375, "loss_num": 0.027099609375, "loss_xval": 0.8359375, "num_input_tokens_seen": 129272556, "step": 1118 }, { "epoch": 6.016129032258065, "grad_norm": 18.268779754638672, "learning_rate": 5e-07, "loss": 0.8874, "num_input_tokens_seen": 129389048, "step": 1119 }, { "epoch": 6.016129032258065, "loss": 0.809725284576416, "loss_ce": 0.000154987457790412, "loss_iou": 0.3515625, "loss_num": 0.0208740234375, "loss_xval": 0.80859375, "num_input_tokens_seen": 129389048, "step": 1119 }, { "epoch": 6.021505376344086, "grad_norm": 19.37051010131836, "learning_rate": 5e-07, "loss": 0.9576, "num_input_tokens_seen": 129503080, "step": 1120 }, { "epoch": 6.021505376344086, "loss": 1.2545477151870728, "loss_ce": 0.00015320879174396396, "loss_iou": 0.51171875, "loss_num": 0.04638671875, "loss_xval": 1.2578125, "num_input_tokens_seen": 129503080, "step": 1120 }, { "epoch": 6.026881720430108, "grad_norm": 18.56949806213379, "learning_rate": 5e-07, "loss": 0.7742, "num_input_tokens_seen": 129617568, "step": 1121 }, { "epoch": 6.026881720430108, "loss": 1.0748013257980347, "loss_ce": 9.433002560399473e-05, "loss_iou": 0.45703125, "loss_num": 0.0322265625, "loss_xval": 1.078125, "num_input_tokens_seen": 129617568, "step": 1121 }, { "epoch": 6.032258064516129, "grad_norm": 19.77068328857422, "learning_rate": 5e-07, "loss": 0.7591, "num_input_tokens_seen": 129733172, "step": 1122 }, { "epoch": 6.032258064516129, "loss": 0.6787631511688232, "loss_ce": 5.2224553655833006e-05, "loss_iou": 0.306640625, "loss_num": 0.0128173828125, "loss_xval": 0.6796875, "num_input_tokens_seen": 129733172, "step": 1122 }, { "epoch": 6.037634408602151, "grad_norm": 25.86099624633789, "learning_rate": 5e-07, "loss": 0.7982, "num_input_tokens_seen": 129852256, "step": 1123 }, { "epoch": 6.037634408602151, "loss": 0.625551700592041, "loss_ce": 6.341230619000271e-05, "loss_iou": 0.283203125, "loss_num": 0.0120849609375, "loss_xval": 0.625, "num_input_tokens_seen": 129852256, "step": 1123 }, { "epoch": 6.043010752688172, "grad_norm": 22.95869255065918, "learning_rate": 5e-07, "loss": 0.7626, "num_input_tokens_seen": 129968192, "step": 1124 }, { "epoch": 6.043010752688172, "loss": 0.659966230392456, "loss_ce": 5.407763092080131e-05, "loss_iou": 0.2890625, "loss_num": 0.0164794921875, "loss_xval": 0.66015625, "num_input_tokens_seen": 129968192, "step": 1124 }, { "epoch": 6.048387096774194, "grad_norm": 23.24308967590332, "learning_rate": 5e-07, "loss": 1.0481, "num_input_tokens_seen": 130082556, "step": 1125 }, { "epoch": 6.048387096774194, "loss": 0.9222029447555542, "loss_ce": 8.380951476283371e-05, "loss_iou": 0.3828125, "loss_num": 0.03125, "loss_xval": 0.921875, "num_input_tokens_seen": 130082556, "step": 1125 }, { "epoch": 6.053763440860215, "grad_norm": 24.05714988708496, "learning_rate": 5e-07, "loss": 1.1913, "num_input_tokens_seen": 130199720, "step": 1126 }, { "epoch": 6.053763440860215, "loss": 0.8291791081428528, "loss_ce": 7.750806980766356e-05, "loss_iou": 0.361328125, "loss_num": 0.0213623046875, "loss_xval": 0.828125, "num_input_tokens_seen": 130199720, "step": 1126 }, { "epoch": 6.059139784946237, "grad_norm": 18.272462844848633, "learning_rate": 5e-07, "loss": 0.9341, "num_input_tokens_seen": 130316480, "step": 1127 }, { "epoch": 6.059139784946237, "loss": 1.247628927230835, "loss_ce": 7.027322135400027e-05, "loss_iou": 0.54296875, "loss_num": 0.0322265625, "loss_xval": 1.25, "num_input_tokens_seen": 130316480, "step": 1127 }, { "epoch": 6.064516129032258, "grad_norm": 19.158815383911133, "learning_rate": 5e-07, "loss": 0.7348, "num_input_tokens_seen": 130430096, "step": 1128 }, { "epoch": 6.064516129032258, "loss": 0.8097297549247742, "loss_ce": 0.00015943407197482884, "loss_iou": 0.349609375, "loss_num": 0.0220947265625, "loss_xval": 0.80859375, "num_input_tokens_seen": 130430096, "step": 1128 }, { "epoch": 6.06989247311828, "grad_norm": 20.2532901763916, "learning_rate": 5e-07, "loss": 0.8923, "num_input_tokens_seen": 130545548, "step": 1129 }, { "epoch": 6.06989247311828, "loss": 0.8112398386001587, "loss_ce": 0.00020465167472139, "loss_iou": 0.34765625, "loss_num": 0.0235595703125, "loss_xval": 0.8125, "num_input_tokens_seen": 130545548, "step": 1129 }, { "epoch": 6.075268817204301, "grad_norm": 16.847942352294922, "learning_rate": 5e-07, "loss": 0.8711, "num_input_tokens_seen": 130664512, "step": 1130 }, { "epoch": 6.075268817204301, "loss": 0.6583141088485718, "loss_ce": 0.00011091561464127153, "loss_iou": 0.279296875, "loss_num": 0.020263671875, "loss_xval": 0.65625, "num_input_tokens_seen": 130664512, "step": 1130 }, { "epoch": 6.080645161290323, "grad_norm": 17.741012573242188, "learning_rate": 5e-07, "loss": 0.624, "num_input_tokens_seen": 130780712, "step": 1131 }, { "epoch": 6.080645161290323, "loss": 0.5725723505020142, "loss_ce": 6.256789492908865e-05, "loss_iou": 0.26171875, "loss_num": 0.0096435546875, "loss_xval": 0.57421875, "num_input_tokens_seen": 130780712, "step": 1131 }, { "epoch": 6.086021505376344, "grad_norm": 29.98691749572754, "learning_rate": 5e-07, "loss": 0.765, "num_input_tokens_seen": 130897348, "step": 1132 }, { "epoch": 6.086021505376344, "loss": 0.8745492696762085, "loss_ce": 3.758948150789365e-05, "loss_iou": 0.359375, "loss_num": 0.0306396484375, "loss_xval": 0.875, "num_input_tokens_seen": 130897348, "step": 1132 }, { "epoch": 6.091397849462366, "grad_norm": 21.548429489135742, "learning_rate": 5e-07, "loss": 0.7895, "num_input_tokens_seen": 131012304, "step": 1133 }, { "epoch": 6.091397849462366, "loss": 0.7621341943740845, "loss_ce": 0.00017127746832557023, "loss_iou": 0.328125, "loss_num": 0.02099609375, "loss_xval": 0.76171875, "num_input_tokens_seen": 131012304, "step": 1133 }, { "epoch": 6.096774193548387, "grad_norm": 18.70204734802246, "learning_rate": 5e-07, "loss": 0.7881, "num_input_tokens_seen": 131126840, "step": 1134 }, { "epoch": 6.096774193548387, "loss": 0.7734919786453247, "loss_ce": 5.445496572065167e-05, "loss_iou": 0.330078125, "loss_num": 0.0224609375, "loss_xval": 0.7734375, "num_input_tokens_seen": 131126840, "step": 1134 }, { "epoch": 6.102150537634409, "grad_norm": 18.507568359375, "learning_rate": 5e-07, "loss": 1.0856, "num_input_tokens_seen": 131241028, "step": 1135 }, { "epoch": 6.102150537634409, "loss": 1.035710096359253, "loss_ce": 6.55422336421907e-05, "loss_iou": 0.44140625, "loss_num": 0.030517578125, "loss_xval": 1.0390625, "num_input_tokens_seen": 131241028, "step": 1135 }, { "epoch": 6.10752688172043, "grad_norm": 19.814510345458984, "learning_rate": 5e-07, "loss": 0.7931, "num_input_tokens_seen": 131353836, "step": 1136 }, { "epoch": 6.10752688172043, "loss": 0.8154735565185547, "loss_ce": 4.382722181617282e-05, "loss_iou": 0.33203125, "loss_num": 0.0299072265625, "loss_xval": 0.81640625, "num_input_tokens_seen": 131353836, "step": 1136 }, { "epoch": 6.112903225806452, "grad_norm": 18.639266967773438, "learning_rate": 5e-07, "loss": 0.8905, "num_input_tokens_seen": 131472884, "step": 1137 }, { "epoch": 6.112903225806452, "loss": 1.1338372230529785, "loss_ce": 4.824339703191072e-05, "loss_iou": 0.5078125, "loss_num": 0.022705078125, "loss_xval": 1.1328125, "num_input_tokens_seen": 131472884, "step": 1137 }, { "epoch": 6.118279569892473, "grad_norm": 20.22661018371582, "learning_rate": 5e-07, "loss": 0.8853, "num_input_tokens_seen": 131584304, "step": 1138 }, { "epoch": 6.118279569892473, "loss": 0.8804150223731995, "loss_ce": 0.0006542960763908923, "loss_iou": 0.357421875, "loss_num": 0.03271484375, "loss_xval": 0.87890625, "num_input_tokens_seen": 131584304, "step": 1138 }, { "epoch": 6.123655913978495, "grad_norm": 22.029319763183594, "learning_rate": 5e-07, "loss": 0.7708, "num_input_tokens_seen": 131702040, "step": 1139 }, { "epoch": 6.123655913978495, "loss": 0.5979406833648682, "loss_ce": 4.024962618132122e-05, "loss_iou": 0.236328125, "loss_num": 0.02490234375, "loss_xval": 0.59765625, "num_input_tokens_seen": 131702040, "step": 1139 }, { "epoch": 6.129032258064516, "grad_norm": 21.714641571044922, "learning_rate": 5e-07, "loss": 0.8035, "num_input_tokens_seen": 131819740, "step": 1140 }, { "epoch": 6.129032258064516, "loss": 0.7842360138893127, "loss_ce": 5.631304520647973e-05, "loss_iou": 0.306640625, "loss_num": 0.033935546875, "loss_xval": 0.78515625, "num_input_tokens_seen": 131819740, "step": 1140 }, { "epoch": 6.134408602150538, "grad_norm": 26.83460807800293, "learning_rate": 5e-07, "loss": 0.79, "num_input_tokens_seen": 131936948, "step": 1141 }, { "epoch": 6.134408602150538, "loss": 0.7355155944824219, "loss_ce": 4.197365706204437e-05, "loss_iou": 0.291015625, "loss_num": 0.0303955078125, "loss_xval": 0.734375, "num_input_tokens_seen": 131936948, "step": 1141 }, { "epoch": 6.139784946236559, "grad_norm": 16.103586196899414, "learning_rate": 5e-07, "loss": 0.8483, "num_input_tokens_seen": 132051472, "step": 1142 }, { "epoch": 6.139784946236559, "loss": 0.7574537396430969, "loss_ce": 6.850776844657958e-05, "loss_iou": 0.337890625, "loss_num": 0.0166015625, "loss_xval": 0.7578125, "num_input_tokens_seen": 132051472, "step": 1142 }, { "epoch": 6.145161290322581, "grad_norm": 28.961715698242188, "learning_rate": 5e-07, "loss": 0.9522, "num_input_tokens_seen": 132169040, "step": 1143 }, { "epoch": 6.145161290322581, "loss": 1.0668654441833496, "loss_ce": 9.302243415731937e-05, "loss_iou": 0.42578125, "loss_num": 0.04296875, "loss_xval": 1.0703125, "num_input_tokens_seen": 132169040, "step": 1143 }, { "epoch": 6.150537634408602, "grad_norm": 15.866260528564453, "learning_rate": 5e-07, "loss": 0.8625, "num_input_tokens_seen": 132283560, "step": 1144 }, { "epoch": 6.150537634408602, "loss": 0.9411078691482544, "loss_ce": 6.789345206925645e-05, "loss_iou": 0.400390625, "loss_num": 0.0281982421875, "loss_xval": 0.94140625, "num_input_tokens_seen": 132283560, "step": 1144 }, { "epoch": 6.155913978494624, "grad_norm": 24.952938079833984, "learning_rate": 5e-07, "loss": 0.7786, "num_input_tokens_seen": 132400720, "step": 1145 }, { "epoch": 6.155913978494624, "loss": 0.9255345463752747, "loss_ce": 0.0002415822382317856, "loss_iou": 0.404296875, "loss_num": 0.0234375, "loss_xval": 0.92578125, "num_input_tokens_seen": 132400720, "step": 1145 }, { "epoch": 6.161290322580645, "grad_norm": 19.4321346282959, "learning_rate": 5e-07, "loss": 0.9959, "num_input_tokens_seen": 132513896, "step": 1146 }, { "epoch": 6.161290322580645, "loss": 1.1790249347686768, "loss_ce": 0.003243771381676197, "loss_iou": 0.51171875, "loss_num": 0.02978515625, "loss_xval": 1.171875, "num_input_tokens_seen": 132513896, "step": 1146 }, { "epoch": 6.166666666666667, "grad_norm": 21.553884506225586, "learning_rate": 5e-07, "loss": 0.6959, "num_input_tokens_seen": 132631884, "step": 1147 }, { "epoch": 6.166666666666667, "loss": 0.7394306063652039, "loss_ce": 5.0717542762868106e-05, "loss_iou": 0.326171875, "loss_num": 0.01708984375, "loss_xval": 0.73828125, "num_input_tokens_seen": 132631884, "step": 1147 }, { "epoch": 6.172043010752688, "grad_norm": 16.756200790405273, "learning_rate": 5e-07, "loss": 0.8852, "num_input_tokens_seen": 132747128, "step": 1148 }, { "epoch": 6.172043010752688, "loss": 0.7551723122596741, "loss_ce": 4.53896245744545e-05, "loss_iou": 0.326171875, "loss_num": 0.020751953125, "loss_xval": 0.75390625, "num_input_tokens_seen": 132747128, "step": 1148 }, { "epoch": 6.17741935483871, "grad_norm": 16.709003448486328, "learning_rate": 5e-07, "loss": 0.8761, "num_input_tokens_seen": 132860792, "step": 1149 }, { "epoch": 6.17741935483871, "loss": 0.8067210912704468, "loss_ce": 8.050992619246244e-05, "loss_iou": 0.357421875, "loss_num": 0.0186767578125, "loss_xval": 0.8046875, "num_input_tokens_seen": 132860792, "step": 1149 }, { "epoch": 6.182795698924731, "grad_norm": 34.9536018371582, "learning_rate": 5e-07, "loss": 0.9554, "num_input_tokens_seen": 132975612, "step": 1150 }, { "epoch": 6.182795698924731, "loss": 0.9210583567619324, "loss_ce": 3.78650329366792e-05, "loss_iou": 0.400390625, "loss_num": 0.0238037109375, "loss_xval": 0.921875, "num_input_tokens_seen": 132975612, "step": 1150 }, { "epoch": 6.188172043010753, "grad_norm": 19.652095794677734, "learning_rate": 5e-07, "loss": 0.8663, "num_input_tokens_seen": 133093172, "step": 1151 }, { "epoch": 6.188172043010753, "loss": 0.6971188187599182, "loss_ce": 9.736655920278281e-05, "loss_iou": 0.29296875, "loss_num": 0.02197265625, "loss_xval": 0.6953125, "num_input_tokens_seen": 133093172, "step": 1151 }, { "epoch": 6.193548387096774, "grad_norm": 20.50163459777832, "learning_rate": 5e-07, "loss": 0.7847, "num_input_tokens_seen": 133210596, "step": 1152 }, { "epoch": 6.193548387096774, "loss": 0.7309593558311462, "loss_ce": 0.00012439978308975697, "loss_iou": 0.28515625, "loss_num": 0.031982421875, "loss_xval": 0.73046875, "num_input_tokens_seen": 133210596, "step": 1152 }, { "epoch": 6.198924731182796, "grad_norm": 15.4944486618042, "learning_rate": 5e-07, "loss": 0.8875, "num_input_tokens_seen": 133324856, "step": 1153 }, { "epoch": 6.198924731182796, "loss": 0.9548795819282532, "loss_ce": 4.55468034488149e-05, "loss_iou": 0.40625, "loss_num": 0.0283203125, "loss_xval": 0.953125, "num_input_tokens_seen": 133324856, "step": 1153 }, { "epoch": 6.204301075268817, "grad_norm": 21.65239715576172, "learning_rate": 5e-07, "loss": 0.7981, "num_input_tokens_seen": 133442492, "step": 1154 }, { "epoch": 6.204301075268817, "loss": 0.7218818664550781, "loss_ce": 8.009563316591084e-05, "loss_iou": 0.30859375, "loss_num": 0.0206298828125, "loss_xval": 0.72265625, "num_input_tokens_seen": 133442492, "step": 1154 }, { "epoch": 6.209677419354839, "grad_norm": 23.022504806518555, "learning_rate": 5e-07, "loss": 0.8679, "num_input_tokens_seen": 133556612, "step": 1155 }, { "epoch": 6.209677419354839, "loss": 0.878483772277832, "loss_ce": 6.57847267575562e-05, "loss_iou": 0.35546875, "loss_num": 0.033447265625, "loss_xval": 0.87890625, "num_input_tokens_seen": 133556612, "step": 1155 }, { "epoch": 6.21505376344086, "grad_norm": 19.21971893310547, "learning_rate": 5e-07, "loss": 0.6851, "num_input_tokens_seen": 133676204, "step": 1156 }, { "epoch": 6.21505376344086, "loss": 0.6520317196846008, "loss_ce": 5.420085653895512e-05, "loss_iou": 0.28125, "loss_num": 0.0179443359375, "loss_xval": 0.65234375, "num_input_tokens_seen": 133676204, "step": 1156 }, { "epoch": 6.220430107526882, "grad_norm": 22.803218841552734, "learning_rate": 5e-07, "loss": 0.8623, "num_input_tokens_seen": 133793388, "step": 1157 }, { "epoch": 6.220430107526882, "loss": 0.8924311399459839, "loss_ce": 9.715123451314867e-05, "loss_iou": 0.375, "loss_num": 0.0281982421875, "loss_xval": 0.890625, "num_input_tokens_seen": 133793388, "step": 1157 }, { "epoch": 6.225806451612903, "grad_norm": 22.923925399780273, "learning_rate": 5e-07, "loss": 0.8994, "num_input_tokens_seen": 133910012, "step": 1158 }, { "epoch": 6.225806451612903, "loss": 0.8465501666069031, "loss_ce": 0.00011461415851954371, "loss_iou": 0.359375, "loss_num": 0.0252685546875, "loss_xval": 0.84765625, "num_input_tokens_seen": 133910012, "step": 1158 }, { "epoch": 6.231182795698925, "grad_norm": 21.81422233581543, "learning_rate": 5e-07, "loss": 0.6939, "num_input_tokens_seen": 134023180, "step": 1159 }, { "epoch": 6.231182795698925, "loss": 0.7628825306892395, "loss_ce": 6.517131987493485e-05, "loss_iou": 0.32421875, "loss_num": 0.023193359375, "loss_xval": 0.76171875, "num_input_tokens_seen": 134023180, "step": 1159 }, { "epoch": 6.236559139784946, "grad_norm": 18.561492919921875, "learning_rate": 5e-07, "loss": 0.6717, "num_input_tokens_seen": 134138496, "step": 1160 }, { "epoch": 6.236559139784946, "loss": 0.6333396434783936, "loss_ce": 3.888759238179773e-05, "loss_iou": 0.265625, "loss_num": 0.0208740234375, "loss_xval": 0.6328125, "num_input_tokens_seen": 134138496, "step": 1160 }, { "epoch": 6.241935483870968, "grad_norm": 18.716259002685547, "learning_rate": 5e-07, "loss": 1.0327, "num_input_tokens_seen": 134251860, "step": 1161 }, { "epoch": 6.241935483870968, "loss": 0.881561279296875, "loss_ce": 9.152591519523412e-05, "loss_iou": 0.3671875, "loss_num": 0.02978515625, "loss_xval": 0.8828125, "num_input_tokens_seen": 134251860, "step": 1161 }, { "epoch": 6.247311827956989, "grad_norm": 21.8593807220459, "learning_rate": 5e-07, "loss": 0.8128, "num_input_tokens_seen": 134366708, "step": 1162 }, { "epoch": 6.247311827956989, "loss": 0.6909956932067871, "loss_ce": 7.773673860356212e-05, "loss_iou": 0.3125, "loss_num": 0.01361083984375, "loss_xval": 0.69140625, "num_input_tokens_seen": 134366708, "step": 1162 }, { "epoch": 6.252688172043011, "grad_norm": 18.489540100097656, "learning_rate": 5e-07, "loss": 0.9459, "num_input_tokens_seen": 134484520, "step": 1163 }, { "epoch": 6.252688172043011, "loss": 1.2564276456832886, "loss_ce": 8.000210800673813e-05, "loss_iou": 0.5, "loss_num": 0.050537109375, "loss_xval": 1.2578125, "num_input_tokens_seen": 134484520, "step": 1163 }, { "epoch": 6.258064516129032, "grad_norm": 20.653703689575195, "learning_rate": 5e-07, "loss": 0.8213, "num_input_tokens_seen": 134599604, "step": 1164 }, { "epoch": 6.258064516129032, "loss": 0.6241445541381836, "loss_ce": 0.00012113897537346929, "loss_iou": 0.275390625, "loss_num": 0.01416015625, "loss_xval": 0.625, "num_input_tokens_seen": 134599604, "step": 1164 }, { "epoch": 6.263440860215054, "grad_norm": 21.18465232849121, "learning_rate": 5e-07, "loss": 0.8147, "num_input_tokens_seen": 134719288, "step": 1165 }, { "epoch": 6.263440860215054, "loss": 1.113383412361145, "loss_ce": 0.00010215798829449341, "loss_iou": 0.478515625, "loss_num": 0.0311279296875, "loss_xval": 1.109375, "num_input_tokens_seen": 134719288, "step": 1165 }, { "epoch": 6.268817204301075, "grad_norm": 17.267290115356445, "learning_rate": 5e-07, "loss": 0.869, "num_input_tokens_seen": 134838044, "step": 1166 }, { "epoch": 6.268817204301075, "loss": 0.9322887659072876, "loss_ce": 0.0006481605814769864, "loss_iou": 0.396484375, "loss_num": 0.027587890625, "loss_xval": 0.9296875, "num_input_tokens_seen": 134838044, "step": 1166 }, { "epoch": 6.274193548387097, "grad_norm": 17.58796501159668, "learning_rate": 5e-07, "loss": 1.1272, "num_input_tokens_seen": 134954800, "step": 1167 }, { "epoch": 6.274193548387097, "loss": 1.1968848705291748, "loss_ce": 0.00010754087998066097, "loss_iou": 0.498046875, "loss_num": 0.0400390625, "loss_xval": 1.1953125, "num_input_tokens_seen": 134954800, "step": 1167 }, { "epoch": 6.279569892473118, "grad_norm": 17.712617874145508, "learning_rate": 5e-07, "loss": 0.7178, "num_input_tokens_seen": 135069972, "step": 1168 }, { "epoch": 6.279569892473118, "loss": 0.6411617994308472, "loss_ce": 4.848799653700553e-05, "loss_iou": 0.28515625, "loss_num": 0.0142822265625, "loss_xval": 0.640625, "num_input_tokens_seen": 135069972, "step": 1168 }, { "epoch": 6.28494623655914, "grad_norm": 15.286833763122559, "learning_rate": 5e-07, "loss": 0.6902, "num_input_tokens_seen": 135183752, "step": 1169 }, { "epoch": 6.28494623655914, "loss": 0.6568090915679932, "loss_ce": 7.082302181515843e-05, "loss_iou": 0.296875, "loss_num": 0.0126953125, "loss_xval": 0.65625, "num_input_tokens_seen": 135183752, "step": 1169 }, { "epoch": 6.290322580645161, "grad_norm": 21.644817352294922, "learning_rate": 5e-07, "loss": 0.7248, "num_input_tokens_seen": 135297572, "step": 1170 }, { "epoch": 6.290322580645161, "loss": 0.834237277507782, "loss_ce": 0.00013082563236821443, "loss_iou": 0.31640625, "loss_num": 0.0400390625, "loss_xval": 0.8359375, "num_input_tokens_seen": 135297572, "step": 1170 }, { "epoch": 6.295698924731183, "grad_norm": 22.451955795288086, "learning_rate": 5e-07, "loss": 0.9145, "num_input_tokens_seen": 135413268, "step": 1171 }, { "epoch": 6.295698924731183, "loss": 0.6888914108276367, "loss_ce": 4.85818600282073e-05, "loss_iou": 0.298828125, "loss_num": 0.017822265625, "loss_xval": 0.6875, "num_input_tokens_seen": 135413268, "step": 1171 }, { "epoch": 6.301075268817204, "grad_norm": 17.64859962463379, "learning_rate": 5e-07, "loss": 0.773, "num_input_tokens_seen": 135530340, "step": 1172 }, { "epoch": 6.301075268817204, "loss": 0.7332054376602173, "loss_ce": 5.112933649797924e-05, "loss_iou": 0.30859375, "loss_num": 0.023681640625, "loss_xval": 0.734375, "num_input_tokens_seen": 135530340, "step": 1172 }, { "epoch": 6.306451612903226, "grad_norm": 35.51970672607422, "learning_rate": 5e-07, "loss": 0.8975, "num_input_tokens_seen": 135648440, "step": 1173 }, { "epoch": 6.306451612903226, "loss": 0.8423426747322083, "loss_ce": 5.755064921686426e-05, "loss_iou": 0.36328125, "loss_num": 0.0234375, "loss_xval": 0.84375, "num_input_tokens_seen": 135648440, "step": 1173 }, { "epoch": 6.311827956989247, "grad_norm": 23.588777542114258, "learning_rate": 5e-07, "loss": 0.8781, "num_input_tokens_seen": 135762100, "step": 1174 }, { "epoch": 6.311827956989247, "loss": 0.9315821528434753, "loss_ce": 0.00018561170145403594, "loss_iou": 0.416015625, "loss_num": 0.0198974609375, "loss_xval": 0.9296875, "num_input_tokens_seen": 135762100, "step": 1174 }, { "epoch": 6.317204301075269, "grad_norm": 20.62851333618164, "learning_rate": 5e-07, "loss": 0.9407, "num_input_tokens_seen": 135881472, "step": 1175 }, { "epoch": 6.317204301075269, "loss": 1.0000190734863281, "loss_ce": 1.9003435227205046e-05, "loss_iou": 0.43359375, "loss_num": 0.0263671875, "loss_xval": 1.0, "num_input_tokens_seen": 135881472, "step": 1175 }, { "epoch": 6.32258064516129, "grad_norm": 18.55087661743164, "learning_rate": 5e-07, "loss": 0.9097, "num_input_tokens_seen": 135995824, "step": 1176 }, { "epoch": 6.32258064516129, "loss": 1.2262531518936157, "loss_ce": 5.6880457123043016e-05, "loss_iou": 0.5390625, "loss_num": 0.029541015625, "loss_xval": 1.2265625, "num_input_tokens_seen": 135995824, "step": 1176 }, { "epoch": 6.327956989247312, "grad_norm": 33.92586898803711, "learning_rate": 5e-07, "loss": 0.9175, "num_input_tokens_seen": 136109076, "step": 1177 }, { "epoch": 6.327956989247312, "loss": 1.1402126550674438, "loss_ce": 7.594590715598315e-05, "loss_iou": 0.5078125, "loss_num": 0.0250244140625, "loss_xval": 1.140625, "num_input_tokens_seen": 136109076, "step": 1177 }, { "epoch": 6.333333333333333, "grad_norm": 14.459040641784668, "learning_rate": 5e-07, "loss": 0.7366, "num_input_tokens_seen": 136227088, "step": 1178 }, { "epoch": 6.333333333333333, "loss": 0.7949734330177307, "loss_ce": 5.156693805474788e-05, "loss_iou": 0.353515625, "loss_num": 0.0177001953125, "loss_xval": 0.796875, "num_input_tokens_seen": 136227088, "step": 1178 }, { "epoch": 6.338709677419355, "grad_norm": 16.902997970581055, "learning_rate": 5e-07, "loss": 0.8149, "num_input_tokens_seen": 136344360, "step": 1179 }, { "epoch": 6.338709677419355, "loss": 0.9186177253723145, "loss_ce": 3.864674727083184e-05, "loss_iou": 0.408203125, "loss_num": 0.020263671875, "loss_xval": 0.91796875, "num_input_tokens_seen": 136344360, "step": 1179 }, { "epoch": 6.344086021505376, "grad_norm": 22.520763397216797, "learning_rate": 5e-07, "loss": 0.9114, "num_input_tokens_seen": 136459356, "step": 1180 }, { "epoch": 6.344086021505376, "loss": 1.111543893814087, "loss_ce": 0.0002157644194085151, "loss_iou": 0.4765625, "loss_num": 0.03173828125, "loss_xval": 1.109375, "num_input_tokens_seen": 136459356, "step": 1180 }, { "epoch": 6.349462365591398, "grad_norm": 19.503217697143555, "learning_rate": 5e-07, "loss": 0.883, "num_input_tokens_seen": 136577244, "step": 1181 }, { "epoch": 6.349462365591398, "loss": 1.2432494163513184, "loss_ce": 0.0003295369097031653, "loss_iou": 0.515625, "loss_num": 0.042236328125, "loss_xval": 1.2421875, "num_input_tokens_seen": 136577244, "step": 1181 }, { "epoch": 6.354838709677419, "grad_norm": 17.845285415649414, "learning_rate": 5e-07, "loss": 0.7154, "num_input_tokens_seen": 136692680, "step": 1182 }, { "epoch": 6.354838709677419, "loss": 0.6390892863273621, "loss_ce": 5.116985266795382e-05, "loss_iou": 0.296875, "loss_num": 0.00885009765625, "loss_xval": 0.640625, "num_input_tokens_seen": 136692680, "step": 1182 }, { "epoch": 6.360215053763441, "grad_norm": 13.143031120300293, "learning_rate": 5e-07, "loss": 0.8902, "num_input_tokens_seen": 136807956, "step": 1183 }, { "epoch": 6.360215053763441, "loss": 1.0200541019439697, "loss_ce": 3.459370054770261e-05, "loss_iou": 0.453125, "loss_num": 0.022705078125, "loss_xval": 1.0234375, "num_input_tokens_seen": 136807956, "step": 1183 }, { "epoch": 6.365591397849462, "grad_norm": 19.383567810058594, "learning_rate": 5e-07, "loss": 0.8753, "num_input_tokens_seen": 136924496, "step": 1184 }, { "epoch": 6.365591397849462, "loss": 0.9646462798118591, "loss_ce": 4.667486064136028e-05, "loss_iou": 0.41015625, "loss_num": 0.02880859375, "loss_xval": 0.96484375, "num_input_tokens_seen": 136924496, "step": 1184 }, { "epoch": 6.370967741935484, "grad_norm": 17.27004051208496, "learning_rate": 5e-07, "loss": 1.1132, "num_input_tokens_seen": 137038148, "step": 1185 }, { "epoch": 6.370967741935484, "loss": 1.578609585762024, "loss_ce": 0.0002404147817287594, "loss_iou": 0.66015625, "loss_num": 0.05126953125, "loss_xval": 1.578125, "num_input_tokens_seen": 137038148, "step": 1185 }, { "epoch": 6.376344086021505, "grad_norm": 20.185901641845703, "learning_rate": 5e-07, "loss": 0.8571, "num_input_tokens_seen": 137155276, "step": 1186 }, { "epoch": 6.376344086021505, "loss": 0.9080270528793335, "loss_ce": 6.808966281823814e-05, "loss_iou": 0.388671875, "loss_num": 0.026123046875, "loss_xval": 0.90625, "num_input_tokens_seen": 137155276, "step": 1186 }, { "epoch": 6.381720430107527, "grad_norm": 19.286645889282227, "learning_rate": 5e-07, "loss": 0.9099, "num_input_tokens_seen": 137270036, "step": 1187 }, { "epoch": 6.381720430107527, "loss": 1.0157477855682373, "loss_ce": 0.0001227821339853108, "loss_iou": 0.421875, "loss_num": 0.03466796875, "loss_xval": 1.015625, "num_input_tokens_seen": 137270036, "step": 1187 }, { "epoch": 6.387096774193548, "grad_norm": 22.015993118286133, "learning_rate": 5e-07, "loss": 0.7523, "num_input_tokens_seen": 137386524, "step": 1188 }, { "epoch": 6.387096774193548, "loss": 0.5746386647224426, "loss_ce": 5.371665974962525e-05, "loss_iou": 0.25390625, "loss_num": 0.012939453125, "loss_xval": 0.57421875, "num_input_tokens_seen": 137386524, "step": 1188 }, { "epoch": 6.39247311827957, "grad_norm": 15.685269355773926, "learning_rate": 5e-07, "loss": 0.9473, "num_input_tokens_seen": 137498672, "step": 1189 }, { "epoch": 6.39247311827957, "loss": 0.9216936230659485, "loss_ce": 6.274350744206458e-05, "loss_iou": 0.4140625, "loss_num": 0.0185546875, "loss_xval": 0.921875, "num_input_tokens_seen": 137498672, "step": 1189 }, { "epoch": 6.397849462365591, "grad_norm": 45.958457946777344, "learning_rate": 5e-07, "loss": 0.8693, "num_input_tokens_seen": 137613696, "step": 1190 }, { "epoch": 6.397849462365591, "loss": 1.1888759136199951, "loss_ce": 0.0001552665780764073, "loss_iou": 0.49609375, "loss_num": 0.039306640625, "loss_xval": 1.1875, "num_input_tokens_seen": 137613696, "step": 1190 }, { "epoch": 6.403225806451613, "grad_norm": 22.48138427734375, "learning_rate": 5e-07, "loss": 0.864, "num_input_tokens_seen": 137732804, "step": 1191 }, { "epoch": 6.403225806451613, "loss": 0.7332180738449097, "loss_ce": 6.378453690558672e-05, "loss_iou": 0.3125, "loss_num": 0.0218505859375, "loss_xval": 0.734375, "num_input_tokens_seen": 137732804, "step": 1191 }, { "epoch": 6.408602150537634, "grad_norm": 16.685028076171875, "learning_rate": 5e-07, "loss": 0.8896, "num_input_tokens_seen": 137850608, "step": 1192 }, { "epoch": 6.408602150537634, "loss": 0.7405349612236023, "loss_ce": 5.647625221172348e-05, "loss_iou": 0.298828125, "loss_num": 0.0281982421875, "loss_xval": 0.7421875, "num_input_tokens_seen": 137850608, "step": 1192 }, { "epoch": 6.413978494623656, "grad_norm": 28.86937141418457, "learning_rate": 5e-07, "loss": 0.8632, "num_input_tokens_seen": 137964896, "step": 1193 }, { "epoch": 6.413978494623656, "loss": 1.1039061546325684, "loss_ce": 0.00014635143452323973, "loss_iou": 0.486328125, "loss_num": 0.0260009765625, "loss_xval": 1.1015625, "num_input_tokens_seen": 137964896, "step": 1193 }, { "epoch": 6.419354838709677, "grad_norm": 18.787918090820312, "learning_rate": 5e-07, "loss": 0.8388, "num_input_tokens_seen": 138084216, "step": 1194 }, { "epoch": 6.419354838709677, "loss": 0.8848605155944824, "loss_ce": 9.482257155468687e-05, "loss_iou": 0.390625, "loss_num": 0.0205078125, "loss_xval": 0.8828125, "num_input_tokens_seen": 138084216, "step": 1194 }, { "epoch": 6.424731182795699, "grad_norm": 15.218137741088867, "learning_rate": 5e-07, "loss": 0.8348, "num_input_tokens_seen": 138202036, "step": 1195 }, { "epoch": 6.424731182795699, "loss": 0.7448588609695435, "loss_ce": 0.00010793114779517055, "loss_iou": 0.328125, "loss_num": 0.0179443359375, "loss_xval": 0.74609375, "num_input_tokens_seen": 138202036, "step": 1195 }, { "epoch": 6.43010752688172, "grad_norm": 16.076461791992188, "learning_rate": 5e-07, "loss": 0.7331, "num_input_tokens_seen": 138319560, "step": 1196 }, { "epoch": 6.43010752688172, "loss": 0.730268120765686, "loss_ce": 4.3487274524522945e-05, "loss_iou": 0.328125, "loss_num": 0.0145263671875, "loss_xval": 0.73046875, "num_input_tokens_seen": 138319560, "step": 1196 }, { "epoch": 6.435483870967742, "grad_norm": 20.7853946685791, "learning_rate": 5e-07, "loss": 0.8731, "num_input_tokens_seen": 138431600, "step": 1197 }, { "epoch": 6.435483870967742, "loss": 0.8107892870903015, "loss_ce": 0.0002424373960820958, "loss_iou": 0.33984375, "loss_num": 0.0263671875, "loss_xval": 0.8125, "num_input_tokens_seen": 138431600, "step": 1197 }, { "epoch": 6.440860215053763, "grad_norm": 85.3147201538086, "learning_rate": 5e-07, "loss": 1.0453, "num_input_tokens_seen": 138546844, "step": 1198 }, { "epoch": 6.440860215053763, "loss": 0.8653905987739563, "loss_ce": 0.00015621594502590597, "loss_iou": 0.34375, "loss_num": 0.03564453125, "loss_xval": 0.8671875, "num_input_tokens_seen": 138546844, "step": 1198 }, { "epoch": 6.446236559139785, "grad_norm": 19.36113739013672, "learning_rate": 5e-07, "loss": 0.9423, "num_input_tokens_seen": 138661724, "step": 1199 }, { "epoch": 6.446236559139785, "loss": 0.9667947292327881, "loss_ce": 0.00011998230911558494, "loss_iou": 0.412109375, "loss_num": 0.0284423828125, "loss_xval": 0.96484375, "num_input_tokens_seen": 138661724, "step": 1199 }, { "epoch": 6.451612903225806, "grad_norm": 17.487573623657227, "learning_rate": 5e-07, "loss": 0.8347, "num_input_tokens_seen": 138779884, "step": 1200 }, { "epoch": 6.451612903225806, "loss": 0.9241403937339783, "loss_ce": 6.812431092839688e-05, "loss_iou": 0.400390625, "loss_num": 0.02490234375, "loss_xval": 0.92578125, "num_input_tokens_seen": 138779884, "step": 1200 }, { "epoch": 6.456989247311828, "grad_norm": 75.65742492675781, "learning_rate": 5e-07, "loss": 0.8325, "num_input_tokens_seen": 138890756, "step": 1201 }, { "epoch": 6.456989247311828, "loss": 0.6528701186180115, "loss_ce": 3.808098699664697e-05, "loss_iou": 0.244140625, "loss_num": 0.033203125, "loss_xval": 0.65234375, "num_input_tokens_seen": 138890756, "step": 1201 }, { "epoch": 6.462365591397849, "grad_norm": 16.98236846923828, "learning_rate": 5e-07, "loss": 0.7394, "num_input_tokens_seen": 139008076, "step": 1202 }, { "epoch": 6.462365591397849, "loss": 0.5791349411010742, "loss_ce": 3.335721339681186e-05, "loss_iou": 0.2373046875, "loss_num": 0.0211181640625, "loss_xval": 0.578125, "num_input_tokens_seen": 139008076, "step": 1202 }, { "epoch": 6.467741935483871, "grad_norm": 115.35881805419922, "learning_rate": 5e-07, "loss": 0.721, "num_input_tokens_seen": 139127212, "step": 1203 }, { "epoch": 6.467741935483871, "loss": 0.8018655776977539, "loss_ce": 0.00010774059046525508, "loss_iou": 0.314453125, "loss_num": 0.034912109375, "loss_xval": 0.80078125, "num_input_tokens_seen": 139127212, "step": 1203 }, { "epoch": 6.473118279569892, "grad_norm": 16.53626251220703, "learning_rate": 5e-07, "loss": 0.8055, "num_input_tokens_seen": 139247436, "step": 1204 }, { "epoch": 6.473118279569892, "loss": 0.9788943529129028, "loss_ce": 0.0001345800410490483, "loss_iou": 0.419921875, "loss_num": 0.028076171875, "loss_xval": 0.98046875, "num_input_tokens_seen": 139247436, "step": 1204 }, { "epoch": 6.478494623655914, "grad_norm": 21.95462989807129, "learning_rate": 5e-07, "loss": 0.8679, "num_input_tokens_seen": 139363984, "step": 1205 }, { "epoch": 6.478494623655914, "loss": 0.6634005308151245, "loss_ce": 7.041042408673093e-05, "loss_iou": 0.259765625, "loss_num": 0.0291748046875, "loss_xval": 0.6640625, "num_input_tokens_seen": 139363984, "step": 1205 }, { "epoch": 6.483870967741936, "grad_norm": 16.2119140625, "learning_rate": 5e-07, "loss": 1.1503, "num_input_tokens_seen": 139478060, "step": 1206 }, { "epoch": 6.483870967741936, "loss": 0.8987491130828857, "loss_ce": 6.755204231012613e-05, "loss_iou": 0.390625, "loss_num": 0.0234375, "loss_xval": 0.8984375, "num_input_tokens_seen": 139478060, "step": 1206 }, { "epoch": 6.489247311827957, "grad_norm": 21.500322341918945, "learning_rate": 5e-07, "loss": 0.8561, "num_input_tokens_seen": 139591584, "step": 1207 }, { "epoch": 6.489247311827957, "loss": 0.8890085220336914, "loss_ce": 9.248999413102865e-05, "loss_iou": 0.390625, "loss_num": 0.021728515625, "loss_xval": 0.890625, "num_input_tokens_seen": 139591584, "step": 1207 }, { "epoch": 6.494623655913978, "grad_norm": 18.504772186279297, "learning_rate": 5e-07, "loss": 0.8517, "num_input_tokens_seen": 139708700, "step": 1208 }, { "epoch": 6.494623655913978, "loss": 0.8707700967788696, "loss_ce": 0.0001646218152018264, "loss_iou": 0.365234375, "loss_num": 0.0284423828125, "loss_xval": 0.87109375, "num_input_tokens_seen": 139708700, "step": 1208 }, { "epoch": 6.5, "grad_norm": 17.82021713256836, "learning_rate": 5e-07, "loss": 0.8358, "num_input_tokens_seen": 139822272, "step": 1209 }, { "epoch": 6.5, "loss": 0.6742991805076599, "loss_ce": 0.00010482735524419695, "loss_iou": 0.2734375, "loss_num": 0.025146484375, "loss_xval": 0.67578125, "num_input_tokens_seen": 139822272, "step": 1209 }, { "epoch": 6.505376344086022, "grad_norm": 19.00002098083496, "learning_rate": 5e-07, "loss": 0.7662, "num_input_tokens_seen": 139939288, "step": 1210 }, { "epoch": 6.505376344086022, "loss": 0.7547313570976257, "loss_ce": 3.165191083098762e-05, "loss_iou": 0.314453125, "loss_num": 0.0252685546875, "loss_xval": 0.75390625, "num_input_tokens_seen": 139939288, "step": 1210 }, { "epoch": 6.510752688172043, "grad_norm": 20.965606689453125, "learning_rate": 5e-07, "loss": 0.6062, "num_input_tokens_seen": 140052156, "step": 1211 }, { "epoch": 6.510752688172043, "loss": 0.46829670667648315, "loss_ce": 3.498003934510052e-05, "loss_iou": 0.1904296875, "loss_num": 0.0174560546875, "loss_xval": 0.46875, "num_input_tokens_seen": 140052156, "step": 1211 }, { "epoch": 6.516129032258064, "grad_norm": 46.9678840637207, "learning_rate": 5e-07, "loss": 0.6704, "num_input_tokens_seen": 140169152, "step": 1212 }, { "epoch": 6.516129032258064, "loss": 0.6792891025543213, "loss_ce": 8.990599599201232e-05, "loss_iou": 0.302734375, "loss_num": 0.0145263671875, "loss_xval": 0.6796875, "num_input_tokens_seen": 140169152, "step": 1212 }, { "epoch": 6.521505376344086, "grad_norm": 12.731374740600586, "learning_rate": 5e-07, "loss": 0.6135, "num_input_tokens_seen": 140287648, "step": 1213 }, { "epoch": 6.521505376344086, "loss": 0.8919326066970825, "loss_ce": 8.689754758961499e-05, "loss_iou": 0.396484375, "loss_num": 0.0198974609375, "loss_xval": 0.890625, "num_input_tokens_seen": 140287648, "step": 1213 }, { "epoch": 6.526881720430108, "grad_norm": 19.918067932128906, "learning_rate": 5e-07, "loss": 0.7857, "num_input_tokens_seen": 140404280, "step": 1214 }, { "epoch": 6.526881720430108, "loss": 0.5493522882461548, "loss_ce": 3.591487984522246e-05, "loss_iou": 0.2470703125, "loss_num": 0.0111083984375, "loss_xval": 0.55078125, "num_input_tokens_seen": 140404280, "step": 1214 }, { "epoch": 6.532258064516129, "grad_norm": 13.032615661621094, "learning_rate": 5e-07, "loss": 1.0294, "num_input_tokens_seen": 140518192, "step": 1215 }, { "epoch": 6.532258064516129, "loss": 0.662389874458313, "loss_ce": 3.6317360354587436e-05, "loss_iou": 0.283203125, "loss_num": 0.01953125, "loss_xval": 0.6640625, "num_input_tokens_seen": 140518192, "step": 1215 }, { "epoch": 6.53763440860215, "grad_norm": 16.43966293334961, "learning_rate": 5e-07, "loss": 0.8167, "num_input_tokens_seen": 140635420, "step": 1216 }, { "epoch": 6.53763440860215, "loss": 0.868817925453186, "loss_ce": 0.0001655904488870874, "loss_iou": 0.345703125, "loss_num": 0.035888671875, "loss_xval": 0.8671875, "num_input_tokens_seen": 140635420, "step": 1216 }, { "epoch": 6.543010752688172, "grad_norm": 15.699464797973633, "learning_rate": 5e-07, "loss": 0.9217, "num_input_tokens_seen": 140750776, "step": 1217 }, { "epoch": 6.543010752688172, "loss": 1.1450843811035156, "loss_ce": 6.484766345238313e-05, "loss_iou": 0.478515625, "loss_num": 0.03759765625, "loss_xval": 1.1484375, "num_input_tokens_seen": 140750776, "step": 1217 }, { "epoch": 6.548387096774194, "grad_norm": 22.016630172729492, "learning_rate": 5e-07, "loss": 0.8138, "num_input_tokens_seen": 140867944, "step": 1218 }, { "epoch": 6.548387096774194, "loss": 0.8960729837417603, "loss_ce": 7.693495717830956e-05, "loss_iou": 0.375, "loss_num": 0.0289306640625, "loss_xval": 0.89453125, "num_input_tokens_seen": 140867944, "step": 1218 }, { "epoch": 6.553763440860215, "grad_norm": 18.632841110229492, "learning_rate": 5e-07, "loss": 0.8632, "num_input_tokens_seen": 140983520, "step": 1219 }, { "epoch": 6.553763440860215, "loss": 1.0807709693908691, "loss_ce": 0.00020449548901524395, "loss_iou": 0.46484375, "loss_num": 0.0303955078125, "loss_xval": 1.078125, "num_input_tokens_seen": 140983520, "step": 1219 }, { "epoch": 6.559139784946236, "grad_norm": 18.409427642822266, "learning_rate": 5e-07, "loss": 0.7988, "num_input_tokens_seen": 141097088, "step": 1220 }, { "epoch": 6.559139784946236, "loss": 0.5493653416633606, "loss_ce": 4.893012373941019e-05, "loss_iou": 0.2119140625, "loss_num": 0.025146484375, "loss_xval": 0.55078125, "num_input_tokens_seen": 141097088, "step": 1220 }, { "epoch": 6.564516129032258, "grad_norm": 20.11862564086914, "learning_rate": 5e-07, "loss": 0.6865, "num_input_tokens_seen": 141214328, "step": 1221 }, { "epoch": 6.564516129032258, "loss": 0.7151352167129517, "loss_ce": 4.7362678742501885e-05, "loss_iou": 0.302734375, "loss_num": 0.021728515625, "loss_xval": 0.71484375, "num_input_tokens_seen": 141214328, "step": 1221 }, { "epoch": 6.56989247311828, "grad_norm": 22.289125442504883, "learning_rate": 5e-07, "loss": 0.8861, "num_input_tokens_seen": 141329236, "step": 1222 }, { "epoch": 6.56989247311828, "loss": 0.9048203229904175, "loss_ce": 3.514583659125492e-05, "loss_iou": 0.390625, "loss_num": 0.0250244140625, "loss_xval": 0.90625, "num_input_tokens_seen": 141329236, "step": 1222 }, { "epoch": 6.575268817204301, "grad_norm": 45.45615005493164, "learning_rate": 5e-07, "loss": 0.9894, "num_input_tokens_seen": 141445260, "step": 1223 }, { "epoch": 6.575268817204301, "loss": 0.596982479095459, "loss_ce": 5.863262776983902e-05, "loss_iou": 0.2412109375, "loss_num": 0.0230712890625, "loss_xval": 0.59765625, "num_input_tokens_seen": 141445260, "step": 1223 }, { "epoch": 6.580645161290323, "grad_norm": 22.64495277404785, "learning_rate": 5e-07, "loss": 0.8148, "num_input_tokens_seen": 141559496, "step": 1224 }, { "epoch": 6.580645161290323, "loss": 0.8187085390090942, "loss_ce": 0.00010503718658583239, "loss_iou": 0.33203125, "loss_num": 0.03076171875, "loss_xval": 0.8203125, "num_input_tokens_seen": 141559496, "step": 1224 }, { "epoch": 6.586021505376344, "grad_norm": 24.630870819091797, "learning_rate": 5e-07, "loss": 0.8153, "num_input_tokens_seen": 141674460, "step": 1225 }, { "epoch": 6.586021505376344, "loss": 0.7192831039428711, "loss_ce": 4.482115764403716e-05, "loss_iou": 0.298828125, "loss_num": 0.0244140625, "loss_xval": 0.71875, "num_input_tokens_seen": 141674460, "step": 1225 }, { "epoch": 6.591397849462366, "grad_norm": 28.38779067993164, "learning_rate": 5e-07, "loss": 0.9606, "num_input_tokens_seen": 141788888, "step": 1226 }, { "epoch": 6.591397849462366, "loss": 0.8748213648796082, "loss_ce": 6.553107959916815e-05, "loss_iou": 0.37109375, "loss_num": 0.0260009765625, "loss_xval": 0.875, "num_input_tokens_seen": 141788888, "step": 1226 }, { "epoch": 6.596774193548387, "grad_norm": 14.43863582611084, "learning_rate": 5e-07, "loss": 0.9198, "num_input_tokens_seen": 141904096, "step": 1227 }, { "epoch": 6.596774193548387, "loss": 1.1717298030853271, "loss_ce": 9.885487816063687e-05, "loss_iou": 0.47265625, "loss_num": 0.045166015625, "loss_xval": 1.171875, "num_input_tokens_seen": 141904096, "step": 1227 }, { "epoch": 6.602150537634409, "grad_norm": 13.844927787780762, "learning_rate": 5e-07, "loss": 0.8404, "num_input_tokens_seen": 142021328, "step": 1228 }, { "epoch": 6.602150537634409, "loss": 0.7910804152488708, "loss_ce": 6.484778714366257e-05, "loss_iou": 0.365234375, "loss_num": 0.01171875, "loss_xval": 0.7890625, "num_input_tokens_seen": 142021328, "step": 1228 }, { "epoch": 6.60752688172043, "grad_norm": 19.302087783813477, "learning_rate": 5e-07, "loss": 0.6521, "num_input_tokens_seen": 142140576, "step": 1229 }, { "epoch": 6.60752688172043, "loss": 0.8030691742897034, "loss_ce": 9.064508776646107e-05, "loss_iou": 0.34375, "loss_num": 0.023193359375, "loss_xval": 0.8046875, "num_input_tokens_seen": 142140576, "step": 1229 }, { "epoch": 6.612903225806452, "grad_norm": 24.7027530670166, "learning_rate": 5e-07, "loss": 0.8385, "num_input_tokens_seen": 142255356, "step": 1230 }, { "epoch": 6.612903225806452, "loss": 0.934627115726471, "loss_ce": 5.680312460754067e-05, "loss_iou": 0.40234375, "loss_num": 0.0260009765625, "loss_xval": 0.93359375, "num_input_tokens_seen": 142255356, "step": 1230 }, { "epoch": 6.618279569892473, "grad_norm": 18.92416000366211, "learning_rate": 5e-07, "loss": 0.9488, "num_input_tokens_seen": 142370188, "step": 1231 }, { "epoch": 6.618279569892473, "loss": 0.6726377606391907, "loss_ce": 3.0309533030958846e-05, "loss_iou": 0.28125, "loss_num": 0.021728515625, "loss_xval": 0.671875, "num_input_tokens_seen": 142370188, "step": 1231 }, { "epoch": 6.623655913978495, "grad_norm": 16.266183853149414, "learning_rate": 5e-07, "loss": 0.9235, "num_input_tokens_seen": 142484604, "step": 1232 }, { "epoch": 6.623655913978495, "loss": 0.7950116395950317, "loss_ce": 8.978647383628413e-05, "loss_iou": 0.34375, "loss_num": 0.0218505859375, "loss_xval": 0.796875, "num_input_tokens_seen": 142484604, "step": 1232 }, { "epoch": 6.629032258064516, "grad_norm": 24.024477005004883, "learning_rate": 5e-07, "loss": 0.8595, "num_input_tokens_seen": 142598968, "step": 1233 }, { "epoch": 6.629032258064516, "loss": 0.6578903198242188, "loss_ce": 5.341792348190211e-05, "loss_iou": 0.29296875, "loss_num": 0.0147705078125, "loss_xval": 0.65625, "num_input_tokens_seen": 142598968, "step": 1233 }, { "epoch": 6.634408602150538, "grad_norm": 15.990174293518066, "learning_rate": 5e-07, "loss": 0.8474, "num_input_tokens_seen": 142712532, "step": 1234 }, { "epoch": 6.634408602150538, "loss": 1.0663114786148071, "loss_ce": 2.7331538149155676e-05, "loss_iou": 0.482421875, "loss_num": 0.0205078125, "loss_xval": 1.0625, "num_input_tokens_seen": 142712532, "step": 1234 }, { "epoch": 6.639784946236559, "grad_norm": 20.675310134887695, "learning_rate": 5e-07, "loss": 0.8617, "num_input_tokens_seen": 142826676, "step": 1235 }, { "epoch": 6.639784946236559, "loss": 0.7632356286048889, "loss_ce": 5.20583416800946e-05, "loss_iou": 0.330078125, "loss_num": 0.020263671875, "loss_xval": 0.76171875, "num_input_tokens_seen": 142826676, "step": 1235 }, { "epoch": 6.645161290322581, "grad_norm": 32.1843376159668, "learning_rate": 5e-07, "loss": 0.888, "num_input_tokens_seen": 142940748, "step": 1236 }, { "epoch": 6.645161290322581, "loss": 0.7177035808563232, "loss_ce": 5.219711965764873e-05, "loss_iou": 0.3046875, "loss_num": 0.0213623046875, "loss_xval": 0.71875, "num_input_tokens_seen": 142940748, "step": 1236 }, { "epoch": 6.650537634408602, "grad_norm": 14.131155967712402, "learning_rate": 5e-07, "loss": 0.7638, "num_input_tokens_seen": 143054820, "step": 1237 }, { "epoch": 6.650537634408602, "loss": 0.7830550670623779, "loss_ce": 9.610031702322885e-05, "loss_iou": 0.3515625, "loss_num": 0.016357421875, "loss_xval": 0.78125, "num_input_tokens_seen": 143054820, "step": 1237 }, { "epoch": 6.655913978494624, "grad_norm": 17.30506706237793, "learning_rate": 5e-07, "loss": 0.9799, "num_input_tokens_seen": 143169420, "step": 1238 }, { "epoch": 6.655913978494624, "loss": 1.3406589031219482, "loss_ce": 8.264745702035725e-05, "loss_iou": 0.59375, "loss_num": 0.0302734375, "loss_xval": 1.34375, "num_input_tokens_seen": 143169420, "step": 1238 }, { "epoch": 6.661290322580645, "grad_norm": 20.547008514404297, "learning_rate": 5e-07, "loss": 0.7521, "num_input_tokens_seen": 143286400, "step": 1239 }, { "epoch": 6.661290322580645, "loss": 0.831084132194519, "loss_ce": 2.949142799479887e-05, "loss_iou": 0.330078125, "loss_num": 0.034423828125, "loss_xval": 0.83203125, "num_input_tokens_seen": 143286400, "step": 1239 }, { "epoch": 6.666666666666667, "grad_norm": 18.786945343017578, "learning_rate": 5e-07, "loss": 0.9071, "num_input_tokens_seen": 143398880, "step": 1240 }, { "epoch": 6.666666666666667, "loss": 0.911234974861145, "loss_ce": 0.00010217665112577379, "loss_iou": 0.37109375, "loss_num": 0.0341796875, "loss_xval": 0.91015625, "num_input_tokens_seen": 143398880, "step": 1240 }, { "epoch": 6.672043010752688, "grad_norm": 26.572505950927734, "learning_rate": 5e-07, "loss": 0.7827, "num_input_tokens_seen": 143514388, "step": 1241 }, { "epoch": 6.672043010752688, "loss": 0.7925182580947876, "loss_ce": 3.7738733226433396e-05, "loss_iou": 0.33203125, "loss_num": 0.025146484375, "loss_xval": 0.79296875, "num_input_tokens_seen": 143514388, "step": 1241 }, { "epoch": 6.67741935483871, "grad_norm": 28.893428802490234, "learning_rate": 5e-07, "loss": 0.956, "num_input_tokens_seen": 143631516, "step": 1242 }, { "epoch": 6.67741935483871, "loss": 0.731282114982605, "loss_ce": 0.00020301833865232766, "loss_iou": 0.318359375, "loss_num": 0.0189208984375, "loss_xval": 0.73046875, "num_input_tokens_seen": 143631516, "step": 1242 }, { "epoch": 6.682795698924731, "grad_norm": 18.4816837310791, "learning_rate": 5e-07, "loss": 0.7008, "num_input_tokens_seen": 143748476, "step": 1243 }, { "epoch": 6.682795698924731, "loss": 0.6777684092521667, "loss_ce": 3.401239519007504e-05, "loss_iou": 0.30859375, "loss_num": 0.0125732421875, "loss_xval": 0.6796875, "num_input_tokens_seen": 143748476, "step": 1243 }, { "epoch": 6.688172043010753, "grad_norm": 17.157140731811523, "learning_rate": 5e-07, "loss": 0.7837, "num_input_tokens_seen": 143865036, "step": 1244 }, { "epoch": 6.688172043010753, "loss": 0.5940223336219788, "loss_ce": 2.8179387300042436e-05, "loss_iou": 0.267578125, "loss_num": 0.0113525390625, "loss_xval": 0.59375, "num_input_tokens_seen": 143865036, "step": 1244 }, { "epoch": 6.693548387096774, "grad_norm": 20.34494972229004, "learning_rate": 5e-07, "loss": 0.7356, "num_input_tokens_seen": 143976148, "step": 1245 }, { "epoch": 6.693548387096774, "loss": 0.6634083986282349, "loss_ce": 7.826087676221505e-05, "loss_iou": 0.294921875, "loss_num": 0.0147705078125, "loss_xval": 0.6640625, "num_input_tokens_seen": 143976148, "step": 1245 }, { "epoch": 6.698924731182796, "grad_norm": 18.229421615600586, "learning_rate": 5e-07, "loss": 0.8944, "num_input_tokens_seen": 144089880, "step": 1246 }, { "epoch": 6.698924731182796, "loss": 1.0952732563018799, "loss_ce": 5.840610901941545e-05, "loss_iou": 0.466796875, "loss_num": 0.03271484375, "loss_xval": 1.09375, "num_input_tokens_seen": 144089880, "step": 1246 }, { "epoch": 6.704301075268817, "grad_norm": 15.060336112976074, "learning_rate": 5e-07, "loss": 0.6622, "num_input_tokens_seen": 144204672, "step": 1247 }, { "epoch": 6.704301075268817, "loss": 0.593780517578125, "loss_ce": 3.053190448554233e-05, "loss_iou": 0.2451171875, "loss_num": 0.020751953125, "loss_xval": 0.59375, "num_input_tokens_seen": 144204672, "step": 1247 }, { "epoch": 6.709677419354839, "grad_norm": 19.278202056884766, "learning_rate": 5e-07, "loss": 0.8376, "num_input_tokens_seen": 144320920, "step": 1248 }, { "epoch": 6.709677419354839, "loss": 0.9897998571395874, "loss_ce": 5.3726424084743485e-05, "loss_iou": 0.404296875, "loss_num": 0.036376953125, "loss_xval": 0.98828125, "num_input_tokens_seen": 144320920, "step": 1248 }, { "epoch": 6.71505376344086, "grad_norm": 16.997478485107422, "learning_rate": 5e-07, "loss": 0.8211, "num_input_tokens_seen": 144435204, "step": 1249 }, { "epoch": 6.71505376344086, "loss": 0.7578562498092651, "loss_ce": 4.37655980931595e-05, "loss_iou": 0.322265625, "loss_num": 0.022705078125, "loss_xval": 0.7578125, "num_input_tokens_seen": 144435204, "step": 1249 }, { "epoch": 6.720430107526882, "grad_norm": 19.427127838134766, "learning_rate": 5e-07, "loss": 0.7981, "num_input_tokens_seen": 144552052, "step": 1250 }, { "epoch": 6.720430107526882, "eval_icons_CIoU": 0.13912716135382652, "eval_icons_GIoU": 0.1083863377571106, "eval_icons_IoU": 0.3028104901313782, "eval_icons_MAE_all": 0.03239532466977835, "eval_icons_MAE_h": 0.03559565730392933, "eval_icons_MAE_w": 0.056373756378889084, "eval_icons_MAE_x_boxes": 0.05362176336348057, "eval_icons_MAE_y_boxes": 0.03481398057192564, "eval_icons_NUM_probability": 0.9984191358089447, "eval_icons_inside_bbox": 0.578125, "eval_icons_loss": 1.9321560859680176, "eval_icons_loss_ce": 0.0002434530106256716, "eval_icons_loss_iou": 0.876708984375, "eval_icons_loss_num": 0.033176422119140625, "eval_icons_loss_xval": 1.91943359375, "eval_icons_runtime": 43.0983, "eval_icons_samples_per_second": 1.16, "eval_icons_steps_per_second": 0.046, "num_input_tokens_seen": 144552052, "step": 1250 }, { "epoch": 6.720430107526882, "eval_screenspot_CIoU": 0.2938700467348099, "eval_screenspot_GIoU": 0.2812644938627879, "eval_screenspot_IoU": 0.3920774857203166, "eval_screenspot_MAE_all": 0.06443765511115392, "eval_screenspot_MAE_h": 0.04894807065526644, "eval_screenspot_MAE_w": 0.08702891568342845, "eval_screenspot_MAE_x_boxes": 0.09161424140135448, "eval_screenspot_MAE_y_boxes": 0.03914226032793522, "eval_screenspot_NUM_probability": 0.9997712771097819, "eval_screenspot_inside_bbox": 0.725000003973643, "eval_screenspot_loss": 1.8168163299560547, "eval_screenspot_loss_ce": 9.724599415979658e-05, "eval_screenspot_loss_iou": 0.760498046875, "eval_screenspot_loss_num": 0.07369232177734375, "eval_screenspot_loss_xval": 1.8878580729166667, "eval_screenspot_runtime": 75.8879, "eval_screenspot_samples_per_second": 1.173, "eval_screenspot_steps_per_second": 0.04, "num_input_tokens_seen": 144552052, "step": 1250 }, { "epoch": 6.720430107526882, "loss": 1.7788546085357666, "loss_ce": 4.603464185493067e-05, "loss_iou": 0.734375, "loss_num": 0.06201171875, "loss_xval": 1.78125, "num_input_tokens_seen": 144552052, "step": 1250 }, { "epoch": 6.725806451612903, "grad_norm": 16.599008560180664, "learning_rate": 5e-07, "loss": 0.8914, "num_input_tokens_seen": 144668864, "step": 1251 }, { "epoch": 6.725806451612903, "loss": 0.8389232158660889, "loss_ce": 5.6065222452161834e-05, "loss_iou": 0.33203125, "loss_num": 0.034423828125, "loss_xval": 0.83984375, "num_input_tokens_seen": 144668864, "step": 1251 }, { "epoch": 6.731182795698925, "grad_norm": 24.143220901489258, "learning_rate": 5e-07, "loss": 0.7907, "num_input_tokens_seen": 144786200, "step": 1252 }, { "epoch": 6.731182795698925, "loss": 0.8015508651733398, "loss_ce": 3.720650420291349e-05, "loss_iou": 0.33203125, "loss_num": 0.02783203125, "loss_xval": 0.80078125, "num_input_tokens_seen": 144786200, "step": 1252 }, { "epoch": 6.736559139784946, "grad_norm": 20.645902633666992, "learning_rate": 5e-07, "loss": 0.9493, "num_input_tokens_seen": 144901784, "step": 1253 }, { "epoch": 6.736559139784946, "loss": 0.9624648094177246, "loss_ce": 6.243425013963133e-05, "loss_iou": 0.416015625, "loss_num": 0.026123046875, "loss_xval": 0.9609375, "num_input_tokens_seen": 144901784, "step": 1253 }, { "epoch": 6.741935483870968, "grad_norm": 18.82040786743164, "learning_rate": 5e-07, "loss": 0.9295, "num_input_tokens_seen": 145018200, "step": 1254 }, { "epoch": 6.741935483870968, "loss": 0.8804010152816772, "loss_ce": 2.9896944397478364e-05, "loss_iou": 0.376953125, "loss_num": 0.02490234375, "loss_xval": 0.87890625, "num_input_tokens_seen": 145018200, "step": 1254 }, { "epoch": 6.747311827956989, "grad_norm": 31.954397201538086, "learning_rate": 5e-07, "loss": 0.9315, "num_input_tokens_seen": 145133500, "step": 1255 }, { "epoch": 6.747311827956989, "loss": 0.5524311065673828, "loss_ce": 6.290784222073853e-05, "loss_iou": 0.232421875, "loss_num": 0.017578125, "loss_xval": 0.55078125, "num_input_tokens_seen": 145133500, "step": 1255 }, { "epoch": 6.752688172043011, "grad_norm": 16.681394577026367, "learning_rate": 5e-07, "loss": 0.7228, "num_input_tokens_seen": 145251388, "step": 1256 }, { "epoch": 6.752688172043011, "loss": 0.49666130542755127, "loss_ce": 7.926887337816879e-05, "loss_iou": 0.19921875, "loss_num": 0.019775390625, "loss_xval": 0.49609375, "num_input_tokens_seen": 145251388, "step": 1256 }, { "epoch": 6.758064516129032, "grad_norm": 16.0112361907959, "learning_rate": 5e-07, "loss": 0.7539, "num_input_tokens_seen": 145365892, "step": 1257 }, { "epoch": 6.758064516129032, "loss": 0.7710235118865967, "loss_ce": 0.0007598799420520663, "loss_iou": 0.318359375, "loss_num": 0.02685546875, "loss_xval": 0.76953125, "num_input_tokens_seen": 145365892, "step": 1257 }, { "epoch": 6.763440860215054, "grad_norm": 19.154268264770508, "learning_rate": 5e-07, "loss": 0.7628, "num_input_tokens_seen": 145483588, "step": 1258 }, { "epoch": 6.763440860215054, "loss": 1.0503487586975098, "loss_ce": 5.574922397499904e-05, "loss_iou": 0.45703125, "loss_num": 0.0269775390625, "loss_xval": 1.046875, "num_input_tokens_seen": 145483588, "step": 1258 }, { "epoch": 6.768817204301075, "grad_norm": 30.331716537475586, "learning_rate": 5e-07, "loss": 0.6506, "num_input_tokens_seen": 145598472, "step": 1259 }, { "epoch": 6.768817204301075, "loss": 0.7443315982818604, "loss_ce": 6.896213017171249e-05, "loss_iou": 0.33203125, "loss_num": 0.0162353515625, "loss_xval": 0.74609375, "num_input_tokens_seen": 145598472, "step": 1259 }, { "epoch": 6.774193548387097, "grad_norm": 16.987314224243164, "learning_rate": 5e-07, "loss": 0.8823, "num_input_tokens_seen": 145714608, "step": 1260 }, { "epoch": 6.774193548387097, "loss": 0.7139370441436768, "loss_ce": 6.987261440372095e-05, "loss_iou": 0.30859375, "loss_num": 0.019287109375, "loss_xval": 0.71484375, "num_input_tokens_seen": 145714608, "step": 1260 }, { "epoch": 6.779569892473118, "grad_norm": 18.169700622558594, "learning_rate": 5e-07, "loss": 0.8276, "num_input_tokens_seen": 145828824, "step": 1261 }, { "epoch": 6.779569892473118, "loss": 0.8140063285827637, "loss_ce": 4.1514504118822515e-05, "loss_iou": 0.349609375, "loss_num": 0.022705078125, "loss_xval": 0.8125, "num_input_tokens_seen": 145828824, "step": 1261 }, { "epoch": 6.78494623655914, "grad_norm": 19.19839859008789, "learning_rate": 5e-07, "loss": 0.7939, "num_input_tokens_seen": 145945904, "step": 1262 }, { "epoch": 6.78494623655914, "loss": 0.7131786346435547, "loss_ce": 4.386592627270147e-05, "loss_iou": 0.31640625, "loss_num": 0.0159912109375, "loss_xval": 0.71484375, "num_input_tokens_seen": 145945904, "step": 1262 }, { "epoch": 6.790322580645161, "grad_norm": 22.61176872253418, "learning_rate": 5e-07, "loss": 0.8488, "num_input_tokens_seen": 146060024, "step": 1263 }, { "epoch": 6.790322580645161, "loss": 0.49125149846076965, "loss_ce": 4.058048943988979e-05, "loss_iou": 0.19921875, "loss_num": 0.018310546875, "loss_xval": 0.4921875, "num_input_tokens_seen": 146060024, "step": 1263 }, { "epoch": 6.795698924731183, "grad_norm": 16.758094787597656, "learning_rate": 5e-07, "loss": 0.8703, "num_input_tokens_seen": 146178592, "step": 1264 }, { "epoch": 6.795698924731183, "loss": 0.7061020135879517, "loss_ce": 4.7360750613734126e-05, "loss_iou": 0.314453125, "loss_num": 0.01531982421875, "loss_xval": 0.70703125, "num_input_tokens_seen": 146178592, "step": 1264 }, { "epoch": 6.801075268817204, "grad_norm": 16.99317169189453, "learning_rate": 5e-07, "loss": 0.8478, "num_input_tokens_seen": 146295508, "step": 1265 }, { "epoch": 6.801075268817204, "loss": 0.7396507859230042, "loss_ce": 2.67676205112366e-05, "loss_iou": 0.314453125, "loss_num": 0.0218505859375, "loss_xval": 0.73828125, "num_input_tokens_seen": 146295508, "step": 1265 }, { "epoch": 6.806451612903226, "grad_norm": 16.517333984375, "learning_rate": 5e-07, "loss": 0.9862, "num_input_tokens_seen": 146413088, "step": 1266 }, { "epoch": 6.806451612903226, "loss": 0.8557958602905273, "loss_ce": 8.296367013826966e-05, "loss_iou": 0.376953125, "loss_num": 0.0205078125, "loss_xval": 0.85546875, "num_input_tokens_seen": 146413088, "step": 1266 }, { "epoch": 6.811827956989247, "grad_norm": 23.02396011352539, "learning_rate": 5e-07, "loss": 0.8036, "num_input_tokens_seen": 146526968, "step": 1267 }, { "epoch": 6.811827956989247, "loss": 1.0403491258621216, "loss_ce": 6.596527964575216e-05, "loss_iou": 0.45703125, "loss_num": 0.0252685546875, "loss_xval": 1.0390625, "num_input_tokens_seen": 146526968, "step": 1267 }, { "epoch": 6.817204301075269, "grad_norm": 15.323639869689941, "learning_rate": 5e-07, "loss": 0.8092, "num_input_tokens_seen": 146642008, "step": 1268 }, { "epoch": 6.817204301075269, "loss": 0.6348764896392822, "loss_ce": 4.986463682143949e-05, "loss_iou": 0.2890625, "loss_num": 0.01141357421875, "loss_xval": 0.63671875, "num_input_tokens_seen": 146642008, "step": 1268 }, { "epoch": 6.82258064516129, "grad_norm": 17.83002281188965, "learning_rate": 5e-07, "loss": 0.7555, "num_input_tokens_seen": 146759244, "step": 1269 }, { "epoch": 6.82258064516129, "loss": 0.3611617684364319, "loss_ce": 1.6766021872172132e-05, "loss_iou": 0.134765625, "loss_num": 0.018310546875, "loss_xval": 0.361328125, "num_input_tokens_seen": 146759244, "step": 1269 }, { "epoch": 6.827956989247312, "grad_norm": 53.579139709472656, "learning_rate": 5e-07, "loss": 0.8236, "num_input_tokens_seen": 146876664, "step": 1270 }, { "epoch": 6.827956989247312, "loss": 0.6364206075668335, "loss_ce": 6.802893040003255e-05, "loss_iou": 0.26171875, "loss_num": 0.022705078125, "loss_xval": 0.63671875, "num_input_tokens_seen": 146876664, "step": 1270 }, { "epoch": 6.833333333333333, "grad_norm": 32.78411865234375, "learning_rate": 5e-07, "loss": 0.7335, "num_input_tokens_seen": 146992680, "step": 1271 }, { "epoch": 6.833333333333333, "loss": 0.5156586766242981, "loss_ce": 3.368714533280581e-05, "loss_iou": 0.18359375, "loss_num": 0.029541015625, "loss_xval": 0.515625, "num_input_tokens_seen": 146992680, "step": 1271 }, { "epoch": 6.838709677419355, "grad_norm": 20.258403778076172, "learning_rate": 5e-07, "loss": 0.8786, "num_input_tokens_seen": 147110116, "step": 1272 }, { "epoch": 6.838709677419355, "loss": 0.8789958953857422, "loss_ce": 8.962298306869343e-05, "loss_iou": 0.376953125, "loss_num": 0.0252685546875, "loss_xval": 0.87890625, "num_input_tokens_seen": 147110116, "step": 1272 }, { "epoch": 6.844086021505376, "grad_norm": 21.561264038085938, "learning_rate": 5e-07, "loss": 0.9579, "num_input_tokens_seen": 147227004, "step": 1273 }, { "epoch": 6.844086021505376, "loss": 1.137761116027832, "loss_ce": 6.585509981960058e-05, "loss_iou": 0.482421875, "loss_num": 0.0341796875, "loss_xval": 1.140625, "num_input_tokens_seen": 147227004, "step": 1273 }, { "epoch": 6.849462365591398, "grad_norm": 31.59235954284668, "learning_rate": 5e-07, "loss": 0.9304, "num_input_tokens_seen": 147344544, "step": 1274 }, { "epoch": 6.849462365591398, "loss": 1.346259355545044, "loss_ce": 6.798609683755785e-05, "loss_iou": 0.58984375, "loss_num": 0.033447265625, "loss_xval": 1.34375, "num_input_tokens_seen": 147344544, "step": 1274 }, { "epoch": 6.854838709677419, "grad_norm": 17.692277908325195, "learning_rate": 5e-07, "loss": 0.7002, "num_input_tokens_seen": 147463052, "step": 1275 }, { "epoch": 6.854838709677419, "loss": 0.8587403893470764, "loss_ce": 9.780752588994801e-05, "loss_iou": 0.359375, "loss_num": 0.0283203125, "loss_xval": 0.859375, "num_input_tokens_seen": 147463052, "step": 1275 }, { "epoch": 6.860215053763441, "grad_norm": 16.478574752807617, "learning_rate": 5e-07, "loss": 0.8962, "num_input_tokens_seen": 147576136, "step": 1276 }, { "epoch": 6.860215053763441, "loss": 0.9393285512924194, "loss_ce": 0.000607890251558274, "loss_iou": 0.412109375, "loss_num": 0.0230712890625, "loss_xval": 0.9375, "num_input_tokens_seen": 147576136, "step": 1276 }, { "epoch": 6.865591397849462, "grad_norm": 20.500988006591797, "learning_rate": 5e-07, "loss": 0.8755, "num_input_tokens_seen": 147692152, "step": 1277 }, { "epoch": 6.865591397849462, "loss": 0.9890360832214355, "loss_ce": 2.2387517674360424e-05, "loss_iou": 0.4140625, "loss_num": 0.0322265625, "loss_xval": 0.98828125, "num_input_tokens_seen": 147692152, "step": 1277 }, { "epoch": 6.870967741935484, "grad_norm": 19.25983428955078, "learning_rate": 5e-07, "loss": 0.7843, "num_input_tokens_seen": 147810952, "step": 1278 }, { "epoch": 6.870967741935484, "loss": 0.8353756666183472, "loss_ce": 4.8497131501790136e-05, "loss_iou": 0.37890625, "loss_num": 0.0155029296875, "loss_xval": 0.8359375, "num_input_tokens_seen": 147810952, "step": 1278 }, { "epoch": 6.876344086021505, "grad_norm": 17.349321365356445, "learning_rate": 5e-07, "loss": 0.7985, "num_input_tokens_seen": 147926616, "step": 1279 }, { "epoch": 6.876344086021505, "loss": 0.9024270176887512, "loss_ce": 8.327690738951787e-05, "loss_iou": 0.380859375, "loss_num": 0.0279541015625, "loss_xval": 0.90234375, "num_input_tokens_seen": 147926616, "step": 1279 }, { "epoch": 6.881720430107527, "grad_norm": 18.537742614746094, "learning_rate": 5e-07, "loss": 0.8149, "num_input_tokens_seen": 148044588, "step": 1280 }, { "epoch": 6.881720430107527, "loss": 1.0054006576538086, "loss_ce": 2.9584896765300073e-05, "loss_iou": 0.4609375, "loss_num": 0.0172119140625, "loss_xval": 1.0078125, "num_input_tokens_seen": 148044588, "step": 1280 }, { "epoch": 6.887096774193548, "grad_norm": 17.51219940185547, "learning_rate": 5e-07, "loss": 0.8834, "num_input_tokens_seen": 148160192, "step": 1281 }, { "epoch": 6.887096774193548, "loss": 0.6304336190223694, "loss_ce": 6.253276660572737e-05, "loss_iou": 0.2734375, "loss_num": 0.0167236328125, "loss_xval": 0.62890625, "num_input_tokens_seen": 148160192, "step": 1281 }, { "epoch": 6.89247311827957, "grad_norm": 19.50029182434082, "learning_rate": 5e-07, "loss": 0.8426, "num_input_tokens_seen": 148277344, "step": 1282 }, { "epoch": 6.89247311827957, "loss": 0.8635748028755188, "loss_ce": 4.942626765114255e-05, "loss_iou": 0.369140625, "loss_num": 0.025146484375, "loss_xval": 0.86328125, "num_input_tokens_seen": 148277344, "step": 1282 }, { "epoch": 6.897849462365591, "grad_norm": 21.635955810546875, "learning_rate": 5e-07, "loss": 0.7367, "num_input_tokens_seen": 148390768, "step": 1283 }, { "epoch": 6.897849462365591, "loss": 0.7839784622192383, "loss_ce": 4.290240030968562e-05, "loss_iou": 0.32421875, "loss_num": 0.026611328125, "loss_xval": 0.78515625, "num_input_tokens_seen": 148390768, "step": 1283 }, { "epoch": 6.903225806451613, "grad_norm": 16.553586959838867, "learning_rate": 5e-07, "loss": 0.7721, "num_input_tokens_seen": 148504184, "step": 1284 }, { "epoch": 6.903225806451613, "loss": 0.5542535781860352, "loss_ce": 5.4351763537852094e-05, "loss_iou": 0.244140625, "loss_num": 0.013427734375, "loss_xval": 0.5546875, "num_input_tokens_seen": 148504184, "step": 1284 }, { "epoch": 6.908602150537634, "grad_norm": 17.721553802490234, "learning_rate": 5e-07, "loss": 0.8144, "num_input_tokens_seen": 148620864, "step": 1285 }, { "epoch": 6.908602150537634, "loss": 1.1213957071304321, "loss_ce": 5.782075095339678e-05, "loss_iou": 0.451171875, "loss_num": 0.04345703125, "loss_xval": 1.125, "num_input_tokens_seen": 148620864, "step": 1285 }, { "epoch": 6.913978494623656, "grad_norm": 18.34896469116211, "learning_rate": 5e-07, "loss": 0.9257, "num_input_tokens_seen": 148736776, "step": 1286 }, { "epoch": 6.913978494623656, "loss": 1.02250337600708, "loss_ce": 4.247971810400486e-05, "loss_iou": 0.453125, "loss_num": 0.0228271484375, "loss_xval": 1.0234375, "num_input_tokens_seen": 148736776, "step": 1286 }, { "epoch": 6.919354838709677, "grad_norm": 29.63393211364746, "learning_rate": 5e-07, "loss": 0.6305, "num_input_tokens_seen": 148853524, "step": 1287 }, { "epoch": 6.919354838709677, "loss": 0.7677544355392456, "loss_ce": 0.00017633727111387998, "loss_iou": 0.298828125, "loss_num": 0.033935546875, "loss_xval": 0.765625, "num_input_tokens_seen": 148853524, "step": 1287 }, { "epoch": 6.924731182795699, "grad_norm": 21.65308952331543, "learning_rate": 5e-07, "loss": 0.8174, "num_input_tokens_seen": 148967700, "step": 1288 }, { "epoch": 6.924731182795699, "loss": 0.7788434624671936, "loss_ce": 3.487454159767367e-05, "loss_iou": 0.32421875, "loss_num": 0.0263671875, "loss_xval": 0.77734375, "num_input_tokens_seen": 148967700, "step": 1288 }, { "epoch": 6.93010752688172, "grad_norm": 21.278162002563477, "learning_rate": 5e-07, "loss": 0.8517, "num_input_tokens_seen": 149084736, "step": 1289 }, { "epoch": 6.93010752688172, "loss": 1.2600603103637695, "loss_ce": 5.052672349847853e-05, "loss_iou": 0.546875, "loss_num": 0.031982421875, "loss_xval": 1.2578125, "num_input_tokens_seen": 149084736, "step": 1289 }, { "epoch": 6.935483870967742, "grad_norm": 15.839442253112793, "learning_rate": 5e-07, "loss": 0.8362, "num_input_tokens_seen": 149200484, "step": 1290 }, { "epoch": 6.935483870967742, "loss": 0.6289429664611816, "loss_ce": 0.00015881434956099838, "loss_iou": 0.279296875, "loss_num": 0.013916015625, "loss_xval": 0.62890625, "num_input_tokens_seen": 149200484, "step": 1290 }, { "epoch": 6.940860215053764, "grad_norm": 27.18647003173828, "learning_rate": 5e-07, "loss": 0.6858, "num_input_tokens_seen": 149315316, "step": 1291 }, { "epoch": 6.940860215053764, "loss": 0.6697328686714172, "loss_ce": 5.514607619261369e-05, "loss_iou": 0.283203125, "loss_num": 0.0203857421875, "loss_xval": 0.66796875, "num_input_tokens_seen": 149315316, "step": 1291 }, { "epoch": 6.946236559139785, "grad_norm": 23.591747283935547, "learning_rate": 5e-07, "loss": 0.8472, "num_input_tokens_seen": 149431992, "step": 1292 }, { "epoch": 6.946236559139785, "loss": 0.9536761045455933, "loss_ce": 6.281315290834755e-05, "loss_iou": 0.396484375, "loss_num": 0.031982421875, "loss_xval": 0.953125, "num_input_tokens_seen": 149431992, "step": 1292 }, { "epoch": 6.951612903225806, "grad_norm": 18.959739685058594, "learning_rate": 5e-07, "loss": 0.8796, "num_input_tokens_seen": 149549892, "step": 1293 }, { "epoch": 6.951612903225806, "loss": 0.964000940322876, "loss_ce": 0.00013372628018260002, "loss_iou": 0.40234375, "loss_num": 0.03173828125, "loss_xval": 0.96484375, "num_input_tokens_seen": 149549892, "step": 1293 }, { "epoch": 6.956989247311828, "grad_norm": 23.095666885375977, "learning_rate": 5e-07, "loss": 0.9791, "num_input_tokens_seen": 149662784, "step": 1294 }, { "epoch": 6.956989247311828, "loss": 1.4024162292480469, "loss_ce": 7.246399763971567e-05, "loss_iou": 0.625, "loss_num": 0.03076171875, "loss_xval": 1.40625, "num_input_tokens_seen": 149662784, "step": 1294 }, { "epoch": 6.96236559139785, "grad_norm": 34.33489227294922, "learning_rate": 5e-07, "loss": 0.8863, "num_input_tokens_seen": 149778296, "step": 1295 }, { "epoch": 6.96236559139785, "loss": 0.9656178951263428, "loss_ce": 4.1726092604221776e-05, "loss_iou": 0.38671875, "loss_num": 0.03857421875, "loss_xval": 0.96484375, "num_input_tokens_seen": 149778296, "step": 1295 }, { "epoch": 6.967741935483871, "grad_norm": 19.058088302612305, "learning_rate": 5e-07, "loss": 0.9795, "num_input_tokens_seen": 149893136, "step": 1296 }, { "epoch": 6.967741935483871, "loss": 1.1262773275375366, "loss_ce": 5.666295328410342e-05, "loss_iou": 0.478515625, "loss_num": 0.0341796875, "loss_xval": 1.125, "num_input_tokens_seen": 149893136, "step": 1296 }, { "epoch": 6.973118279569892, "grad_norm": 19.232820510864258, "learning_rate": 5e-07, "loss": 0.8088, "num_input_tokens_seen": 150007536, "step": 1297 }, { "epoch": 6.973118279569892, "loss": 0.8592278361320496, "loss_ce": 9.698601206764579e-05, "loss_iou": 0.37109375, "loss_num": 0.02294921875, "loss_xval": 0.859375, "num_input_tokens_seen": 150007536, "step": 1297 }, { "epoch": 6.978494623655914, "grad_norm": 21.352907180786133, "learning_rate": 5e-07, "loss": 0.8341, "num_input_tokens_seen": 150122428, "step": 1298 }, { "epoch": 6.978494623655914, "loss": 0.8533464670181274, "loss_ce": 7.496173202525824e-05, "loss_iou": 0.33203125, "loss_num": 0.0380859375, "loss_xval": 0.8515625, "num_input_tokens_seen": 150122428, "step": 1298 }, { "epoch": 6.983870967741936, "grad_norm": 18.102067947387695, "learning_rate": 5e-07, "loss": 0.7725, "num_input_tokens_seen": 150237352, "step": 1299 }, { "epoch": 6.983870967741936, "loss": 0.693989634513855, "loss_ce": 1.9862374756485224e-05, "loss_iou": 0.30078125, "loss_num": 0.018798828125, "loss_xval": 0.6953125, "num_input_tokens_seen": 150237352, "step": 1299 }, { "epoch": 6.989247311827957, "grad_norm": 19.576923370361328, "learning_rate": 5e-07, "loss": 0.8687, "num_input_tokens_seen": 150354992, "step": 1300 }, { "epoch": 6.989247311827957, "loss": 1.059199571609497, "loss_ce": 0.0001175060315290466, "loss_iou": 0.453125, "loss_num": 0.0302734375, "loss_xval": 1.0625, "num_input_tokens_seen": 150354992, "step": 1300 }, { "epoch": 6.994623655913978, "grad_norm": 23.107921600341797, "learning_rate": 5e-07, "loss": 0.791, "num_input_tokens_seen": 150471208, "step": 1301 }, { "epoch": 6.994623655913978, "loss": 0.8928667902946472, "loss_ce": 4.449578409548849e-05, "loss_iou": 0.33984375, "loss_num": 0.042724609375, "loss_xval": 0.89453125, "num_input_tokens_seen": 150471208, "step": 1301 }, { "epoch": 7.0, "grad_norm": 21.133644104003906, "learning_rate": 5e-07, "loss": 0.6683, "num_input_tokens_seen": 150587404, "step": 1302 }, { "epoch": 7.0, "loss": 0.7681223154067993, "loss_ce": 5.590241926256567e-05, "loss_iou": 0.33984375, "loss_num": 0.0174560546875, "loss_xval": 0.76953125, "num_input_tokens_seen": 150587404, "step": 1302 }, { "epoch": 7.005376344086022, "grad_norm": 19.512893676757812, "learning_rate": 5e-07, "loss": 0.8093, "num_input_tokens_seen": 150703704, "step": 1303 }, { "epoch": 7.005376344086022, "loss": 0.8518587350845337, "loss_ce": 5.210433664615266e-05, "loss_iou": 0.341796875, "loss_num": 0.033935546875, "loss_xval": 0.8515625, "num_input_tokens_seen": 150703704, "step": 1303 }, { "epoch": 7.010752688172043, "grad_norm": 15.965598106384277, "learning_rate": 5e-07, "loss": 0.7228, "num_input_tokens_seen": 150818572, "step": 1304 }, { "epoch": 7.010752688172043, "loss": 0.8345375061035156, "loss_ce": 6.477924762293696e-05, "loss_iou": 0.357421875, "loss_num": 0.0238037109375, "loss_xval": 0.8359375, "num_input_tokens_seen": 150818572, "step": 1304 }, { "epoch": 7.016129032258065, "grad_norm": 16.436662673950195, "learning_rate": 5e-07, "loss": 0.8719, "num_input_tokens_seen": 150932624, "step": 1305 }, { "epoch": 7.016129032258065, "loss": 0.5376709699630737, "loss_ce": 7.337550050579011e-05, "loss_iou": 0.2353515625, "loss_num": 0.01336669921875, "loss_xval": 0.5390625, "num_input_tokens_seen": 150932624, "step": 1305 }, { "epoch": 7.021505376344086, "grad_norm": 26.423446655273438, "learning_rate": 5e-07, "loss": 0.7086, "num_input_tokens_seen": 151049724, "step": 1306 }, { "epoch": 7.021505376344086, "loss": 0.8813867568969727, "loss_ce": 3.913999171345495e-05, "loss_iou": 0.396484375, "loss_num": 0.017578125, "loss_xval": 0.8828125, "num_input_tokens_seen": 151049724, "step": 1306 }, { "epoch": 7.026881720430108, "grad_norm": 15.411077499389648, "learning_rate": 5e-07, "loss": 0.624, "num_input_tokens_seen": 151167096, "step": 1307 }, { "epoch": 7.026881720430108, "loss": 0.6270742416381836, "loss_ce": 0.00012110413808841258, "loss_iou": 0.2734375, "loss_num": 0.016357421875, "loss_xval": 0.625, "num_input_tokens_seen": 151167096, "step": 1307 }, { "epoch": 7.032258064516129, "grad_norm": 18.085298538208008, "learning_rate": 5e-07, "loss": 0.9035, "num_input_tokens_seen": 151283856, "step": 1308 }, { "epoch": 7.032258064516129, "loss": 0.7670060992240906, "loss_ce": 3.836386895272881e-05, "loss_iou": 0.328125, "loss_num": 0.0220947265625, "loss_xval": 0.765625, "num_input_tokens_seen": 151283856, "step": 1308 }, { "epoch": 7.037634408602151, "grad_norm": 17.891212463378906, "learning_rate": 5e-07, "loss": 0.6958, "num_input_tokens_seen": 151398464, "step": 1309 }, { "epoch": 7.037634408602151, "loss": 0.7034287452697754, "loss_ce": 0.00012063551548635587, "loss_iou": 0.30078125, "loss_num": 0.0201416015625, "loss_xval": 0.703125, "num_input_tokens_seen": 151398464, "step": 1309 }, { "epoch": 7.043010752688172, "grad_norm": 18.01856803894043, "learning_rate": 5e-07, "loss": 0.7253, "num_input_tokens_seen": 151514048, "step": 1310 }, { "epoch": 7.043010752688172, "loss": 0.7675396203994751, "loss_ce": 8.355090540135279e-05, "loss_iou": 0.33203125, "loss_num": 0.020751953125, "loss_xval": 0.765625, "num_input_tokens_seen": 151514048, "step": 1310 }, { "epoch": 7.048387096774194, "grad_norm": 18.120508193969727, "learning_rate": 5e-07, "loss": 0.6131, "num_input_tokens_seen": 151630044, "step": 1311 }, { "epoch": 7.048387096774194, "loss": 0.5722037553787231, "loss_ce": 0.0001822991034714505, "loss_iou": 0.240234375, "loss_num": 0.0185546875, "loss_xval": 0.5703125, "num_input_tokens_seen": 151630044, "step": 1311 }, { "epoch": 7.053763440860215, "grad_norm": 20.211185455322266, "learning_rate": 5e-07, "loss": 0.7345, "num_input_tokens_seen": 151746340, "step": 1312 }, { "epoch": 7.053763440860215, "loss": 0.7214946150779724, "loss_ce": 5.903310011490248e-05, "loss_iou": 0.302734375, "loss_num": 0.023193359375, "loss_xval": 0.72265625, "num_input_tokens_seen": 151746340, "step": 1312 }, { "epoch": 7.059139784946237, "grad_norm": 17.135610580444336, "learning_rate": 5e-07, "loss": 0.7217, "num_input_tokens_seen": 151865884, "step": 1313 }, { "epoch": 7.059139784946237, "loss": 0.6551464796066284, "loss_ce": 0.0001171838230220601, "loss_iou": 0.267578125, "loss_num": 0.024169921875, "loss_xval": 0.65625, "num_input_tokens_seen": 151865884, "step": 1313 }, { "epoch": 7.064516129032258, "grad_norm": 24.469350814819336, "learning_rate": 5e-07, "loss": 0.8749, "num_input_tokens_seen": 151980672, "step": 1314 }, { "epoch": 7.064516129032258, "loss": 0.6356210708618164, "loss_ce": 0.00012307001452427357, "loss_iou": 0.26171875, "loss_num": 0.0224609375, "loss_xval": 0.63671875, "num_input_tokens_seen": 151980672, "step": 1314 }, { "epoch": 7.06989247311828, "grad_norm": 19.6260986328125, "learning_rate": 5e-07, "loss": 0.9699, "num_input_tokens_seen": 152093688, "step": 1315 }, { "epoch": 7.06989247311828, "loss": 0.8568565845489502, "loss_ce": 4.50395091320388e-05, "loss_iou": 0.36328125, "loss_num": 0.02587890625, "loss_xval": 0.85546875, "num_input_tokens_seen": 152093688, "step": 1315 }, { "epoch": 7.075268817204301, "grad_norm": 19.99861717224121, "learning_rate": 5e-07, "loss": 0.7961, "num_input_tokens_seen": 152210248, "step": 1316 }, { "epoch": 7.075268817204301, "loss": 0.6568068265914917, "loss_ce": 6.85498962411657e-05, "loss_iou": 0.294921875, "loss_num": 0.01300048828125, "loss_xval": 0.65625, "num_input_tokens_seen": 152210248, "step": 1316 }, { "epoch": 7.080645161290323, "grad_norm": 16.989927291870117, "learning_rate": 5e-07, "loss": 0.8398, "num_input_tokens_seen": 152321376, "step": 1317 }, { "epoch": 7.080645161290323, "loss": 0.7342132925987244, "loss_ce": 8.240646275226027e-05, "loss_iou": 0.31640625, "loss_num": 0.0203857421875, "loss_xval": 0.734375, "num_input_tokens_seen": 152321376, "step": 1317 }, { "epoch": 7.086021505376344, "grad_norm": 16.08162498474121, "learning_rate": 5e-07, "loss": 0.8017, "num_input_tokens_seen": 152436056, "step": 1318 }, { "epoch": 7.086021505376344, "loss": 1.18275785446167, "loss_ce": 0.00014073195052333176, "loss_iou": 0.51953125, "loss_num": 0.0291748046875, "loss_xval": 1.1796875, "num_input_tokens_seen": 152436056, "step": 1318 }, { "epoch": 7.091397849462366, "grad_norm": 16.797863006591797, "learning_rate": 5e-07, "loss": 0.9627, "num_input_tokens_seen": 152554812, "step": 1319 }, { "epoch": 7.091397849462366, "loss": 0.9966045618057251, "loss_ce": 2.2549842469743453e-05, "loss_iou": 0.427734375, "loss_num": 0.0281982421875, "loss_xval": 0.99609375, "num_input_tokens_seen": 152554812, "step": 1319 }, { "epoch": 7.096774193548387, "grad_norm": 19.68446922302246, "learning_rate": 5e-07, "loss": 0.7623, "num_input_tokens_seen": 152671556, "step": 1320 }, { "epoch": 7.096774193548387, "loss": 0.778517484664917, "loss_ce": 7.50577382859774e-05, "loss_iou": 0.341796875, "loss_num": 0.0194091796875, "loss_xval": 0.77734375, "num_input_tokens_seen": 152671556, "step": 1320 }, { "epoch": 7.102150537634409, "grad_norm": 21.32960319519043, "learning_rate": 5e-07, "loss": 0.8026, "num_input_tokens_seen": 152788380, "step": 1321 }, { "epoch": 7.102150537634409, "loss": 0.8967729806900024, "loss_ce": 0.0001664742740103975, "loss_iou": 0.3515625, "loss_num": 0.038818359375, "loss_xval": 0.8984375, "num_input_tokens_seen": 152788380, "step": 1321 }, { "epoch": 7.10752688172043, "grad_norm": 15.512706756591797, "learning_rate": 5e-07, "loss": 0.6904, "num_input_tokens_seen": 152905668, "step": 1322 }, { "epoch": 7.10752688172043, "loss": 0.6897681951522827, "loss_ce": 7.097417983459309e-05, "loss_iou": 0.296875, "loss_num": 0.018798828125, "loss_xval": 0.69140625, "num_input_tokens_seen": 152905668, "step": 1322 }, { "epoch": 7.112903225806452, "grad_norm": 17.857789993286133, "learning_rate": 5e-07, "loss": 0.8651, "num_input_tokens_seen": 153021124, "step": 1323 }, { "epoch": 7.112903225806452, "loss": 1.151929497718811, "loss_ce": 7.397931767627597e-05, "loss_iou": 0.47265625, "loss_num": 0.041259765625, "loss_xval": 1.1484375, "num_input_tokens_seen": 153021124, "step": 1323 }, { "epoch": 7.118279569892473, "grad_norm": 22.7247314453125, "learning_rate": 5e-07, "loss": 0.8584, "num_input_tokens_seen": 153136108, "step": 1324 }, { "epoch": 7.118279569892473, "loss": 0.7959400415420532, "loss_ce": 4.16348448197823e-05, "loss_iou": 0.35546875, "loss_num": 0.0169677734375, "loss_xval": 0.796875, "num_input_tokens_seen": 153136108, "step": 1324 }, { "epoch": 7.123655913978495, "grad_norm": 20.79973793029785, "learning_rate": 5e-07, "loss": 0.8136, "num_input_tokens_seen": 153247652, "step": 1325 }, { "epoch": 7.123655913978495, "loss": 0.6675905585289001, "loss_ce": 0.00011009084118995816, "loss_iou": 0.259765625, "loss_num": 0.0294189453125, "loss_xval": 0.66796875, "num_input_tokens_seen": 153247652, "step": 1325 }, { "epoch": 7.129032258064516, "grad_norm": 20.0304012298584, "learning_rate": 5e-07, "loss": 0.9068, "num_input_tokens_seen": 153362088, "step": 1326 }, { "epoch": 7.129032258064516, "loss": 0.6274905800819397, "loss_ce": 4.916659963782877e-05, "loss_iou": 0.271484375, "loss_num": 0.0167236328125, "loss_xval": 0.62890625, "num_input_tokens_seen": 153362088, "step": 1326 }, { "epoch": 7.134408602150538, "grad_norm": 21.682416915893555, "learning_rate": 5e-07, "loss": 0.8042, "num_input_tokens_seen": 153479064, "step": 1327 }, { "epoch": 7.134408602150538, "loss": 0.5778035521507263, "loss_ce": 0.00028889154782518744, "loss_iou": 0.23828125, "loss_num": 0.0201416015625, "loss_xval": 0.578125, "num_input_tokens_seen": 153479064, "step": 1327 }, { "epoch": 7.139784946236559, "grad_norm": 27.14375114440918, "learning_rate": 5e-07, "loss": 0.7759, "num_input_tokens_seen": 153595620, "step": 1328 }, { "epoch": 7.139784946236559, "loss": 0.9883310794830322, "loss_ce": 4.977673233952373e-05, "loss_iou": 0.44140625, "loss_num": 0.0211181640625, "loss_xval": 0.98828125, "num_input_tokens_seen": 153595620, "step": 1328 }, { "epoch": 7.145161290322581, "grad_norm": 21.883682250976562, "learning_rate": 5e-07, "loss": 0.8186, "num_input_tokens_seen": 153710884, "step": 1329 }, { "epoch": 7.145161290322581, "loss": 1.057600736618042, "loss_ce": 0.00010567613207967952, "loss_iou": 0.478515625, "loss_num": 0.020263671875, "loss_xval": 1.0546875, "num_input_tokens_seen": 153710884, "step": 1329 }, { "epoch": 7.150537634408602, "grad_norm": 15.361381530761719, "learning_rate": 5e-07, "loss": 0.8201, "num_input_tokens_seen": 153824160, "step": 1330 }, { "epoch": 7.150537634408602, "loss": 1.0045045614242554, "loss_ce": 0.0001099911896744743, "loss_iou": 0.4296875, "loss_num": 0.0291748046875, "loss_xval": 1.0078125, "num_input_tokens_seen": 153824160, "step": 1330 }, { "epoch": 7.155913978494624, "grad_norm": 18.62063980102539, "learning_rate": 5e-07, "loss": 0.901, "num_input_tokens_seen": 153938624, "step": 1331 }, { "epoch": 7.155913978494624, "loss": 0.9132876992225647, "loss_ce": 7.96824024291709e-05, "loss_iou": 0.38671875, "loss_num": 0.0283203125, "loss_xval": 0.9140625, "num_input_tokens_seen": 153938624, "step": 1331 }, { "epoch": 7.161290322580645, "grad_norm": 18.86810874938965, "learning_rate": 5e-07, "loss": 0.8251, "num_input_tokens_seen": 154054016, "step": 1332 }, { "epoch": 7.161290322580645, "loss": 0.6694819927215576, "loss_ce": 4.8409408918814734e-05, "loss_iou": 0.294921875, "loss_num": 0.01611328125, "loss_xval": 0.66796875, "num_input_tokens_seen": 154054016, "step": 1332 }, { "epoch": 7.166666666666667, "grad_norm": 16.035181045532227, "learning_rate": 5e-07, "loss": 0.9516, "num_input_tokens_seen": 154169860, "step": 1333 }, { "epoch": 7.166666666666667, "loss": 0.6963492035865784, "loss_ce": 6.0173842939548194e-05, "loss_iou": 0.29296875, "loss_num": 0.0224609375, "loss_xval": 0.6953125, "num_input_tokens_seen": 154169860, "step": 1333 }, { "epoch": 7.172043010752688, "grad_norm": 24.405670166015625, "learning_rate": 5e-07, "loss": 0.7305, "num_input_tokens_seen": 154283704, "step": 1334 }, { "epoch": 7.172043010752688, "loss": 0.8946045637130737, "loss_ce": 7.335373084060848e-05, "loss_iou": 0.38671875, "loss_num": 0.0242919921875, "loss_xval": 0.89453125, "num_input_tokens_seen": 154283704, "step": 1334 }, { "epoch": 7.17741935483871, "grad_norm": 21.881206512451172, "learning_rate": 5e-07, "loss": 0.813, "num_input_tokens_seen": 154397516, "step": 1335 }, { "epoch": 7.17741935483871, "loss": 0.734904408454895, "loss_ce": 4.112835449632257e-05, "loss_iou": 0.326171875, "loss_num": 0.016845703125, "loss_xval": 0.734375, "num_input_tokens_seen": 154397516, "step": 1335 }, { "epoch": 7.182795698924731, "grad_norm": 19.95915412902832, "learning_rate": 5e-07, "loss": 0.871, "num_input_tokens_seen": 154514668, "step": 1336 }, { "epoch": 7.182795698924731, "loss": 0.9710084199905396, "loss_ce": 6.111621769377962e-05, "loss_iou": 0.380859375, "loss_num": 0.041748046875, "loss_xval": 0.97265625, "num_input_tokens_seen": 154514668, "step": 1336 }, { "epoch": 7.188172043010753, "grad_norm": 21.690813064575195, "learning_rate": 5e-07, "loss": 0.8822, "num_input_tokens_seen": 154631268, "step": 1337 }, { "epoch": 7.188172043010753, "loss": 0.6677795052528381, "loss_ce": 5.487282032845542e-05, "loss_iou": 0.2890625, "loss_num": 0.01806640625, "loss_xval": 0.66796875, "num_input_tokens_seen": 154631268, "step": 1337 }, { "epoch": 7.193548387096774, "grad_norm": 17.28043556213379, "learning_rate": 5e-07, "loss": 0.6643, "num_input_tokens_seen": 154745800, "step": 1338 }, { "epoch": 7.193548387096774, "loss": 0.6772661209106445, "loss_ce": 2.0022314856760204e-05, "loss_iou": 0.275390625, "loss_num": 0.0250244140625, "loss_xval": 0.67578125, "num_input_tokens_seen": 154745800, "step": 1338 }, { "epoch": 7.198924731182796, "grad_norm": 16.232847213745117, "learning_rate": 5e-07, "loss": 0.6578, "num_input_tokens_seen": 154859116, "step": 1339 }, { "epoch": 7.198924731182796, "loss": 0.7800047397613525, "loss_ce": 0.00021960429148748517, "loss_iou": 0.328125, "loss_num": 0.025146484375, "loss_xval": 0.78125, "num_input_tokens_seen": 154859116, "step": 1339 }, { "epoch": 7.204301075268817, "grad_norm": 18.983943939208984, "learning_rate": 5e-07, "loss": 0.8177, "num_input_tokens_seen": 154976216, "step": 1340 }, { "epoch": 7.204301075268817, "loss": 0.829396665096283, "loss_ce": 5.0961833039764315e-05, "loss_iou": 0.380859375, "loss_num": 0.01373291015625, "loss_xval": 0.828125, "num_input_tokens_seen": 154976216, "step": 1340 }, { "epoch": 7.209677419354839, "grad_norm": 24.02460479736328, "learning_rate": 5e-07, "loss": 0.7679, "num_input_tokens_seen": 155094528, "step": 1341 }, { "epoch": 7.209677419354839, "loss": 0.8394541144371033, "loss_ce": 9.865066385827959e-05, "loss_iou": 0.37890625, "loss_num": 0.01611328125, "loss_xval": 0.83984375, "num_input_tokens_seen": 155094528, "step": 1341 }, { "epoch": 7.21505376344086, "grad_norm": 18.306957244873047, "learning_rate": 5e-07, "loss": 0.7539, "num_input_tokens_seen": 155212204, "step": 1342 }, { "epoch": 7.21505376344086, "loss": 0.8838455080986023, "loss_ce": 5.6437387684127316e-05, "loss_iou": 0.373046875, "loss_num": 0.027099609375, "loss_xval": 0.8828125, "num_input_tokens_seen": 155212204, "step": 1342 }, { "epoch": 7.220430107526882, "grad_norm": 14.73993968963623, "learning_rate": 5e-07, "loss": 0.9006, "num_input_tokens_seen": 155327096, "step": 1343 }, { "epoch": 7.220430107526882, "loss": 0.9819645285606384, "loss_ce": 3.096458385698497e-05, "loss_iou": 0.4140625, "loss_num": 0.0302734375, "loss_xval": 0.98046875, "num_input_tokens_seen": 155327096, "step": 1343 }, { "epoch": 7.225806451612903, "grad_norm": 20.488319396972656, "learning_rate": 5e-07, "loss": 0.7226, "num_input_tokens_seen": 155445320, "step": 1344 }, { "epoch": 7.225806451612903, "loss": 0.7917160987854004, "loss_ce": 0.00015116570284590125, "loss_iou": 0.34765625, "loss_num": 0.01904296875, "loss_xval": 0.79296875, "num_input_tokens_seen": 155445320, "step": 1344 }, { "epoch": 7.231182795698925, "grad_norm": 18.53790855407715, "learning_rate": 5e-07, "loss": 0.7973, "num_input_tokens_seen": 155560228, "step": 1345 }, { "epoch": 7.231182795698925, "loss": 0.7007015943527222, "loss_ce": 1.8004808225668967e-05, "loss_iou": 0.28125, "loss_num": 0.0279541015625, "loss_xval": 0.69921875, "num_input_tokens_seen": 155560228, "step": 1345 }, { "epoch": 7.236559139784946, "grad_norm": 26.842119216918945, "learning_rate": 5e-07, "loss": 0.7744, "num_input_tokens_seen": 155670784, "step": 1346 }, { "epoch": 7.236559139784946, "loss": 0.6872043013572693, "loss_ce": 7.051166176097468e-05, "loss_iou": 0.298828125, "loss_num": 0.0179443359375, "loss_xval": 0.6875, "num_input_tokens_seen": 155670784, "step": 1346 }, { "epoch": 7.241935483870968, "grad_norm": 17.68696403503418, "learning_rate": 5e-07, "loss": 0.742, "num_input_tokens_seen": 155788596, "step": 1347 }, { "epoch": 7.241935483870968, "loss": 0.6261545419692993, "loss_ce": 5.586573752225377e-05, "loss_iou": 0.267578125, "loss_num": 0.01806640625, "loss_xval": 0.625, "num_input_tokens_seen": 155788596, "step": 1347 }, { "epoch": 7.247311827956989, "grad_norm": 23.513784408569336, "learning_rate": 5e-07, "loss": 0.699, "num_input_tokens_seen": 155906452, "step": 1348 }, { "epoch": 7.247311827956989, "loss": 0.8193811178207397, "loss_ce": 4.5162480091676116e-05, "loss_iou": 0.3515625, "loss_num": 0.022705078125, "loss_xval": 0.8203125, "num_input_tokens_seen": 155906452, "step": 1348 }, { "epoch": 7.252688172043011, "grad_norm": 20.466508865356445, "learning_rate": 5e-07, "loss": 0.9014, "num_input_tokens_seen": 156021936, "step": 1349 }, { "epoch": 7.252688172043011, "loss": 0.9246354103088379, "loss_ce": 7.487426046282053e-05, "loss_iou": 0.40625, "loss_num": 0.0223388671875, "loss_xval": 0.92578125, "num_input_tokens_seen": 156021936, "step": 1349 }, { "epoch": 7.258064516129032, "grad_norm": 21.4437313079834, "learning_rate": 5e-07, "loss": 0.7669, "num_input_tokens_seen": 156142200, "step": 1350 }, { "epoch": 7.258064516129032, "loss": 0.7278566360473633, "loss_ce": 0.00019551948935259134, "loss_iou": 0.330078125, "loss_num": 0.01348876953125, "loss_xval": 0.7265625, "num_input_tokens_seen": 156142200, "step": 1350 }, { "epoch": 7.263440860215054, "grad_norm": 28.962200164794922, "learning_rate": 5e-07, "loss": 0.9144, "num_input_tokens_seen": 156259392, "step": 1351 }, { "epoch": 7.263440860215054, "loss": 0.9875436425209045, "loss_ce": 0.00048307227552868426, "loss_iou": 0.419921875, "loss_num": 0.029296875, "loss_xval": 0.98828125, "num_input_tokens_seen": 156259392, "step": 1351 }, { "epoch": 7.268817204301075, "grad_norm": 20.194833755493164, "learning_rate": 5e-07, "loss": 0.6791, "num_input_tokens_seen": 156374500, "step": 1352 }, { "epoch": 7.268817204301075, "loss": 0.8568241000175476, "loss_ce": 0.0001346368808299303, "loss_iou": 0.33984375, "loss_num": 0.035888671875, "loss_xval": 0.85546875, "num_input_tokens_seen": 156374500, "step": 1352 }, { "epoch": 7.274193548387097, "grad_norm": 26.237977981567383, "learning_rate": 5e-07, "loss": 0.7573, "num_input_tokens_seen": 156488032, "step": 1353 }, { "epoch": 7.274193548387097, "loss": 0.4692837595939636, "loss_ce": 4.54584151157178e-05, "loss_iou": 0.1904296875, "loss_num": 0.017822265625, "loss_xval": 0.46875, "num_input_tokens_seen": 156488032, "step": 1353 }, { "epoch": 7.279569892473118, "grad_norm": 16.60437774658203, "learning_rate": 5e-07, "loss": 0.6716, "num_input_tokens_seen": 156606524, "step": 1354 }, { "epoch": 7.279569892473118, "loss": 0.658247172832489, "loss_ce": 4.400085163069889e-05, "loss_iou": 0.26953125, "loss_num": 0.02392578125, "loss_xval": 0.65625, "num_input_tokens_seen": 156606524, "step": 1354 }, { "epoch": 7.28494623655914, "grad_norm": 17.42603302001953, "learning_rate": 5e-07, "loss": 0.7599, "num_input_tokens_seen": 156724684, "step": 1355 }, { "epoch": 7.28494623655914, "loss": 0.7810534834861755, "loss_ce": 4.765183985000476e-05, "loss_iou": 0.33203125, "loss_num": 0.0234375, "loss_xval": 0.78125, "num_input_tokens_seen": 156724684, "step": 1355 }, { "epoch": 7.290322580645161, "grad_norm": 26.194087982177734, "learning_rate": 5e-07, "loss": 0.849, "num_input_tokens_seen": 156841204, "step": 1356 }, { "epoch": 7.290322580645161, "loss": 1.0537863969802856, "loss_ce": 7.547010318376124e-05, "loss_iou": 0.421875, "loss_num": 0.041259765625, "loss_xval": 1.0546875, "num_input_tokens_seen": 156841204, "step": 1356 }, { "epoch": 7.295698924731183, "grad_norm": 24.35080909729004, "learning_rate": 5e-07, "loss": 0.7587, "num_input_tokens_seen": 156957320, "step": 1357 }, { "epoch": 7.295698924731183, "loss": 0.7708127498626709, "loss_ce": 6.0850965382996947e-05, "loss_iou": 0.341796875, "loss_num": 0.0177001953125, "loss_xval": 0.76953125, "num_input_tokens_seen": 156957320, "step": 1357 }, { "epoch": 7.301075268817204, "grad_norm": 17.537843704223633, "learning_rate": 5e-07, "loss": 0.7322, "num_input_tokens_seen": 157075056, "step": 1358 }, { "epoch": 7.301075268817204, "loss": 0.9475710988044739, "loss_ce": 6.131174450274557e-05, "loss_iou": 0.40625, "loss_num": 0.02734375, "loss_xval": 0.94921875, "num_input_tokens_seen": 157075056, "step": 1358 }, { "epoch": 7.306451612903226, "grad_norm": 18.85224723815918, "learning_rate": 5e-07, "loss": 0.9424, "num_input_tokens_seen": 157188580, "step": 1359 }, { "epoch": 7.306451612903226, "loss": 1.0303704738616943, "loss_ce": 9.699973452370614e-05, "loss_iou": 0.44140625, "loss_num": 0.029296875, "loss_xval": 1.03125, "num_input_tokens_seen": 157188580, "step": 1359 }, { "epoch": 7.311827956989247, "grad_norm": 26.937461853027344, "learning_rate": 5e-07, "loss": 0.6451, "num_input_tokens_seen": 157306100, "step": 1360 }, { "epoch": 7.311827956989247, "loss": 0.7227221727371216, "loss_ce": 6.590186239918694e-05, "loss_iou": 0.275390625, "loss_num": 0.034423828125, "loss_xval": 0.72265625, "num_input_tokens_seen": 157306100, "step": 1360 }, { "epoch": 7.317204301075269, "grad_norm": 23.831525802612305, "learning_rate": 5e-07, "loss": 0.7855, "num_input_tokens_seen": 157419764, "step": 1361 }, { "epoch": 7.317204301075269, "loss": 0.6630197167396545, "loss_ce": 5.5889242503326386e-05, "loss_iou": 0.29296875, "loss_num": 0.015625, "loss_xval": 0.6640625, "num_input_tokens_seen": 157419764, "step": 1361 }, { "epoch": 7.32258064516129, "grad_norm": 20.589183807373047, "learning_rate": 5e-07, "loss": 0.81, "num_input_tokens_seen": 157535272, "step": 1362 }, { "epoch": 7.32258064516129, "loss": 0.966850757598877, "loss_ce": 5.389008219935931e-05, "loss_iou": 0.388671875, "loss_num": 0.03759765625, "loss_xval": 0.96875, "num_input_tokens_seen": 157535272, "step": 1362 }, { "epoch": 7.327956989247312, "grad_norm": 17.65766143798828, "learning_rate": 5e-07, "loss": 0.9515, "num_input_tokens_seen": 157653336, "step": 1363 }, { "epoch": 7.327956989247312, "loss": 0.9893312454223633, "loss_ce": 7.347812061198056e-05, "loss_iou": 0.4375, "loss_num": 0.0230712890625, "loss_xval": 0.98828125, "num_input_tokens_seen": 157653336, "step": 1363 }, { "epoch": 7.333333333333333, "grad_norm": 19.692642211914062, "learning_rate": 5e-07, "loss": 0.7437, "num_input_tokens_seen": 157769056, "step": 1364 }, { "epoch": 7.333333333333333, "loss": 0.8218128681182861, "loss_ce": 3.552149428287521e-05, "loss_iou": 0.36328125, "loss_num": 0.0189208984375, "loss_xval": 0.8203125, "num_input_tokens_seen": 157769056, "step": 1364 }, { "epoch": 7.338709677419355, "grad_norm": 24.97136116027832, "learning_rate": 5e-07, "loss": 0.7163, "num_input_tokens_seen": 157885976, "step": 1365 }, { "epoch": 7.338709677419355, "loss": 0.8005837202072144, "loss_ce": 4.6590670535806566e-05, "loss_iou": 0.3359375, "loss_num": 0.025634765625, "loss_xval": 0.80078125, "num_input_tokens_seen": 157885976, "step": 1365 }, { "epoch": 7.344086021505376, "grad_norm": 17.98779296875, "learning_rate": 5e-07, "loss": 0.8219, "num_input_tokens_seen": 158001564, "step": 1366 }, { "epoch": 7.344086021505376, "loss": 1.236889362335205, "loss_ce": 7.297175761777908e-05, "loss_iou": 0.5546875, "loss_num": 0.0250244140625, "loss_xval": 1.234375, "num_input_tokens_seen": 158001564, "step": 1366 }, { "epoch": 7.349462365591398, "grad_norm": 28.52256965637207, "learning_rate": 5e-07, "loss": 0.6606, "num_input_tokens_seen": 158117696, "step": 1367 }, { "epoch": 7.349462365591398, "loss": 0.7217328548431396, "loss_ce": 5.314667578204535e-05, "loss_iou": 0.306640625, "loss_num": 0.0220947265625, "loss_xval": 0.72265625, "num_input_tokens_seen": 158117696, "step": 1367 }, { "epoch": 7.354838709677419, "grad_norm": 24.262113571166992, "learning_rate": 5e-07, "loss": 0.7492, "num_input_tokens_seen": 158233324, "step": 1368 }, { "epoch": 7.354838709677419, "loss": 0.6367901563644409, "loss_ce": 7.141625974327326e-05, "loss_iou": 0.2734375, "loss_num": 0.018310546875, "loss_xval": 0.63671875, "num_input_tokens_seen": 158233324, "step": 1368 }, { "epoch": 7.360215053763441, "grad_norm": 24.8895263671875, "learning_rate": 5e-07, "loss": 0.9886, "num_input_tokens_seen": 158351564, "step": 1369 }, { "epoch": 7.360215053763441, "loss": 1.4410929679870605, "loss_ce": 0.00017494874191470444, "loss_iou": 0.60546875, "loss_num": 0.0458984375, "loss_xval": 1.4375, "num_input_tokens_seen": 158351564, "step": 1369 }, { "epoch": 7.365591397849462, "grad_norm": 16.763742446899414, "learning_rate": 5e-07, "loss": 0.7232, "num_input_tokens_seen": 158469004, "step": 1370 }, { "epoch": 7.365591397849462, "loss": 0.8234140872955322, "loss_ce": 4.976998388883658e-05, "loss_iou": 0.34375, "loss_num": 0.027587890625, "loss_xval": 0.82421875, "num_input_tokens_seen": 158469004, "step": 1370 }, { "epoch": 7.370967741935484, "grad_norm": 22.249713897705078, "learning_rate": 5e-07, "loss": 0.6962, "num_input_tokens_seen": 158586628, "step": 1371 }, { "epoch": 7.370967741935484, "loss": 0.8440967798233032, "loss_ce": 0.0001026514291879721, "loss_iou": 0.357421875, "loss_num": 0.0257568359375, "loss_xval": 0.84375, "num_input_tokens_seen": 158586628, "step": 1371 }, { "epoch": 7.376344086021505, "grad_norm": 18.221647262573242, "learning_rate": 5e-07, "loss": 0.6963, "num_input_tokens_seen": 158703228, "step": 1372 }, { "epoch": 7.376344086021505, "loss": 0.5886727571487427, "loss_ce": 4.9661022785585374e-05, "loss_iou": 0.24609375, "loss_num": 0.019287109375, "loss_xval": 0.58984375, "num_input_tokens_seen": 158703228, "step": 1372 }, { "epoch": 7.381720430107527, "grad_norm": 14.730327606201172, "learning_rate": 5e-07, "loss": 0.8747, "num_input_tokens_seen": 158817544, "step": 1373 }, { "epoch": 7.381720430107527, "loss": 0.9106810688972473, "loss_ce": 3.654389729490504e-05, "loss_iou": 0.39453125, "loss_num": 0.0242919921875, "loss_xval": 0.91015625, "num_input_tokens_seen": 158817544, "step": 1373 }, { "epoch": 7.387096774193548, "grad_norm": 21.1221923828125, "learning_rate": 5e-07, "loss": 0.7331, "num_input_tokens_seen": 158931956, "step": 1374 }, { "epoch": 7.387096774193548, "loss": 0.8760266304016113, "loss_ce": 5.006013088859618e-05, "loss_iou": 0.373046875, "loss_num": 0.025634765625, "loss_xval": 0.875, "num_input_tokens_seen": 158931956, "step": 1374 }, { "epoch": 7.39247311827957, "grad_norm": 29.91840934753418, "learning_rate": 5e-07, "loss": 0.7474, "num_input_tokens_seen": 159044076, "step": 1375 }, { "epoch": 7.39247311827957, "loss": 0.9888315200805664, "loss_ce": 6.20008067926392e-05, "loss_iou": 0.412109375, "loss_num": 0.032958984375, "loss_xval": 0.98828125, "num_input_tokens_seen": 159044076, "step": 1375 }, { "epoch": 7.397849462365591, "grad_norm": 16.138778686523438, "learning_rate": 5e-07, "loss": 0.8089, "num_input_tokens_seen": 159157368, "step": 1376 }, { "epoch": 7.397849462365591, "loss": 0.7439755201339722, "loss_ce": 0.00032324279891327024, "loss_iou": 0.3359375, "loss_num": 0.01409912109375, "loss_xval": 0.7421875, "num_input_tokens_seen": 159157368, "step": 1376 }, { "epoch": 7.403225806451613, "grad_norm": 21.064380645751953, "learning_rate": 5e-07, "loss": 0.6592, "num_input_tokens_seen": 159271840, "step": 1377 }, { "epoch": 7.403225806451613, "loss": 0.6042911410331726, "loss_ce": 4.310336953494698e-05, "loss_iou": 0.2392578125, "loss_num": 0.025390625, "loss_xval": 0.60546875, "num_input_tokens_seen": 159271840, "step": 1377 }, { "epoch": 7.408602150537634, "grad_norm": 16.178958892822266, "learning_rate": 5e-07, "loss": 0.683, "num_input_tokens_seen": 159386268, "step": 1378 }, { "epoch": 7.408602150537634, "loss": 0.7697147727012634, "loss_ce": 6.14439049968496e-05, "loss_iou": 0.337890625, "loss_num": 0.0186767578125, "loss_xval": 0.76953125, "num_input_tokens_seen": 159386268, "step": 1378 }, { "epoch": 7.413978494623656, "grad_norm": 39.02601623535156, "learning_rate": 5e-07, "loss": 0.8128, "num_input_tokens_seen": 159500656, "step": 1379 }, { "epoch": 7.413978494623656, "loss": 1.0869746208190918, "loss_ce": 6.052451135474257e-05, "loss_iou": 0.46875, "loss_num": 0.030029296875, "loss_xval": 1.0859375, "num_input_tokens_seen": 159500656, "step": 1379 }, { "epoch": 7.419354838709677, "grad_norm": 16.155122756958008, "learning_rate": 5e-07, "loss": 0.704, "num_input_tokens_seen": 159617160, "step": 1380 }, { "epoch": 7.419354838709677, "loss": 0.7705672979354858, "loss_ce": 5.949213664280251e-05, "loss_iou": 0.3203125, "loss_num": 0.025634765625, "loss_xval": 0.76953125, "num_input_tokens_seen": 159617160, "step": 1380 }, { "epoch": 7.424731182795699, "grad_norm": 20.23777198791504, "learning_rate": 5e-07, "loss": 0.7004, "num_input_tokens_seen": 159734380, "step": 1381 }, { "epoch": 7.424731182795699, "loss": 0.8679447770118713, "loss_ce": 2.483048774593044e-05, "loss_iou": 0.380859375, "loss_num": 0.021240234375, "loss_xval": 0.8671875, "num_input_tokens_seen": 159734380, "step": 1381 }, { "epoch": 7.43010752688172, "grad_norm": 17.590560913085938, "learning_rate": 5e-07, "loss": 0.7175, "num_input_tokens_seen": 159851228, "step": 1382 }, { "epoch": 7.43010752688172, "loss": 0.48416221141815186, "loss_ce": 3.1350558856502175e-05, "loss_iou": 0.2099609375, "loss_num": 0.0126953125, "loss_xval": 0.484375, "num_input_tokens_seen": 159851228, "step": 1382 }, { "epoch": 7.435483870967742, "grad_norm": 33.12651062011719, "learning_rate": 5e-07, "loss": 0.9091, "num_input_tokens_seen": 159967292, "step": 1383 }, { "epoch": 7.435483870967742, "loss": 1.3877630233764648, "loss_ce": 6.764304998796433e-05, "loss_iou": 0.58984375, "loss_num": 0.04150390625, "loss_xval": 1.390625, "num_input_tokens_seen": 159967292, "step": 1383 }, { "epoch": 7.440860215053763, "grad_norm": 24.0540771484375, "learning_rate": 5e-07, "loss": 0.9527, "num_input_tokens_seen": 160080460, "step": 1384 }, { "epoch": 7.440860215053763, "loss": 0.8152867555618286, "loss_ce": 0.00010116318298969418, "loss_iou": 0.359375, "loss_num": 0.0191650390625, "loss_xval": 0.81640625, "num_input_tokens_seen": 160080460, "step": 1384 }, { "epoch": 7.446236559139785, "grad_norm": 26.225282669067383, "learning_rate": 5e-07, "loss": 0.8154, "num_input_tokens_seen": 160199344, "step": 1385 }, { "epoch": 7.446236559139785, "loss": 1.0967413187026978, "loss_ce": 6.157949974294752e-05, "loss_iou": 0.4609375, "loss_num": 0.034912109375, "loss_xval": 1.09375, "num_input_tokens_seen": 160199344, "step": 1385 }, { "epoch": 7.451612903225806, "grad_norm": 20.268436431884766, "learning_rate": 5e-07, "loss": 0.7236, "num_input_tokens_seen": 160313352, "step": 1386 }, { "epoch": 7.451612903225806, "loss": 0.9551438093185425, "loss_ce": 6.571607082150877e-05, "loss_iou": 0.423828125, "loss_num": 0.021484375, "loss_xval": 0.953125, "num_input_tokens_seen": 160313352, "step": 1386 }, { "epoch": 7.456989247311828, "grad_norm": 23.810243606567383, "learning_rate": 5e-07, "loss": 0.7753, "num_input_tokens_seen": 160427508, "step": 1387 }, { "epoch": 7.456989247311828, "loss": 0.8295158743858337, "loss_ce": 0.0001091562444344163, "loss_iou": 0.3359375, "loss_num": 0.03173828125, "loss_xval": 0.828125, "num_input_tokens_seen": 160427508, "step": 1387 }, { "epoch": 7.462365591397849, "grad_norm": 22.565841674804688, "learning_rate": 5e-07, "loss": 0.8101, "num_input_tokens_seen": 160542156, "step": 1388 }, { "epoch": 7.462365591397849, "loss": 1.0417790412902832, "loss_ce": 3.09879906126298e-05, "loss_iou": 0.453125, "loss_num": 0.0267333984375, "loss_xval": 1.0390625, "num_input_tokens_seen": 160542156, "step": 1388 }, { "epoch": 7.467741935483871, "grad_norm": 19.42728042602539, "learning_rate": 5e-07, "loss": 0.6359, "num_input_tokens_seen": 160657316, "step": 1389 }, { "epoch": 7.467741935483871, "loss": 0.6165758371353149, "loss_ce": 0.00012077894643880427, "loss_iou": 0.259765625, "loss_num": 0.01904296875, "loss_xval": 0.6171875, "num_input_tokens_seen": 160657316, "step": 1389 }, { "epoch": 7.473118279569892, "grad_norm": 21.015954971313477, "learning_rate": 5e-07, "loss": 0.775, "num_input_tokens_seen": 160771268, "step": 1390 }, { "epoch": 7.473118279569892, "loss": 0.7063843011856079, "loss_ce": 8.54485115269199e-05, "loss_iou": 0.30078125, "loss_num": 0.0211181640625, "loss_xval": 0.70703125, "num_input_tokens_seen": 160771268, "step": 1390 }, { "epoch": 7.478494623655914, "grad_norm": 52.247467041015625, "learning_rate": 5e-07, "loss": 1.0611, "num_input_tokens_seen": 160884328, "step": 1391 }, { "epoch": 7.478494623655914, "loss": 1.0655831098556519, "loss_ce": 0.00015340998652391136, "loss_iou": 0.431640625, "loss_num": 0.04052734375, "loss_xval": 1.0625, "num_input_tokens_seen": 160884328, "step": 1391 }, { "epoch": 7.483870967741936, "grad_norm": 23.55976676940918, "learning_rate": 5e-07, "loss": 0.8164, "num_input_tokens_seen": 161000384, "step": 1392 }, { "epoch": 7.483870967741936, "loss": 0.7646991014480591, "loss_ce": 5.0672009820118546e-05, "loss_iou": 0.34375, "loss_num": 0.015625, "loss_xval": 0.765625, "num_input_tokens_seen": 161000384, "step": 1392 }, { "epoch": 7.489247311827957, "grad_norm": 25.35131072998047, "learning_rate": 5e-07, "loss": 0.8083, "num_input_tokens_seen": 161115780, "step": 1393 }, { "epoch": 7.489247311827957, "loss": 0.9138404130935669, "loss_ce": 0.0002662217593751848, "loss_iou": 0.365234375, "loss_num": 0.036376953125, "loss_xval": 0.9140625, "num_input_tokens_seen": 161115780, "step": 1393 }, { "epoch": 7.494623655913978, "grad_norm": 22.31975746154785, "learning_rate": 5e-07, "loss": 0.6932, "num_input_tokens_seen": 161231244, "step": 1394 }, { "epoch": 7.494623655913978, "loss": 0.8189187049865723, "loss_ce": 7.106518023647368e-05, "loss_iou": 0.357421875, "loss_num": 0.02099609375, "loss_xval": 0.8203125, "num_input_tokens_seen": 161231244, "step": 1394 }, { "epoch": 7.5, "grad_norm": 44.91950988769531, "learning_rate": 5e-07, "loss": 0.7324, "num_input_tokens_seen": 161346672, "step": 1395 }, { "epoch": 7.5, "loss": 1.0872249603271484, "loss_ce": 6.671541632385924e-05, "loss_iou": 0.45703125, "loss_num": 0.03466796875, "loss_xval": 1.0859375, "num_input_tokens_seen": 161346672, "step": 1395 }, { "epoch": 7.505376344086022, "grad_norm": 45.49638748168945, "learning_rate": 5e-07, "loss": 0.8637, "num_input_tokens_seen": 161460756, "step": 1396 }, { "epoch": 7.505376344086022, "loss": 0.868746280670166, "loss_ce": 9.394407970830798e-05, "loss_iou": 0.36328125, "loss_num": 0.0284423828125, "loss_xval": 0.8671875, "num_input_tokens_seen": 161460756, "step": 1396 }, { "epoch": 7.510752688172043, "grad_norm": 17.38796043395996, "learning_rate": 5e-07, "loss": 0.8225, "num_input_tokens_seen": 161575120, "step": 1397 }, { "epoch": 7.510752688172043, "loss": 0.8856920599937439, "loss_ce": 7.188593008322641e-05, "loss_iou": 0.390625, "loss_num": 0.020751953125, "loss_xval": 0.88671875, "num_input_tokens_seen": 161575120, "step": 1397 }, { "epoch": 7.516129032258064, "grad_norm": 33.16282653808594, "learning_rate": 5e-07, "loss": 0.7787, "num_input_tokens_seen": 161686428, "step": 1398 }, { "epoch": 7.516129032258064, "loss": 0.7141480445861816, "loss_ce": 3.671027297968976e-05, "loss_iou": 0.3125, "loss_num": 0.0174560546875, "loss_xval": 0.71484375, "num_input_tokens_seen": 161686428, "step": 1398 }, { "epoch": 7.521505376344086, "grad_norm": 20.838960647583008, "learning_rate": 5e-07, "loss": 0.8174, "num_input_tokens_seen": 161801520, "step": 1399 }, { "epoch": 7.521505376344086, "loss": 0.7961798310279846, "loss_ce": 3.724388443515636e-05, "loss_iou": 0.34765625, "loss_num": 0.019775390625, "loss_xval": 0.796875, "num_input_tokens_seen": 161801520, "step": 1399 }, { "epoch": 7.526881720430108, "grad_norm": 56.17605972290039, "learning_rate": 5e-07, "loss": 0.7227, "num_input_tokens_seen": 161917872, "step": 1400 }, { "epoch": 7.526881720430108, "loss": 0.7021217346191406, "loss_ce": 9.533483535051346e-05, "loss_iou": 0.27734375, "loss_num": 0.029541015625, "loss_xval": 0.703125, "num_input_tokens_seen": 161917872, "step": 1400 }, { "epoch": 7.532258064516129, "grad_norm": 17.99762535095215, "learning_rate": 5e-07, "loss": 0.8004, "num_input_tokens_seen": 162031800, "step": 1401 }, { "epoch": 7.532258064516129, "loss": 0.4474043846130371, "loss_ce": 1.6715872334316373e-05, "loss_iou": 0.173828125, "loss_num": 0.02001953125, "loss_xval": 0.447265625, "num_input_tokens_seen": 162031800, "step": 1401 }, { "epoch": 7.53763440860215, "grad_norm": 18.173969268798828, "learning_rate": 5e-07, "loss": 0.712, "num_input_tokens_seen": 162145832, "step": 1402 }, { "epoch": 7.53763440860215, "loss": 0.5867531299591064, "loss_ce": 8.318546315422282e-05, "loss_iou": 0.25390625, "loss_num": 0.01611328125, "loss_xval": 0.5859375, "num_input_tokens_seen": 162145832, "step": 1402 }, { "epoch": 7.543010752688172, "grad_norm": 18.33939552307129, "learning_rate": 5e-07, "loss": 0.7162, "num_input_tokens_seen": 162258640, "step": 1403 }, { "epoch": 7.543010752688172, "loss": 0.6064742207527161, "loss_ce": 2.892601150961127e-05, "loss_iou": 0.255859375, "loss_num": 0.0186767578125, "loss_xval": 0.60546875, "num_input_tokens_seen": 162258640, "step": 1403 }, { "epoch": 7.548387096774194, "grad_norm": 18.50484848022461, "learning_rate": 5e-07, "loss": 0.7532, "num_input_tokens_seen": 162372372, "step": 1404 }, { "epoch": 7.548387096774194, "loss": 0.7526363134384155, "loss_ce": 0.0004390301473904401, "loss_iou": 0.302734375, "loss_num": 0.0291748046875, "loss_xval": 0.75390625, "num_input_tokens_seen": 162372372, "step": 1404 }, { "epoch": 7.553763440860215, "grad_norm": 16.502988815307617, "learning_rate": 5e-07, "loss": 0.9028, "num_input_tokens_seen": 162488060, "step": 1405 }, { "epoch": 7.553763440860215, "loss": 0.7728527784347534, "loss_ce": 2.5615638151066378e-05, "loss_iou": 0.30078125, "loss_num": 0.034423828125, "loss_xval": 0.7734375, "num_input_tokens_seen": 162488060, "step": 1405 }, { "epoch": 7.559139784946236, "grad_norm": 28.245288848876953, "learning_rate": 5e-07, "loss": 0.7919, "num_input_tokens_seen": 162602428, "step": 1406 }, { "epoch": 7.559139784946236, "loss": 0.6373648047447205, "loss_ce": 3.5740747989621013e-05, "loss_iou": 0.2890625, "loss_num": 0.01153564453125, "loss_xval": 0.63671875, "num_input_tokens_seen": 162602428, "step": 1406 }, { "epoch": 7.564516129032258, "grad_norm": 17.471267700195312, "learning_rate": 5e-07, "loss": 0.8751, "num_input_tokens_seen": 162722288, "step": 1407 }, { "epoch": 7.564516129032258, "loss": 0.6851387619972229, "loss_ce": 0.0005684805801138282, "loss_iou": 0.3046875, "loss_num": 0.0155029296875, "loss_xval": 0.68359375, "num_input_tokens_seen": 162722288, "step": 1407 }, { "epoch": 7.56989247311828, "grad_norm": 20.944250106811523, "learning_rate": 5e-07, "loss": 0.7839, "num_input_tokens_seen": 162838140, "step": 1408 }, { "epoch": 7.56989247311828, "loss": 0.806573748588562, "loss_ce": 5.522723949979991e-05, "loss_iou": 0.322265625, "loss_num": 0.03271484375, "loss_xval": 0.8046875, "num_input_tokens_seen": 162838140, "step": 1408 }, { "epoch": 7.575268817204301, "grad_norm": 24.273204803466797, "learning_rate": 5e-07, "loss": 0.7272, "num_input_tokens_seen": 162953472, "step": 1409 }, { "epoch": 7.575268817204301, "loss": 0.7070751786231995, "loss_ce": 4.390049070934765e-05, "loss_iou": 0.28125, "loss_num": 0.029052734375, "loss_xval": 0.70703125, "num_input_tokens_seen": 162953472, "step": 1409 }, { "epoch": 7.580645161290323, "grad_norm": 23.742124557495117, "learning_rate": 5e-07, "loss": 0.8082, "num_input_tokens_seen": 163071572, "step": 1410 }, { "epoch": 7.580645161290323, "loss": 0.9963898658752441, "loss_ce": 5.193567631067708e-05, "loss_iou": 0.4296875, "loss_num": 0.0274658203125, "loss_xval": 0.99609375, "num_input_tokens_seen": 163071572, "step": 1410 }, { "epoch": 7.586021505376344, "grad_norm": 27.556501388549805, "learning_rate": 5e-07, "loss": 0.8098, "num_input_tokens_seen": 163188772, "step": 1411 }, { "epoch": 7.586021505376344, "loss": 0.7114250659942627, "loss_ce": 6.034977195668034e-05, "loss_iou": 0.326171875, "loss_num": 0.0115966796875, "loss_xval": 0.7109375, "num_input_tokens_seen": 163188772, "step": 1411 }, { "epoch": 7.591397849462366, "grad_norm": 18.64209747314453, "learning_rate": 5e-07, "loss": 0.6945, "num_input_tokens_seen": 163306620, "step": 1412 }, { "epoch": 7.591397849462366, "loss": 0.6160291433334351, "loss_ce": 6.239089998416603e-05, "loss_iou": 0.26171875, "loss_num": 0.018798828125, "loss_xval": 0.6171875, "num_input_tokens_seen": 163306620, "step": 1412 }, { "epoch": 7.596774193548387, "grad_norm": 19.604015350341797, "learning_rate": 5e-07, "loss": 0.786, "num_input_tokens_seen": 163420636, "step": 1413 }, { "epoch": 7.596774193548387, "loss": 0.5564306974411011, "loss_ce": 3.419167114770971e-05, "loss_iou": 0.2294921875, "loss_num": 0.0194091796875, "loss_xval": 0.5546875, "num_input_tokens_seen": 163420636, "step": 1413 }, { "epoch": 7.602150537634409, "grad_norm": 15.954503059387207, "learning_rate": 5e-07, "loss": 0.9346, "num_input_tokens_seen": 163532720, "step": 1414 }, { "epoch": 7.602150537634409, "loss": 0.9827689528465271, "loss_ce": 0.0010795118287205696, "loss_iou": 0.388671875, "loss_num": 0.041259765625, "loss_xval": 0.98046875, "num_input_tokens_seen": 163532720, "step": 1414 }, { "epoch": 7.60752688172043, "grad_norm": 21.946998596191406, "learning_rate": 5e-07, "loss": 0.8571, "num_input_tokens_seen": 163645628, "step": 1415 }, { "epoch": 7.60752688172043, "loss": 0.7532706260681152, "loss_ce": 9.682430390967056e-05, "loss_iou": 0.328125, "loss_num": 0.0189208984375, "loss_xval": 0.75390625, "num_input_tokens_seen": 163645628, "step": 1415 }, { "epoch": 7.612903225806452, "grad_norm": 21.248565673828125, "learning_rate": 5e-07, "loss": 0.7217, "num_input_tokens_seen": 163762556, "step": 1416 }, { "epoch": 7.612903225806452, "loss": 0.6964523792266846, "loss_ce": 4.129489025217481e-05, "loss_iou": 0.310546875, "loss_num": 0.01507568359375, "loss_xval": 0.6953125, "num_input_tokens_seen": 163762556, "step": 1416 }, { "epoch": 7.618279569892473, "grad_norm": 25.616994857788086, "learning_rate": 5e-07, "loss": 0.8416, "num_input_tokens_seen": 163875724, "step": 1417 }, { "epoch": 7.618279569892473, "loss": 0.7942781448364258, "loss_ce": 8.867058204486966e-05, "loss_iou": 0.357421875, "loss_num": 0.0159912109375, "loss_xval": 0.79296875, "num_input_tokens_seen": 163875724, "step": 1417 }, { "epoch": 7.623655913978495, "grad_norm": 23.182737350463867, "learning_rate": 5e-07, "loss": 0.8454, "num_input_tokens_seen": 163992104, "step": 1418 }, { "epoch": 7.623655913978495, "loss": 0.9327006340026855, "loss_ce": 8.347573748324066e-05, "loss_iou": 0.3828125, "loss_num": 0.032958984375, "loss_xval": 0.93359375, "num_input_tokens_seen": 163992104, "step": 1418 }, { "epoch": 7.629032258064516, "grad_norm": 29.618370056152344, "learning_rate": 5e-07, "loss": 0.7927, "num_input_tokens_seen": 164107088, "step": 1419 }, { "epoch": 7.629032258064516, "loss": 0.7483475208282471, "loss_ce": 5.6494453019695356e-05, "loss_iou": 0.306640625, "loss_num": 0.0272216796875, "loss_xval": 0.75, "num_input_tokens_seen": 164107088, "step": 1419 }, { "epoch": 7.634408602150538, "grad_norm": 32.49754333496094, "learning_rate": 5e-07, "loss": 0.6885, "num_input_tokens_seen": 164220276, "step": 1420 }, { "epoch": 7.634408602150538, "loss": 0.8302399516105652, "loss_ce": 0.00016184066771529615, "loss_iou": 0.365234375, "loss_num": 0.02001953125, "loss_xval": 0.828125, "num_input_tokens_seen": 164220276, "step": 1420 }, { "epoch": 7.639784946236559, "grad_norm": 18.185209274291992, "learning_rate": 5e-07, "loss": 0.7776, "num_input_tokens_seen": 164335568, "step": 1421 }, { "epoch": 7.639784946236559, "loss": 0.7192746996879578, "loss_ce": 3.64338920917362e-05, "loss_iou": 0.28125, "loss_num": 0.03173828125, "loss_xval": 0.71875, "num_input_tokens_seen": 164335568, "step": 1421 }, { "epoch": 7.645161290322581, "grad_norm": 26.10336685180664, "learning_rate": 5e-07, "loss": 0.7251, "num_input_tokens_seen": 164452116, "step": 1422 }, { "epoch": 7.645161290322581, "loss": 0.8062031865119934, "loss_ce": 5.0827573431888595e-05, "loss_iou": 0.32421875, "loss_num": 0.03125, "loss_xval": 0.8046875, "num_input_tokens_seen": 164452116, "step": 1422 }, { "epoch": 7.650537634408602, "grad_norm": 18.76084327697754, "learning_rate": 5e-07, "loss": 0.77, "num_input_tokens_seen": 164566312, "step": 1423 }, { "epoch": 7.650537634408602, "loss": 0.7053917646408081, "loss_ce": 6.948525697225705e-05, "loss_iou": 0.31640625, "loss_num": 0.0142822265625, "loss_xval": 0.70703125, "num_input_tokens_seen": 164566312, "step": 1423 }, { "epoch": 7.655913978494624, "grad_norm": 17.47438621520996, "learning_rate": 5e-07, "loss": 0.7468, "num_input_tokens_seen": 164683408, "step": 1424 }, { "epoch": 7.655913978494624, "loss": 0.9077471494674683, "loss_ce": 3.2311374525306746e-05, "loss_iou": 0.40234375, "loss_num": 0.0205078125, "loss_xval": 0.90625, "num_input_tokens_seen": 164683408, "step": 1424 }, { "epoch": 7.661290322580645, "grad_norm": 22.448095321655273, "learning_rate": 5e-07, "loss": 0.7735, "num_input_tokens_seen": 164799768, "step": 1425 }, { "epoch": 7.661290322580645, "loss": 0.5813458561897278, "loss_ce": 4.705352330347523e-05, "loss_iou": 0.267578125, "loss_num": 0.0089111328125, "loss_xval": 0.58203125, "num_input_tokens_seen": 164799768, "step": 1425 }, { "epoch": 7.666666666666667, "grad_norm": 15.60203742980957, "learning_rate": 5e-07, "loss": 0.7147, "num_input_tokens_seen": 164917560, "step": 1426 }, { "epoch": 7.666666666666667, "loss": 0.5687795877456665, "loss_ce": 5.400137524702586e-05, "loss_iou": 0.244140625, "loss_num": 0.016357421875, "loss_xval": 0.5703125, "num_input_tokens_seen": 164917560, "step": 1426 }, { "epoch": 7.672043010752688, "grad_norm": 16.63711166381836, "learning_rate": 5e-07, "loss": 0.6084, "num_input_tokens_seen": 165033540, "step": 1427 }, { "epoch": 7.672043010752688, "loss": 0.500514566898346, "loss_ce": 2.6297781005268916e-05, "loss_iou": 0.2158203125, "loss_num": 0.0135498046875, "loss_xval": 0.5, "num_input_tokens_seen": 165033540, "step": 1427 }, { "epoch": 7.67741935483871, "grad_norm": 20.615053176879883, "learning_rate": 5e-07, "loss": 0.7429, "num_input_tokens_seen": 165151600, "step": 1428 }, { "epoch": 7.67741935483871, "loss": 0.8396633267402649, "loss_ce": 6.369737820932642e-05, "loss_iou": 0.38671875, "loss_num": 0.013671875, "loss_xval": 0.83984375, "num_input_tokens_seen": 165151600, "step": 1428 }, { "epoch": 7.682795698924731, "grad_norm": 24.05682945251465, "learning_rate": 5e-07, "loss": 0.6972, "num_input_tokens_seen": 165270000, "step": 1429 }, { "epoch": 7.682795698924731, "loss": 0.4592640995979309, "loss_ce": 3.560121695045382e-05, "loss_iou": 0.2080078125, "loss_num": 0.00860595703125, "loss_xval": 0.458984375, "num_input_tokens_seen": 165270000, "step": 1429 }, { "epoch": 7.688172043010753, "grad_norm": 23.12557601928711, "learning_rate": 5e-07, "loss": 0.8918, "num_input_tokens_seen": 165389372, "step": 1430 }, { "epoch": 7.688172043010753, "loss": 0.8043585419654846, "loss_ce": 3.722116525750607e-05, "loss_iou": 0.345703125, "loss_num": 0.0223388671875, "loss_xval": 0.8046875, "num_input_tokens_seen": 165389372, "step": 1430 }, { "epoch": 7.693548387096774, "grad_norm": 16.35075569152832, "learning_rate": 5e-07, "loss": 0.7814, "num_input_tokens_seen": 165501628, "step": 1431 }, { "epoch": 7.693548387096774, "loss": 0.661179780960083, "loss_ce": 4.696474934462458e-05, "loss_iou": 0.2412109375, "loss_num": 0.03564453125, "loss_xval": 0.66015625, "num_input_tokens_seen": 165501628, "step": 1431 }, { "epoch": 7.698924731182796, "grad_norm": 28.298694610595703, "learning_rate": 5e-07, "loss": 0.9923, "num_input_tokens_seen": 165616552, "step": 1432 }, { "epoch": 7.698924731182796, "loss": 0.8816202878952026, "loss_ce": 2.848586518666707e-05, "loss_iou": 0.38671875, "loss_num": 0.021484375, "loss_xval": 0.8828125, "num_input_tokens_seen": 165616552, "step": 1432 }, { "epoch": 7.704301075268817, "grad_norm": 16.58974266052246, "learning_rate": 5e-07, "loss": 0.8607, "num_input_tokens_seen": 165731532, "step": 1433 }, { "epoch": 7.704301075268817, "loss": 1.095003604888916, "loss_ce": 3.295636270195246e-05, "loss_iou": 0.486328125, "loss_num": 0.024169921875, "loss_xval": 1.09375, "num_input_tokens_seen": 165731532, "step": 1433 }, { "epoch": 7.709677419354839, "grad_norm": 19.049314498901367, "learning_rate": 5e-07, "loss": 0.8693, "num_input_tokens_seen": 165847844, "step": 1434 }, { "epoch": 7.709677419354839, "loss": 0.8911504745483398, "loss_ce": 3.720365202752873e-05, "loss_iou": 0.375, "loss_num": 0.028076171875, "loss_xval": 0.890625, "num_input_tokens_seen": 165847844, "step": 1434 }, { "epoch": 7.71505376344086, "grad_norm": 17.822750091552734, "learning_rate": 5e-07, "loss": 0.7936, "num_input_tokens_seen": 165962268, "step": 1435 }, { "epoch": 7.71505376344086, "loss": 0.8530729413032532, "loss_ce": 4.560914385365322e-05, "loss_iou": 0.353515625, "loss_num": 0.0291748046875, "loss_xval": 0.8515625, "num_input_tokens_seen": 165962268, "step": 1435 }, { "epoch": 7.720430107526882, "grad_norm": 21.387617111206055, "learning_rate": 5e-07, "loss": 0.7884, "num_input_tokens_seen": 166079500, "step": 1436 }, { "epoch": 7.720430107526882, "loss": 0.709299623966217, "loss_ce": 7.10922249709256e-05, "loss_iou": 0.3046875, "loss_num": 0.02001953125, "loss_xval": 0.7109375, "num_input_tokens_seen": 166079500, "step": 1436 }, { "epoch": 7.725806451612903, "grad_norm": 18.486452102661133, "learning_rate": 5e-07, "loss": 0.9495, "num_input_tokens_seen": 166194492, "step": 1437 }, { "epoch": 7.725806451612903, "loss": 1.0437556505203247, "loss_ce": 5.447278090287e-05, "loss_iou": 0.435546875, "loss_num": 0.034423828125, "loss_xval": 1.046875, "num_input_tokens_seen": 166194492, "step": 1437 }, { "epoch": 7.731182795698925, "grad_norm": 20.103519439697266, "learning_rate": 5e-07, "loss": 0.69, "num_input_tokens_seen": 166309232, "step": 1438 }, { "epoch": 7.731182795698925, "loss": 0.8369598388671875, "loss_ce": 4.573762635118328e-05, "loss_iou": 0.3671875, "loss_num": 0.020263671875, "loss_xval": 0.8359375, "num_input_tokens_seen": 166309232, "step": 1438 }, { "epoch": 7.736559139784946, "grad_norm": 17.816377639770508, "learning_rate": 5e-07, "loss": 0.8795, "num_input_tokens_seen": 166419436, "step": 1439 }, { "epoch": 7.736559139784946, "loss": 0.9740715622901917, "loss_ce": 0.00019455768051557243, "loss_iou": 0.4140625, "loss_num": 0.0289306640625, "loss_xval": 0.97265625, "num_input_tokens_seen": 166419436, "step": 1439 }, { "epoch": 7.741935483870968, "grad_norm": 29.17402458190918, "learning_rate": 5e-07, "loss": 0.7696, "num_input_tokens_seen": 166535844, "step": 1440 }, { "epoch": 7.741935483870968, "loss": 0.45290637016296387, "loss_ce": 2.5501825803075917e-05, "loss_iou": 0.2021484375, "loss_num": 0.0098876953125, "loss_xval": 0.453125, "num_input_tokens_seen": 166535844, "step": 1440 }, { "epoch": 7.747311827956989, "grad_norm": 17.33723258972168, "learning_rate": 5e-07, "loss": 0.6084, "num_input_tokens_seen": 166650964, "step": 1441 }, { "epoch": 7.747311827956989, "loss": 0.7147113680839539, "loss_ce": 0.00011175496911164373, "loss_iou": 0.296875, "loss_num": 0.024658203125, "loss_xval": 0.71484375, "num_input_tokens_seen": 166650964, "step": 1441 }, { "epoch": 7.752688172043011, "grad_norm": 18.75877571105957, "learning_rate": 5e-07, "loss": 0.6174, "num_input_tokens_seen": 166768004, "step": 1442 }, { "epoch": 7.752688172043011, "loss": 0.45779645442962646, "loss_ce": 3.2748917874414474e-05, "loss_iou": 0.193359375, "loss_num": 0.01434326171875, "loss_xval": 0.45703125, "num_input_tokens_seen": 166768004, "step": 1442 }, { "epoch": 7.758064516129032, "grad_norm": 23.82184600830078, "learning_rate": 5e-07, "loss": 0.7123, "num_input_tokens_seen": 166883100, "step": 1443 }, { "epoch": 7.758064516129032, "loss": 0.6792582273483276, "loss_ce": 5.8994632126996294e-05, "loss_iou": 0.26953125, "loss_num": 0.02783203125, "loss_xval": 0.6796875, "num_input_tokens_seen": 166883100, "step": 1443 }, { "epoch": 7.763440860215054, "grad_norm": 21.415603637695312, "learning_rate": 5e-07, "loss": 0.69, "num_input_tokens_seen": 166998240, "step": 1444 }, { "epoch": 7.763440860215054, "loss": 0.6434341073036194, "loss_ce": 0.00012353702913969755, "loss_iou": 0.27734375, "loss_num": 0.017333984375, "loss_xval": 0.64453125, "num_input_tokens_seen": 166998240, "step": 1444 }, { "epoch": 7.768817204301075, "grad_norm": 21.57468605041504, "learning_rate": 5e-07, "loss": 0.8013, "num_input_tokens_seen": 167115576, "step": 1445 }, { "epoch": 7.768817204301075, "loss": 0.8149650692939758, "loss_ce": 2.3709832021268085e-05, "loss_iou": 0.3359375, "loss_num": 0.02880859375, "loss_xval": 0.81640625, "num_input_tokens_seen": 167115576, "step": 1445 }, { "epoch": 7.774193548387097, "grad_norm": 16.371971130371094, "learning_rate": 5e-07, "loss": 0.8013, "num_input_tokens_seen": 167232508, "step": 1446 }, { "epoch": 7.774193548387097, "loss": 0.6208754777908325, "loss_ce": 2.585945912869647e-05, "loss_iou": 0.26171875, "loss_num": 0.019287109375, "loss_xval": 0.62109375, "num_input_tokens_seen": 167232508, "step": 1446 }, { "epoch": 7.779569892473118, "grad_norm": 15.935375213623047, "learning_rate": 5e-07, "loss": 0.7282, "num_input_tokens_seen": 167346648, "step": 1447 }, { "epoch": 7.779569892473118, "loss": 0.5581126809120178, "loss_ce": 6.823831790825352e-05, "loss_iou": 0.236328125, "loss_num": 0.0169677734375, "loss_xval": 0.55859375, "num_input_tokens_seen": 167346648, "step": 1447 }, { "epoch": 7.78494623655914, "grad_norm": 18.259212493896484, "learning_rate": 5e-07, "loss": 0.5631, "num_input_tokens_seen": 167462128, "step": 1448 }, { "epoch": 7.78494623655914, "loss": 0.5728065967559814, "loss_ce": 5.26641815667972e-05, "loss_iou": 0.2578125, "loss_num": 0.01202392578125, "loss_xval": 0.57421875, "num_input_tokens_seen": 167462128, "step": 1448 }, { "epoch": 7.790322580645161, "grad_norm": 21.96074867248535, "learning_rate": 5e-07, "loss": 0.7899, "num_input_tokens_seen": 167578444, "step": 1449 }, { "epoch": 7.790322580645161, "loss": 0.9749164581298828, "loss_ce": 0.0015277756610885262, "loss_iou": 0.431640625, "loss_num": 0.0218505859375, "loss_xval": 0.97265625, "num_input_tokens_seen": 167578444, "step": 1449 }, { "epoch": 7.795698924731183, "grad_norm": 22.241222381591797, "learning_rate": 5e-07, "loss": 0.8526, "num_input_tokens_seen": 167696748, "step": 1450 }, { "epoch": 7.795698924731183, "loss": 0.9999523162841797, "loss_ce": 0.00019647493900265545, "loss_iou": 0.4375, "loss_num": 0.0252685546875, "loss_xval": 1.0, "num_input_tokens_seen": 167696748, "step": 1450 }, { "epoch": 7.801075268817204, "grad_norm": 16.790197372436523, "learning_rate": 5e-07, "loss": 0.8155, "num_input_tokens_seen": 167815352, "step": 1451 }, { "epoch": 7.801075268817204, "loss": 0.6330962777137756, "loss_ce": 3.965066571254283e-05, "loss_iou": 0.26171875, "loss_num": 0.0220947265625, "loss_xval": 0.6328125, "num_input_tokens_seen": 167815352, "step": 1451 }, { "epoch": 7.806451612903226, "grad_norm": 23.358713150024414, "learning_rate": 5e-07, "loss": 0.8217, "num_input_tokens_seen": 167930732, "step": 1452 }, { "epoch": 7.806451612903226, "loss": 0.657543420791626, "loss_ce": 7.271084177773446e-05, "loss_iou": 0.28125, "loss_num": 0.0191650390625, "loss_xval": 0.65625, "num_input_tokens_seen": 167930732, "step": 1452 }, { "epoch": 7.811827956989247, "grad_norm": 17.26963233947754, "learning_rate": 5e-07, "loss": 0.7474, "num_input_tokens_seen": 168049264, "step": 1453 }, { "epoch": 7.811827956989247, "loss": 0.803625762462616, "loss_ce": 0.00015894374519120902, "loss_iou": 0.330078125, "loss_num": 0.0286865234375, "loss_xval": 0.8046875, "num_input_tokens_seen": 168049264, "step": 1453 }, { "epoch": 7.817204301075269, "grad_norm": 21.320737838745117, "learning_rate": 5e-07, "loss": 0.694, "num_input_tokens_seen": 168165312, "step": 1454 }, { "epoch": 7.817204301075269, "loss": 0.7351387739181519, "loss_ce": 3.1337644031737e-05, "loss_iou": 0.318359375, "loss_num": 0.019775390625, "loss_xval": 0.734375, "num_input_tokens_seen": 168165312, "step": 1454 }, { "epoch": 7.82258064516129, "grad_norm": 14.71544075012207, "learning_rate": 5e-07, "loss": 0.6828, "num_input_tokens_seen": 168279744, "step": 1455 }, { "epoch": 7.82258064516129, "loss": 0.5302962064743042, "loss_ce": 2.2762489606975578e-05, "loss_iou": 0.2236328125, "loss_num": 0.0164794921875, "loss_xval": 0.53125, "num_input_tokens_seen": 168279744, "step": 1455 }, { "epoch": 7.827956989247312, "grad_norm": 22.10490608215332, "learning_rate": 5e-07, "loss": 0.8807, "num_input_tokens_seen": 168396112, "step": 1456 }, { "epoch": 7.827956989247312, "loss": 1.1797304153442383, "loss_ce": 4.29758511018008e-05, "loss_iou": 0.50390625, "loss_num": 0.03369140625, "loss_xval": 1.1796875, "num_input_tokens_seen": 168396112, "step": 1456 }, { "epoch": 7.833333333333333, "grad_norm": 78.46383666992188, "learning_rate": 5e-07, "loss": 0.8941, "num_input_tokens_seen": 168511712, "step": 1457 }, { "epoch": 7.833333333333333, "loss": 1.03910231590271, "loss_ce": 3.974383434979245e-05, "loss_iou": 0.4375, "loss_num": 0.03271484375, "loss_xval": 1.0390625, "num_input_tokens_seen": 168511712, "step": 1457 }, { "epoch": 7.838709677419355, "grad_norm": 15.548877716064453, "learning_rate": 5e-07, "loss": 0.673, "num_input_tokens_seen": 168628468, "step": 1458 }, { "epoch": 7.838709677419355, "loss": 0.6684820652008057, "loss_ce": 2.5011246179929003e-05, "loss_iou": 0.30078125, "loss_num": 0.01373291015625, "loss_xval": 0.66796875, "num_input_tokens_seen": 168628468, "step": 1458 }, { "epoch": 7.844086021505376, "grad_norm": 24.793895721435547, "learning_rate": 5e-07, "loss": 0.7602, "num_input_tokens_seen": 168746256, "step": 1459 }, { "epoch": 7.844086021505376, "loss": 0.6560291647911072, "loss_ce": 2.3322161723626778e-05, "loss_iou": 0.259765625, "loss_num": 0.0269775390625, "loss_xval": 0.65625, "num_input_tokens_seen": 168746256, "step": 1459 }, { "epoch": 7.849462365591398, "grad_norm": 19.04301643371582, "learning_rate": 5e-07, "loss": 0.9343, "num_input_tokens_seen": 168860076, "step": 1460 }, { "epoch": 7.849462365591398, "loss": 1.0201033353805542, "loss_ce": 8.384660759475082e-05, "loss_iou": 0.43359375, "loss_num": 0.0301513671875, "loss_xval": 1.0234375, "num_input_tokens_seen": 168860076, "step": 1460 }, { "epoch": 7.854838709677419, "grad_norm": 21.78269386291504, "learning_rate": 5e-07, "loss": 0.7688, "num_input_tokens_seen": 168971832, "step": 1461 }, { "epoch": 7.854838709677419, "loss": 0.9565151929855347, "loss_ce": 3.33102434524335e-05, "loss_iou": 0.40234375, "loss_num": 0.0302734375, "loss_xval": 0.95703125, "num_input_tokens_seen": 168971832, "step": 1461 }, { "epoch": 7.860215053763441, "grad_norm": 16.09261703491211, "learning_rate": 5e-07, "loss": 0.7563, "num_input_tokens_seen": 169087400, "step": 1462 }, { "epoch": 7.860215053763441, "loss": 0.7681461572647095, "loss_ce": 7.978671783348545e-05, "loss_iou": 0.333984375, "loss_num": 0.0198974609375, "loss_xval": 0.76953125, "num_input_tokens_seen": 169087400, "step": 1462 }, { "epoch": 7.865591397849462, "grad_norm": 22.978574752807617, "learning_rate": 5e-07, "loss": 0.839, "num_input_tokens_seen": 169204524, "step": 1463 }, { "epoch": 7.865591397849462, "loss": 0.7837457656860352, "loss_ce": 5.4338539484888315e-05, "loss_iou": 0.341796875, "loss_num": 0.0201416015625, "loss_xval": 0.78515625, "num_input_tokens_seen": 169204524, "step": 1463 }, { "epoch": 7.870967741935484, "grad_norm": 18.066631317138672, "learning_rate": 5e-07, "loss": 0.9089, "num_input_tokens_seen": 169319804, "step": 1464 }, { "epoch": 7.870967741935484, "loss": 0.9444191455841064, "loss_ce": 8.322528447024524e-05, "loss_iou": 0.408203125, "loss_num": 0.025390625, "loss_xval": 0.9453125, "num_input_tokens_seen": 169319804, "step": 1464 }, { "epoch": 7.876344086021505, "grad_norm": 13.875812530517578, "learning_rate": 5e-07, "loss": 0.7349, "num_input_tokens_seen": 169436852, "step": 1465 }, { "epoch": 7.876344086021505, "loss": 0.5503284931182861, "loss_ce": 3.556181400199421e-05, "loss_iou": 0.232421875, "loss_num": 0.0172119140625, "loss_xval": 0.55078125, "num_input_tokens_seen": 169436852, "step": 1465 }, { "epoch": 7.881720430107527, "grad_norm": 21.856081008911133, "learning_rate": 5e-07, "loss": 0.729, "num_input_tokens_seen": 169551700, "step": 1466 }, { "epoch": 7.881720430107527, "loss": 0.5256731510162354, "loss_ce": 3.838828706648201e-05, "loss_iou": 0.240234375, "loss_num": 0.00897216796875, "loss_xval": 0.52734375, "num_input_tokens_seen": 169551700, "step": 1466 }, { "epoch": 7.887096774193548, "grad_norm": 17.643905639648438, "learning_rate": 5e-07, "loss": 0.7985, "num_input_tokens_seen": 169667120, "step": 1467 }, { "epoch": 7.887096774193548, "loss": 0.8913863897323608, "loss_ce": 2.896628575399518e-05, "loss_iou": 0.384765625, "loss_num": 0.0245361328125, "loss_xval": 0.890625, "num_input_tokens_seen": 169667120, "step": 1467 }, { "epoch": 7.89247311827957, "grad_norm": 19.826717376708984, "learning_rate": 5e-07, "loss": 0.9138, "num_input_tokens_seen": 169775840, "step": 1468 }, { "epoch": 7.89247311827957, "loss": 1.2900968790054321, "loss_ce": 5.781796426163055e-05, "loss_iou": 0.55859375, "loss_num": 0.033935546875, "loss_xval": 1.2890625, "num_input_tokens_seen": 169775840, "step": 1468 }, { "epoch": 7.897849462365591, "grad_norm": 38.74271774291992, "learning_rate": 5e-07, "loss": 0.7184, "num_input_tokens_seen": 169895312, "step": 1469 }, { "epoch": 7.897849462365591, "loss": 0.6113793849945068, "loss_ce": 5.123951268615201e-05, "loss_iou": 0.26953125, "loss_num": 0.01422119140625, "loss_xval": 0.609375, "num_input_tokens_seen": 169895312, "step": 1469 }, { "epoch": 7.903225806451613, "grad_norm": 23.1355037689209, "learning_rate": 5e-07, "loss": 0.6716, "num_input_tokens_seen": 170008616, "step": 1470 }, { "epoch": 7.903225806451613, "loss": 0.5094466805458069, "loss_ce": 0.0021224478259682655, "loss_iou": 0.2197265625, "loss_num": 0.0135498046875, "loss_xval": 0.5078125, "num_input_tokens_seen": 170008616, "step": 1470 }, { "epoch": 7.908602150537634, "grad_norm": 20.069284439086914, "learning_rate": 5e-07, "loss": 0.6226, "num_input_tokens_seen": 170123560, "step": 1471 }, { "epoch": 7.908602150537634, "loss": 0.6186755895614624, "loss_ce": 2.3282234906218946e-05, "loss_iou": 0.26171875, "loss_num": 0.0186767578125, "loss_xval": 0.6171875, "num_input_tokens_seen": 170123560, "step": 1471 }, { "epoch": 7.913978494623656, "grad_norm": 21.128225326538086, "learning_rate": 5e-07, "loss": 0.6962, "num_input_tokens_seen": 170239032, "step": 1472 }, { "epoch": 7.913978494623656, "loss": 0.6157602071762085, "loss_ce": 3.751360782189295e-05, "loss_iou": 0.271484375, "loss_num": 0.0147705078125, "loss_xval": 0.6171875, "num_input_tokens_seen": 170239032, "step": 1472 }, { "epoch": 7.919354838709677, "grad_norm": 16.166179656982422, "learning_rate": 5e-07, "loss": 0.8067, "num_input_tokens_seen": 170354276, "step": 1473 }, { "epoch": 7.919354838709677, "loss": 0.9844784736633301, "loss_ce": 0.00010347116767661646, "loss_iou": 0.396484375, "loss_num": 0.038330078125, "loss_xval": 0.984375, "num_input_tokens_seen": 170354276, "step": 1473 }, { "epoch": 7.924731182795699, "grad_norm": 23.979860305786133, "learning_rate": 5e-07, "loss": 0.7392, "num_input_tokens_seen": 170471248, "step": 1474 }, { "epoch": 7.924731182795699, "loss": 0.7334524989128113, "loss_ce": 5.4048367019277066e-05, "loss_iou": 0.3125, "loss_num": 0.0216064453125, "loss_xval": 0.734375, "num_input_tokens_seen": 170471248, "step": 1474 }, { "epoch": 7.93010752688172, "grad_norm": 20.230878829956055, "learning_rate": 5e-07, "loss": 0.7683, "num_input_tokens_seen": 170586324, "step": 1475 }, { "epoch": 7.93010752688172, "loss": 0.8391319513320923, "loss_ce": 2.064866930595599e-05, "loss_iou": 0.3359375, "loss_num": 0.033203125, "loss_xval": 0.83984375, "num_input_tokens_seen": 170586324, "step": 1475 }, { "epoch": 7.935483870967742, "grad_norm": 14.993705749511719, "learning_rate": 5e-07, "loss": 0.9157, "num_input_tokens_seen": 170699908, "step": 1476 }, { "epoch": 7.935483870967742, "loss": 1.0279122591018677, "loss_ce": 8.019553933991119e-05, "loss_iou": 0.44921875, "loss_num": 0.025634765625, "loss_xval": 1.03125, "num_input_tokens_seen": 170699908, "step": 1476 }, { "epoch": 7.940860215053764, "grad_norm": 19.548580169677734, "learning_rate": 5e-07, "loss": 0.7188, "num_input_tokens_seen": 170816188, "step": 1477 }, { "epoch": 7.940860215053764, "loss": 0.6377578973770142, "loss_ce": 6.260225200094283e-05, "loss_iou": 0.2470703125, "loss_num": 0.0286865234375, "loss_xval": 0.63671875, "num_input_tokens_seen": 170816188, "step": 1477 }, { "epoch": 7.946236559139785, "grad_norm": 20.131507873535156, "learning_rate": 5e-07, "loss": 0.6414, "num_input_tokens_seen": 170930764, "step": 1478 }, { "epoch": 7.946236559139785, "loss": 0.6409515738487244, "loss_ce": 8.245906792581081e-05, "loss_iou": 0.263671875, "loss_num": 0.02294921875, "loss_xval": 0.640625, "num_input_tokens_seen": 170930764, "step": 1478 }, { "epoch": 7.951612903225806, "grad_norm": 22.8435115814209, "learning_rate": 5e-07, "loss": 0.916, "num_input_tokens_seen": 171046212, "step": 1479 }, { "epoch": 7.951612903225806, "loss": 0.9800289273262024, "loss_ce": 4.845996591029689e-05, "loss_iou": 0.41015625, "loss_num": 0.031982421875, "loss_xval": 0.98046875, "num_input_tokens_seen": 171046212, "step": 1479 }, { "epoch": 7.956989247311828, "grad_norm": 21.222896575927734, "learning_rate": 5e-07, "loss": 0.7124, "num_input_tokens_seen": 171161444, "step": 1480 }, { "epoch": 7.956989247311828, "loss": 0.6982954144477844, "loss_ce": 5.3268006013240665e-05, "loss_iou": 0.314453125, "loss_num": 0.01373291015625, "loss_xval": 0.69921875, "num_input_tokens_seen": 171161444, "step": 1480 }, { "epoch": 7.96236559139785, "grad_norm": 20.490550994873047, "learning_rate": 5e-07, "loss": 0.6623, "num_input_tokens_seen": 171278684, "step": 1481 }, { "epoch": 7.96236559139785, "loss": 0.5034660696983337, "loss_ce": 4.810184327652678e-05, "loss_iou": 0.1923828125, "loss_num": 0.02392578125, "loss_xval": 0.50390625, "num_input_tokens_seen": 171278684, "step": 1481 }, { "epoch": 7.967741935483871, "grad_norm": 18.617286682128906, "learning_rate": 5e-07, "loss": 0.7748, "num_input_tokens_seen": 171393928, "step": 1482 }, { "epoch": 7.967741935483871, "loss": 0.6396937370300293, "loss_ce": 4.529858415480703e-05, "loss_iou": 0.248046875, "loss_num": 0.0286865234375, "loss_xval": 0.640625, "num_input_tokens_seen": 171393928, "step": 1482 }, { "epoch": 7.973118279569892, "grad_norm": 18.86888885498047, "learning_rate": 5e-07, "loss": 0.8313, "num_input_tokens_seen": 171509972, "step": 1483 }, { "epoch": 7.973118279569892, "loss": 0.8491498827934265, "loss_ce": 2.8774964448530227e-05, "loss_iou": 0.333984375, "loss_num": 0.036376953125, "loss_xval": 0.84765625, "num_input_tokens_seen": 171509972, "step": 1483 }, { "epoch": 7.978494623655914, "grad_norm": 25.91986656188965, "learning_rate": 5e-07, "loss": 0.7493, "num_input_tokens_seen": 171627352, "step": 1484 }, { "epoch": 7.978494623655914, "loss": 0.6882930994033813, "loss_ce": 6.0626509366557e-05, "loss_iou": 0.296875, "loss_num": 0.0186767578125, "loss_xval": 0.6875, "num_input_tokens_seen": 171627352, "step": 1484 }, { "epoch": 7.983870967741936, "grad_norm": 25.558853149414062, "learning_rate": 5e-07, "loss": 0.7242, "num_input_tokens_seen": 171742128, "step": 1485 }, { "epoch": 7.983870967741936, "loss": 1.003964900970459, "loss_ce": 5.871711255167611e-05, "loss_iou": 0.412109375, "loss_num": 0.0361328125, "loss_xval": 1.0, "num_input_tokens_seen": 171742128, "step": 1485 }, { "epoch": 7.989247311827957, "grad_norm": 16.69420051574707, "learning_rate": 5e-07, "loss": 0.7403, "num_input_tokens_seen": 171856584, "step": 1486 }, { "epoch": 7.989247311827957, "loss": 0.5126107931137085, "loss_ce": 3.7523754144785926e-05, "loss_iou": 0.2119140625, "loss_num": 0.017578125, "loss_xval": 0.51171875, "num_input_tokens_seen": 171856584, "step": 1486 }, { "epoch": 7.994623655913978, "grad_norm": 20.429798126220703, "learning_rate": 5e-07, "loss": 0.8343, "num_input_tokens_seen": 171970316, "step": 1487 }, { "epoch": 7.994623655913978, "loss": 0.7867798209190369, "loss_ce": 3.664570249384269e-05, "loss_iou": 0.32421875, "loss_num": 0.0274658203125, "loss_xval": 0.78515625, "num_input_tokens_seen": 171970316, "step": 1487 }, { "epoch": 8.0, "grad_norm": 19.385814666748047, "learning_rate": 5e-07, "loss": 0.6807, "num_input_tokens_seen": 172086208, "step": 1488 }, { "epoch": 8.0, "loss": 0.6153687238693237, "loss_ce": 0.00013438232417684048, "loss_iou": 0.275390625, "loss_num": 0.01324462890625, "loss_xval": 0.6171875, "num_input_tokens_seen": 172086208, "step": 1488 }, { "epoch": 8.005376344086022, "grad_norm": 16.304752349853516, "learning_rate": 5e-07, "loss": 0.6822, "num_input_tokens_seen": 172202924, "step": 1489 }, { "epoch": 8.005376344086022, "loss": 0.6069691181182861, "loss_ce": 3.548890526872128e-05, "loss_iou": 0.27734375, "loss_num": 0.010498046875, "loss_xval": 0.60546875, "num_input_tokens_seen": 172202924, "step": 1489 }, { "epoch": 8.010752688172044, "grad_norm": 24.756235122680664, "learning_rate": 5e-07, "loss": 0.7162, "num_input_tokens_seen": 172318508, "step": 1490 }, { "epoch": 8.010752688172044, "loss": 0.6670191884040833, "loss_ce": 2.701904122659471e-05, "loss_iou": 0.287109375, "loss_num": 0.018798828125, "loss_xval": 0.66796875, "num_input_tokens_seen": 172318508, "step": 1490 }, { "epoch": 8.016129032258064, "grad_norm": 19.196468353271484, "learning_rate": 5e-07, "loss": 0.8625, "num_input_tokens_seen": 172430344, "step": 1491 }, { "epoch": 8.016129032258064, "loss": 1.1162593364715576, "loss_ce": 4.8375783080700785e-05, "loss_iou": 0.462890625, "loss_num": 0.0380859375, "loss_xval": 1.1171875, "num_input_tokens_seen": 172430344, "step": 1491 }, { "epoch": 8.021505376344086, "grad_norm": 15.326699256896973, "learning_rate": 5e-07, "loss": 0.7342, "num_input_tokens_seen": 172547636, "step": 1492 }, { "epoch": 8.021505376344086, "loss": 0.7755799293518066, "loss_ce": 6.725099956383929e-05, "loss_iou": 0.3359375, "loss_num": 0.02099609375, "loss_xval": 0.77734375, "num_input_tokens_seen": 172547636, "step": 1492 }, { "epoch": 8.026881720430108, "grad_norm": 27.629352569580078, "learning_rate": 5e-07, "loss": 0.8258, "num_input_tokens_seen": 172663496, "step": 1493 }, { "epoch": 8.026881720430108, "loss": 0.6638630628585815, "loss_ce": 4.4700362195726484e-05, "loss_iou": 0.29296875, "loss_num": 0.015869140625, "loss_xval": 0.6640625, "num_input_tokens_seen": 172663496, "step": 1493 }, { "epoch": 8.03225806451613, "grad_norm": 22.894346237182617, "learning_rate": 5e-07, "loss": 0.7364, "num_input_tokens_seen": 172776900, "step": 1494 }, { "epoch": 8.03225806451613, "loss": 0.6616567373275757, "loss_ce": 3.563026257324964e-05, "loss_iou": 0.287109375, "loss_num": 0.0174560546875, "loss_xval": 0.66015625, "num_input_tokens_seen": 172776900, "step": 1494 }, { "epoch": 8.03763440860215, "grad_norm": 18.045734405517578, "learning_rate": 5e-07, "loss": 0.7894, "num_input_tokens_seen": 172891536, "step": 1495 }, { "epoch": 8.03763440860215, "loss": 0.9624438285827637, "loss_ce": 4.152023757342249e-05, "loss_iou": 0.40625, "loss_num": 0.0296630859375, "loss_xval": 0.9609375, "num_input_tokens_seen": 172891536, "step": 1495 }, { "epoch": 8.043010752688172, "grad_norm": 16.121030807495117, "learning_rate": 5e-07, "loss": 0.7805, "num_input_tokens_seen": 173007440, "step": 1496 }, { "epoch": 8.043010752688172, "loss": 0.7207645177841187, "loss_ce": 6.13450538367033e-05, "loss_iou": 0.31640625, "loss_num": 0.0172119140625, "loss_xval": 0.71875, "num_input_tokens_seen": 173007440, "step": 1496 }, { "epoch": 8.048387096774194, "grad_norm": 16.605648040771484, "learning_rate": 5e-07, "loss": 0.845, "num_input_tokens_seen": 173121056, "step": 1497 }, { "epoch": 8.048387096774194, "loss": 0.5061322450637817, "loss_ce": 2.8694630600512028e-05, "loss_iou": 0.21875, "loss_num": 0.0135498046875, "loss_xval": 0.5078125, "num_input_tokens_seen": 173121056, "step": 1497 }, { "epoch": 8.053763440860216, "grad_norm": 20.107303619384766, "learning_rate": 5e-07, "loss": 0.6171, "num_input_tokens_seen": 173233404, "step": 1498 }, { "epoch": 8.053763440860216, "loss": 0.5280354022979736, "loss_ce": 2.0272404071874917e-05, "loss_iou": 0.2197265625, "loss_num": 0.017578125, "loss_xval": 0.52734375, "num_input_tokens_seen": 173233404, "step": 1498 }, { "epoch": 8.059139784946236, "grad_norm": 22.977294921875, "learning_rate": 5e-07, "loss": 0.7011, "num_input_tokens_seen": 173348220, "step": 1499 }, { "epoch": 8.059139784946236, "loss": 0.8387531042098999, "loss_ce": 6.90049200784415e-05, "loss_iou": 0.369140625, "loss_num": 0.0198974609375, "loss_xval": 0.83984375, "num_input_tokens_seen": 173348220, "step": 1499 }, { "epoch": 8.064516129032258, "grad_norm": 14.168288230895996, "learning_rate": 5e-07, "loss": 0.5733, "num_input_tokens_seen": 173462080, "step": 1500 }, { "epoch": 8.064516129032258, "eval_icons_CIoU": 0.1613897830247879, "eval_icons_GIoU": 0.12710388749837875, "eval_icons_IoU": 0.319986492395401, "eval_icons_MAE_all": 0.030157193541526794, "eval_icons_MAE_h": 0.033089784905314445, "eval_icons_MAE_w": 0.053890977054834366, "eval_icons_MAE_x_boxes": 0.05134650692343712, "eval_icons_MAE_y_boxes": 0.033229995518922806, "eval_icons_NUM_probability": 0.9986189007759094, "eval_icons_inside_bbox": 0.59375, "eval_icons_loss": 1.8727612495422363, "eval_icons_loss_ce": 0.00022612516113440506, "eval_icons_loss_iou": 0.85791015625, "eval_icons_loss_num": 0.030422210693359375, "eval_icons_loss_xval": 1.86865234375, "eval_icons_runtime": 44.8706, "eval_icons_samples_per_second": 1.114, "eval_icons_steps_per_second": 0.045, "num_input_tokens_seen": 173462080, "step": 1500 }, { "epoch": 8.064516129032258, "eval_screenspot_CIoU": 0.3139355629682541, "eval_screenspot_GIoU": 0.30421581864356995, "eval_screenspot_IoU": 0.40677109360694885, "eval_screenspot_MAE_all": 0.06091445001463095, "eval_screenspot_MAE_h": 0.04943366845448812, "eval_screenspot_MAE_w": 0.0767235333720843, "eval_screenspot_MAE_x_boxes": 0.08225180767476559, "eval_screenspot_MAE_y_boxes": 0.03997026942670345, "eval_screenspot_NUM_probability": 0.9997237126032511, "eval_screenspot_inside_bbox": 0.7116666634877523, "eval_screenspot_loss": 1.7582085132598877, "eval_screenspot_loss_ce": 0.0001262788791791536, "eval_screenspot_loss_iou": 0.7417805989583334, "eval_screenspot_loss_num": 0.07024892171223958, "eval_screenspot_loss_xval": 1.8362630208333333, "eval_screenspot_runtime": 80.474, "eval_screenspot_samples_per_second": 1.106, "eval_screenspot_steps_per_second": 0.037, "num_input_tokens_seen": 173462080, "step": 1500 }, { "epoch": 8.064516129032258, "loss": 1.6914647817611694, "loss_ce": 5.848341970704496e-05, "loss_iou": 0.703125, "loss_num": 0.056640625, "loss_xval": 1.6875, "num_input_tokens_seen": 173462080, "step": 1500 }, { "epoch": 8.06989247311828, "grad_norm": 14.336308479309082, "learning_rate": 5e-07, "loss": 0.6915, "num_input_tokens_seen": 173576304, "step": 1501 }, { "epoch": 8.06989247311828, "loss": 0.9058266282081604, "loss_ce": 6.492194370366633e-05, "loss_iou": 0.373046875, "loss_num": 0.0322265625, "loss_xval": 0.90625, "num_input_tokens_seen": 173576304, "step": 1501 }, { "epoch": 8.075268817204302, "grad_norm": 17.357017517089844, "learning_rate": 5e-07, "loss": 0.7937, "num_input_tokens_seen": 173689808, "step": 1502 }, { "epoch": 8.075268817204302, "loss": 1.0184611082077026, "loss_ce": 2.8470625693444163e-05, "loss_iou": 0.46484375, "loss_num": 0.0177001953125, "loss_xval": 1.015625, "num_input_tokens_seen": 173689808, "step": 1502 }, { "epoch": 8.080645161290322, "grad_norm": 19.784496307373047, "learning_rate": 5e-07, "loss": 0.5749, "num_input_tokens_seen": 173802448, "step": 1503 }, { "epoch": 8.080645161290322, "loss": 0.521959125995636, "loss_ce": 0.00023058363876771182, "loss_iou": 0.2265625, "loss_num": 0.0137939453125, "loss_xval": 0.5234375, "num_input_tokens_seen": 173802448, "step": 1503 }, { "epoch": 8.086021505376344, "grad_norm": 17.73674201965332, "learning_rate": 5e-07, "loss": 0.7031, "num_input_tokens_seen": 173920444, "step": 1504 }, { "epoch": 8.086021505376344, "loss": 0.6216998100280762, "loss_ce": 0.00011778672342188656, "loss_iou": 0.248046875, "loss_num": 0.0250244140625, "loss_xval": 0.62109375, "num_input_tokens_seen": 173920444, "step": 1504 }, { "epoch": 8.091397849462366, "grad_norm": 15.194256782531738, "learning_rate": 5e-07, "loss": 0.6158, "num_input_tokens_seen": 174036480, "step": 1505 }, { "epoch": 8.091397849462366, "loss": 0.6482440233230591, "loss_ce": 5.062430500402115e-05, "loss_iou": 0.26953125, "loss_num": 0.0218505859375, "loss_xval": 0.6484375, "num_input_tokens_seen": 174036480, "step": 1505 }, { "epoch": 8.096774193548388, "grad_norm": 21.052448272705078, "learning_rate": 5e-07, "loss": 0.769, "num_input_tokens_seen": 174147620, "step": 1506 }, { "epoch": 8.096774193548388, "loss": 0.8396719694137573, "loss_ce": 7.233492215164006e-05, "loss_iou": 0.35546875, "loss_num": 0.02587890625, "loss_xval": 0.83984375, "num_input_tokens_seen": 174147620, "step": 1506 }, { "epoch": 8.102150537634408, "grad_norm": 22.96590805053711, "learning_rate": 5e-07, "loss": 0.7358, "num_input_tokens_seen": 174260484, "step": 1507 }, { "epoch": 8.102150537634408, "loss": 0.7072972059249878, "loss_ce": 2.1820898837177083e-05, "loss_iou": 0.306640625, "loss_num": 0.0189208984375, "loss_xval": 0.70703125, "num_input_tokens_seen": 174260484, "step": 1507 }, { "epoch": 8.10752688172043, "grad_norm": 25.850982666015625, "learning_rate": 5e-07, "loss": 0.8456, "num_input_tokens_seen": 174375732, "step": 1508 }, { "epoch": 8.10752688172043, "loss": 0.6777869462966919, "loss_ce": 5.25532741448842e-05, "loss_iou": 0.279296875, "loss_num": 0.02392578125, "loss_xval": 0.6796875, "num_input_tokens_seen": 174375732, "step": 1508 }, { "epoch": 8.112903225806452, "grad_norm": 22.212289810180664, "learning_rate": 5e-07, "loss": 0.6564, "num_input_tokens_seen": 174494080, "step": 1509 }, { "epoch": 8.112903225806452, "loss": 0.9952346682548523, "loss_ce": 0.00011746303789550439, "loss_iou": 0.40234375, "loss_num": 0.0380859375, "loss_xval": 0.99609375, "num_input_tokens_seen": 174494080, "step": 1509 }, { "epoch": 8.118279569892474, "grad_norm": 19.60482406616211, "learning_rate": 5e-07, "loss": 0.8903, "num_input_tokens_seen": 174611156, "step": 1510 }, { "epoch": 8.118279569892474, "loss": 0.6633914709091187, "loss_ce": 6.136401498224586e-05, "loss_iou": 0.2734375, "loss_num": 0.0234375, "loss_xval": 0.6640625, "num_input_tokens_seen": 174611156, "step": 1510 }, { "epoch": 8.123655913978494, "grad_norm": 33.445037841796875, "learning_rate": 5e-07, "loss": 0.7247, "num_input_tokens_seen": 174731096, "step": 1511 }, { "epoch": 8.123655913978494, "loss": 0.9084082841873169, "loss_ce": 8.307784446515143e-05, "loss_iou": 0.400390625, "loss_num": 0.02197265625, "loss_xval": 0.91015625, "num_input_tokens_seen": 174731096, "step": 1511 }, { "epoch": 8.129032258064516, "grad_norm": 19.200895309448242, "learning_rate": 5e-07, "loss": 0.6792, "num_input_tokens_seen": 174845960, "step": 1512 }, { "epoch": 8.129032258064516, "loss": 0.6802453398704529, "loss_ce": 6.957421283004805e-05, "loss_iou": 0.279296875, "loss_num": 0.0245361328125, "loss_xval": 0.6796875, "num_input_tokens_seen": 174845960, "step": 1512 }, { "epoch": 8.134408602150538, "grad_norm": 19.612600326538086, "learning_rate": 5e-07, "loss": 0.5815, "num_input_tokens_seen": 174960276, "step": 1513 }, { "epoch": 8.134408602150538, "loss": 0.5696104764938354, "loss_ce": 6.0920854593859985e-05, "loss_iou": 0.25, "loss_num": 0.013916015625, "loss_xval": 0.5703125, "num_input_tokens_seen": 174960276, "step": 1513 }, { "epoch": 8.13978494623656, "grad_norm": 28.935606002807617, "learning_rate": 5e-07, "loss": 0.6645, "num_input_tokens_seen": 175075008, "step": 1514 }, { "epoch": 8.13978494623656, "loss": 0.48698508739471436, "loss_ce": 0.00010768522042781115, "loss_iou": 0.19140625, "loss_num": 0.020751953125, "loss_xval": 0.486328125, "num_input_tokens_seen": 175075008, "step": 1514 }, { "epoch": 8.14516129032258, "grad_norm": 17.024869918823242, "learning_rate": 5e-07, "loss": 0.7301, "num_input_tokens_seen": 175192268, "step": 1515 }, { "epoch": 8.14516129032258, "loss": 0.6755697727203369, "loss_ce": 3.269713488407433e-05, "loss_iou": 0.29296875, "loss_num": 0.0181884765625, "loss_xval": 0.67578125, "num_input_tokens_seen": 175192268, "step": 1515 }, { "epoch": 8.150537634408602, "grad_norm": 20.573514938354492, "learning_rate": 5e-07, "loss": 0.705, "num_input_tokens_seen": 175307436, "step": 1516 }, { "epoch": 8.150537634408602, "loss": 0.4973405599594116, "loss_ce": 2.6102465199073777e-05, "loss_iou": 0.21484375, "loss_num": 0.013427734375, "loss_xval": 0.498046875, "num_input_tokens_seen": 175307436, "step": 1516 }, { "epoch": 8.155913978494624, "grad_norm": 24.92240333557129, "learning_rate": 5e-07, "loss": 0.7303, "num_input_tokens_seen": 175422248, "step": 1517 }, { "epoch": 8.155913978494624, "loss": 0.7393025159835815, "loss_ce": 4.4690441427519545e-05, "loss_iou": 0.31640625, "loss_num": 0.021728515625, "loss_xval": 0.73828125, "num_input_tokens_seen": 175422248, "step": 1517 }, { "epoch": 8.161290322580646, "grad_norm": 16.98579978942871, "learning_rate": 5e-07, "loss": 0.8048, "num_input_tokens_seen": 175539704, "step": 1518 }, { "epoch": 8.161290322580646, "loss": 0.906112015247345, "loss_ce": 0.00010615561041049659, "loss_iou": 0.369140625, "loss_num": 0.03369140625, "loss_xval": 0.90625, "num_input_tokens_seen": 175539704, "step": 1518 }, { "epoch": 8.166666666666666, "grad_norm": 15.111401557922363, "learning_rate": 5e-07, "loss": 0.9182, "num_input_tokens_seen": 175654048, "step": 1519 }, { "epoch": 8.166666666666666, "loss": 1.032954454421997, "loss_ce": 0.00023959160898812115, "loss_iou": 0.44921875, "loss_num": 0.026611328125, "loss_xval": 1.03125, "num_input_tokens_seen": 175654048, "step": 1519 }, { "epoch": 8.172043010752688, "grad_norm": 22.015283584594727, "learning_rate": 5e-07, "loss": 0.6573, "num_input_tokens_seen": 175772612, "step": 1520 }, { "epoch": 8.172043010752688, "loss": 0.7466458678245544, "loss_ce": 9.435589163331315e-05, "loss_iou": 0.326171875, "loss_num": 0.0185546875, "loss_xval": 0.74609375, "num_input_tokens_seen": 175772612, "step": 1520 }, { "epoch": 8.17741935483871, "grad_norm": 24.9875431060791, "learning_rate": 5e-07, "loss": 0.7276, "num_input_tokens_seen": 175886296, "step": 1521 }, { "epoch": 8.17741935483871, "loss": 0.6734791994094849, "loss_ce": 1.7287715309066698e-05, "loss_iou": 0.28515625, "loss_num": 0.020751953125, "loss_xval": 0.671875, "num_input_tokens_seen": 175886296, "step": 1521 }, { "epoch": 8.182795698924732, "grad_norm": 15.919234275817871, "learning_rate": 5e-07, "loss": 0.6942, "num_input_tokens_seen": 176001704, "step": 1522 }, { "epoch": 8.182795698924732, "loss": 0.4686688184738159, "loss_ce": 4.087084380444139e-05, "loss_iou": 0.20703125, "loss_num": 0.01080322265625, "loss_xval": 0.46875, "num_input_tokens_seen": 176001704, "step": 1522 }, { "epoch": 8.188172043010752, "grad_norm": 20.271041870117188, "learning_rate": 5e-07, "loss": 0.6559, "num_input_tokens_seen": 176115192, "step": 1523 }, { "epoch": 8.188172043010752, "loss": 0.5960561037063599, "loss_ce": 0.00010886906238738447, "loss_iou": 0.251953125, "loss_num": 0.0184326171875, "loss_xval": 0.59765625, "num_input_tokens_seen": 176115192, "step": 1523 }, { "epoch": 8.193548387096774, "grad_norm": 24.632036209106445, "learning_rate": 5e-07, "loss": 0.7243, "num_input_tokens_seen": 176227504, "step": 1524 }, { "epoch": 8.193548387096774, "loss": 0.6485110521316528, "loss_ce": 7.354228000622243e-05, "loss_iou": 0.291015625, "loss_num": 0.013427734375, "loss_xval": 0.6484375, "num_input_tokens_seen": 176227504, "step": 1524 }, { "epoch": 8.198924731182796, "grad_norm": 17.99993133544922, "learning_rate": 5e-07, "loss": 0.8495, "num_input_tokens_seen": 176344848, "step": 1525 }, { "epoch": 8.198924731182796, "loss": 0.7898455262184143, "loss_ce": 5.062051786808297e-05, "loss_iou": 0.357421875, "loss_num": 0.01513671875, "loss_xval": 0.7890625, "num_input_tokens_seen": 176344848, "step": 1525 }, { "epoch": 8.204301075268818, "grad_norm": 17.87275505065918, "learning_rate": 5e-07, "loss": 0.827, "num_input_tokens_seen": 176458320, "step": 1526 }, { "epoch": 8.204301075268818, "loss": 1.0266810655593872, "loss_ce": 6.971010589040816e-05, "loss_iou": 0.43359375, "loss_num": 0.031982421875, "loss_xval": 1.0234375, "num_input_tokens_seen": 176458320, "step": 1526 }, { "epoch": 8.209677419354838, "grad_norm": 19.56378746032715, "learning_rate": 5e-07, "loss": 0.8604, "num_input_tokens_seen": 176574136, "step": 1527 }, { "epoch": 8.209677419354838, "loss": 0.7720144391059875, "loss_ce": 4.176070069661364e-05, "loss_iou": 0.337890625, "loss_num": 0.019287109375, "loss_xval": 0.7734375, "num_input_tokens_seen": 176574136, "step": 1527 }, { "epoch": 8.21505376344086, "grad_norm": 19.94392204284668, "learning_rate": 5e-07, "loss": 0.7297, "num_input_tokens_seen": 176689208, "step": 1528 }, { "epoch": 8.21505376344086, "loss": 0.9536460638046265, "loss_ce": 3.281892713857815e-05, "loss_iou": 0.39453125, "loss_num": 0.033447265625, "loss_xval": 0.953125, "num_input_tokens_seen": 176689208, "step": 1528 }, { "epoch": 8.220430107526882, "grad_norm": 17.123069763183594, "learning_rate": 5e-07, "loss": 0.7377, "num_input_tokens_seen": 176805140, "step": 1529 }, { "epoch": 8.220430107526882, "loss": 0.4386531710624695, "loss_ce": 5.4537656978936866e-05, "loss_iou": 0.1787109375, "loss_num": 0.0162353515625, "loss_xval": 0.439453125, "num_input_tokens_seen": 176805140, "step": 1529 }, { "epoch": 8.225806451612904, "grad_norm": 17.513612747192383, "learning_rate": 5e-07, "loss": 0.7432, "num_input_tokens_seen": 176921936, "step": 1530 }, { "epoch": 8.225806451612904, "loss": 0.49662670493125916, "loss_ce": 4.464598896447569e-05, "loss_iou": 0.2138671875, "loss_num": 0.01373291015625, "loss_xval": 0.49609375, "num_input_tokens_seen": 176921936, "step": 1530 }, { "epoch": 8.231182795698924, "grad_norm": 27.563243865966797, "learning_rate": 5e-07, "loss": 0.6939, "num_input_tokens_seen": 177037696, "step": 1531 }, { "epoch": 8.231182795698924, "loss": 0.5323061943054199, "loss_ce": 7.958101923577487e-05, "loss_iou": 0.220703125, "loss_num": 0.018310546875, "loss_xval": 0.53125, "num_input_tokens_seen": 177037696, "step": 1531 }, { "epoch": 8.236559139784946, "grad_norm": 21.24750518798828, "learning_rate": 5e-07, "loss": 0.6924, "num_input_tokens_seen": 177154216, "step": 1532 }, { "epoch": 8.236559139784946, "loss": 1.055281639099121, "loss_ce": 0.00010593100159894675, "loss_iou": 0.453125, "loss_num": 0.030029296875, "loss_xval": 1.0546875, "num_input_tokens_seen": 177154216, "step": 1532 }, { "epoch": 8.241935483870968, "grad_norm": 21.524099349975586, "learning_rate": 5e-07, "loss": 0.8295, "num_input_tokens_seen": 177270936, "step": 1533 }, { "epoch": 8.241935483870968, "loss": 0.8540337681770325, "loss_ce": 2.9872506274841726e-05, "loss_iou": 0.361328125, "loss_num": 0.0262451171875, "loss_xval": 0.85546875, "num_input_tokens_seen": 177270936, "step": 1533 }, { "epoch": 8.24731182795699, "grad_norm": 17.272424697875977, "learning_rate": 5e-07, "loss": 0.861, "num_input_tokens_seen": 177382824, "step": 1534 }, { "epoch": 8.24731182795699, "loss": 0.848956286907196, "loss_ce": 7.93756335042417e-05, "loss_iou": 0.376953125, "loss_num": 0.0191650390625, "loss_xval": 0.84765625, "num_input_tokens_seen": 177382824, "step": 1534 }, { "epoch": 8.25268817204301, "grad_norm": 20.062950134277344, "learning_rate": 5e-07, "loss": 0.9114, "num_input_tokens_seen": 177498328, "step": 1535 }, { "epoch": 8.25268817204301, "loss": 1.0815931558609009, "loss_ce": 0.0007826213841326535, "loss_iou": 0.4765625, "loss_num": 0.025146484375, "loss_xval": 1.078125, "num_input_tokens_seen": 177498328, "step": 1535 }, { "epoch": 8.258064516129032, "grad_norm": 20.277626037597656, "learning_rate": 5e-07, "loss": 0.6258, "num_input_tokens_seen": 177614284, "step": 1536 }, { "epoch": 8.258064516129032, "loss": 0.8633373975753784, "loss_ce": 5.618414070340805e-05, "loss_iou": 0.376953125, "loss_num": 0.021728515625, "loss_xval": 0.86328125, "num_input_tokens_seen": 177614284, "step": 1536 }, { "epoch": 8.263440860215054, "grad_norm": 22.768409729003906, "learning_rate": 5e-07, "loss": 0.8187, "num_input_tokens_seen": 177729128, "step": 1537 }, { "epoch": 8.263440860215054, "loss": 1.159942626953125, "loss_ce": 3.0466815587715246e-05, "loss_iou": 0.5, "loss_num": 0.032470703125, "loss_xval": 1.15625, "num_input_tokens_seen": 177729128, "step": 1537 }, { "epoch": 8.268817204301076, "grad_norm": 21.909780502319336, "learning_rate": 5e-07, "loss": 0.6771, "num_input_tokens_seen": 177843448, "step": 1538 }, { "epoch": 8.268817204301076, "loss": 0.7996078133583069, "loss_ce": 4.727629493572749e-05, "loss_iou": 0.34375, "loss_num": 0.0224609375, "loss_xval": 0.80078125, "num_input_tokens_seen": 177843448, "step": 1538 }, { "epoch": 8.274193548387096, "grad_norm": 20.994741439819336, "learning_rate": 5e-07, "loss": 0.8632, "num_input_tokens_seen": 177960020, "step": 1539 }, { "epoch": 8.274193548387096, "loss": 1.0769339799880981, "loss_ce": 2.975489951495547e-05, "loss_iou": 0.44921875, "loss_num": 0.036376953125, "loss_xval": 1.078125, "num_input_tokens_seen": 177960020, "step": 1539 }, { "epoch": 8.279569892473118, "grad_norm": 22.914276123046875, "learning_rate": 5e-07, "loss": 0.9859, "num_input_tokens_seen": 178074724, "step": 1540 }, { "epoch": 8.279569892473118, "loss": 0.8462464809417725, "loss_ce": 5.512790085049346e-05, "loss_iou": 0.3515625, "loss_num": 0.0283203125, "loss_xval": 0.84765625, "num_input_tokens_seen": 178074724, "step": 1540 }, { "epoch": 8.28494623655914, "grad_norm": 18.014738082885742, "learning_rate": 5e-07, "loss": 0.7485, "num_input_tokens_seen": 178190512, "step": 1541 }, { "epoch": 8.28494623655914, "loss": 0.7131755352020264, "loss_ce": 4.077230914845131e-05, "loss_iou": 0.3125, "loss_num": 0.0177001953125, "loss_xval": 0.71484375, "num_input_tokens_seen": 178190512, "step": 1541 }, { "epoch": 8.290322580645162, "grad_norm": 18.66390037536621, "learning_rate": 5e-07, "loss": 0.697, "num_input_tokens_seen": 178303912, "step": 1542 }, { "epoch": 8.290322580645162, "loss": 0.6074588298797607, "loss_ce": 3.696428757393733e-05, "loss_iou": 0.26953125, "loss_num": 0.013916015625, "loss_xval": 0.609375, "num_input_tokens_seen": 178303912, "step": 1542 }, { "epoch": 8.295698924731182, "grad_norm": 22.51055908203125, "learning_rate": 5e-07, "loss": 0.8111, "num_input_tokens_seen": 178419272, "step": 1543 }, { "epoch": 8.295698924731182, "loss": 0.9719637632369995, "loss_ce": 3.993526115664281e-05, "loss_iou": 0.41796875, "loss_num": 0.027099609375, "loss_xval": 0.97265625, "num_input_tokens_seen": 178419272, "step": 1543 }, { "epoch": 8.301075268817204, "grad_norm": 24.755727767944336, "learning_rate": 5e-07, "loss": 0.6879, "num_input_tokens_seen": 178535744, "step": 1544 }, { "epoch": 8.301075268817204, "loss": 0.5691360831260681, "loss_ce": 4.4305226765573025e-05, "loss_iou": 0.2314453125, "loss_num": 0.0213623046875, "loss_xval": 0.5703125, "num_input_tokens_seen": 178535744, "step": 1544 }, { "epoch": 8.306451612903226, "grad_norm": 20.50115203857422, "learning_rate": 5e-07, "loss": 0.8853, "num_input_tokens_seen": 178652780, "step": 1545 }, { "epoch": 8.306451612903226, "loss": 0.9685502648353577, "loss_ce": 4.440762131707743e-05, "loss_iou": 0.396484375, "loss_num": 0.034912109375, "loss_xval": 0.96875, "num_input_tokens_seen": 178652780, "step": 1545 }, { "epoch": 8.311827956989248, "grad_norm": 16.643878936767578, "learning_rate": 5e-07, "loss": 0.7641, "num_input_tokens_seen": 178769696, "step": 1546 }, { "epoch": 8.311827956989248, "loss": 0.4544924795627594, "loss_ce": 2.4716175175854005e-05, "loss_iou": 0.1962890625, "loss_num": 0.01220703125, "loss_xval": 0.455078125, "num_input_tokens_seen": 178769696, "step": 1546 }, { "epoch": 8.317204301075268, "grad_norm": 18.408418655395508, "learning_rate": 5e-07, "loss": 0.6661, "num_input_tokens_seen": 178885136, "step": 1547 }, { "epoch": 8.317204301075268, "loss": 0.47541341185569763, "loss_ce": 7.160428503993899e-05, "loss_iou": 0.1943359375, "loss_num": 0.0174560546875, "loss_xval": 0.474609375, "num_input_tokens_seen": 178885136, "step": 1547 }, { "epoch": 8.32258064516129, "grad_norm": 19.463520050048828, "learning_rate": 5e-07, "loss": 0.7789, "num_input_tokens_seen": 179000736, "step": 1548 }, { "epoch": 8.32258064516129, "loss": 0.5553354620933533, "loss_ce": 3.763381391763687e-05, "loss_iou": 0.2412109375, "loss_num": 0.01470947265625, "loss_xval": 0.5546875, "num_input_tokens_seen": 179000736, "step": 1548 }, { "epoch": 8.327956989247312, "grad_norm": 18.55394172668457, "learning_rate": 5e-07, "loss": 0.7494, "num_input_tokens_seen": 179118040, "step": 1549 }, { "epoch": 8.327956989247312, "loss": 0.4209500849246979, "loss_ce": 5.164634785614908e-05, "loss_iou": 0.171875, "loss_num": 0.01513671875, "loss_xval": 0.421875, "num_input_tokens_seen": 179118040, "step": 1549 }, { "epoch": 8.333333333333334, "grad_norm": 18.037797927856445, "learning_rate": 5e-07, "loss": 0.7106, "num_input_tokens_seen": 179231328, "step": 1550 }, { "epoch": 8.333333333333334, "loss": 0.5766969919204712, "loss_ce": 3.6816309147980064e-05, "loss_iou": 0.259765625, "loss_num": 0.01165771484375, "loss_xval": 0.578125, "num_input_tokens_seen": 179231328, "step": 1550 }, { "epoch": 8.338709677419354, "grad_norm": 19.276565551757812, "learning_rate": 5e-07, "loss": 0.6507, "num_input_tokens_seen": 179348200, "step": 1551 }, { "epoch": 8.338709677419354, "loss": 0.6009751558303833, "loss_ce": 2.3022799723548815e-05, "loss_iou": 0.2578125, "loss_num": 0.016845703125, "loss_xval": 0.6015625, "num_input_tokens_seen": 179348200, "step": 1551 }, { "epoch": 8.344086021505376, "grad_norm": 17.79414939880371, "learning_rate": 5e-07, "loss": 0.756, "num_input_tokens_seen": 179462172, "step": 1552 }, { "epoch": 8.344086021505376, "loss": 0.8444945812225342, "loss_ce": 4.2685649532359093e-05, "loss_iou": 0.376953125, "loss_num": 0.0179443359375, "loss_xval": 0.84375, "num_input_tokens_seen": 179462172, "step": 1552 }, { "epoch": 8.349462365591398, "grad_norm": 26.43301773071289, "learning_rate": 5e-07, "loss": 0.6196, "num_input_tokens_seen": 179578524, "step": 1553 }, { "epoch": 8.349462365591398, "loss": 0.7595577239990234, "loss_ce": 3.624880264396779e-05, "loss_iou": 0.326171875, "loss_num": 0.02197265625, "loss_xval": 0.7578125, "num_input_tokens_seen": 179578524, "step": 1553 }, { "epoch": 8.35483870967742, "grad_norm": 18.594379425048828, "learning_rate": 5e-07, "loss": 0.7312, "num_input_tokens_seen": 179690464, "step": 1554 }, { "epoch": 8.35483870967742, "loss": 0.6899855136871338, "loss_ce": 4.405710205901414e-05, "loss_iou": 0.291015625, "loss_num": 0.0216064453125, "loss_xval": 0.69140625, "num_input_tokens_seen": 179690464, "step": 1554 }, { "epoch": 8.36021505376344, "grad_norm": 12.993745803833008, "learning_rate": 5e-07, "loss": 0.8639, "num_input_tokens_seen": 179805360, "step": 1555 }, { "epoch": 8.36021505376344, "loss": 0.7803725600242615, "loss_ce": 9.912281529977918e-05, "loss_iou": 0.3515625, "loss_num": 0.01531982421875, "loss_xval": 0.78125, "num_input_tokens_seen": 179805360, "step": 1555 }, { "epoch": 8.365591397849462, "grad_norm": 17.439428329467773, "learning_rate": 5e-07, "loss": 0.9028, "num_input_tokens_seen": 179920376, "step": 1556 }, { "epoch": 8.365591397849462, "loss": 0.7655404806137085, "loss_ce": 3.757717058761045e-05, "loss_iou": 0.34375, "loss_num": 0.0159912109375, "loss_xval": 0.765625, "num_input_tokens_seen": 179920376, "step": 1556 }, { "epoch": 8.370967741935484, "grad_norm": 26.1959285736084, "learning_rate": 5e-07, "loss": 0.8031, "num_input_tokens_seen": 180033224, "step": 1557 }, { "epoch": 8.370967741935484, "loss": 0.9797754883766174, "loss_ce": 3.914511398761533e-05, "loss_iou": 0.369140625, "loss_num": 0.048583984375, "loss_xval": 0.98046875, "num_input_tokens_seen": 180033224, "step": 1557 }, { "epoch": 8.376344086021506, "grad_norm": 19.94898223876953, "learning_rate": 5e-07, "loss": 0.8087, "num_input_tokens_seen": 180150168, "step": 1558 }, { "epoch": 8.376344086021506, "loss": 0.6460648775100708, "loss_ce": 6.878733984194696e-05, "loss_iou": 0.2451171875, "loss_num": 0.03125, "loss_xval": 0.64453125, "num_input_tokens_seen": 180150168, "step": 1558 }, { "epoch": 8.381720430107526, "grad_norm": 16.430877685546875, "learning_rate": 5e-07, "loss": 0.7056, "num_input_tokens_seen": 180261760, "step": 1559 }, { "epoch": 8.381720430107526, "loss": 0.5725534558296204, "loss_ce": 4.3715990614145994e-05, "loss_iou": 0.240234375, "loss_num": 0.018310546875, "loss_xval": 0.57421875, "num_input_tokens_seen": 180261760, "step": 1559 }, { "epoch": 8.387096774193548, "grad_norm": 17.966060638427734, "learning_rate": 5e-07, "loss": 0.9108, "num_input_tokens_seen": 180379564, "step": 1560 }, { "epoch": 8.387096774193548, "loss": 0.75124591588974, "loss_ce": 2.5210913008777425e-05, "loss_iou": 0.32421875, "loss_num": 0.0206298828125, "loss_xval": 0.75, "num_input_tokens_seen": 180379564, "step": 1560 }, { "epoch": 8.39247311827957, "grad_norm": 17.723447799682617, "learning_rate": 5e-07, "loss": 0.6629, "num_input_tokens_seen": 180496700, "step": 1561 }, { "epoch": 8.39247311827957, "loss": 0.5867116451263428, "loss_ce": 4.171633918303996e-05, "loss_iou": 0.23828125, "loss_num": 0.0220947265625, "loss_xval": 0.5859375, "num_input_tokens_seen": 180496700, "step": 1561 }, { "epoch": 8.397849462365592, "grad_norm": 17.196237564086914, "learning_rate": 5e-07, "loss": 0.604, "num_input_tokens_seen": 180612004, "step": 1562 }, { "epoch": 8.397849462365592, "loss": 0.7834773659706116, "loss_ce": 3.0130118830129504e-05, "loss_iou": 0.328125, "loss_num": 0.0255126953125, "loss_xval": 0.78515625, "num_input_tokens_seen": 180612004, "step": 1562 }, { "epoch": 8.403225806451612, "grad_norm": 19.816003799438477, "learning_rate": 5e-07, "loss": 0.8219, "num_input_tokens_seen": 180727968, "step": 1563 }, { "epoch": 8.403225806451612, "loss": 0.9631831049919128, "loss_ce": 4.8319903726223856e-05, "loss_iou": 0.41796875, "loss_num": 0.025146484375, "loss_xval": 0.96484375, "num_input_tokens_seen": 180727968, "step": 1563 }, { "epoch": 8.408602150537634, "grad_norm": 23.172990798950195, "learning_rate": 5e-07, "loss": 0.7308, "num_input_tokens_seen": 180841256, "step": 1564 }, { "epoch": 8.408602150537634, "loss": 0.7602911591529846, "loss_ce": 3.725871647475287e-05, "loss_iou": 0.3203125, "loss_num": 0.0240478515625, "loss_xval": 0.76171875, "num_input_tokens_seen": 180841256, "step": 1564 }, { "epoch": 8.413978494623656, "grad_norm": 22.587270736694336, "learning_rate": 5e-07, "loss": 0.8432, "num_input_tokens_seen": 180956052, "step": 1565 }, { "epoch": 8.413978494623656, "loss": 0.647507905960083, "loss_ce": 4.701331636169925e-05, "loss_iou": 0.263671875, "loss_num": 0.024169921875, "loss_xval": 0.6484375, "num_input_tokens_seen": 180956052, "step": 1565 }, { "epoch": 8.419354838709678, "grad_norm": 23.80035972595215, "learning_rate": 5e-07, "loss": 0.8125, "num_input_tokens_seen": 181073292, "step": 1566 }, { "epoch": 8.419354838709678, "loss": 0.8271796703338623, "loss_ce": 3.1242907425621524e-05, "loss_iou": 0.37109375, "loss_num": 0.0167236328125, "loss_xval": 0.828125, "num_input_tokens_seen": 181073292, "step": 1566 }, { "epoch": 8.424731182795698, "grad_norm": 16.23335075378418, "learning_rate": 5e-07, "loss": 0.8426, "num_input_tokens_seen": 181188408, "step": 1567 }, { "epoch": 8.424731182795698, "loss": 0.6547688245773315, "loss_ce": 4.470207204576582e-05, "loss_iou": 0.275390625, "loss_num": 0.0208740234375, "loss_xval": 0.65625, "num_input_tokens_seen": 181188408, "step": 1567 }, { "epoch": 8.43010752688172, "grad_norm": 27.001665115356445, "learning_rate": 5e-07, "loss": 0.7999, "num_input_tokens_seen": 181304628, "step": 1568 }, { "epoch": 8.43010752688172, "loss": 0.7981600165367126, "loss_ce": 6.430744542740285e-05, "loss_iou": 0.330078125, "loss_num": 0.02734375, "loss_xval": 0.796875, "num_input_tokens_seen": 181304628, "step": 1568 }, { "epoch": 8.435483870967742, "grad_norm": 13.183411598205566, "learning_rate": 5e-07, "loss": 0.7395, "num_input_tokens_seen": 181417204, "step": 1569 }, { "epoch": 8.435483870967742, "loss": 0.7229235172271729, "loss_ce": 2.3083701307768933e-05, "loss_iou": 0.3203125, "loss_num": 0.01611328125, "loss_xval": 0.72265625, "num_input_tokens_seen": 181417204, "step": 1569 }, { "epoch": 8.440860215053764, "grad_norm": 16.9658260345459, "learning_rate": 5e-07, "loss": 0.6348, "num_input_tokens_seen": 181531832, "step": 1570 }, { "epoch": 8.440860215053764, "loss": 0.5635471343994141, "loss_ce": 7.059246127028018e-05, "loss_iou": 0.224609375, "loss_num": 0.0228271484375, "loss_xval": 0.5625, "num_input_tokens_seen": 181531832, "step": 1570 }, { "epoch": 8.446236559139784, "grad_norm": 21.632417678833008, "learning_rate": 5e-07, "loss": 0.6819, "num_input_tokens_seen": 181649292, "step": 1571 }, { "epoch": 8.446236559139784, "loss": 0.5493689775466919, "loss_ce": 5.253930430626497e-05, "loss_iou": 0.2333984375, "loss_num": 0.0164794921875, "loss_xval": 0.55078125, "num_input_tokens_seen": 181649292, "step": 1571 }, { "epoch": 8.451612903225806, "grad_norm": 28.864469528198242, "learning_rate": 5e-07, "loss": 0.8049, "num_input_tokens_seen": 181763528, "step": 1572 }, { "epoch": 8.451612903225806, "loss": 0.7877194881439209, "loss_ce": 0.00012178840552223846, "loss_iou": 0.349609375, "loss_num": 0.017578125, "loss_xval": 0.7890625, "num_input_tokens_seen": 181763528, "step": 1572 }, { "epoch": 8.456989247311828, "grad_norm": 47.69084548950195, "learning_rate": 5e-07, "loss": 0.8545, "num_input_tokens_seen": 181878432, "step": 1573 }, { "epoch": 8.456989247311828, "loss": 0.8106140494346619, "loss_ce": 6.716215284541249e-05, "loss_iou": 0.36328125, "loss_num": 0.0167236328125, "loss_xval": 0.8125, "num_input_tokens_seen": 181878432, "step": 1573 }, { "epoch": 8.46236559139785, "grad_norm": 16.568607330322266, "learning_rate": 5e-07, "loss": 0.7606, "num_input_tokens_seen": 181994464, "step": 1574 }, { "epoch": 8.46236559139785, "loss": 0.6260082721710205, "loss_ce": 3.169279807480052e-05, "loss_iou": 0.26171875, "loss_num": 0.02001953125, "loss_xval": 0.625, "num_input_tokens_seen": 181994464, "step": 1574 }, { "epoch": 8.46774193548387, "grad_norm": 23.808551788330078, "learning_rate": 5e-07, "loss": 0.7235, "num_input_tokens_seen": 182111500, "step": 1575 }, { "epoch": 8.46774193548387, "loss": 0.7710418701171875, "loss_ce": 4.5782740926370025e-05, "loss_iou": 0.333984375, "loss_num": 0.0203857421875, "loss_xval": 0.76953125, "num_input_tokens_seen": 182111500, "step": 1575 }, { "epoch": 8.473118279569892, "grad_norm": 30.81068992614746, "learning_rate": 5e-07, "loss": 0.6754, "num_input_tokens_seen": 182227548, "step": 1576 }, { "epoch": 8.473118279569892, "loss": 0.6806973218917847, "loss_ce": 3.325987927382812e-05, "loss_iou": 0.28515625, "loss_num": 0.02197265625, "loss_xval": 0.6796875, "num_input_tokens_seen": 182227548, "step": 1576 }, { "epoch": 8.478494623655914, "grad_norm": 22.174636840820312, "learning_rate": 5e-07, "loss": 0.7349, "num_input_tokens_seen": 182343996, "step": 1577 }, { "epoch": 8.478494623655914, "loss": 0.6755979061126709, "loss_ce": 6.080802631913684e-05, "loss_iou": 0.28515625, "loss_num": 0.0206298828125, "loss_xval": 0.67578125, "num_input_tokens_seen": 182343996, "step": 1577 }, { "epoch": 8.483870967741936, "grad_norm": 18.957592010498047, "learning_rate": 5e-07, "loss": 0.5981, "num_input_tokens_seen": 182458388, "step": 1578 }, { "epoch": 8.483870967741936, "loss": 0.7364403009414673, "loss_ce": 0.00011217590508749709, "loss_iou": 0.306640625, "loss_num": 0.0245361328125, "loss_xval": 0.734375, "num_input_tokens_seen": 182458388, "step": 1578 }, { "epoch": 8.489247311827956, "grad_norm": 18.9088077545166, "learning_rate": 5e-07, "loss": 0.6041, "num_input_tokens_seen": 182572092, "step": 1579 }, { "epoch": 8.489247311827956, "loss": 0.5167831182479858, "loss_ce": 8.993945812107995e-05, "loss_iou": 0.197265625, "loss_num": 0.024658203125, "loss_xval": 0.515625, "num_input_tokens_seen": 182572092, "step": 1579 }, { "epoch": 8.494623655913978, "grad_norm": 21.89832305908203, "learning_rate": 5e-07, "loss": 0.7857, "num_input_tokens_seen": 182691012, "step": 1580 }, { "epoch": 8.494623655913978, "loss": 0.5498459339141846, "loss_ce": 4.125476334593259e-05, "loss_iou": 0.236328125, "loss_num": 0.015380859375, "loss_xval": 0.55078125, "num_input_tokens_seen": 182691012, "step": 1580 }, { "epoch": 8.5, "grad_norm": 20.69843292236328, "learning_rate": 5e-07, "loss": 0.672, "num_input_tokens_seen": 182808456, "step": 1581 }, { "epoch": 8.5, "loss": 0.695598304271698, "loss_ce": 4.1699524444993585e-05, "loss_iou": 0.30859375, "loss_num": 0.0159912109375, "loss_xval": 0.6953125, "num_input_tokens_seen": 182808456, "step": 1581 }, { "epoch": 8.505376344086022, "grad_norm": 19.15953826904297, "learning_rate": 5e-07, "loss": 0.7918, "num_input_tokens_seen": 182928960, "step": 1582 }, { "epoch": 8.505376344086022, "loss": 0.5040527582168579, "loss_ce": 2.4442470021313056e-05, "loss_iou": 0.216796875, "loss_num": 0.01409912109375, "loss_xval": 0.50390625, "num_input_tokens_seen": 182928960, "step": 1582 }, { "epoch": 8.510752688172044, "grad_norm": 27.704376220703125, "learning_rate": 5e-07, "loss": 0.8627, "num_input_tokens_seen": 183044944, "step": 1583 }, { "epoch": 8.510752688172044, "loss": 1.0049076080322266, "loss_ce": 2.481247065588832e-05, "loss_iou": 0.4296875, "loss_num": 0.02880859375, "loss_xval": 1.0078125, "num_input_tokens_seen": 183044944, "step": 1583 }, { "epoch": 8.516129032258064, "grad_norm": 20.495830535888672, "learning_rate": 5e-07, "loss": 0.6827, "num_input_tokens_seen": 183161680, "step": 1584 }, { "epoch": 8.516129032258064, "loss": 0.5643725395202637, "loss_ce": 4.1522886021994054e-05, "loss_iou": 0.23046875, "loss_num": 0.020751953125, "loss_xval": 0.5625, "num_input_tokens_seen": 183161680, "step": 1584 }, { "epoch": 8.521505376344086, "grad_norm": 22.27865219116211, "learning_rate": 5e-07, "loss": 0.8313, "num_input_tokens_seen": 183277660, "step": 1585 }, { "epoch": 8.521505376344086, "loss": 0.648236870765686, "loss_ce": 4.354683915153146e-05, "loss_iou": 0.2890625, "loss_num": 0.0137939453125, "loss_xval": 0.6484375, "num_input_tokens_seen": 183277660, "step": 1585 }, { "epoch": 8.526881720430108, "grad_norm": 22.497060775756836, "learning_rate": 5e-07, "loss": 0.7871, "num_input_tokens_seen": 183392228, "step": 1586 }, { "epoch": 8.526881720430108, "loss": 0.8545287847518921, "loss_ce": 3.6653327697422355e-05, "loss_iou": 0.37109375, "loss_num": 0.022216796875, "loss_xval": 0.85546875, "num_input_tokens_seen": 183392228, "step": 1586 }, { "epoch": 8.532258064516128, "grad_norm": 37.85683822631836, "learning_rate": 5e-07, "loss": 0.7905, "num_input_tokens_seen": 183504448, "step": 1587 }, { "epoch": 8.532258064516128, "loss": 0.9647043347358704, "loss_ce": 0.00010472737631062046, "loss_iou": 0.396484375, "loss_num": 0.0341796875, "loss_xval": 0.96484375, "num_input_tokens_seen": 183504448, "step": 1587 }, { "epoch": 8.53763440860215, "grad_norm": 34.656639099121094, "learning_rate": 5e-07, "loss": 0.6887, "num_input_tokens_seen": 183622052, "step": 1588 }, { "epoch": 8.53763440860215, "loss": 0.5645397901535034, "loss_ce": 8.668671944178641e-05, "loss_iou": 0.240234375, "loss_num": 0.016845703125, "loss_xval": 0.5625, "num_input_tokens_seen": 183622052, "step": 1588 }, { "epoch": 8.543010752688172, "grad_norm": 22.534616470336914, "learning_rate": 5e-07, "loss": 0.7823, "num_input_tokens_seen": 183736804, "step": 1589 }, { "epoch": 8.543010752688172, "loss": 0.8628352880477905, "loss_ce": 0.0006526829674839973, "loss_iou": 0.38671875, "loss_num": 0.0177001953125, "loss_xval": 0.86328125, "num_input_tokens_seen": 183736804, "step": 1589 }, { "epoch": 8.548387096774194, "grad_norm": 21.619129180908203, "learning_rate": 5e-07, "loss": 0.6897, "num_input_tokens_seen": 183853912, "step": 1590 }, { "epoch": 8.548387096774194, "loss": 0.5685114860534668, "loss_ce": 3.0083769161137752e-05, "loss_iou": 0.251953125, "loss_num": 0.0128173828125, "loss_xval": 0.5703125, "num_input_tokens_seen": 183853912, "step": 1590 }, { "epoch": 8.553763440860216, "grad_norm": 25.05208396911621, "learning_rate": 5e-07, "loss": 0.8216, "num_input_tokens_seen": 183968220, "step": 1591 }, { "epoch": 8.553763440860216, "loss": 0.9712700843811035, "loss_ce": 7.870045374147594e-05, "loss_iou": 0.4140625, "loss_num": 0.0291748046875, "loss_xval": 0.97265625, "num_input_tokens_seen": 183968220, "step": 1591 }, { "epoch": 8.559139784946236, "grad_norm": 15.499307632446289, "learning_rate": 5e-07, "loss": 0.5306, "num_input_tokens_seen": 184082548, "step": 1592 }, { "epoch": 8.559139784946236, "loss": 0.4505838453769684, "loss_ce": 2.2325621102936566e-05, "loss_iou": 0.1865234375, "loss_num": 0.01544189453125, "loss_xval": 0.451171875, "num_input_tokens_seen": 184082548, "step": 1592 }, { "epoch": 8.564516129032258, "grad_norm": 17.703920364379883, "learning_rate": 5e-07, "loss": 0.8703, "num_input_tokens_seen": 184197360, "step": 1593 }, { "epoch": 8.564516129032258, "loss": 0.5976877212524414, "loss_ce": 3.146951348753646e-05, "loss_iou": 0.26171875, "loss_num": 0.01507568359375, "loss_xval": 0.59765625, "num_input_tokens_seen": 184197360, "step": 1593 }, { "epoch": 8.56989247311828, "grad_norm": 20.164819717407227, "learning_rate": 5e-07, "loss": 0.9053, "num_input_tokens_seen": 184311876, "step": 1594 }, { "epoch": 8.56989247311828, "loss": 0.8323131799697876, "loss_ce": 3.778679820243269e-05, "loss_iou": 0.35546875, "loss_num": 0.024169921875, "loss_xval": 0.83203125, "num_input_tokens_seen": 184311876, "step": 1594 }, { "epoch": 8.575268817204302, "grad_norm": 24.769132614135742, "learning_rate": 5e-07, "loss": 0.7881, "num_input_tokens_seen": 184426544, "step": 1595 }, { "epoch": 8.575268817204302, "loss": 1.0820608139038086, "loss_ce": 2.9571718187071383e-05, "loss_iou": 0.44921875, "loss_num": 0.036376953125, "loss_xval": 1.078125, "num_input_tokens_seen": 184426544, "step": 1595 }, { "epoch": 8.580645161290322, "grad_norm": 27.914411544799805, "learning_rate": 5e-07, "loss": 0.7307, "num_input_tokens_seen": 184544200, "step": 1596 }, { "epoch": 8.580645161290322, "loss": 0.6727204322814941, "loss_ce": 5.1984257879666984e-05, "loss_iou": 0.29296875, "loss_num": 0.0174560546875, "loss_xval": 0.671875, "num_input_tokens_seen": 184544200, "step": 1596 }, { "epoch": 8.586021505376344, "grad_norm": 19.955942153930664, "learning_rate": 5e-07, "loss": 0.786, "num_input_tokens_seen": 184658780, "step": 1597 }, { "epoch": 8.586021505376344, "loss": 0.7114702463150024, "loss_ce": 4.4468695705290884e-05, "loss_iou": 0.3046875, "loss_num": 0.0208740234375, "loss_xval": 0.7109375, "num_input_tokens_seen": 184658780, "step": 1597 }, { "epoch": 8.591397849462366, "grad_norm": 16.494590759277344, "learning_rate": 5e-07, "loss": 0.6493, "num_input_tokens_seen": 184774212, "step": 1598 }, { "epoch": 8.591397849462366, "loss": 0.6609781980514526, "loss_ce": 2.8494156140368432e-05, "loss_iou": 0.26953125, "loss_num": 0.024169921875, "loss_xval": 0.66015625, "num_input_tokens_seen": 184774212, "step": 1598 }, { "epoch": 8.596774193548388, "grad_norm": 33.688568115234375, "learning_rate": 5e-07, "loss": 0.7278, "num_input_tokens_seen": 184890388, "step": 1599 }, { "epoch": 8.596774193548388, "loss": 0.7623294591903687, "loss_ce": 6.142030179034919e-05, "loss_iou": 0.330078125, "loss_num": 0.020263671875, "loss_xval": 0.76171875, "num_input_tokens_seen": 184890388, "step": 1599 }, { "epoch": 8.602150537634408, "grad_norm": 30.894920349121094, "learning_rate": 5e-07, "loss": 0.9103, "num_input_tokens_seen": 185007580, "step": 1600 }, { "epoch": 8.602150537634408, "loss": 0.9048370122909546, "loss_ce": 5.184837937122211e-05, "loss_iou": 0.41015625, "loss_num": 0.01708984375, "loss_xval": 0.90625, "num_input_tokens_seen": 185007580, "step": 1600 }, { "epoch": 8.60752688172043, "grad_norm": 26.215618133544922, "learning_rate": 5e-07, "loss": 0.7142, "num_input_tokens_seen": 185125764, "step": 1601 }, { "epoch": 8.60752688172043, "loss": 0.790633499622345, "loss_ce": 0.00010612802725518122, "loss_iou": 0.35546875, "loss_num": 0.0159912109375, "loss_xval": 0.7890625, "num_input_tokens_seen": 185125764, "step": 1601 }, { "epoch": 8.612903225806452, "grad_norm": 41.207149505615234, "learning_rate": 5e-07, "loss": 0.7695, "num_input_tokens_seen": 185239140, "step": 1602 }, { "epoch": 8.612903225806452, "loss": 0.81528639793396, "loss_ce": 0.00016189597954507917, "loss_iou": 0.341796875, "loss_num": 0.026123046875, "loss_xval": 0.81640625, "num_input_tokens_seen": 185239140, "step": 1602 }, { "epoch": 8.618279569892474, "grad_norm": 19.43071174621582, "learning_rate": 5e-07, "loss": 0.7356, "num_input_tokens_seen": 185356088, "step": 1603 }, { "epoch": 8.618279569892474, "loss": 0.8206015825271606, "loss_ce": 4.4973770854994655e-05, "loss_iou": 0.345703125, "loss_num": 0.02587890625, "loss_xval": 0.8203125, "num_input_tokens_seen": 185356088, "step": 1603 }, { "epoch": 8.623655913978494, "grad_norm": 18.197999954223633, "learning_rate": 5e-07, "loss": 0.6643, "num_input_tokens_seen": 185473356, "step": 1604 }, { "epoch": 8.623655913978494, "loss": 0.7484918236732483, "loss_ce": 7.874222501413897e-05, "loss_iou": 0.328125, "loss_num": 0.0181884765625, "loss_xval": 0.75, "num_input_tokens_seen": 185473356, "step": 1604 }, { "epoch": 8.629032258064516, "grad_norm": 20.936079025268555, "learning_rate": 5e-07, "loss": 0.66, "num_input_tokens_seen": 185590032, "step": 1605 }, { "epoch": 8.629032258064516, "loss": 0.7647342681884766, "loss_ce": 8.585224713897333e-05, "loss_iou": 0.337890625, "loss_num": 0.0181884765625, "loss_xval": 0.765625, "num_input_tokens_seen": 185590032, "step": 1605 }, { "epoch": 8.634408602150538, "grad_norm": 20.28717803955078, "learning_rate": 5e-07, "loss": 0.6897, "num_input_tokens_seen": 185704672, "step": 1606 }, { "epoch": 8.634408602150538, "loss": 0.7477697134017944, "loss_ce": 8.906557195587084e-05, "loss_iou": 0.330078125, "loss_num": 0.0179443359375, "loss_xval": 0.74609375, "num_input_tokens_seen": 185704672, "step": 1606 }, { "epoch": 8.63978494623656, "grad_norm": 14.134358406066895, "learning_rate": 5e-07, "loss": 0.6169, "num_input_tokens_seen": 185819360, "step": 1607 }, { "epoch": 8.63978494623656, "loss": 0.5460940599441528, "loss_ce": 0.00019562870147638023, "loss_iou": 0.234375, "loss_num": 0.015380859375, "loss_xval": 0.546875, "num_input_tokens_seen": 185819360, "step": 1607 }, { "epoch": 8.64516129032258, "grad_norm": 15.729669570922852, "learning_rate": 5e-07, "loss": 0.5945, "num_input_tokens_seen": 185932376, "step": 1608 }, { "epoch": 8.64516129032258, "loss": 0.6707966327667236, "loss_ce": 2.0291336113587022e-05, "loss_iou": 0.283203125, "loss_num": 0.0213623046875, "loss_xval": 0.671875, "num_input_tokens_seen": 185932376, "step": 1608 }, { "epoch": 8.650537634408602, "grad_norm": 19.13863754272461, "learning_rate": 5e-07, "loss": 0.6958, "num_input_tokens_seen": 186045828, "step": 1609 }, { "epoch": 8.650537634408602, "loss": 0.6516423225402832, "loss_ce": 3.102991104242392e-05, "loss_iou": 0.291015625, "loss_num": 0.01385498046875, "loss_xval": 0.65234375, "num_input_tokens_seen": 186045828, "step": 1609 }, { "epoch": 8.655913978494624, "grad_norm": 18.549779891967773, "learning_rate": 5e-07, "loss": 0.6055, "num_input_tokens_seen": 186160436, "step": 1610 }, { "epoch": 8.655913978494624, "loss": 0.6440750956535339, "loss_ce": 3.214886964997277e-05, "loss_iou": 0.271484375, "loss_num": 0.0203857421875, "loss_xval": 0.64453125, "num_input_tokens_seen": 186160436, "step": 1610 }, { "epoch": 8.661290322580646, "grad_norm": 15.715423583984375, "learning_rate": 5e-07, "loss": 0.7839, "num_input_tokens_seen": 186273744, "step": 1611 }, { "epoch": 8.661290322580646, "loss": 0.9094655513763428, "loss_ce": 4.174207424512133e-05, "loss_iou": 0.3984375, "loss_num": 0.02294921875, "loss_xval": 0.91015625, "num_input_tokens_seen": 186273744, "step": 1611 }, { "epoch": 8.666666666666666, "grad_norm": 22.517486572265625, "learning_rate": 5e-07, "loss": 0.7779, "num_input_tokens_seen": 186389876, "step": 1612 }, { "epoch": 8.666666666666666, "loss": 0.5576691031455994, "loss_ce": 5.192997195990756e-05, "loss_iou": 0.2333984375, "loss_num": 0.0181884765625, "loss_xval": 0.55859375, "num_input_tokens_seen": 186389876, "step": 1612 }, { "epoch": 8.672043010752688, "grad_norm": 15.311644554138184, "learning_rate": 5e-07, "loss": 0.6824, "num_input_tokens_seen": 186506596, "step": 1613 }, { "epoch": 8.672043010752688, "loss": 0.853675127029419, "loss_ce": 3.744632704183459e-05, "loss_iou": 0.3828125, "loss_num": 0.01708984375, "loss_xval": 0.85546875, "num_input_tokens_seen": 186506596, "step": 1613 }, { "epoch": 8.67741935483871, "grad_norm": 18.362815856933594, "learning_rate": 5e-07, "loss": 0.7832, "num_input_tokens_seen": 186618600, "step": 1614 }, { "epoch": 8.67741935483871, "loss": 0.6895904541015625, "loss_ce": 1.5283583707059734e-05, "loss_iou": 0.296875, "loss_num": 0.01904296875, "loss_xval": 0.69140625, "num_input_tokens_seen": 186618600, "step": 1614 }, { "epoch": 8.682795698924732, "grad_norm": 21.643627166748047, "learning_rate": 5e-07, "loss": 0.8899, "num_input_tokens_seen": 186729700, "step": 1615 }, { "epoch": 8.682795698924732, "loss": 0.8328003287315369, "loss_ce": 3.6614161217585206e-05, "loss_iou": 0.32421875, "loss_num": 0.03662109375, "loss_xval": 0.83203125, "num_input_tokens_seen": 186729700, "step": 1615 }, { "epoch": 8.688172043010752, "grad_norm": 19.32761573791504, "learning_rate": 5e-07, "loss": 0.7464, "num_input_tokens_seen": 186843552, "step": 1616 }, { "epoch": 8.688172043010752, "loss": 0.7651619911193848, "loss_ce": 2.529137782403268e-05, "loss_iou": 0.314453125, "loss_num": 0.0269775390625, "loss_xval": 0.765625, "num_input_tokens_seen": 186843552, "step": 1616 }, { "epoch": 8.693548387096774, "grad_norm": 18.36901092529297, "learning_rate": 5e-07, "loss": 0.8275, "num_input_tokens_seen": 186958232, "step": 1617 }, { "epoch": 8.693548387096774, "loss": 1.0115447044372559, "loss_ce": 7.010511762928218e-05, "loss_iou": 0.3984375, "loss_num": 0.042724609375, "loss_xval": 1.0078125, "num_input_tokens_seen": 186958232, "step": 1617 }, { "epoch": 8.698924731182796, "grad_norm": 20.231544494628906, "learning_rate": 5e-07, "loss": 0.7639, "num_input_tokens_seen": 187072768, "step": 1618 }, { "epoch": 8.698924731182796, "loss": 0.9571723341941833, "loss_ce": 0.00038517595385201275, "loss_iou": 0.40625, "loss_num": 0.0286865234375, "loss_xval": 0.95703125, "num_input_tokens_seen": 187072768, "step": 1618 }, { "epoch": 8.704301075268818, "grad_norm": 20.15364646911621, "learning_rate": 5e-07, "loss": 0.6848, "num_input_tokens_seen": 187186360, "step": 1619 }, { "epoch": 8.704301075268818, "loss": 0.594879150390625, "loss_ce": 3.0493085432681255e-05, "loss_iou": 0.263671875, "loss_num": 0.013916015625, "loss_xval": 0.59375, "num_input_tokens_seen": 187186360, "step": 1619 }, { "epoch": 8.709677419354838, "grad_norm": 21.21925926208496, "learning_rate": 5e-07, "loss": 0.7709, "num_input_tokens_seen": 187305204, "step": 1620 }, { "epoch": 8.709677419354838, "loss": 0.6187736392021179, "loss_ce": 0.00012131292896810919, "loss_iou": 0.26171875, "loss_num": 0.0189208984375, "loss_xval": 0.6171875, "num_input_tokens_seen": 187305204, "step": 1620 }, { "epoch": 8.71505376344086, "grad_norm": 19.243061065673828, "learning_rate": 5e-07, "loss": 0.7095, "num_input_tokens_seen": 187423692, "step": 1621 }, { "epoch": 8.71505376344086, "loss": 0.6424041390419006, "loss_ce": 7.013901631580666e-05, "loss_iou": 0.27734375, "loss_num": 0.0177001953125, "loss_xval": 0.640625, "num_input_tokens_seen": 187423692, "step": 1621 }, { "epoch": 8.720430107526882, "grad_norm": 15.03866195678711, "learning_rate": 5e-07, "loss": 0.7701, "num_input_tokens_seen": 187537424, "step": 1622 }, { "epoch": 8.720430107526882, "loss": 0.7839815616607666, "loss_ce": 4.597877341439016e-05, "loss_iou": 0.330078125, "loss_num": 0.0242919921875, "loss_xval": 0.78515625, "num_input_tokens_seen": 187537424, "step": 1622 }, { "epoch": 8.725806451612904, "grad_norm": 23.335552215576172, "learning_rate": 5e-07, "loss": 0.6742, "num_input_tokens_seen": 187649704, "step": 1623 }, { "epoch": 8.725806451612904, "loss": 0.5933016538619995, "loss_ce": 3.990047116531059e-05, "loss_iou": 0.232421875, "loss_num": 0.0257568359375, "loss_xval": 0.59375, "num_input_tokens_seen": 187649704, "step": 1623 }, { "epoch": 8.731182795698924, "grad_norm": 23.570114135742188, "learning_rate": 5e-07, "loss": 0.6266, "num_input_tokens_seen": 187764576, "step": 1624 }, { "epoch": 8.731182795698924, "loss": 0.5336074829101562, "loss_ce": 3.8159261748660356e-05, "loss_iou": 0.2265625, "loss_num": 0.01611328125, "loss_xval": 0.53515625, "num_input_tokens_seen": 187764576, "step": 1624 }, { "epoch": 8.736559139784946, "grad_norm": 17.37296485900879, "learning_rate": 5e-07, "loss": 0.6598, "num_input_tokens_seen": 187882416, "step": 1625 }, { "epoch": 8.736559139784946, "loss": 0.5735180974006653, "loss_ce": 3.1748495530337095e-05, "loss_iou": 0.26171875, "loss_num": 0.01031494140625, "loss_xval": 0.57421875, "num_input_tokens_seen": 187882416, "step": 1625 }, { "epoch": 8.741935483870968, "grad_norm": 21.918975830078125, "learning_rate": 5e-07, "loss": 0.6101, "num_input_tokens_seen": 187997764, "step": 1626 }, { "epoch": 8.741935483870968, "loss": 0.6731256246566772, "loss_ce": 2.9869182981201448e-05, "loss_iou": 0.294921875, "loss_num": 0.0166015625, "loss_xval": 0.671875, "num_input_tokens_seen": 187997764, "step": 1626 }, { "epoch": 8.74731182795699, "grad_norm": 26.071125030517578, "learning_rate": 5e-07, "loss": 0.8413, "num_input_tokens_seen": 188111288, "step": 1627 }, { "epoch": 8.74731182795699, "loss": 1.0286104679107666, "loss_ce": 4.597895895130932e-05, "loss_iou": 0.431640625, "loss_num": 0.033203125, "loss_xval": 1.03125, "num_input_tokens_seen": 188111288, "step": 1627 }, { "epoch": 8.75268817204301, "grad_norm": 24.962961196899414, "learning_rate": 5e-07, "loss": 0.6249, "num_input_tokens_seen": 188228228, "step": 1628 }, { "epoch": 8.75268817204301, "loss": 0.7009886503219604, "loss_ce": 6.086681969463825e-05, "loss_iou": 0.2890625, "loss_num": 0.0245361328125, "loss_xval": 0.69921875, "num_input_tokens_seen": 188228228, "step": 1628 }, { "epoch": 8.758064516129032, "grad_norm": 26.17108917236328, "learning_rate": 5e-07, "loss": 0.7779, "num_input_tokens_seen": 188347572, "step": 1629 }, { "epoch": 8.758064516129032, "loss": 0.7080503702163696, "loss_ce": 4.252848884789273e-05, "loss_iou": 0.3125, "loss_num": 0.016845703125, "loss_xval": 0.70703125, "num_input_tokens_seen": 188347572, "step": 1629 }, { "epoch": 8.763440860215054, "grad_norm": 17.22964096069336, "learning_rate": 5e-07, "loss": 0.7014, "num_input_tokens_seen": 188462696, "step": 1630 }, { "epoch": 8.763440860215054, "loss": 0.6819679141044617, "loss_ce": 8.313589205499738e-05, "loss_iou": 0.29296875, "loss_num": 0.0194091796875, "loss_xval": 0.68359375, "num_input_tokens_seen": 188462696, "step": 1630 }, { "epoch": 8.768817204301076, "grad_norm": 19.615312576293945, "learning_rate": 5e-07, "loss": 0.8678, "num_input_tokens_seen": 188577080, "step": 1631 }, { "epoch": 8.768817204301076, "loss": 0.64580899477005, "loss_ce": 5.7051431213039905e-05, "loss_iou": 0.2490234375, "loss_num": 0.029541015625, "loss_xval": 0.64453125, "num_input_tokens_seen": 188577080, "step": 1631 }, { "epoch": 8.774193548387096, "grad_norm": 13.890069961547852, "learning_rate": 5e-07, "loss": 0.6351, "num_input_tokens_seen": 188694808, "step": 1632 }, { "epoch": 8.774193548387096, "loss": 0.6743530035018921, "loss_ce": 3.661491791717708e-05, "loss_iou": 0.2890625, "loss_num": 0.01904296875, "loss_xval": 0.67578125, "num_input_tokens_seen": 188694808, "step": 1632 }, { "epoch": 8.779569892473118, "grad_norm": 19.893871307373047, "learning_rate": 5e-07, "loss": 0.7769, "num_input_tokens_seen": 188811940, "step": 1633 }, { "epoch": 8.779569892473118, "loss": 0.6261507272720337, "loss_ce": 5.21396505064331e-05, "loss_iou": 0.26953125, "loss_num": 0.0172119140625, "loss_xval": 0.625, "num_input_tokens_seen": 188811940, "step": 1633 }, { "epoch": 8.78494623655914, "grad_norm": 30.163183212280273, "learning_rate": 5e-07, "loss": 0.6559, "num_input_tokens_seen": 188929888, "step": 1634 }, { "epoch": 8.78494623655914, "loss": 0.6579877138137817, "loss_ce": 2.873286030080635e-05, "loss_iou": 0.296875, "loss_num": 0.01324462890625, "loss_xval": 0.65625, "num_input_tokens_seen": 188929888, "step": 1634 }, { "epoch": 8.790322580645162, "grad_norm": 15.925314903259277, "learning_rate": 5e-07, "loss": 0.8493, "num_input_tokens_seen": 189044296, "step": 1635 }, { "epoch": 8.790322580645162, "loss": 0.6963387131690979, "loss_ce": 4.9650763685349375e-05, "loss_iou": 0.294921875, "loss_num": 0.021484375, "loss_xval": 0.6953125, "num_input_tokens_seen": 189044296, "step": 1635 }, { "epoch": 8.795698924731182, "grad_norm": 20.735950469970703, "learning_rate": 5e-07, "loss": 0.77, "num_input_tokens_seen": 189161016, "step": 1636 }, { "epoch": 8.795698924731182, "loss": 0.6316110491752625, "loss_ce": 1.9287030227133073e-05, "loss_iou": 0.2578125, "loss_num": 0.0234375, "loss_xval": 0.6328125, "num_input_tokens_seen": 189161016, "step": 1636 }, { "epoch": 8.801075268817204, "grad_norm": 18.284467697143555, "learning_rate": 5e-07, "loss": 0.814, "num_input_tokens_seen": 189276272, "step": 1637 }, { "epoch": 8.801075268817204, "loss": 1.2897617816925049, "loss_ce": 2.7927248083869927e-05, "loss_iou": 0.57421875, "loss_num": 0.0277099609375, "loss_xval": 1.2890625, "num_input_tokens_seen": 189276272, "step": 1637 }, { "epoch": 8.806451612903226, "grad_norm": 22.177425384521484, "learning_rate": 5e-07, "loss": 0.6581, "num_input_tokens_seen": 189392576, "step": 1638 }, { "epoch": 8.806451612903226, "loss": 0.5185879468917847, "loss_ce": 3.330258186906576e-05, "loss_iou": 0.1962890625, "loss_num": 0.0252685546875, "loss_xval": 0.51953125, "num_input_tokens_seen": 189392576, "step": 1638 }, { "epoch": 8.811827956989248, "grad_norm": 35.1341667175293, "learning_rate": 5e-07, "loss": 0.6287, "num_input_tokens_seen": 189509380, "step": 1639 }, { "epoch": 8.811827956989248, "loss": 0.6357773542404175, "loss_ce": 3.5142365959472954e-05, "loss_iou": 0.275390625, "loss_num": 0.016357421875, "loss_xval": 0.63671875, "num_input_tokens_seen": 189509380, "step": 1639 }, { "epoch": 8.817204301075268, "grad_norm": 25.741365432739258, "learning_rate": 5e-07, "loss": 0.6283, "num_input_tokens_seen": 189626292, "step": 1640 }, { "epoch": 8.817204301075268, "loss": 0.44021356105804443, "loss_ce": 2.8041351470164955e-05, "loss_iou": 0.1796875, "loss_num": 0.01611328125, "loss_xval": 0.439453125, "num_input_tokens_seen": 189626292, "step": 1640 }, { "epoch": 8.82258064516129, "grad_norm": 25.963998794555664, "learning_rate": 5e-07, "loss": 0.8393, "num_input_tokens_seen": 189743820, "step": 1641 }, { "epoch": 8.82258064516129, "loss": 0.9295954704284668, "loss_ce": 3.0042905564187095e-05, "loss_iou": 0.421875, "loss_num": 0.0172119140625, "loss_xval": 0.9296875, "num_input_tokens_seen": 189743820, "step": 1641 }, { "epoch": 8.827956989247312, "grad_norm": 21.51838493347168, "learning_rate": 5e-07, "loss": 0.7917, "num_input_tokens_seen": 189856512, "step": 1642 }, { "epoch": 8.827956989247312, "loss": 0.6826404333114624, "loss_ce": 2.3268286895472556e-05, "loss_iou": 0.296875, "loss_num": 0.0179443359375, "loss_xval": 0.68359375, "num_input_tokens_seen": 189856512, "step": 1642 }, { "epoch": 8.833333333333334, "grad_norm": 15.930901527404785, "learning_rate": 5e-07, "loss": 0.6326, "num_input_tokens_seen": 189971248, "step": 1643 }, { "epoch": 8.833333333333334, "loss": 0.6668481230735779, "loss_ce": 0.00010009075776906684, "loss_iou": 0.271484375, "loss_num": 0.024169921875, "loss_xval": 0.66796875, "num_input_tokens_seen": 189971248, "step": 1643 }, { "epoch": 8.838709677419354, "grad_norm": 14.563186645507812, "learning_rate": 5e-07, "loss": 0.8529, "num_input_tokens_seen": 190088152, "step": 1644 }, { "epoch": 8.838709677419354, "loss": 0.5691507458686829, "loss_ce": 5.892894114367664e-05, "loss_iou": 0.232421875, "loss_num": 0.02099609375, "loss_xval": 0.5703125, "num_input_tokens_seen": 190088152, "step": 1644 }, { "epoch": 8.844086021505376, "grad_norm": 34.49042510986328, "learning_rate": 5e-07, "loss": 0.702, "num_input_tokens_seen": 190204948, "step": 1645 }, { "epoch": 8.844086021505376, "loss": 0.9111895561218262, "loss_ce": 5.672477709595114e-05, "loss_iou": 0.37109375, "loss_num": 0.03369140625, "loss_xval": 0.91015625, "num_input_tokens_seen": 190204948, "step": 1645 }, { "epoch": 8.849462365591398, "grad_norm": 31.920320510864258, "learning_rate": 5e-07, "loss": 0.7023, "num_input_tokens_seen": 190323680, "step": 1646 }, { "epoch": 8.849462365591398, "loss": 0.6855825185775757, "loss_ce": 3.563353675417602e-05, "loss_iou": 0.2890625, "loss_num": 0.0211181640625, "loss_xval": 0.6875, "num_input_tokens_seen": 190323680, "step": 1646 }, { "epoch": 8.85483870967742, "grad_norm": 24.758821487426758, "learning_rate": 5e-07, "loss": 0.6328, "num_input_tokens_seen": 190436468, "step": 1647 }, { "epoch": 8.85483870967742, "loss": 0.5951235294342041, "loss_ce": 3.072473919019103e-05, "loss_iou": 0.248046875, "loss_num": 0.0196533203125, "loss_xval": 0.59375, "num_input_tokens_seen": 190436468, "step": 1647 }, { "epoch": 8.86021505376344, "grad_norm": 25.955726623535156, "learning_rate": 5e-07, "loss": 0.6671, "num_input_tokens_seen": 190549532, "step": 1648 }, { "epoch": 8.86021505376344, "loss": 0.6697551012039185, "loss_ce": 1.6320645954692736e-05, "loss_iou": 0.255859375, "loss_num": 0.03125, "loss_xval": 0.66796875, "num_input_tokens_seen": 190549532, "step": 1648 }, { "epoch": 8.865591397849462, "grad_norm": 29.515430450439453, "learning_rate": 5e-07, "loss": 0.7664, "num_input_tokens_seen": 190664596, "step": 1649 }, { "epoch": 8.865591397849462, "loss": 0.6854609251022339, "loss_ce": 3.613729859353043e-05, "loss_iou": 0.29296875, "loss_num": 0.0196533203125, "loss_xval": 0.68359375, "num_input_tokens_seen": 190664596, "step": 1649 }, { "epoch": 8.870967741935484, "grad_norm": 23.329418182373047, "learning_rate": 5e-07, "loss": 0.9241, "num_input_tokens_seen": 190779284, "step": 1650 }, { "epoch": 8.870967741935484, "loss": 0.978064775466919, "loss_ce": 0.0024788451846688986, "loss_iou": 0.4140625, "loss_num": 0.0291748046875, "loss_xval": 0.9765625, "num_input_tokens_seen": 190779284, "step": 1650 }, { "epoch": 8.876344086021506, "grad_norm": 27.253625869750977, "learning_rate": 5e-07, "loss": 0.7881, "num_input_tokens_seen": 190895640, "step": 1651 }, { "epoch": 8.876344086021506, "loss": 0.8227850198745728, "loss_ce": 3.110793841187842e-05, "loss_iou": 0.359375, "loss_num": 0.0211181640625, "loss_xval": 0.82421875, "num_input_tokens_seen": 190895640, "step": 1651 }, { "epoch": 8.881720430107526, "grad_norm": 15.038797378540039, "learning_rate": 5e-07, "loss": 0.8024, "num_input_tokens_seen": 191009168, "step": 1652 }, { "epoch": 8.881720430107526, "loss": 0.5299376249313354, "loss_ce": 3.040922456420958e-05, "loss_iou": 0.224609375, "loss_num": 0.01611328125, "loss_xval": 0.53125, "num_input_tokens_seen": 191009168, "step": 1652 }, { "epoch": 8.887096774193548, "grad_norm": 18.588443756103516, "learning_rate": 5e-07, "loss": 0.5989, "num_input_tokens_seen": 191123880, "step": 1653 }, { "epoch": 8.887096774193548, "loss": 0.49705392122268677, "loss_ce": 4.46340418420732e-05, "loss_iou": 0.2216796875, "loss_num": 0.0107421875, "loss_xval": 0.49609375, "num_input_tokens_seen": 191123880, "step": 1653 }, { "epoch": 8.89247311827957, "grad_norm": 21.265125274658203, "learning_rate": 5e-07, "loss": 0.6986, "num_input_tokens_seen": 191241896, "step": 1654 }, { "epoch": 8.89247311827957, "loss": 0.6829081773757935, "loss_ce": 4.688360058935359e-05, "loss_iou": 0.291015625, "loss_num": 0.0203857421875, "loss_xval": 0.68359375, "num_input_tokens_seen": 191241896, "step": 1654 }, { "epoch": 8.897849462365592, "grad_norm": 20.955787658691406, "learning_rate": 5e-07, "loss": 0.6106, "num_input_tokens_seen": 191357776, "step": 1655 }, { "epoch": 8.897849462365592, "loss": 0.7465279698371887, "loss_ce": 6.798475806135684e-05, "loss_iou": 0.314453125, "loss_num": 0.0235595703125, "loss_xval": 0.74609375, "num_input_tokens_seen": 191357776, "step": 1655 }, { "epoch": 8.903225806451612, "grad_norm": 20.404123306274414, "learning_rate": 5e-07, "loss": 0.6614, "num_input_tokens_seen": 191472208, "step": 1656 }, { "epoch": 8.903225806451612, "loss": 0.8742263317108154, "loss_ce": 8.088341564871371e-05, "loss_iou": 0.341796875, "loss_num": 0.037841796875, "loss_xval": 0.875, "num_input_tokens_seen": 191472208, "step": 1656 }, { "epoch": 8.908602150537634, "grad_norm": 22.004413604736328, "learning_rate": 5e-07, "loss": 0.7427, "num_input_tokens_seen": 191589612, "step": 1657 }, { "epoch": 8.908602150537634, "loss": 0.4612804651260376, "loss_ce": 3.777341771638021e-05, "loss_iou": 0.1923828125, "loss_num": 0.01544189453125, "loss_xval": 0.4609375, "num_input_tokens_seen": 191589612, "step": 1657 }, { "epoch": 8.913978494623656, "grad_norm": 22.762081146240234, "learning_rate": 5e-07, "loss": 0.661, "num_input_tokens_seen": 191706032, "step": 1658 }, { "epoch": 8.913978494623656, "loss": 0.6597043871879578, "loss_ce": 3.6423924029804766e-05, "loss_iou": 0.26953125, "loss_num": 0.0244140625, "loss_xval": 0.66015625, "num_input_tokens_seen": 191706032, "step": 1658 }, { "epoch": 8.919354838709678, "grad_norm": 33.23845291137695, "learning_rate": 5e-07, "loss": 0.8834, "num_input_tokens_seen": 191822184, "step": 1659 }, { "epoch": 8.919354838709678, "loss": 0.8633137941360474, "loss_ce": 3.252038004575297e-05, "loss_iou": 0.396484375, "loss_num": 0.0137939453125, "loss_xval": 0.86328125, "num_input_tokens_seen": 191822184, "step": 1659 }, { "epoch": 8.924731182795698, "grad_norm": 20.51439094543457, "learning_rate": 5e-07, "loss": 0.7658, "num_input_tokens_seen": 191940364, "step": 1660 }, { "epoch": 8.924731182795698, "loss": 0.605007529258728, "loss_ce": 2.701011544559151e-05, "loss_iou": 0.267578125, "loss_num": 0.0137939453125, "loss_xval": 0.60546875, "num_input_tokens_seen": 191940364, "step": 1660 }, { "epoch": 8.93010752688172, "grad_norm": 19.452951431274414, "learning_rate": 5e-07, "loss": 0.8416, "num_input_tokens_seen": 192055776, "step": 1661 }, { "epoch": 8.93010752688172, "loss": 1.1896127462387085, "loss_ce": 0.00015958823496475816, "loss_iou": 0.5390625, "loss_num": 0.02197265625, "loss_xval": 1.1875, "num_input_tokens_seen": 192055776, "step": 1661 }, { "epoch": 8.935483870967742, "grad_norm": 25.42995262145996, "learning_rate": 5e-07, "loss": 0.7911, "num_input_tokens_seen": 192171900, "step": 1662 }, { "epoch": 8.935483870967742, "loss": 0.9475448131561279, "loss_ce": 3.5034110624110326e-05, "loss_iou": 0.40234375, "loss_num": 0.0289306640625, "loss_xval": 0.94921875, "num_input_tokens_seen": 192171900, "step": 1662 }, { "epoch": 8.940860215053764, "grad_norm": 18.72953987121582, "learning_rate": 5e-07, "loss": 0.6659, "num_input_tokens_seen": 192289456, "step": 1663 }, { "epoch": 8.940860215053764, "loss": 0.4601210951805115, "loss_ce": 3.807728353422135e-05, "loss_iou": 0.20703125, "loss_num": 0.00897216796875, "loss_xval": 0.4609375, "num_input_tokens_seen": 192289456, "step": 1663 }, { "epoch": 8.946236559139784, "grad_norm": 41.106903076171875, "learning_rate": 5e-07, "loss": 0.7077, "num_input_tokens_seen": 192405044, "step": 1664 }, { "epoch": 8.946236559139784, "loss": 0.5493535399436951, "loss_ce": 3.7152713048271835e-05, "loss_iou": 0.248046875, "loss_num": 0.0108642578125, "loss_xval": 0.55078125, "num_input_tokens_seen": 192405044, "step": 1664 }, { "epoch": 8.951612903225806, "grad_norm": 54.170318603515625, "learning_rate": 5e-07, "loss": 0.7751, "num_input_tokens_seen": 192520300, "step": 1665 }, { "epoch": 8.951612903225806, "loss": 0.8289144039154053, "loss_ce": 5.699708344764076e-05, "loss_iou": 0.33984375, "loss_num": 0.030029296875, "loss_xval": 0.828125, "num_input_tokens_seen": 192520300, "step": 1665 }, { "epoch": 8.956989247311828, "grad_norm": 29.44109344482422, "learning_rate": 5e-07, "loss": 0.5948, "num_input_tokens_seen": 192636780, "step": 1666 }, { "epoch": 8.956989247311828, "loss": 0.5898789763450623, "loss_ce": 3.522849874570966e-05, "loss_iou": 0.259765625, "loss_num": 0.01385498046875, "loss_xval": 0.58984375, "num_input_tokens_seen": 192636780, "step": 1666 }, { "epoch": 8.96236559139785, "grad_norm": 27.281248092651367, "learning_rate": 5e-07, "loss": 0.8909, "num_input_tokens_seen": 192749012, "step": 1667 }, { "epoch": 8.96236559139785, "loss": 0.7342853546142578, "loss_ce": 3.243536775698885e-05, "loss_iou": 0.30078125, "loss_num": 0.0269775390625, "loss_xval": 0.734375, "num_input_tokens_seen": 192749012, "step": 1667 }, { "epoch": 8.967741935483872, "grad_norm": 20.4422664642334, "learning_rate": 5e-07, "loss": 0.8993, "num_input_tokens_seen": 192864920, "step": 1668 }, { "epoch": 8.967741935483872, "loss": 0.7597986459732056, "loss_ce": 3.304886195110157e-05, "loss_iou": 0.310546875, "loss_num": 0.0279541015625, "loss_xval": 0.7578125, "num_input_tokens_seen": 192864920, "step": 1668 }, { "epoch": 8.973118279569892, "grad_norm": 21.299232482910156, "learning_rate": 5e-07, "loss": 0.6479, "num_input_tokens_seen": 192979284, "step": 1669 }, { "epoch": 8.973118279569892, "loss": 0.49821382761001587, "loss_ce": 4.487136902753264e-05, "loss_iou": 0.2216796875, "loss_num": 0.010986328125, "loss_xval": 0.498046875, "num_input_tokens_seen": 192979284, "step": 1669 }, { "epoch": 8.978494623655914, "grad_norm": 24.996564865112305, "learning_rate": 5e-07, "loss": 0.7336, "num_input_tokens_seen": 193095036, "step": 1670 }, { "epoch": 8.978494623655914, "loss": 0.4734308421611786, "loss_ce": 4.217123205307871e-05, "loss_iou": 0.2099609375, "loss_num": 0.0107421875, "loss_xval": 0.47265625, "num_input_tokens_seen": 193095036, "step": 1670 }, { "epoch": 8.983870967741936, "grad_norm": 33.08991241455078, "learning_rate": 5e-07, "loss": 0.8417, "num_input_tokens_seen": 193211496, "step": 1671 }, { "epoch": 8.983870967741936, "loss": 0.49881646037101746, "loss_ce": 3.71818860003259e-05, "loss_iou": 0.2236328125, "loss_num": 0.01043701171875, "loss_xval": 0.498046875, "num_input_tokens_seen": 193211496, "step": 1671 }, { "epoch": 8.989247311827956, "grad_norm": 19.941011428833008, "learning_rate": 5e-07, "loss": 0.729, "num_input_tokens_seen": 193326124, "step": 1672 }, { "epoch": 8.989247311827956, "loss": 0.6372395157814026, "loss_ce": 3.250567533541471e-05, "loss_iou": 0.267578125, "loss_num": 0.0203857421875, "loss_xval": 0.63671875, "num_input_tokens_seen": 193326124, "step": 1672 }, { "epoch": 8.994623655913978, "grad_norm": 29.111604690551758, "learning_rate": 5e-07, "loss": 0.7909, "num_input_tokens_seen": 193442008, "step": 1673 }, { "epoch": 8.994623655913978, "loss": 0.7865480780601501, "loss_ce": 4.902888395008631e-05, "loss_iou": 0.35546875, "loss_num": 0.0147705078125, "loss_xval": 0.78515625, "num_input_tokens_seen": 193442008, "step": 1673 }, { "epoch": 9.0, "grad_norm": 36.217201232910156, "learning_rate": 5e-07, "loss": 0.6514, "num_input_tokens_seen": 193558940, "step": 1674 }, { "epoch": 9.0, "loss": 0.5762596130371094, "loss_ce": 8.77438360475935e-05, "loss_iou": 0.234375, "loss_num": 0.021484375, "loss_xval": 0.578125, "num_input_tokens_seen": 193558940, "step": 1674 }, { "epoch": 9.005376344086022, "grad_norm": 20.87369728088379, "learning_rate": 5e-07, "loss": 0.816, "num_input_tokens_seen": 193673472, "step": 1675 }, { "epoch": 9.005376344086022, "loss": 0.8481744527816772, "loss_ce": 2.992961890413426e-05, "loss_iou": 0.359375, "loss_num": 0.025634765625, "loss_xval": 0.84765625, "num_input_tokens_seen": 193673472, "step": 1675 }, { "epoch": 9.010752688172044, "grad_norm": 25.208099365234375, "learning_rate": 5e-07, "loss": 0.7492, "num_input_tokens_seen": 193791256, "step": 1676 }, { "epoch": 9.010752688172044, "loss": 0.7887576222419739, "loss_ce": 0.00018337959772907197, "loss_iou": 0.3515625, "loss_num": 0.0174560546875, "loss_xval": 0.7890625, "num_input_tokens_seen": 193791256, "step": 1676 }, { "epoch": 9.016129032258064, "grad_norm": 15.699356079101562, "learning_rate": 5e-07, "loss": 0.695, "num_input_tokens_seen": 193908916, "step": 1677 }, { "epoch": 9.016129032258064, "loss": 0.4110974073410034, "loss_ce": 2.5621837266953662e-05, "loss_iou": 0.1767578125, "loss_num": 0.011474609375, "loss_xval": 0.41015625, "num_input_tokens_seen": 193908916, "step": 1677 }, { "epoch": 9.021505376344086, "grad_norm": 20.428524017333984, "learning_rate": 5e-07, "loss": 0.5824, "num_input_tokens_seen": 194024756, "step": 1678 }, { "epoch": 9.021505376344086, "loss": 0.6255307197570801, "loss_ce": 4.2442898120498285e-05, "loss_iou": 0.232421875, "loss_num": 0.0322265625, "loss_xval": 0.625, "num_input_tokens_seen": 194024756, "step": 1678 }, { "epoch": 9.026881720430108, "grad_norm": 22.427597045898438, "learning_rate": 5e-07, "loss": 0.7752, "num_input_tokens_seen": 194138124, "step": 1679 }, { "epoch": 9.026881720430108, "loss": 0.7175496816635132, "loss_ce": 2.0377246983116493e-05, "loss_iou": 0.302734375, "loss_num": 0.0223388671875, "loss_xval": 0.71875, "num_input_tokens_seen": 194138124, "step": 1679 }, { "epoch": 9.03225806451613, "grad_norm": 16.430543899536133, "learning_rate": 5e-07, "loss": 0.6721, "num_input_tokens_seen": 194256864, "step": 1680 }, { "epoch": 9.03225806451613, "loss": 0.6135544776916504, "loss_ce": 2.9129612812539563e-05, "loss_iou": 0.25, "loss_num": 0.0223388671875, "loss_xval": 0.61328125, "num_input_tokens_seen": 194256864, "step": 1680 }, { "epoch": 9.03763440860215, "grad_norm": 16.981979370117188, "learning_rate": 5e-07, "loss": 0.6364, "num_input_tokens_seen": 194368792, "step": 1681 }, { "epoch": 9.03763440860215, "loss": 0.7438467741012573, "loss_ce": 7.234550139401108e-05, "loss_iou": 0.302734375, "loss_num": 0.0272216796875, "loss_xval": 0.7421875, "num_input_tokens_seen": 194368792, "step": 1681 }, { "epoch": 9.043010752688172, "grad_norm": 18.201852798461914, "learning_rate": 5e-07, "loss": 0.9528, "num_input_tokens_seen": 194485796, "step": 1682 }, { "epoch": 9.043010752688172, "loss": 0.764204740524292, "loss_ce": 4.4583364797290415e-05, "loss_iou": 0.341796875, "loss_num": 0.01611328125, "loss_xval": 0.765625, "num_input_tokens_seen": 194485796, "step": 1682 }, { "epoch": 9.048387096774194, "grad_norm": 25.9494686126709, "learning_rate": 5e-07, "loss": 0.7556, "num_input_tokens_seen": 194598516, "step": 1683 }, { "epoch": 9.048387096774194, "loss": 0.6019806265830994, "loss_ce": 0.00017402175581082702, "loss_iou": 0.267578125, "loss_num": 0.012939453125, "loss_xval": 0.6015625, "num_input_tokens_seen": 194598516, "step": 1683 }, { "epoch": 9.053763440860216, "grad_norm": 30.395278930664062, "learning_rate": 5e-07, "loss": 0.7001, "num_input_tokens_seen": 194712640, "step": 1684 }, { "epoch": 9.053763440860216, "loss": 0.7398973703384399, "loss_ce": 0.00015126662037800997, "loss_iou": 0.32421875, "loss_num": 0.0184326171875, "loss_xval": 0.73828125, "num_input_tokens_seen": 194712640, "step": 1684 }, { "epoch": 9.059139784946236, "grad_norm": 18.781078338623047, "learning_rate": 5e-07, "loss": 0.8004, "num_input_tokens_seen": 194827224, "step": 1685 }, { "epoch": 9.059139784946236, "loss": 0.555191695690155, "loss_ce": 1.588532904861495e-05, "loss_iou": 0.2333984375, "loss_num": 0.0177001953125, "loss_xval": 0.5546875, "num_input_tokens_seen": 194827224, "step": 1685 }, { "epoch": 9.064516129032258, "grad_norm": 27.97327423095703, "learning_rate": 5e-07, "loss": 0.9001, "num_input_tokens_seen": 194939856, "step": 1686 }, { "epoch": 9.064516129032258, "loss": 0.5935271978378296, "loss_ce": 2.1304258552845567e-05, "loss_iou": 0.2451171875, "loss_num": 0.0206298828125, "loss_xval": 0.59375, "num_input_tokens_seen": 194939856, "step": 1686 }, { "epoch": 9.06989247311828, "grad_norm": 19.95378875732422, "learning_rate": 5e-07, "loss": 0.7004, "num_input_tokens_seen": 195055720, "step": 1687 }, { "epoch": 9.06989247311828, "loss": 0.5991703271865845, "loss_ce": 4.920691208099015e-05, "loss_iou": 0.25390625, "loss_num": 0.0181884765625, "loss_xval": 0.59765625, "num_input_tokens_seen": 195055720, "step": 1687 }, { "epoch": 9.075268817204302, "grad_norm": 19.33224105834961, "learning_rate": 5e-07, "loss": 0.7409, "num_input_tokens_seen": 195170788, "step": 1688 }, { "epoch": 9.075268817204302, "loss": 0.7670369744300842, "loss_ce": 0.00019125927065033466, "loss_iou": 0.33203125, "loss_num": 0.0203857421875, "loss_xval": 0.765625, "num_input_tokens_seen": 195170788, "step": 1688 }, { "epoch": 9.080645161290322, "grad_norm": 27.869009017944336, "learning_rate": 5e-07, "loss": 0.6058, "num_input_tokens_seen": 195289144, "step": 1689 }, { "epoch": 9.080645161290322, "loss": 0.5156453847885132, "loss_ce": 2.0396473701111972e-05, "loss_iou": 0.1962890625, "loss_num": 0.0245361328125, "loss_xval": 0.515625, "num_input_tokens_seen": 195289144, "step": 1689 }, { "epoch": 9.086021505376344, "grad_norm": 18.155763626098633, "learning_rate": 5e-07, "loss": 0.651, "num_input_tokens_seen": 195404712, "step": 1690 }, { "epoch": 9.086021505376344, "loss": 0.8140188455581665, "loss_ce": 5.398334178607911e-05, "loss_iou": 0.369140625, "loss_num": 0.01531982421875, "loss_xval": 0.8125, "num_input_tokens_seen": 195404712, "step": 1690 }, { "epoch": 9.091397849462366, "grad_norm": 19.449237823486328, "learning_rate": 5e-07, "loss": 0.6032, "num_input_tokens_seen": 195522268, "step": 1691 }, { "epoch": 9.091397849462366, "loss": 0.5915759801864624, "loss_ce": 2.326588764844928e-05, "loss_iou": 0.267578125, "loss_num": 0.0113525390625, "loss_xval": 0.58984375, "num_input_tokens_seen": 195522268, "step": 1691 }, { "epoch": 9.096774193548388, "grad_norm": 22.277788162231445, "learning_rate": 5e-07, "loss": 0.7333, "num_input_tokens_seen": 195636268, "step": 1692 }, { "epoch": 9.096774193548388, "loss": 0.9105081558227539, "loss_ce": 0.00010776495037134737, "loss_iou": 0.390625, "loss_num": 0.025634765625, "loss_xval": 0.91015625, "num_input_tokens_seen": 195636268, "step": 1692 }, { "epoch": 9.102150537634408, "grad_norm": 25.335182189941406, "learning_rate": 5e-07, "loss": 0.8605, "num_input_tokens_seen": 195752440, "step": 1693 }, { "epoch": 9.102150537634408, "loss": 1.1924066543579102, "loss_ce": 0.0002679286990314722, "loss_iou": 0.5, "loss_num": 0.0380859375, "loss_xval": 1.1953125, "num_input_tokens_seen": 195752440, "step": 1693 }, { "epoch": 9.10752688172043, "grad_norm": 17.747371673583984, "learning_rate": 5e-07, "loss": 0.6586, "num_input_tokens_seen": 195866880, "step": 1694 }, { "epoch": 9.10752688172043, "loss": 0.6928597092628479, "loss_ce": 0.00011070512846345082, "loss_iou": 0.306640625, "loss_num": 0.0159912109375, "loss_xval": 0.69140625, "num_input_tokens_seen": 195866880, "step": 1694 }, { "epoch": 9.112903225806452, "grad_norm": 15.393328666687012, "learning_rate": 5e-07, "loss": 0.7979, "num_input_tokens_seen": 195979248, "step": 1695 }, { "epoch": 9.112903225806452, "loss": 1.0048460960388184, "loss_ce": 0.00020737886370625347, "loss_iou": 0.44921875, "loss_num": 0.0211181640625, "loss_xval": 1.0078125, "num_input_tokens_seen": 195979248, "step": 1695 }, { "epoch": 9.118279569892474, "grad_norm": 24.635778427124023, "learning_rate": 5e-07, "loss": 0.5742, "num_input_tokens_seen": 196097300, "step": 1696 }, { "epoch": 9.118279569892474, "loss": 0.4553663730621338, "loss_ce": 4.412252746988088e-05, "loss_iou": 0.169921875, "loss_num": 0.02294921875, "loss_xval": 0.455078125, "num_input_tokens_seen": 196097300, "step": 1696 }, { "epoch": 9.123655913978494, "grad_norm": 47.084712982177734, "learning_rate": 5e-07, "loss": 0.7585, "num_input_tokens_seen": 196214068, "step": 1697 }, { "epoch": 9.123655913978494, "loss": 1.1284472942352295, "loss_ce": 2.9388706025201827e-05, "loss_iou": 0.47265625, "loss_num": 0.03662109375, "loss_xval": 1.125, "num_input_tokens_seen": 196214068, "step": 1697 }, { "epoch": 9.129032258064516, "grad_norm": 34.54342269897461, "learning_rate": 5e-07, "loss": 0.5961, "num_input_tokens_seen": 196331460, "step": 1698 }, { "epoch": 9.129032258064516, "loss": 0.5359612703323364, "loss_ce": 7.257235120050609e-05, "loss_iou": 0.2216796875, "loss_num": 0.0185546875, "loss_xval": 0.53515625, "num_input_tokens_seen": 196331460, "step": 1698 }, { "epoch": 9.134408602150538, "grad_norm": 16.610933303833008, "learning_rate": 5e-07, "loss": 0.6928, "num_input_tokens_seen": 196446056, "step": 1699 }, { "epoch": 9.134408602150538, "loss": 1.1934020519256592, "loss_ce": 4.268635530024767e-05, "loss_iou": 0.53125, "loss_num": 0.025634765625, "loss_xval": 1.1953125, "num_input_tokens_seen": 196446056, "step": 1699 }, { "epoch": 9.13978494623656, "grad_norm": 18.77288818359375, "learning_rate": 5e-07, "loss": 0.705, "num_input_tokens_seen": 196563176, "step": 1700 }, { "epoch": 9.13978494623656, "loss": 0.7200653553009033, "loss_ce": 9.462417801842093e-05, "loss_iou": 0.322265625, "loss_num": 0.0150146484375, "loss_xval": 0.71875, "num_input_tokens_seen": 196563176, "step": 1700 }, { "epoch": 9.14516129032258, "grad_norm": 20.60765266418457, "learning_rate": 5e-07, "loss": 0.6971, "num_input_tokens_seen": 196676372, "step": 1701 }, { "epoch": 9.14516129032258, "loss": 0.6359335780143738, "loss_ce": 6.932146061444655e-05, "loss_iou": 0.255859375, "loss_num": 0.02490234375, "loss_xval": 0.63671875, "num_input_tokens_seen": 196676372, "step": 1701 }, { "epoch": 9.150537634408602, "grad_norm": 24.00194549560547, "learning_rate": 5e-07, "loss": 0.6509, "num_input_tokens_seen": 196790288, "step": 1702 }, { "epoch": 9.150537634408602, "loss": 0.5408654808998108, "loss_ce": 3.2954063499346375e-05, "loss_iou": 0.21484375, "loss_num": 0.02197265625, "loss_xval": 0.5390625, "num_input_tokens_seen": 196790288, "step": 1702 }, { "epoch": 9.155913978494624, "grad_norm": 20.554914474487305, "learning_rate": 5e-07, "loss": 0.6981, "num_input_tokens_seen": 196902884, "step": 1703 }, { "epoch": 9.155913978494624, "loss": 0.6190937757492065, "loss_ce": 7.525453111156821e-05, "loss_iou": 0.26171875, "loss_num": 0.019287109375, "loss_xval": 0.6171875, "num_input_tokens_seen": 196902884, "step": 1703 }, { "epoch": 9.161290322580646, "grad_norm": 23.096113204956055, "learning_rate": 5e-07, "loss": 0.6995, "num_input_tokens_seen": 197017024, "step": 1704 }, { "epoch": 9.161290322580646, "loss": 0.4273935556411743, "loss_ce": 2.5396660930709913e-05, "loss_iou": 0.1904296875, "loss_num": 0.00927734375, "loss_xval": 0.427734375, "num_input_tokens_seen": 197017024, "step": 1704 }, { "epoch": 9.166666666666666, "grad_norm": 15.794148445129395, "learning_rate": 5e-07, "loss": 0.6736, "num_input_tokens_seen": 197135212, "step": 1705 }, { "epoch": 9.166666666666666, "loss": 0.5744283199310303, "loss_ce": 2.644519554451108e-05, "loss_iou": 0.255859375, "loss_num": 0.01251220703125, "loss_xval": 0.57421875, "num_input_tokens_seen": 197135212, "step": 1705 }, { "epoch": 9.172043010752688, "grad_norm": 19.662717819213867, "learning_rate": 5e-07, "loss": 0.6222, "num_input_tokens_seen": 197250160, "step": 1706 }, { "epoch": 9.172043010752688, "loss": 0.8552947640419006, "loss_ce": 7.017506140982732e-05, "loss_iou": 0.375, "loss_num": 0.020751953125, "loss_xval": 0.85546875, "num_input_tokens_seen": 197250160, "step": 1706 }, { "epoch": 9.17741935483871, "grad_norm": 18.88068962097168, "learning_rate": 5e-07, "loss": 0.8017, "num_input_tokens_seen": 197363436, "step": 1707 }, { "epoch": 9.17741935483871, "loss": 0.6799569725990295, "loss_ce": 2.5310146156698465e-05, "loss_iou": 0.287109375, "loss_num": 0.0213623046875, "loss_xval": 0.6796875, "num_input_tokens_seen": 197363436, "step": 1707 }, { "epoch": 9.182795698924732, "grad_norm": 20.93270492553711, "learning_rate": 5e-07, "loss": 0.7436, "num_input_tokens_seen": 197480308, "step": 1708 }, { "epoch": 9.182795698924732, "loss": 0.4403403699398041, "loss_ce": 3.275552444392815e-05, "loss_iou": 0.1865234375, "loss_num": 0.01373291015625, "loss_xval": 0.439453125, "num_input_tokens_seen": 197480308, "step": 1708 }, { "epoch": 9.188172043010752, "grad_norm": 23.72980308532715, "learning_rate": 5e-07, "loss": 0.7502, "num_input_tokens_seen": 197597152, "step": 1709 }, { "epoch": 9.188172043010752, "loss": 0.6161544919013977, "loss_ce": 0.0001876984169939533, "loss_iou": 0.2578125, "loss_num": 0.0203857421875, "loss_xval": 0.6171875, "num_input_tokens_seen": 197597152, "step": 1709 }, { "epoch": 9.193548387096774, "grad_norm": 22.428016662597656, "learning_rate": 5e-07, "loss": 0.7015, "num_input_tokens_seen": 197716132, "step": 1710 }, { "epoch": 9.193548387096774, "loss": 1.1191767454147339, "loss_ce": 3.620148345362395e-05, "loss_iou": 0.478515625, "loss_num": 0.03271484375, "loss_xval": 1.1171875, "num_input_tokens_seen": 197716132, "step": 1710 }, { "epoch": 9.198924731182796, "grad_norm": 25.586143493652344, "learning_rate": 5e-07, "loss": 0.6056, "num_input_tokens_seen": 197831296, "step": 1711 }, { "epoch": 9.198924731182796, "loss": 0.5753275752067566, "loss_ce": 7.124105468392372e-05, "loss_iou": 0.2578125, "loss_num": 0.01214599609375, "loss_xval": 0.57421875, "num_input_tokens_seen": 197831296, "step": 1711 }, { "epoch": 9.204301075268818, "grad_norm": 28.12302017211914, "learning_rate": 5e-07, "loss": 0.729, "num_input_tokens_seen": 197948260, "step": 1712 }, { "epoch": 9.204301075268818, "loss": 0.8391552567481995, "loss_ce": 4.396955409902148e-05, "loss_iou": 0.380859375, "loss_num": 0.015625, "loss_xval": 0.83984375, "num_input_tokens_seen": 197948260, "step": 1712 }, { "epoch": 9.209677419354838, "grad_norm": 18.55975914001465, "learning_rate": 5e-07, "loss": 0.6519, "num_input_tokens_seen": 198067392, "step": 1713 }, { "epoch": 9.209677419354838, "loss": 0.5829274654388428, "loss_ce": 4.173385968897492e-05, "loss_iou": 0.2421875, "loss_num": 0.019775390625, "loss_xval": 0.58203125, "num_input_tokens_seen": 198067392, "step": 1713 }, { "epoch": 9.21505376344086, "grad_norm": 16.30130386352539, "learning_rate": 5e-07, "loss": 0.7625, "num_input_tokens_seen": 198181736, "step": 1714 }, { "epoch": 9.21505376344086, "loss": 0.9862513542175293, "loss_ce": 4.5297165343072265e-05, "loss_iou": 0.4375, "loss_num": 0.0225830078125, "loss_xval": 0.984375, "num_input_tokens_seen": 198181736, "step": 1714 }, { "epoch": 9.220430107526882, "grad_norm": 21.02399253845215, "learning_rate": 5e-07, "loss": 0.7554, "num_input_tokens_seen": 198294824, "step": 1715 }, { "epoch": 9.220430107526882, "loss": 0.6197391748428345, "loss_ce": 0.00011026044376194477, "loss_iou": 0.25390625, "loss_num": 0.0220947265625, "loss_xval": 0.62109375, "num_input_tokens_seen": 198294824, "step": 1715 }, { "epoch": 9.225806451612904, "grad_norm": 35.9827766418457, "learning_rate": 5e-07, "loss": 0.6375, "num_input_tokens_seen": 198410344, "step": 1716 }, { "epoch": 9.225806451612904, "loss": 0.5951204299926758, "loss_ce": 2.7692596631823108e-05, "loss_iou": 0.251953125, "loss_num": 0.0181884765625, "loss_xval": 0.59375, "num_input_tokens_seen": 198410344, "step": 1716 }, { "epoch": 9.231182795698924, "grad_norm": 43.52107620239258, "learning_rate": 5e-07, "loss": 0.6972, "num_input_tokens_seen": 198526844, "step": 1717 }, { "epoch": 9.231182795698924, "loss": 0.7209798097610474, "loss_ce": 3.257171920267865e-05, "loss_iou": 0.318359375, "loss_num": 0.0164794921875, "loss_xval": 0.72265625, "num_input_tokens_seen": 198526844, "step": 1717 }, { "epoch": 9.236559139784946, "grad_norm": 27.21805763244629, "learning_rate": 5e-07, "loss": 0.8104, "num_input_tokens_seen": 198641912, "step": 1718 }, { "epoch": 9.236559139784946, "loss": 0.4695081114768982, "loss_ce": 2.5681689294287935e-05, "loss_iou": 0.19921875, "loss_num": 0.01416015625, "loss_xval": 0.46875, "num_input_tokens_seen": 198641912, "step": 1718 }, { "epoch": 9.241935483870968, "grad_norm": 32.581661224365234, "learning_rate": 5e-07, "loss": 0.9035, "num_input_tokens_seen": 198761264, "step": 1719 }, { "epoch": 9.241935483870968, "loss": 0.8271937370300293, "loss_ce": 4.5259577746037394e-05, "loss_iou": 0.357421875, "loss_num": 0.022216796875, "loss_xval": 0.828125, "num_input_tokens_seen": 198761264, "step": 1719 }, { "epoch": 9.24731182795699, "grad_norm": 48.16014862060547, "learning_rate": 5e-07, "loss": 0.658, "num_input_tokens_seen": 198879560, "step": 1720 }, { "epoch": 9.24731182795699, "loss": 0.7776162624359131, "loss_ce": 2.8396452762535773e-05, "loss_iou": 0.353515625, "loss_num": 0.0142822265625, "loss_xval": 0.77734375, "num_input_tokens_seen": 198879560, "step": 1720 }, { "epoch": 9.25268817204301, "grad_norm": 27.91717529296875, "learning_rate": 5e-07, "loss": 0.6761, "num_input_tokens_seen": 198995844, "step": 1721 }, { "epoch": 9.25268817204301, "loss": 0.6416402459144592, "loss_ce": 3.8700691220583394e-05, "loss_iou": 0.2890625, "loss_num": 0.0123291015625, "loss_xval": 0.640625, "num_input_tokens_seen": 198995844, "step": 1721 }, { "epoch": 9.258064516129032, "grad_norm": 25.39971923828125, "learning_rate": 5e-07, "loss": 0.7104, "num_input_tokens_seen": 199109224, "step": 1722 }, { "epoch": 9.258064516129032, "loss": 0.5908567905426025, "loss_ce": 3.6527198972180486e-05, "loss_iou": 0.2490234375, "loss_num": 0.0185546875, "loss_xval": 0.58984375, "num_input_tokens_seen": 199109224, "step": 1722 }, { "epoch": 9.263440860215054, "grad_norm": 29.494972229003906, "learning_rate": 5e-07, "loss": 0.8546, "num_input_tokens_seen": 199221116, "step": 1723 }, { "epoch": 9.263440860215054, "loss": 0.9172873497009277, "loss_ce": 5.1038514357060194e-05, "loss_iou": 0.408203125, "loss_num": 0.02001953125, "loss_xval": 0.91796875, "num_input_tokens_seen": 199221116, "step": 1723 }, { "epoch": 9.268817204301076, "grad_norm": 31.176647186279297, "learning_rate": 5e-07, "loss": 0.8355, "num_input_tokens_seen": 199334868, "step": 1724 }, { "epoch": 9.268817204301076, "loss": 0.7984998226165771, "loss_ce": 3.789117545238696e-05, "loss_iou": 0.341796875, "loss_num": 0.023193359375, "loss_xval": 0.796875, "num_input_tokens_seen": 199334868, "step": 1724 }, { "epoch": 9.274193548387096, "grad_norm": 15.52731990814209, "learning_rate": 5e-07, "loss": 0.5972, "num_input_tokens_seen": 199452036, "step": 1725 }, { "epoch": 9.274193548387096, "loss": 0.41957518458366394, "loss_ce": 1.9543229427654296e-05, "loss_iou": 0.177734375, "loss_num": 0.01251220703125, "loss_xval": 0.419921875, "num_input_tokens_seen": 199452036, "step": 1725 }, { "epoch": 9.279569892473118, "grad_norm": 12.8770751953125, "learning_rate": 5e-07, "loss": 0.7592, "num_input_tokens_seen": 199570480, "step": 1726 }, { "epoch": 9.279569892473118, "loss": 0.5461998581886292, "loss_ce": 5.7305602240376174e-05, "loss_iou": 0.2451171875, "loss_num": 0.01116943359375, "loss_xval": 0.546875, "num_input_tokens_seen": 199570480, "step": 1726 }, { "epoch": 9.28494623655914, "grad_norm": 19.1672420501709, "learning_rate": 5e-07, "loss": 0.6884, "num_input_tokens_seen": 199685012, "step": 1727 }, { "epoch": 9.28494623655914, "loss": 0.6804364323616028, "loss_ce": 1.6497986507602036e-05, "loss_iou": 0.2578125, "loss_num": 0.033203125, "loss_xval": 0.6796875, "num_input_tokens_seen": 199685012, "step": 1727 }, { "epoch": 9.290322580645162, "grad_norm": 20.01900863647461, "learning_rate": 5e-07, "loss": 0.7401, "num_input_tokens_seen": 199798716, "step": 1728 }, { "epoch": 9.290322580645162, "loss": 0.5878193378448486, "loss_ce": 0.00017284427303820848, "loss_iou": 0.255859375, "loss_num": 0.015380859375, "loss_xval": 0.5859375, "num_input_tokens_seen": 199798716, "step": 1728 }, { "epoch": 9.295698924731182, "grad_norm": 14.252970695495605, "learning_rate": 5e-07, "loss": 0.7345, "num_input_tokens_seen": 199909452, "step": 1729 }, { "epoch": 9.295698924731182, "loss": 0.5481565594673157, "loss_ce": 6.085797940613702e-05, "loss_iou": 0.224609375, "loss_num": 0.02001953125, "loss_xval": 0.546875, "num_input_tokens_seen": 199909452, "step": 1729 }, { "epoch": 9.301075268817204, "grad_norm": 21.49510383605957, "learning_rate": 5e-07, "loss": 0.6888, "num_input_tokens_seen": 200025132, "step": 1730 }, { "epoch": 9.301075268817204, "loss": 0.5635266304016113, "loss_ce": 5.0083701353287324e-05, "loss_iou": 0.2470703125, "loss_num": 0.0137939453125, "loss_xval": 0.5625, "num_input_tokens_seen": 200025132, "step": 1730 }, { "epoch": 9.306451612903226, "grad_norm": 21.9280948638916, "learning_rate": 5e-07, "loss": 0.7756, "num_input_tokens_seen": 200140700, "step": 1731 }, { "epoch": 9.306451612903226, "loss": 0.7195219993591309, "loss_ce": 3.952731276513077e-05, "loss_iou": 0.314453125, "loss_num": 0.0185546875, "loss_xval": 0.71875, "num_input_tokens_seen": 200140700, "step": 1731 }, { "epoch": 9.311827956989248, "grad_norm": 23.98203468322754, "learning_rate": 5e-07, "loss": 0.7242, "num_input_tokens_seen": 200254160, "step": 1732 }, { "epoch": 9.311827956989248, "loss": 0.5937619209289551, "loss_ce": 4.241861461196095e-05, "loss_iou": 0.248046875, "loss_num": 0.0196533203125, "loss_xval": 0.59375, "num_input_tokens_seen": 200254160, "step": 1732 }, { "epoch": 9.317204301075268, "grad_norm": 22.614639282226562, "learning_rate": 5e-07, "loss": 0.8431, "num_input_tokens_seen": 200365600, "step": 1733 }, { "epoch": 9.317204301075268, "loss": 0.8899338245391846, "loss_ce": 4.125326086068526e-05, "loss_iou": 0.361328125, "loss_num": 0.032958984375, "loss_xval": 0.890625, "num_input_tokens_seen": 200365600, "step": 1733 }, { "epoch": 9.32258064516129, "grad_norm": 20.031801223754883, "learning_rate": 5e-07, "loss": 0.7142, "num_input_tokens_seen": 200479992, "step": 1734 }, { "epoch": 9.32258064516129, "loss": 0.7967898845672607, "loss_ce": 3.693025792017579e-05, "loss_iou": 0.353515625, "loss_num": 0.0181884765625, "loss_xval": 0.796875, "num_input_tokens_seen": 200479992, "step": 1734 }, { "epoch": 9.327956989247312, "grad_norm": 20.56460952758789, "learning_rate": 5e-07, "loss": 0.7018, "num_input_tokens_seen": 200596128, "step": 1735 }, { "epoch": 9.327956989247312, "loss": 0.6361386179924011, "loss_ce": 3.021462725882884e-05, "loss_iou": 0.275390625, "loss_num": 0.017333984375, "loss_xval": 0.63671875, "num_input_tokens_seen": 200596128, "step": 1735 }, { "epoch": 9.333333333333334, "grad_norm": 17.113428115844727, "learning_rate": 5e-07, "loss": 0.9139, "num_input_tokens_seen": 200709696, "step": 1736 }, { "epoch": 9.333333333333334, "loss": 0.6101819276809692, "loss_ce": 7.45209981687367e-05, "loss_iou": 0.267578125, "loss_num": 0.01531982421875, "loss_xval": 0.609375, "num_input_tokens_seen": 200709696, "step": 1736 }, { "epoch": 9.338709677419354, "grad_norm": 26.724163055419922, "learning_rate": 5e-07, "loss": 0.6677, "num_input_tokens_seen": 200827352, "step": 1737 }, { "epoch": 9.338709677419354, "loss": 0.7173166275024414, "loss_ce": 0.0002755833265837282, "loss_iou": 0.30859375, "loss_num": 0.01953125, "loss_xval": 0.71875, "num_input_tokens_seen": 200827352, "step": 1737 }, { "epoch": 9.344086021505376, "grad_norm": 34.92605209350586, "learning_rate": 5e-07, "loss": 0.7885, "num_input_tokens_seen": 200941624, "step": 1738 }, { "epoch": 9.344086021505376, "loss": 0.8823854923248291, "loss_ce": 6.123741331975907e-05, "loss_iou": 0.380859375, "loss_num": 0.0240478515625, "loss_xval": 0.8828125, "num_input_tokens_seen": 200941624, "step": 1738 }, { "epoch": 9.349462365591398, "grad_norm": 36.24028396606445, "learning_rate": 5e-07, "loss": 0.6652, "num_input_tokens_seen": 201058272, "step": 1739 }, { "epoch": 9.349462365591398, "loss": 0.7151107788085938, "loss_ce": 2.288995892740786e-05, "loss_iou": 0.302734375, "loss_num": 0.021728515625, "loss_xval": 0.71484375, "num_input_tokens_seen": 201058272, "step": 1739 }, { "epoch": 9.35483870967742, "grad_norm": 25.32924461364746, "learning_rate": 5e-07, "loss": 0.8222, "num_input_tokens_seen": 201171740, "step": 1740 }, { "epoch": 9.35483870967742, "loss": 0.6956021189689636, "loss_ce": 4.5495507947634906e-05, "loss_iou": 0.306640625, "loss_num": 0.0162353515625, "loss_xval": 0.6953125, "num_input_tokens_seen": 201171740, "step": 1740 }, { "epoch": 9.36021505376344, "grad_norm": 16.50229263305664, "learning_rate": 5e-07, "loss": 0.7882, "num_input_tokens_seen": 201287296, "step": 1741 }, { "epoch": 9.36021505376344, "loss": 0.7253275513648987, "loss_ce": 0.00047405078657902777, "loss_iou": 0.302734375, "loss_num": 0.024169921875, "loss_xval": 0.7265625, "num_input_tokens_seen": 201287296, "step": 1741 }, { "epoch": 9.365591397849462, "grad_norm": 14.039196968078613, "learning_rate": 5e-07, "loss": 0.6764, "num_input_tokens_seen": 201403768, "step": 1742 }, { "epoch": 9.365591397849462, "loss": 0.8577147722244263, "loss_ce": 4.873454599874094e-05, "loss_iou": 0.357421875, "loss_num": 0.0283203125, "loss_xval": 0.859375, "num_input_tokens_seen": 201403768, "step": 1742 }, { "epoch": 9.370967741935484, "grad_norm": 16.690031051635742, "learning_rate": 5e-07, "loss": 0.7073, "num_input_tokens_seen": 201517108, "step": 1743 }, { "epoch": 9.370967741935484, "loss": 0.8696600794792175, "loss_ce": 3.11631774820853e-05, "loss_iou": 0.3828125, "loss_num": 0.0205078125, "loss_xval": 0.87109375, "num_input_tokens_seen": 201517108, "step": 1743 }, { "epoch": 9.376344086021506, "grad_norm": 28.28016471862793, "learning_rate": 5e-07, "loss": 0.7077, "num_input_tokens_seen": 201633856, "step": 1744 }, { "epoch": 9.376344086021506, "loss": 0.5027105808258057, "loss_ce": 2.503802716091741e-05, "loss_iou": 0.2138671875, "loss_num": 0.0150146484375, "loss_xval": 0.50390625, "num_input_tokens_seen": 201633856, "step": 1744 }, { "epoch": 9.381720430107526, "grad_norm": 23.457122802734375, "learning_rate": 5e-07, "loss": 0.8653, "num_input_tokens_seen": 201750888, "step": 1745 }, { "epoch": 9.381720430107526, "loss": 0.8531222939491272, "loss_ce": 9.493873949395493e-05, "loss_iou": 0.369140625, "loss_num": 0.02294921875, "loss_xval": 0.8515625, "num_input_tokens_seen": 201750888, "step": 1745 }, { "epoch": 9.387096774193548, "grad_norm": 18.067670822143555, "learning_rate": 5e-07, "loss": 0.7376, "num_input_tokens_seen": 201866640, "step": 1746 }, { "epoch": 9.387096774193548, "loss": 0.41818422079086304, "loss_ce": 3.2369156542699784e-05, "loss_iou": 0.18359375, "loss_num": 0.0101318359375, "loss_xval": 0.41796875, "num_input_tokens_seen": 201866640, "step": 1746 }, { "epoch": 9.39247311827957, "grad_norm": 18.511932373046875, "learning_rate": 5e-07, "loss": 0.6727, "num_input_tokens_seen": 201982420, "step": 1747 }, { "epoch": 9.39247311827957, "loss": 0.48916444182395935, "loss_ce": 2.8733651561196893e-05, "loss_iou": 0.208984375, "loss_num": 0.01434326171875, "loss_xval": 0.48828125, "num_input_tokens_seen": 201982420, "step": 1747 }, { "epoch": 9.397849462365592, "grad_norm": 42.01295471191406, "learning_rate": 5e-07, "loss": 0.6755, "num_input_tokens_seen": 202094340, "step": 1748 }, { "epoch": 9.397849462365592, "loss": 0.58304363489151, "loss_ce": 3.580759948818013e-05, "loss_iou": 0.25, "loss_num": 0.0166015625, "loss_xval": 0.58203125, "num_input_tokens_seen": 202094340, "step": 1748 }, { "epoch": 9.403225806451612, "grad_norm": 24.725040435791016, "learning_rate": 5e-07, "loss": 0.8203, "num_input_tokens_seen": 202211104, "step": 1749 }, { "epoch": 9.403225806451612, "loss": 0.6460771560668945, "loss_ce": 8.103578875306994e-05, "loss_iou": 0.267578125, "loss_num": 0.0225830078125, "loss_xval": 0.64453125, "num_input_tokens_seen": 202211104, "step": 1749 }, { "epoch": 9.408602150537634, "grad_norm": 17.8070125579834, "learning_rate": 5e-07, "loss": 0.724, "num_input_tokens_seen": 202326196, "step": 1750 }, { "epoch": 9.408602150537634, "eval_icons_CIoU": 0.1468050628900528, "eval_icons_GIoU": 0.11823820695281029, "eval_icons_IoU": 0.30518609285354614, "eval_icons_MAE_all": 0.033520122058689594, "eval_icons_MAE_h": 0.0385954100638628, "eval_icons_MAE_w": 0.05618929676711559, "eval_icons_MAE_x_boxes": 0.05293003097176552, "eval_icons_MAE_y_boxes": 0.03413102217018604, "eval_icons_NUM_probability": 0.9985907971858978, "eval_icons_inside_bbox": 0.6059027910232544, "eval_icons_loss": 1.9302623271942139, "eval_icons_loss_ce": 0.0002261853078380227, "eval_icons_loss_iou": 0.874755859375, "eval_icons_loss_num": 0.03406524658203125, "eval_icons_loss_xval": 1.919921875, "eval_icons_runtime": 48.5025, "eval_icons_samples_per_second": 1.031, "eval_icons_steps_per_second": 0.041, "num_input_tokens_seen": 202326196, "step": 1750 }, { "epoch": 9.408602150537634, "eval_screenspot_CIoU": 0.330404390891393, "eval_screenspot_GIoU": 0.3218938857316971, "eval_screenspot_IoU": 0.42006094257036847, "eval_screenspot_MAE_all": 0.05906902998685837, "eval_screenspot_MAE_h": 0.049581896513700485, "eval_screenspot_MAE_w": 0.0733050766090552, "eval_screenspot_MAE_x_boxes": 0.0786740096906821, "eval_screenspot_MAE_y_boxes": 0.03991607390344143, "eval_screenspot_NUM_probability": 0.9997089306513468, "eval_screenspot_inside_bbox": 0.725000003973643, "eval_screenspot_loss": 1.714653491973877, "eval_screenspot_loss_ce": 0.00011143586501323928, "eval_screenspot_loss_iou": 0.7272135416666666, "eval_screenspot_loss_num": 0.06791941324869792, "eval_screenspot_loss_xval": 1.7941080729166667, "eval_screenspot_runtime": 93.3483, "eval_screenspot_samples_per_second": 0.953, "eval_screenspot_steps_per_second": 0.032, "num_input_tokens_seen": 202326196, "step": 1750 }, { "epoch": 9.408602150537634, "loss": 1.6343345642089844, "loss_ce": 5.723972572013736e-05, "loss_iou": 0.68359375, "loss_num": 0.053955078125, "loss_xval": 1.6328125, "num_input_tokens_seen": 202326196, "step": 1750 }, { "epoch": 9.413978494623656, "grad_norm": 16.415428161621094, "learning_rate": 5e-07, "loss": 0.8597, "num_input_tokens_seen": 202442316, "step": 1751 }, { "epoch": 9.413978494623656, "loss": 0.8402742147445679, "loss_ce": 6.426897016353905e-05, "loss_iou": 0.361328125, "loss_num": 0.0238037109375, "loss_xval": 0.83984375, "num_input_tokens_seen": 202442316, "step": 1751 }, { "epoch": 9.419354838709678, "grad_norm": 17.216638565063477, "learning_rate": 5e-07, "loss": 0.7569, "num_input_tokens_seen": 202558380, "step": 1752 }, { "epoch": 9.419354838709678, "loss": 0.5594696998596191, "loss_ce": 2.1453670342452824e-05, "loss_iou": 0.2421875, "loss_num": 0.01495361328125, "loss_xval": 0.55859375, "num_input_tokens_seen": 202558380, "step": 1752 }, { "epoch": 9.424731182795698, "grad_norm": 29.91708755493164, "learning_rate": 5e-07, "loss": 0.6961, "num_input_tokens_seen": 202673612, "step": 1753 }, { "epoch": 9.424731182795698, "loss": 0.9297168254852295, "loss_ce": 2.9278566216817126e-05, "loss_iou": 0.4140625, "loss_num": 0.020263671875, "loss_xval": 0.9296875, "num_input_tokens_seen": 202673612, "step": 1753 }, { "epoch": 9.43010752688172, "grad_norm": 16.383594512939453, "learning_rate": 5e-07, "loss": 0.8673, "num_input_tokens_seen": 202790676, "step": 1754 }, { "epoch": 9.43010752688172, "loss": 1.1724050045013428, "loss_ce": 4.179740426479839e-05, "loss_iou": 0.5234375, "loss_num": 0.024658203125, "loss_xval": 1.171875, "num_input_tokens_seen": 202790676, "step": 1754 }, { "epoch": 9.435483870967742, "grad_norm": 19.439363479614258, "learning_rate": 5e-07, "loss": 0.7588, "num_input_tokens_seen": 202906716, "step": 1755 }, { "epoch": 9.435483870967742, "loss": 0.684952974319458, "loss_ce": 1.643780524318572e-05, "loss_iou": 0.267578125, "loss_num": 0.02978515625, "loss_xval": 0.68359375, "num_input_tokens_seen": 202906716, "step": 1755 }, { "epoch": 9.440860215053764, "grad_norm": 71.22770690917969, "learning_rate": 5e-07, "loss": 0.6691, "num_input_tokens_seen": 203023212, "step": 1756 }, { "epoch": 9.440860215053764, "loss": 0.8885040879249573, "loss_ce": 7.637500675627962e-05, "loss_iou": 0.400390625, "loss_num": 0.0172119140625, "loss_xval": 0.88671875, "num_input_tokens_seen": 203023212, "step": 1756 }, { "epoch": 9.446236559139784, "grad_norm": 14.608715057373047, "learning_rate": 5e-07, "loss": 0.7255, "num_input_tokens_seen": 203138328, "step": 1757 }, { "epoch": 9.446236559139784, "loss": 0.38216647505760193, "loss_ce": 2.537262480473146e-05, "loss_iou": 0.1494140625, "loss_num": 0.0166015625, "loss_xval": 0.3828125, "num_input_tokens_seen": 203138328, "step": 1757 }, { "epoch": 9.451612903225806, "grad_norm": 18.45246696472168, "learning_rate": 5e-07, "loss": 0.6622, "num_input_tokens_seen": 203251616, "step": 1758 }, { "epoch": 9.451612903225806, "loss": 0.5588716864585876, "loss_ce": 3.375855158083141e-05, "loss_iou": 0.2451171875, "loss_num": 0.01348876953125, "loss_xval": 0.55859375, "num_input_tokens_seen": 203251616, "step": 1758 }, { "epoch": 9.456989247311828, "grad_norm": 20.15816879272461, "learning_rate": 5e-07, "loss": 0.8607, "num_input_tokens_seen": 203367496, "step": 1759 }, { "epoch": 9.456989247311828, "loss": 0.8769841194152832, "loss_ce": 3.097803346463479e-05, "loss_iou": 0.375, "loss_num": 0.025146484375, "loss_xval": 0.875, "num_input_tokens_seen": 203367496, "step": 1759 }, { "epoch": 9.46236559139785, "grad_norm": 33.92463684082031, "learning_rate": 5e-07, "loss": 0.9689, "num_input_tokens_seen": 203483652, "step": 1760 }, { "epoch": 9.46236559139785, "loss": 0.7332661747932434, "loss_ce": 0.00011188896314706653, "loss_iou": 0.32421875, "loss_num": 0.016845703125, "loss_xval": 0.734375, "num_input_tokens_seen": 203483652, "step": 1760 }, { "epoch": 9.46774193548387, "grad_norm": 18.559572219848633, "learning_rate": 5e-07, "loss": 0.6006, "num_input_tokens_seen": 203601940, "step": 1761 }, { "epoch": 9.46774193548387, "loss": 0.5524182319641113, "loss_ce": 5.008331572753377e-05, "loss_iou": 0.2392578125, "loss_num": 0.0146484375, "loss_xval": 0.55078125, "num_input_tokens_seen": 203601940, "step": 1761 }, { "epoch": 9.473118279569892, "grad_norm": 17.240217208862305, "learning_rate": 5e-07, "loss": 0.6898, "num_input_tokens_seen": 203719828, "step": 1762 }, { "epoch": 9.473118279569892, "loss": 0.8218164443969727, "loss_ce": 3.9154143451014534e-05, "loss_iou": 0.37109375, "loss_num": 0.0162353515625, "loss_xval": 0.8203125, "num_input_tokens_seen": 203719828, "step": 1762 }, { "epoch": 9.478494623655914, "grad_norm": 27.036792755126953, "learning_rate": 5e-07, "loss": 0.6186, "num_input_tokens_seen": 203836848, "step": 1763 }, { "epoch": 9.478494623655914, "loss": 0.6112650036811829, "loss_ce": 5.894729110877961e-05, "loss_iou": 0.259765625, "loss_num": 0.0186767578125, "loss_xval": 0.609375, "num_input_tokens_seen": 203836848, "step": 1763 }, { "epoch": 9.483870967741936, "grad_norm": 16.811595916748047, "learning_rate": 5e-07, "loss": 0.786, "num_input_tokens_seen": 203949952, "step": 1764 }, { "epoch": 9.483870967741936, "loss": 0.8923641443252563, "loss_ce": 3.0184517527231947e-05, "loss_iou": 0.376953125, "loss_num": 0.0274658203125, "loss_xval": 0.890625, "num_input_tokens_seen": 203949952, "step": 1764 }, { "epoch": 9.489247311827956, "grad_norm": 23.26786231994629, "learning_rate": 5e-07, "loss": 0.5454, "num_input_tokens_seen": 204069824, "step": 1765 }, { "epoch": 9.489247311827956, "loss": 0.5807316899299622, "loss_ce": 4.318911669543013e-05, "loss_iou": 0.263671875, "loss_num": 0.01080322265625, "loss_xval": 0.58203125, "num_input_tokens_seen": 204069824, "step": 1765 }, { "epoch": 9.494623655913978, "grad_norm": 25.141874313354492, "learning_rate": 5e-07, "loss": 0.8044, "num_input_tokens_seen": 204186356, "step": 1766 }, { "epoch": 9.494623655913978, "loss": 0.7813140153884888, "loss_ce": 6.398840196197852e-05, "loss_iou": 0.326171875, "loss_num": 0.0255126953125, "loss_xval": 0.78125, "num_input_tokens_seen": 204186356, "step": 1766 }, { "epoch": 9.5, "grad_norm": 18.02626609802246, "learning_rate": 5e-07, "loss": 0.6632, "num_input_tokens_seen": 204303712, "step": 1767 }, { "epoch": 9.5, "loss": 0.5955603122711182, "loss_ce": 0.0001013473083730787, "loss_iou": 0.244140625, "loss_num": 0.021484375, "loss_xval": 0.59375, "num_input_tokens_seen": 204303712, "step": 1767 }, { "epoch": 9.505376344086022, "grad_norm": 24.132802963256836, "learning_rate": 5e-07, "loss": 0.7085, "num_input_tokens_seen": 204418656, "step": 1768 }, { "epoch": 9.505376344086022, "loss": 0.6815395355224609, "loss_ce": 2.096297248499468e-05, "loss_iou": 0.2890625, "loss_num": 0.0208740234375, "loss_xval": 0.6796875, "num_input_tokens_seen": 204418656, "step": 1768 }, { "epoch": 9.510752688172044, "grad_norm": 39.178192138671875, "learning_rate": 5e-07, "loss": 0.769, "num_input_tokens_seen": 204534616, "step": 1769 }, { "epoch": 9.510752688172044, "loss": 0.8260005712509155, "loss_ce": 7.289783388841897e-05, "loss_iou": 0.349609375, "loss_num": 0.025390625, "loss_xval": 0.82421875, "num_input_tokens_seen": 204534616, "step": 1769 }, { "epoch": 9.516129032258064, "grad_norm": 35.33113479614258, "learning_rate": 5e-07, "loss": 0.7442, "num_input_tokens_seen": 204654332, "step": 1770 }, { "epoch": 9.516129032258064, "loss": 0.7734983563423157, "loss_ce": 6.084796041250229e-05, "loss_iou": 0.333984375, "loss_num": 0.021484375, "loss_xval": 0.7734375, "num_input_tokens_seen": 204654332, "step": 1770 }, { "epoch": 9.521505376344086, "grad_norm": 21.631694793701172, "learning_rate": 5e-07, "loss": 0.7792, "num_input_tokens_seen": 204767872, "step": 1771 }, { "epoch": 9.521505376344086, "loss": 0.7607682943344116, "loss_ce": 2.60865745076444e-05, "loss_iou": 0.34375, "loss_num": 0.01458740234375, "loss_xval": 0.76171875, "num_input_tokens_seen": 204767872, "step": 1771 }, { "epoch": 9.526881720430108, "grad_norm": 24.481121063232422, "learning_rate": 5e-07, "loss": 0.7302, "num_input_tokens_seen": 204883280, "step": 1772 }, { "epoch": 9.526881720430108, "loss": 0.8819103837013245, "loss_ce": 0.00031863455660641193, "loss_iou": 0.3828125, "loss_num": 0.022705078125, "loss_xval": 0.8828125, "num_input_tokens_seen": 204883280, "step": 1772 }, { "epoch": 9.532258064516128, "grad_norm": 18.60175895690918, "learning_rate": 5e-07, "loss": 0.7452, "num_input_tokens_seen": 204997852, "step": 1773 }, { "epoch": 9.532258064516128, "loss": 0.853789746761322, "loss_ce": 2.998855234181974e-05, "loss_iou": 0.359375, "loss_num": 0.02734375, "loss_xval": 0.85546875, "num_input_tokens_seen": 204997852, "step": 1773 }, { "epoch": 9.53763440860215, "grad_norm": 17.94915008544922, "learning_rate": 5e-07, "loss": 0.7887, "num_input_tokens_seen": 205112744, "step": 1774 }, { "epoch": 9.53763440860215, "loss": 0.9327678084373474, "loss_ce": 0.00015063512546475977, "loss_iou": 0.40625, "loss_num": 0.0242919921875, "loss_xval": 0.93359375, "num_input_tokens_seen": 205112744, "step": 1774 }, { "epoch": 9.543010752688172, "grad_norm": 18.954181671142578, "learning_rate": 5e-07, "loss": 0.694, "num_input_tokens_seen": 205228848, "step": 1775 }, { "epoch": 9.543010752688172, "loss": 0.4932008385658264, "loss_ce": 3.6788474972127005e-05, "loss_iou": 0.2236328125, "loss_num": 0.009033203125, "loss_xval": 0.4921875, "num_input_tokens_seen": 205228848, "step": 1775 }, { "epoch": 9.548387096774194, "grad_norm": 25.44583511352539, "learning_rate": 5e-07, "loss": 0.6376, "num_input_tokens_seen": 205344628, "step": 1776 }, { "epoch": 9.548387096774194, "loss": 0.4596296548843384, "loss_ce": 3.4946999221574515e-05, "loss_iou": 0.19140625, "loss_num": 0.015380859375, "loss_xval": 0.458984375, "num_input_tokens_seen": 205344628, "step": 1776 }, { "epoch": 9.553763440860216, "grad_norm": 28.034547805786133, "learning_rate": 5e-07, "loss": 0.715, "num_input_tokens_seen": 205462372, "step": 1777 }, { "epoch": 9.553763440860216, "loss": 0.7390671372413635, "loss_ce": 5.3486815886572e-05, "loss_iou": 0.318359375, "loss_num": 0.0203857421875, "loss_xval": 0.73828125, "num_input_tokens_seen": 205462372, "step": 1777 }, { "epoch": 9.559139784946236, "grad_norm": 17.4012508392334, "learning_rate": 5e-07, "loss": 0.6623, "num_input_tokens_seen": 205579640, "step": 1778 }, { "epoch": 9.559139784946236, "loss": 0.8562407493591309, "loss_ce": 3.951852340833284e-05, "loss_iou": 0.3671875, "loss_num": 0.0245361328125, "loss_xval": 0.85546875, "num_input_tokens_seen": 205579640, "step": 1778 }, { "epoch": 9.564516129032258, "grad_norm": 22.516372680664062, "learning_rate": 5e-07, "loss": 0.8461, "num_input_tokens_seen": 205694620, "step": 1779 }, { "epoch": 9.564516129032258, "loss": 0.7468622922897339, "loss_ce": 3.60917802026961e-05, "loss_iou": 0.29296875, "loss_num": 0.0322265625, "loss_xval": 0.74609375, "num_input_tokens_seen": 205694620, "step": 1779 }, { "epoch": 9.56989247311828, "grad_norm": 25.071901321411133, "learning_rate": 5e-07, "loss": 0.6954, "num_input_tokens_seen": 205812892, "step": 1780 }, { "epoch": 9.56989247311828, "loss": 0.6439237594604492, "loss_ce": 0.0001860108895925805, "loss_iou": 0.27734375, "loss_num": 0.017822265625, "loss_xval": 0.64453125, "num_input_tokens_seen": 205812892, "step": 1780 }, { "epoch": 9.575268817204302, "grad_norm": 36.15744400024414, "learning_rate": 5e-07, "loss": 0.7125, "num_input_tokens_seen": 205928540, "step": 1781 }, { "epoch": 9.575268817204302, "loss": 0.7014525532722473, "loss_ce": 3.654891770565882e-05, "loss_iou": 0.31640625, "loss_num": 0.0137939453125, "loss_xval": 0.703125, "num_input_tokens_seen": 205928540, "step": 1781 }, { "epoch": 9.580645161290322, "grad_norm": 26.154476165771484, "learning_rate": 5e-07, "loss": 0.8067, "num_input_tokens_seen": 206037820, "step": 1782 }, { "epoch": 9.580645161290322, "loss": 0.7862246036529541, "loss_ce": 3.0739807698410004e-05, "loss_iou": 0.33203125, "loss_num": 0.0244140625, "loss_xval": 0.78515625, "num_input_tokens_seen": 206037820, "step": 1782 }, { "epoch": 9.586021505376344, "grad_norm": 22.497264862060547, "learning_rate": 5e-07, "loss": 0.7209, "num_input_tokens_seen": 206155900, "step": 1783 }, { "epoch": 9.586021505376344, "loss": 0.7198106050491333, "loss_ce": 8.407532004639506e-05, "loss_iou": 0.298828125, "loss_num": 0.0244140625, "loss_xval": 0.71875, "num_input_tokens_seen": 206155900, "step": 1783 }, { "epoch": 9.591397849462366, "grad_norm": 21.41866111755371, "learning_rate": 5e-07, "loss": 0.8055, "num_input_tokens_seen": 206273608, "step": 1784 }, { "epoch": 9.591397849462366, "loss": 0.8639695644378662, "loss_ce": 0.00013897934695705771, "loss_iou": 0.361328125, "loss_num": 0.0281982421875, "loss_xval": 0.86328125, "num_input_tokens_seen": 206273608, "step": 1784 }, { "epoch": 9.596774193548388, "grad_norm": 16.221393585205078, "learning_rate": 5e-07, "loss": 0.6379, "num_input_tokens_seen": 206388868, "step": 1785 }, { "epoch": 9.596774193548388, "loss": 0.5111451148986816, "loss_ce": 3.675553307402879e-05, "loss_iou": 0.2099609375, "loss_num": 0.0181884765625, "loss_xval": 0.51171875, "num_input_tokens_seen": 206388868, "step": 1785 }, { "epoch": 9.602150537634408, "grad_norm": 30.591203689575195, "learning_rate": 5e-07, "loss": 0.7188, "num_input_tokens_seen": 206502448, "step": 1786 }, { "epoch": 9.602150537634408, "loss": 0.8372337818145752, "loss_ce": 7.55598593968898e-05, "loss_iou": 0.37109375, "loss_num": 0.018798828125, "loss_xval": 0.8359375, "num_input_tokens_seen": 206502448, "step": 1786 }, { "epoch": 9.60752688172043, "grad_norm": 28.425065994262695, "learning_rate": 5e-07, "loss": 0.7878, "num_input_tokens_seen": 206616896, "step": 1787 }, { "epoch": 9.60752688172043, "loss": 0.6870846748352051, "loss_ce": 7.298692071344703e-05, "loss_iou": 0.294921875, "loss_num": 0.01953125, "loss_xval": 0.6875, "num_input_tokens_seen": 206616896, "step": 1787 }, { "epoch": 9.612903225806452, "grad_norm": 26.228240966796875, "learning_rate": 5e-07, "loss": 0.6047, "num_input_tokens_seen": 206732044, "step": 1788 }, { "epoch": 9.612903225806452, "loss": 0.8948377966880798, "loss_ce": 6.24402891844511e-05, "loss_iou": 0.38671875, "loss_num": 0.0240478515625, "loss_xval": 0.89453125, "num_input_tokens_seen": 206732044, "step": 1788 }, { "epoch": 9.618279569892474, "grad_norm": 17.696332931518555, "learning_rate": 5e-07, "loss": 0.6792, "num_input_tokens_seen": 206847272, "step": 1789 }, { "epoch": 9.618279569892474, "loss": 0.6391757726669312, "loss_ce": 1.5614066796842963e-05, "loss_iou": 0.255859375, "loss_num": 0.0257568359375, "loss_xval": 0.640625, "num_input_tokens_seen": 206847272, "step": 1789 }, { "epoch": 9.623655913978494, "grad_norm": 22.390979766845703, "learning_rate": 5e-07, "loss": 0.5846, "num_input_tokens_seen": 206964320, "step": 1790 }, { "epoch": 9.623655913978494, "loss": 0.5881819725036621, "loss_ce": 4.723166421172209e-05, "loss_iou": 0.26171875, "loss_num": 0.012939453125, "loss_xval": 0.58984375, "num_input_tokens_seen": 206964320, "step": 1790 }, { "epoch": 9.629032258064516, "grad_norm": 18.41312026977539, "learning_rate": 5e-07, "loss": 0.7653, "num_input_tokens_seen": 207080924, "step": 1791 }, { "epoch": 9.629032258064516, "loss": 0.6558467745780945, "loss_ce": 8.504107972839847e-05, "loss_iou": 0.279296875, "loss_num": 0.01904296875, "loss_xval": 0.65625, "num_input_tokens_seen": 207080924, "step": 1791 }, { "epoch": 9.634408602150538, "grad_norm": 19.16942596435547, "learning_rate": 5e-07, "loss": 0.6824, "num_input_tokens_seen": 207199304, "step": 1792 }, { "epoch": 9.634408602150538, "loss": 0.5505685806274414, "loss_ce": 3.142319474136457e-05, "loss_iou": 0.2109375, "loss_num": 0.025634765625, "loss_xval": 0.55078125, "num_input_tokens_seen": 207199304, "step": 1792 }, { "epoch": 9.63978494623656, "grad_norm": 25.754955291748047, "learning_rate": 5e-07, "loss": 0.6583, "num_input_tokens_seen": 207314668, "step": 1793 }, { "epoch": 9.63978494623656, "loss": 0.40432286262512207, "loss_ce": 2.6002773665823042e-05, "loss_iou": 0.158203125, "loss_num": 0.017333984375, "loss_xval": 0.404296875, "num_input_tokens_seen": 207314668, "step": 1793 }, { "epoch": 9.64516129032258, "grad_norm": 18.973388671875, "learning_rate": 5e-07, "loss": 0.5805, "num_input_tokens_seen": 207432664, "step": 1794 }, { "epoch": 9.64516129032258, "loss": 0.5096685886383057, "loss_ce": 2.5019849999807775e-05, "loss_iou": 0.2138671875, "loss_num": 0.0166015625, "loss_xval": 0.5078125, "num_input_tokens_seen": 207432664, "step": 1794 }, { "epoch": 9.650537634408602, "grad_norm": 17.42803192138672, "learning_rate": 5e-07, "loss": 0.6937, "num_input_tokens_seen": 207550812, "step": 1795 }, { "epoch": 9.650537634408602, "loss": 0.7082812786102295, "loss_ce": 2.9372335120569915e-05, "loss_iou": 0.298828125, "loss_num": 0.022216796875, "loss_xval": 0.70703125, "num_input_tokens_seen": 207550812, "step": 1795 }, { "epoch": 9.655913978494624, "grad_norm": 17.089496612548828, "learning_rate": 5e-07, "loss": 0.7132, "num_input_tokens_seen": 207663700, "step": 1796 }, { "epoch": 9.655913978494624, "loss": 0.3962608575820923, "loss_ce": 2.06495460588485e-05, "loss_iou": 0.1669921875, "loss_num": 0.0125732421875, "loss_xval": 0.396484375, "num_input_tokens_seen": 207663700, "step": 1796 }, { "epoch": 9.661290322580646, "grad_norm": 19.841135025024414, "learning_rate": 5e-07, "loss": 0.8144, "num_input_tokens_seen": 207776112, "step": 1797 }, { "epoch": 9.661290322580646, "loss": 0.6856029033660889, "loss_ce": 5.600329313892871e-05, "loss_iou": 0.283203125, "loss_num": 0.0235595703125, "loss_xval": 0.6875, "num_input_tokens_seen": 207776112, "step": 1797 }, { "epoch": 9.666666666666666, "grad_norm": 19.107160568237305, "learning_rate": 5e-07, "loss": 0.5538, "num_input_tokens_seen": 207894844, "step": 1798 }, { "epoch": 9.666666666666666, "loss": 0.49881893396377563, "loss_ce": 3.963748167734593e-05, "loss_iou": 0.1923828125, "loss_num": 0.0230712890625, "loss_xval": 0.498046875, "num_input_tokens_seen": 207894844, "step": 1798 }, { "epoch": 9.672043010752688, "grad_norm": 46.6480712890625, "learning_rate": 5e-07, "loss": 0.6238, "num_input_tokens_seen": 208012536, "step": 1799 }, { "epoch": 9.672043010752688, "loss": 0.6235830187797546, "loss_ce": 0.0001699195127002895, "loss_iou": 0.26953125, "loss_num": 0.0166015625, "loss_xval": 0.625, "num_input_tokens_seen": 208012536, "step": 1799 }, { "epoch": 9.67741935483871, "grad_norm": 37.24729919433594, "learning_rate": 5e-07, "loss": 0.5663, "num_input_tokens_seen": 208127744, "step": 1800 }, { "epoch": 9.67741935483871, "loss": 0.7988624572753906, "loss_ce": 3.432736775721423e-05, "loss_iou": 0.35546875, "loss_num": 0.0177001953125, "loss_xval": 0.796875, "num_input_tokens_seen": 208127744, "step": 1800 }, { "epoch": 9.682795698924732, "grad_norm": 21.15680503845215, "learning_rate": 5e-07, "loss": 0.5282, "num_input_tokens_seen": 208244704, "step": 1801 }, { "epoch": 9.682795698924732, "loss": 0.4299580454826355, "loss_ce": 2.6417033950565383e-05, "loss_iou": 0.185546875, "loss_num": 0.01190185546875, "loss_xval": 0.4296875, "num_input_tokens_seen": 208244704, "step": 1801 }, { "epoch": 9.688172043010752, "grad_norm": 19.51079750061035, "learning_rate": 5e-07, "loss": 0.7223, "num_input_tokens_seen": 208360592, "step": 1802 }, { "epoch": 9.688172043010752, "loss": 0.7803502678871155, "loss_ce": 7.68632089602761e-05, "loss_iou": 0.333984375, "loss_num": 0.0224609375, "loss_xval": 0.78125, "num_input_tokens_seen": 208360592, "step": 1802 }, { "epoch": 9.693548387096774, "grad_norm": 25.64137077331543, "learning_rate": 5e-07, "loss": 0.7141, "num_input_tokens_seen": 208477060, "step": 1803 }, { "epoch": 9.693548387096774, "loss": 0.8124284148216248, "loss_ce": 5.051448897575028e-05, "loss_iou": 0.35546875, "loss_num": 0.02001953125, "loss_xval": 0.8125, "num_input_tokens_seen": 208477060, "step": 1803 }, { "epoch": 9.698924731182796, "grad_norm": 29.159154891967773, "learning_rate": 5e-07, "loss": 0.6022, "num_input_tokens_seen": 208593516, "step": 1804 }, { "epoch": 9.698924731182796, "loss": 0.6018303632736206, "loss_ce": 2.3713819246040657e-05, "loss_iou": 0.2578125, "loss_num": 0.017578125, "loss_xval": 0.6015625, "num_input_tokens_seen": 208593516, "step": 1804 }, { "epoch": 9.704301075268818, "grad_norm": 32.80813217163086, "learning_rate": 5e-07, "loss": 0.7897, "num_input_tokens_seen": 208705064, "step": 1805 }, { "epoch": 9.704301075268818, "loss": 0.921383261680603, "loss_ce": 0.00011864943371620029, "loss_iou": 0.408203125, "loss_num": 0.0208740234375, "loss_xval": 0.921875, "num_input_tokens_seen": 208705064, "step": 1805 }, { "epoch": 9.709677419354838, "grad_norm": 18.804208755493164, "learning_rate": 5e-07, "loss": 0.7797, "num_input_tokens_seen": 208822396, "step": 1806 }, { "epoch": 9.709677419354838, "loss": 0.6809337139129639, "loss_ce": 2.5516517780488357e-05, "loss_iou": 0.298828125, "loss_num": 0.016845703125, "loss_xval": 0.6796875, "num_input_tokens_seen": 208822396, "step": 1806 }, { "epoch": 9.71505376344086, "grad_norm": 20.126352310180664, "learning_rate": 5e-07, "loss": 0.8008, "num_input_tokens_seen": 208938976, "step": 1807 }, { "epoch": 9.71505376344086, "loss": 0.7778638601303101, "loss_ce": 3.179962368449196e-05, "loss_iou": 0.32421875, "loss_num": 0.0257568359375, "loss_xval": 0.77734375, "num_input_tokens_seen": 208938976, "step": 1807 }, { "epoch": 9.720430107526882, "grad_norm": 31.35825538635254, "learning_rate": 5e-07, "loss": 0.8568, "num_input_tokens_seen": 209053940, "step": 1808 }, { "epoch": 9.720430107526882, "loss": 0.7242845296859741, "loss_ce": 4.1338451410410926e-05, "loss_iou": 0.298828125, "loss_num": 0.0252685546875, "loss_xval": 0.72265625, "num_input_tokens_seen": 209053940, "step": 1808 }, { "epoch": 9.725806451612904, "grad_norm": 24.176780700683594, "learning_rate": 5e-07, "loss": 0.9464, "num_input_tokens_seen": 209171612, "step": 1809 }, { "epoch": 9.725806451612904, "loss": 0.8170616626739502, "loss_ce": 0.005782371386885643, "loss_iou": 0.287109375, "loss_num": 0.0478515625, "loss_xval": 0.8125, "num_input_tokens_seen": 209171612, "step": 1809 }, { "epoch": 9.731182795698924, "grad_norm": 35.67753982543945, "learning_rate": 5e-07, "loss": 0.6965, "num_input_tokens_seen": 209288164, "step": 1810 }, { "epoch": 9.731182795698924, "loss": 0.5010082721710205, "loss_ce": 3.1678308005211875e-05, "loss_iou": 0.205078125, "loss_num": 0.01806640625, "loss_xval": 0.5, "num_input_tokens_seen": 209288164, "step": 1810 }, { "epoch": 9.736559139784946, "grad_norm": 19.507835388183594, "learning_rate": 5e-07, "loss": 0.7736, "num_input_tokens_seen": 209404292, "step": 1811 }, { "epoch": 9.736559139784946, "loss": 0.8507736921310425, "loss_ce": 6.56606353004463e-05, "loss_iou": 0.357421875, "loss_num": 0.027099609375, "loss_xval": 0.8515625, "num_input_tokens_seen": 209404292, "step": 1811 }, { "epoch": 9.741935483870968, "grad_norm": 22.262283325195312, "learning_rate": 5e-07, "loss": 0.6899, "num_input_tokens_seen": 209521084, "step": 1812 }, { "epoch": 9.741935483870968, "loss": 0.7263466715812683, "loss_ce": 2.8312317226664163e-05, "loss_iou": 0.322265625, "loss_num": 0.0159912109375, "loss_xval": 0.7265625, "num_input_tokens_seen": 209521084, "step": 1812 }, { "epoch": 9.74731182795699, "grad_norm": 20.94075584411621, "learning_rate": 5e-07, "loss": 0.6744, "num_input_tokens_seen": 209638016, "step": 1813 }, { "epoch": 9.74731182795699, "loss": 0.8130288124084473, "loss_ce": 4.056117541040294e-05, "loss_iou": 0.3671875, "loss_num": 0.015869140625, "loss_xval": 0.8125, "num_input_tokens_seen": 209638016, "step": 1813 }, { "epoch": 9.75268817204301, "grad_norm": 15.201274871826172, "learning_rate": 5e-07, "loss": 0.5999, "num_input_tokens_seen": 209754268, "step": 1814 }, { "epoch": 9.75268817204301, "loss": 0.5386006832122803, "loss_ce": 2.6471592718735337e-05, "loss_iou": 0.234375, "loss_num": 0.01373291015625, "loss_xval": 0.5390625, "num_input_tokens_seen": 209754268, "step": 1814 }, { "epoch": 9.758064516129032, "grad_norm": 23.398941040039062, "learning_rate": 5e-07, "loss": 0.6757, "num_input_tokens_seen": 209868640, "step": 1815 }, { "epoch": 9.758064516129032, "loss": 0.6326144337654114, "loss_ce": 4.609348252415657e-05, "loss_iou": 0.28515625, "loss_num": 0.0123291015625, "loss_xval": 0.6328125, "num_input_tokens_seen": 209868640, "step": 1815 }, { "epoch": 9.763440860215054, "grad_norm": 26.242095947265625, "learning_rate": 5e-07, "loss": 0.8222, "num_input_tokens_seen": 209984116, "step": 1816 }, { "epoch": 9.763440860215054, "loss": 0.809475839138031, "loss_ce": 2.7641110136755742e-05, "loss_iou": 0.34765625, "loss_num": 0.0225830078125, "loss_xval": 0.80859375, "num_input_tokens_seen": 209984116, "step": 1816 }, { "epoch": 9.768817204301076, "grad_norm": 24.08068084716797, "learning_rate": 5e-07, "loss": 0.6368, "num_input_tokens_seen": 210098312, "step": 1817 }, { "epoch": 9.768817204301076, "loss": 0.5973328351974487, "loss_ce": 4.282273584976792e-05, "loss_iou": 0.28125, "loss_num": 0.00732421875, "loss_xval": 0.59765625, "num_input_tokens_seen": 210098312, "step": 1817 }, { "epoch": 9.774193548387096, "grad_norm": 20.037803649902344, "learning_rate": 5e-07, "loss": 0.8115, "num_input_tokens_seen": 210216012, "step": 1818 }, { "epoch": 9.774193548387096, "loss": 0.7029204368591309, "loss_ce": 3.9556958654429764e-05, "loss_iou": 0.287109375, "loss_num": 0.0260009765625, "loss_xval": 0.703125, "num_input_tokens_seen": 210216012, "step": 1818 }, { "epoch": 9.779569892473118, "grad_norm": 16.716564178466797, "learning_rate": 5e-07, "loss": 0.6658, "num_input_tokens_seen": 210332568, "step": 1819 }, { "epoch": 9.779569892473118, "loss": 0.5584330558776855, "loss_ce": 8.3452308899723e-05, "loss_iou": 0.25390625, "loss_num": 0.01031494140625, "loss_xval": 0.55859375, "num_input_tokens_seen": 210332568, "step": 1819 }, { "epoch": 9.78494623655914, "grad_norm": 16.907302856445312, "learning_rate": 5e-07, "loss": 0.6045, "num_input_tokens_seen": 210449852, "step": 1820 }, { "epoch": 9.78494623655914, "loss": 0.6078206300735474, "loss_ce": 3.2514672057004645e-05, "loss_iou": 0.265625, "loss_num": 0.0150146484375, "loss_xval": 0.609375, "num_input_tokens_seen": 210449852, "step": 1820 }, { "epoch": 9.790322580645162, "grad_norm": 16.64137077331543, "learning_rate": 5e-07, "loss": 0.6937, "num_input_tokens_seen": 210566400, "step": 1821 }, { "epoch": 9.790322580645162, "loss": 0.5063825845718384, "loss_ce": 3.492691757855937e-05, "loss_iou": 0.2080078125, "loss_num": 0.01806640625, "loss_xval": 0.5078125, "num_input_tokens_seen": 210566400, "step": 1821 }, { "epoch": 9.795698924731182, "grad_norm": 26.168149948120117, "learning_rate": 5e-07, "loss": 0.6436, "num_input_tokens_seen": 210683408, "step": 1822 }, { "epoch": 9.795698924731182, "loss": 0.5433574914932251, "loss_ce": 2.249604949611239e-05, "loss_iou": 0.236328125, "loss_num": 0.01416015625, "loss_xval": 0.54296875, "num_input_tokens_seen": 210683408, "step": 1822 }, { "epoch": 9.801075268817204, "grad_norm": 30.06972312927246, "learning_rate": 5e-07, "loss": 0.7212, "num_input_tokens_seen": 210801056, "step": 1823 }, { "epoch": 9.801075268817204, "loss": 0.8624190092086792, "loss_ce": 0.00011431932944105938, "loss_iou": 0.375, "loss_num": 0.0225830078125, "loss_xval": 0.86328125, "num_input_tokens_seen": 210801056, "step": 1823 }, { "epoch": 9.806451612903226, "grad_norm": 21.705533981323242, "learning_rate": 5e-07, "loss": 0.6988, "num_input_tokens_seen": 210917956, "step": 1824 }, { "epoch": 9.806451612903226, "loss": 0.6699524521827698, "loss_ce": 3.059268419747241e-05, "loss_iou": 0.294921875, "loss_num": 0.0164794921875, "loss_xval": 0.671875, "num_input_tokens_seen": 210917956, "step": 1824 }, { "epoch": 9.811827956989248, "grad_norm": 24.3620548248291, "learning_rate": 5e-07, "loss": 0.6464, "num_input_tokens_seen": 211033428, "step": 1825 }, { "epoch": 9.811827956989248, "loss": 0.6941226124763489, "loss_ce": 3.0804403650108725e-05, "loss_iou": 0.296875, "loss_num": 0.019775390625, "loss_xval": 0.6953125, "num_input_tokens_seen": 211033428, "step": 1825 }, { "epoch": 9.817204301075268, "grad_norm": 33.77761459350586, "learning_rate": 5e-07, "loss": 0.8382, "num_input_tokens_seen": 211151164, "step": 1826 }, { "epoch": 9.817204301075268, "loss": 0.8469821214675903, "loss_ce": 5.828523717354983e-05, "loss_iou": 0.388671875, "loss_num": 0.01422119140625, "loss_xval": 0.84765625, "num_input_tokens_seen": 211151164, "step": 1826 }, { "epoch": 9.82258064516129, "grad_norm": 22.932327270507812, "learning_rate": 5e-07, "loss": 0.7157, "num_input_tokens_seen": 211265800, "step": 1827 }, { "epoch": 9.82258064516129, "loss": 0.7678418159484863, "loss_ce": 1.9557694031391293e-05, "loss_iou": 0.314453125, "loss_num": 0.0283203125, "loss_xval": 0.76953125, "num_input_tokens_seen": 211265800, "step": 1827 }, { "epoch": 9.827956989247312, "grad_norm": 25.721012115478516, "learning_rate": 5e-07, "loss": 0.7896, "num_input_tokens_seen": 211380536, "step": 1828 }, { "epoch": 9.827956989247312, "loss": 0.8445563316345215, "loss_ce": 7.39145398256369e-05, "loss_iou": 0.365234375, "loss_num": 0.0225830078125, "loss_xval": 0.84375, "num_input_tokens_seen": 211380536, "step": 1828 }, { "epoch": 9.833333333333334, "grad_norm": 15.6292085647583, "learning_rate": 5e-07, "loss": 0.6095, "num_input_tokens_seen": 211496828, "step": 1829 }, { "epoch": 9.833333333333334, "loss": 0.5761967301368713, "loss_ce": 2.4851351554389112e-05, "loss_iou": 0.259765625, "loss_num": 0.01165771484375, "loss_xval": 0.578125, "num_input_tokens_seen": 211496828, "step": 1829 }, { "epoch": 9.838709677419354, "grad_norm": 30.185609817504883, "learning_rate": 5e-07, "loss": 0.9006, "num_input_tokens_seen": 211612900, "step": 1830 }, { "epoch": 9.838709677419354, "loss": 0.7971644401550293, "loss_ce": 4.531151716946624e-05, "loss_iou": 0.345703125, "loss_num": 0.02099609375, "loss_xval": 0.796875, "num_input_tokens_seen": 211612900, "step": 1830 }, { "epoch": 9.844086021505376, "grad_norm": 17.052413940429688, "learning_rate": 5e-07, "loss": 0.5743, "num_input_tokens_seen": 211727624, "step": 1831 }, { "epoch": 9.844086021505376, "loss": 0.6325892210006714, "loss_ce": 2.0870571461273357e-05, "loss_iou": 0.2734375, "loss_num": 0.0169677734375, "loss_xval": 0.6328125, "num_input_tokens_seen": 211727624, "step": 1831 }, { "epoch": 9.849462365591398, "grad_norm": 26.279386520385742, "learning_rate": 5e-07, "loss": 0.7294, "num_input_tokens_seen": 211838968, "step": 1832 }, { "epoch": 9.849462365591398, "loss": 0.7368790507316589, "loss_ce": 6.265839328989387e-05, "loss_iou": 0.310546875, "loss_num": 0.023193359375, "loss_xval": 0.73828125, "num_input_tokens_seen": 211838968, "step": 1832 }, { "epoch": 9.85483870967742, "grad_norm": 27.526395797729492, "learning_rate": 5e-07, "loss": 0.7193, "num_input_tokens_seen": 211954224, "step": 1833 }, { "epoch": 9.85483870967742, "loss": 0.83111572265625, "loss_ce": 6.102719635237008e-05, "loss_iou": 0.3671875, "loss_num": 0.01953125, "loss_xval": 0.83203125, "num_input_tokens_seen": 211954224, "step": 1833 }, { "epoch": 9.86021505376344, "grad_norm": 24.116535186767578, "learning_rate": 5e-07, "loss": 0.7015, "num_input_tokens_seen": 212070144, "step": 1834 }, { "epoch": 9.86021505376344, "loss": 0.676213800907135, "loss_ce": 6.636679609073326e-05, "loss_iou": 0.3046875, "loss_num": 0.01318359375, "loss_xval": 0.67578125, "num_input_tokens_seen": 212070144, "step": 1834 }, { "epoch": 9.865591397849462, "grad_norm": 19.85767364501953, "learning_rate": 5e-07, "loss": 0.635, "num_input_tokens_seen": 212188196, "step": 1835 }, { "epoch": 9.865591397849462, "loss": 0.9040833115577698, "loss_ce": 3.063316398765892e-05, "loss_iou": 0.408203125, "loss_num": 0.017578125, "loss_xval": 0.90234375, "num_input_tokens_seen": 212188196, "step": 1835 }, { "epoch": 9.870967741935484, "grad_norm": 22.787141799926758, "learning_rate": 5e-07, "loss": 0.6551, "num_input_tokens_seen": 212305724, "step": 1836 }, { "epoch": 9.870967741935484, "loss": 0.9270365834236145, "loss_ce": 3.4640830563148484e-05, "loss_iou": 0.412109375, "loss_num": 0.020751953125, "loss_xval": 0.92578125, "num_input_tokens_seen": 212305724, "step": 1836 }, { "epoch": 9.876344086021506, "grad_norm": 26.256162643432617, "learning_rate": 5e-07, "loss": 0.6805, "num_input_tokens_seen": 212422228, "step": 1837 }, { "epoch": 9.876344086021506, "loss": 0.7782435417175293, "loss_ce": 4.53617685707286e-05, "loss_iou": 0.314453125, "loss_num": 0.0294189453125, "loss_xval": 0.77734375, "num_input_tokens_seen": 212422228, "step": 1837 }, { "epoch": 9.881720430107526, "grad_norm": 25.861570358276367, "learning_rate": 5e-07, "loss": 0.6671, "num_input_tokens_seen": 212538228, "step": 1838 }, { "epoch": 9.881720430107526, "loss": 0.6638485193252563, "loss_ce": 3.0128463549772277e-05, "loss_iou": 0.28125, "loss_num": 0.0201416015625, "loss_xval": 0.6640625, "num_input_tokens_seen": 212538228, "step": 1838 }, { "epoch": 9.887096774193548, "grad_norm": 17.846294403076172, "learning_rate": 5e-07, "loss": 0.607, "num_input_tokens_seen": 212652568, "step": 1839 }, { "epoch": 9.887096774193548, "loss": 0.5295761227607727, "loss_ce": 3.511327668093145e-05, "loss_iou": 0.21875, "loss_num": 0.0184326171875, "loss_xval": 0.53125, "num_input_tokens_seen": 212652568, "step": 1839 }, { "epoch": 9.89247311827957, "grad_norm": 34.0125617980957, "learning_rate": 5e-07, "loss": 0.7483, "num_input_tokens_seen": 212765008, "step": 1840 }, { "epoch": 9.89247311827957, "loss": 0.8148450255393982, "loss_ce": 0.0001477553159929812, "loss_iou": 0.357421875, "loss_num": 0.0205078125, "loss_xval": 0.81640625, "num_input_tokens_seen": 212765008, "step": 1840 }, { "epoch": 9.897849462365592, "grad_norm": 25.041610717773438, "learning_rate": 5e-07, "loss": 0.6941, "num_input_tokens_seen": 212881160, "step": 1841 }, { "epoch": 9.897849462365592, "loss": 0.5521730184555054, "loss_ce": 0.00017106790619436651, "loss_iou": 0.228515625, "loss_num": 0.0189208984375, "loss_xval": 0.55078125, "num_input_tokens_seen": 212881160, "step": 1841 }, { "epoch": 9.903225806451612, "grad_norm": 27.2064266204834, "learning_rate": 5e-07, "loss": 0.7189, "num_input_tokens_seen": 212998988, "step": 1842 }, { "epoch": 9.903225806451612, "loss": 0.7231823205947876, "loss_ce": 3.7730002077296376e-05, "loss_iou": 0.33203125, "loss_num": 0.011474609375, "loss_xval": 0.72265625, "num_input_tokens_seen": 212998988, "step": 1842 }, { "epoch": 9.908602150537634, "grad_norm": 21.187335968017578, "learning_rate": 5e-07, "loss": 0.8444, "num_input_tokens_seen": 213113308, "step": 1843 }, { "epoch": 9.908602150537634, "loss": 0.9086203575134277, "loss_ce": 5.104906449560076e-05, "loss_iou": 0.412109375, "loss_num": 0.01708984375, "loss_xval": 0.91015625, "num_input_tokens_seen": 213113308, "step": 1843 }, { "epoch": 9.913978494623656, "grad_norm": 19.37000274658203, "learning_rate": 5e-07, "loss": 0.7443, "num_input_tokens_seen": 213231208, "step": 1844 }, { "epoch": 9.913978494623656, "loss": 0.8931277394294739, "loss_ce": 0.0007937345653772354, "loss_iou": 0.37890625, "loss_num": 0.0269775390625, "loss_xval": 0.890625, "num_input_tokens_seen": 213231208, "step": 1844 }, { "epoch": 9.919354838709678, "grad_norm": 22.89154052734375, "learning_rate": 5e-07, "loss": 0.5993, "num_input_tokens_seen": 213351236, "step": 1845 }, { "epoch": 9.919354838709678, "loss": 0.710755467414856, "loss_ce": 6.21688668616116e-05, "loss_iou": 0.30078125, "loss_num": 0.0220947265625, "loss_xval": 0.7109375, "num_input_tokens_seen": 213351236, "step": 1845 }, { "epoch": 9.924731182795698, "grad_norm": 21.003421783447266, "learning_rate": 5e-07, "loss": 0.7227, "num_input_tokens_seen": 213467608, "step": 1846 }, { "epoch": 9.924731182795698, "loss": 0.7896431684494019, "loss_ce": 9.23793122638017e-05, "loss_iou": 0.3359375, "loss_num": 0.02392578125, "loss_xval": 0.7890625, "num_input_tokens_seen": 213467608, "step": 1846 }, { "epoch": 9.93010752688172, "grad_norm": 17.243350982666016, "learning_rate": 5e-07, "loss": 0.8225, "num_input_tokens_seen": 213583440, "step": 1847 }, { "epoch": 9.93010752688172, "loss": 0.9859059453010559, "loss_ce": 6.611731078010052e-05, "loss_iou": 0.421875, "loss_num": 0.028564453125, "loss_xval": 0.984375, "num_input_tokens_seen": 213583440, "step": 1847 }, { "epoch": 9.935483870967742, "grad_norm": 37.58351516723633, "learning_rate": 5e-07, "loss": 0.7574, "num_input_tokens_seen": 213693880, "step": 1848 }, { "epoch": 9.935483870967742, "loss": 0.8786110877990723, "loss_ce": 7.104520045686513e-05, "loss_iou": 0.392578125, "loss_num": 0.018798828125, "loss_xval": 0.87890625, "num_input_tokens_seen": 213693880, "step": 1848 }, { "epoch": 9.940860215053764, "grad_norm": 14.304203987121582, "learning_rate": 5e-07, "loss": 0.8351, "num_input_tokens_seen": 213807376, "step": 1849 }, { "epoch": 9.940860215053764, "loss": 0.8723170161247253, "loss_ce": 6.356539233820513e-05, "loss_iou": 0.380859375, "loss_num": 0.0220947265625, "loss_xval": 0.87109375, "num_input_tokens_seen": 213807376, "step": 1849 }, { "epoch": 9.946236559139784, "grad_norm": 38.971351623535156, "learning_rate": 5e-07, "loss": 0.7412, "num_input_tokens_seen": 213925608, "step": 1850 }, { "epoch": 9.946236559139784, "loss": 0.888611912727356, "loss_ce": 6.20905339019373e-05, "loss_iou": 0.373046875, "loss_num": 0.028076171875, "loss_xval": 0.88671875, "num_input_tokens_seen": 213925608, "step": 1850 }, { "epoch": 9.951612903225806, "grad_norm": 23.12494659423828, "learning_rate": 5e-07, "loss": 0.7455, "num_input_tokens_seen": 214039196, "step": 1851 }, { "epoch": 9.951612903225806, "loss": 0.6650861501693726, "loss_ce": 4.703110971604474e-05, "loss_iou": 0.29296875, "loss_num": 0.0159912109375, "loss_xval": 0.6640625, "num_input_tokens_seen": 214039196, "step": 1851 }, { "epoch": 9.956989247311828, "grad_norm": 23.14842414855957, "learning_rate": 5e-07, "loss": 0.6946, "num_input_tokens_seen": 214153712, "step": 1852 }, { "epoch": 9.956989247311828, "loss": 0.8891783952713013, "loss_ce": 1.827147207222879e-05, "loss_iou": 0.392578125, "loss_num": 0.020751953125, "loss_xval": 0.890625, "num_input_tokens_seen": 214153712, "step": 1852 }, { "epoch": 9.96236559139785, "grad_norm": 23.997886657714844, "learning_rate": 5e-07, "loss": 0.7291, "num_input_tokens_seen": 214271896, "step": 1853 }, { "epoch": 9.96236559139785, "loss": 0.5760875940322876, "loss_ce": 3.7777437682962045e-05, "loss_iou": 0.244140625, "loss_num": 0.0177001953125, "loss_xval": 0.57421875, "num_input_tokens_seen": 214271896, "step": 1853 }, { "epoch": 9.967741935483872, "grad_norm": 26.62306785583496, "learning_rate": 5e-07, "loss": 0.7172, "num_input_tokens_seen": 214386768, "step": 1854 }, { "epoch": 9.967741935483872, "loss": 0.6132495403289795, "loss_ce": 9.035359835252166e-05, "loss_iou": 0.265625, "loss_num": 0.0167236328125, "loss_xval": 0.61328125, "num_input_tokens_seen": 214386768, "step": 1854 }, { "epoch": 9.973118279569892, "grad_norm": 25.650270462036133, "learning_rate": 5e-07, "loss": 0.6164, "num_input_tokens_seen": 214503716, "step": 1855 }, { "epoch": 9.973118279569892, "loss": 0.48954689502716064, "loss_ce": 4.49433209723793e-05, "loss_iou": 0.21484375, "loss_num": 0.01214599609375, "loss_xval": 0.490234375, "num_input_tokens_seen": 214503716, "step": 1855 }, { "epoch": 9.978494623655914, "grad_norm": 28.200958251953125, "learning_rate": 5e-07, "loss": 0.6418, "num_input_tokens_seen": 214615352, "step": 1856 }, { "epoch": 9.978494623655914, "loss": 0.6105678081512451, "loss_ce": 3.3178861485794187e-05, "loss_iou": 0.26953125, "loss_num": 0.014404296875, "loss_xval": 0.609375, "num_input_tokens_seen": 214615352, "step": 1856 }, { "epoch": 9.983870967741936, "grad_norm": 17.08054542541504, "learning_rate": 5e-07, "loss": 0.7275, "num_input_tokens_seen": 214731920, "step": 1857 }, { "epoch": 9.983870967741936, "loss": 0.7700457572937012, "loss_ce": 2.623637737997342e-05, "loss_iou": 0.34375, "loss_num": 0.0167236328125, "loss_xval": 0.76953125, "num_input_tokens_seen": 214731920, "step": 1857 }, { "epoch": 9.989247311827956, "grad_norm": 18.1452579498291, "learning_rate": 5e-07, "loss": 0.6864, "num_input_tokens_seen": 214846408, "step": 1858 }, { "epoch": 9.989247311827956, "loss": 0.8489249348640442, "loss_ce": 4.7987177822506055e-05, "loss_iou": 0.373046875, "loss_num": 0.0206298828125, "loss_xval": 0.84765625, "num_input_tokens_seen": 214846408, "step": 1858 }, { "epoch": 9.994623655913978, "grad_norm": 26.315765380859375, "learning_rate": 5e-07, "loss": 0.9118, "num_input_tokens_seen": 214958520, "step": 1859 }, { "epoch": 9.994623655913978, "loss": 0.7715364694595337, "loss_ce": 5.210913150222041e-05, "loss_iou": 0.3359375, "loss_num": 0.0196533203125, "loss_xval": 0.7734375, "num_input_tokens_seen": 214958520, "step": 1859 }, { "epoch": 10.0, "grad_norm": 25.740806579589844, "learning_rate": 5e-07, "loss": 0.6219, "num_input_tokens_seen": 215076468, "step": 1860 }, { "epoch": 10.0, "loss": 0.9107390642166138, "loss_ce": 9.456673433305696e-05, "loss_iou": 0.38671875, "loss_num": 0.0272216796875, "loss_xval": 0.91015625, "num_input_tokens_seen": 215076468, "step": 1860 }, { "epoch": 10.005376344086022, "grad_norm": 40.83506393432617, "learning_rate": 5e-07, "loss": 0.7538, "num_input_tokens_seen": 215192476, "step": 1861 }, { "epoch": 10.005376344086022, "loss": 0.7998454570770264, "loss_ce": 4.0736231312621385e-05, "loss_iou": 0.333984375, "loss_num": 0.0263671875, "loss_xval": 0.80078125, "num_input_tokens_seen": 215192476, "step": 1861 }, { "epoch": 10.010752688172044, "grad_norm": 22.60944366455078, "learning_rate": 5e-07, "loss": 0.4937, "num_input_tokens_seen": 215309716, "step": 1862 }, { "epoch": 10.010752688172044, "loss": 0.4569406509399414, "loss_ce": 3.148045652778819e-05, "loss_iou": 0.19921875, "loss_num": 0.01177978515625, "loss_xval": 0.45703125, "num_input_tokens_seen": 215309716, "step": 1862 }, { "epoch": 10.016129032258064, "grad_norm": 18.676965713500977, "learning_rate": 5e-07, "loss": 0.7246, "num_input_tokens_seen": 215419348, "step": 1863 }, { "epoch": 10.016129032258064, "loss": 0.8734427094459534, "loss_ce": 2.956134994747117e-05, "loss_iou": 0.34765625, "loss_num": 0.035400390625, "loss_xval": 0.875, "num_input_tokens_seen": 215419348, "step": 1863 }, { "epoch": 10.021505376344086, "grad_norm": 25.32847785949707, "learning_rate": 5e-07, "loss": 0.5707, "num_input_tokens_seen": 215535888, "step": 1864 }, { "epoch": 10.021505376344086, "loss": 0.48465538024902344, "loss_ce": 3.6235753213986754e-05, "loss_iou": 0.197265625, "loss_num": 0.01806640625, "loss_xval": 0.484375, "num_input_tokens_seen": 215535888, "step": 1864 }, { "epoch": 10.026881720430108, "grad_norm": 17.69980239868164, "learning_rate": 5e-07, "loss": 0.6731, "num_input_tokens_seen": 215652608, "step": 1865 }, { "epoch": 10.026881720430108, "loss": 0.6208676099777222, "loss_ce": 1.802987935661804e-05, "loss_iou": 0.271484375, "loss_num": 0.01544189453125, "loss_xval": 0.62109375, "num_input_tokens_seen": 215652608, "step": 1865 }, { "epoch": 10.03225806451613, "grad_norm": 25.149309158325195, "learning_rate": 5e-07, "loss": 0.8194, "num_input_tokens_seen": 215770116, "step": 1866 }, { "epoch": 10.03225806451613, "loss": 1.4314275979995728, "loss_ce": 3.115785148111172e-05, "loss_iou": 0.62890625, "loss_num": 0.03564453125, "loss_xval": 1.4296875, "num_input_tokens_seen": 215770116, "step": 1866 }, { "epoch": 10.03763440860215, "grad_norm": 22.044164657592773, "learning_rate": 5e-07, "loss": 0.6333, "num_input_tokens_seen": 215883748, "step": 1867 }, { "epoch": 10.03763440860215, "loss": 0.5240383148193359, "loss_ce": 5.1431194151518866e-05, "loss_iou": 0.232421875, "loss_num": 0.011962890625, "loss_xval": 0.5234375, "num_input_tokens_seen": 215883748, "step": 1867 }, { "epoch": 10.043010752688172, "grad_norm": 18.840320587158203, "learning_rate": 5e-07, "loss": 0.8676, "num_input_tokens_seen": 215998624, "step": 1868 }, { "epoch": 10.043010752688172, "loss": 1.056098222732544, "loss_ce": 6.78773649269715e-05, "loss_iou": 0.45703125, "loss_num": 0.0286865234375, "loss_xval": 1.0546875, "num_input_tokens_seen": 215998624, "step": 1868 }, { "epoch": 10.048387096774194, "grad_norm": 38.26198959350586, "learning_rate": 5e-07, "loss": 0.8746, "num_input_tokens_seen": 216114516, "step": 1869 }, { "epoch": 10.048387096774194, "loss": 1.383826494216919, "loss_ce": 3.7351353967096657e-05, "loss_iou": 0.60546875, "loss_num": 0.034423828125, "loss_xval": 1.3828125, "num_input_tokens_seen": 216114516, "step": 1869 }, { "epoch": 10.053763440860216, "grad_norm": 30.922683715820312, "learning_rate": 5e-07, "loss": 0.7525, "num_input_tokens_seen": 216227048, "step": 1870 }, { "epoch": 10.053763440860216, "loss": 0.5863409042358398, "loss_ce": 3.717873914865777e-05, "loss_iou": 0.2421875, "loss_num": 0.0203857421875, "loss_xval": 0.5859375, "num_input_tokens_seen": 216227048, "step": 1870 }, { "epoch": 10.059139784946236, "grad_norm": 28.78019905090332, "learning_rate": 5e-07, "loss": 0.8245, "num_input_tokens_seen": 216339752, "step": 1871 }, { "epoch": 10.059139784946236, "loss": 0.8023484349250793, "loss_ce": 0.00022439593158196658, "loss_iou": 0.34375, "loss_num": 0.02294921875, "loss_xval": 0.80078125, "num_input_tokens_seen": 216339752, "step": 1871 }, { "epoch": 10.064516129032258, "grad_norm": 18.264842987060547, "learning_rate": 5e-07, "loss": 0.5868, "num_input_tokens_seen": 216453764, "step": 1872 }, { "epoch": 10.064516129032258, "loss": 0.6144571304321289, "loss_ce": 7.725587056484073e-05, "loss_iou": 0.28515625, "loss_num": 0.009033203125, "loss_xval": 0.61328125, "num_input_tokens_seen": 216453764, "step": 1872 }, { "epoch": 10.06989247311828, "grad_norm": 18.549848556518555, "learning_rate": 5e-07, "loss": 0.5906, "num_input_tokens_seen": 216568488, "step": 1873 }, { "epoch": 10.06989247311828, "loss": 0.7474032044410706, "loss_ce": 8.873986371327192e-05, "loss_iou": 0.3203125, "loss_num": 0.021484375, "loss_xval": 0.74609375, "num_input_tokens_seen": 216568488, "step": 1873 }, { "epoch": 10.075268817204302, "grad_norm": 16.19817543029785, "learning_rate": 5e-07, "loss": 0.5279, "num_input_tokens_seen": 216683264, "step": 1874 }, { "epoch": 10.075268817204302, "loss": 0.5879203081130981, "loss_ce": 2.9687393180211075e-05, "loss_iou": 0.2470703125, "loss_num": 0.018798828125, "loss_xval": 0.5859375, "num_input_tokens_seen": 216683264, "step": 1874 }, { "epoch": 10.080645161290322, "grad_norm": 23.16637420654297, "learning_rate": 5e-07, "loss": 0.6882, "num_input_tokens_seen": 216794660, "step": 1875 }, { "epoch": 10.080645161290322, "loss": 0.6235613226890564, "loss_ce": 2.6143250579480082e-05, "loss_iou": 0.25390625, "loss_num": 0.0233154296875, "loss_xval": 0.625, "num_input_tokens_seen": 216794660, "step": 1875 }, { "epoch": 10.086021505376344, "grad_norm": 25.48651695251465, "learning_rate": 5e-07, "loss": 0.6012, "num_input_tokens_seen": 216913468, "step": 1876 }, { "epoch": 10.086021505376344, "loss": 0.5725507140159607, "loss_ce": 4.093777170055546e-05, "loss_iou": 0.2314453125, "loss_num": 0.0216064453125, "loss_xval": 0.57421875, "num_input_tokens_seen": 216913468, "step": 1876 }, { "epoch": 10.091397849462366, "grad_norm": 38.400474548339844, "learning_rate": 5e-07, "loss": 0.752, "num_input_tokens_seen": 217033104, "step": 1877 }, { "epoch": 10.091397849462366, "loss": 0.6550604701042175, "loss_ce": 3.1174480682238936e-05, "loss_iou": 0.279296875, "loss_num": 0.0189208984375, "loss_xval": 0.65625, "num_input_tokens_seen": 217033104, "step": 1877 }, { "epoch": 10.096774193548388, "grad_norm": 19.576595306396484, "learning_rate": 5e-07, "loss": 0.8236, "num_input_tokens_seen": 217146844, "step": 1878 }, { "epoch": 10.096774193548388, "loss": 0.6348094344139099, "loss_ce": 4.381765393191017e-05, "loss_iou": 0.26953125, "loss_num": 0.0194091796875, "loss_xval": 0.6328125, "num_input_tokens_seen": 217146844, "step": 1878 }, { "epoch": 10.102150537634408, "grad_norm": 21.865652084350586, "learning_rate": 5e-07, "loss": 0.677, "num_input_tokens_seen": 217260968, "step": 1879 }, { "epoch": 10.102150537634408, "loss": 0.818899393081665, "loss_ce": 5.1707069360418245e-05, "loss_iou": 0.359375, "loss_num": 0.0198974609375, "loss_xval": 0.8203125, "num_input_tokens_seen": 217260968, "step": 1879 }, { "epoch": 10.10752688172043, "grad_norm": 33.49911880493164, "learning_rate": 5e-07, "loss": 0.6184, "num_input_tokens_seen": 217380700, "step": 1880 }, { "epoch": 10.10752688172043, "loss": 0.5237311124801636, "loss_ce": 4.948008427163586e-05, "loss_iou": 0.228515625, "loss_num": 0.01348876953125, "loss_xval": 0.5234375, "num_input_tokens_seen": 217380700, "step": 1880 }, { "epoch": 10.112903225806452, "grad_norm": 24.57210922241211, "learning_rate": 5e-07, "loss": 0.6551, "num_input_tokens_seen": 217494240, "step": 1881 }, { "epoch": 10.112903225806452, "loss": 0.4902607798576355, "loss_ce": 2.639609192556236e-05, "loss_iou": 0.2109375, "loss_num": 0.01348876953125, "loss_xval": 0.490234375, "num_input_tokens_seen": 217494240, "step": 1881 }, { "epoch": 10.118279569892474, "grad_norm": 47.88642883300781, "learning_rate": 5e-07, "loss": 0.7599, "num_input_tokens_seen": 217613608, "step": 1882 }, { "epoch": 10.118279569892474, "loss": 0.5095646381378174, "loss_ce": 4.313087265472859e-05, "loss_iou": 0.21484375, "loss_num": 0.0157470703125, "loss_xval": 0.5078125, "num_input_tokens_seen": 217613608, "step": 1882 }, { "epoch": 10.123655913978494, "grad_norm": 59.1188850402832, "learning_rate": 5e-07, "loss": 0.757, "num_input_tokens_seen": 217727908, "step": 1883 }, { "epoch": 10.123655913978494, "loss": 0.5879480242729187, "loss_ce": 5.74060577491764e-05, "loss_iou": 0.23046875, "loss_num": 0.0252685546875, "loss_xval": 0.5859375, "num_input_tokens_seen": 217727908, "step": 1883 }, { "epoch": 10.129032258064516, "grad_norm": 28.011640548706055, "learning_rate": 5e-07, "loss": 0.6051, "num_input_tokens_seen": 217844928, "step": 1884 }, { "epoch": 10.129032258064516, "loss": 0.6073314547538757, "loss_ce": 3.1658280931878835e-05, "loss_iou": 0.2578125, "loss_num": 0.0185546875, "loss_xval": 0.60546875, "num_input_tokens_seen": 217844928, "step": 1884 }, { "epoch": 10.134408602150538, "grad_norm": 21.24217414855957, "learning_rate": 5e-07, "loss": 0.6466, "num_input_tokens_seen": 217957256, "step": 1885 }, { "epoch": 10.134408602150538, "loss": 0.5200709700584412, "loss_ce": 5.144379611010663e-05, "loss_iou": 0.228515625, "loss_num": 0.0123291015625, "loss_xval": 0.51953125, "num_input_tokens_seen": 217957256, "step": 1885 }, { "epoch": 10.13978494623656, "grad_norm": 21.546873092651367, "learning_rate": 5e-07, "loss": 0.7653, "num_input_tokens_seen": 218073244, "step": 1886 }, { "epoch": 10.13978494623656, "loss": 0.7087884545326233, "loss_ce": 4.820129106519744e-05, "loss_iou": 0.296875, "loss_num": 0.0230712890625, "loss_xval": 0.70703125, "num_input_tokens_seen": 218073244, "step": 1886 }, { "epoch": 10.14516129032258, "grad_norm": 24.666749954223633, "learning_rate": 5e-07, "loss": 0.7388, "num_input_tokens_seen": 218187524, "step": 1887 }, { "epoch": 10.14516129032258, "loss": 0.5777177810668945, "loss_ce": 8.100569539237767e-05, "loss_iou": 0.2275390625, "loss_num": 0.0242919921875, "loss_xval": 0.578125, "num_input_tokens_seen": 218187524, "step": 1887 }, { "epoch": 10.150537634408602, "grad_norm": 22.818058013916016, "learning_rate": 5e-07, "loss": 0.7882, "num_input_tokens_seen": 218301816, "step": 1888 }, { "epoch": 10.150537634408602, "loss": 0.6472571492195129, "loss_ce": 4.0384991734754294e-05, "loss_iou": 0.265625, "loss_num": 0.023193359375, "loss_xval": 0.6484375, "num_input_tokens_seen": 218301816, "step": 1888 }, { "epoch": 10.155913978494624, "grad_norm": 18.267210006713867, "learning_rate": 5e-07, "loss": 0.8437, "num_input_tokens_seen": 218413492, "step": 1889 }, { "epoch": 10.155913978494624, "loss": 1.0806559324264526, "loss_ce": 8.952812640927732e-05, "loss_iou": 0.474609375, "loss_num": 0.026611328125, "loss_xval": 1.078125, "num_input_tokens_seen": 218413492, "step": 1889 }, { "epoch": 10.161290322580646, "grad_norm": 20.76621437072754, "learning_rate": 5e-07, "loss": 0.7561, "num_input_tokens_seen": 218527652, "step": 1890 }, { "epoch": 10.161290322580646, "loss": 0.8130722045898438, "loss_ce": 8.390378934564069e-05, "loss_iou": 0.34375, "loss_num": 0.025146484375, "loss_xval": 0.8125, "num_input_tokens_seen": 218527652, "step": 1890 }, { "epoch": 10.166666666666666, "grad_norm": 29.068002700805664, "learning_rate": 5e-07, "loss": 0.6692, "num_input_tokens_seen": 218641248, "step": 1891 }, { "epoch": 10.166666666666666, "loss": 0.8242570161819458, "loss_ce": 3.82544967578724e-05, "loss_iou": 0.3203125, "loss_num": 0.03662109375, "loss_xval": 0.82421875, "num_input_tokens_seen": 218641248, "step": 1891 }, { "epoch": 10.172043010752688, "grad_norm": 19.30426788330078, "learning_rate": 5e-07, "loss": 0.6642, "num_input_tokens_seen": 218761440, "step": 1892 }, { "epoch": 10.172043010752688, "loss": 0.5962185859680176, "loss_ce": 2.7157935619470663e-05, "loss_iou": 0.263671875, "loss_num": 0.01336669921875, "loss_xval": 0.59765625, "num_input_tokens_seen": 218761440, "step": 1892 }, { "epoch": 10.17741935483871, "grad_norm": 22.152374267578125, "learning_rate": 5e-07, "loss": 0.5762, "num_input_tokens_seen": 218877536, "step": 1893 }, { "epoch": 10.17741935483871, "loss": 0.5129640698432922, "loss_ce": 2.4609926185803488e-05, "loss_iou": 0.2314453125, "loss_num": 0.01007080078125, "loss_xval": 0.51171875, "num_input_tokens_seen": 218877536, "step": 1893 }, { "epoch": 10.182795698924732, "grad_norm": 28.58057403564453, "learning_rate": 5e-07, "loss": 0.716, "num_input_tokens_seen": 218995272, "step": 1894 }, { "epoch": 10.182795698924732, "loss": 0.6516376733779907, "loss_ce": 2.6307116058887914e-05, "loss_iou": 0.28125, "loss_num": 0.017578125, "loss_xval": 0.65234375, "num_input_tokens_seen": 218995272, "step": 1894 }, { "epoch": 10.188172043010752, "grad_norm": 19.361791610717773, "learning_rate": 5e-07, "loss": 0.6706, "num_input_tokens_seen": 219110384, "step": 1895 }, { "epoch": 10.188172043010752, "loss": 0.714630663394928, "loss_ce": 3.106805161223747e-05, "loss_iou": 0.310546875, "loss_num": 0.018310546875, "loss_xval": 0.71484375, "num_input_tokens_seen": 219110384, "step": 1895 }, { "epoch": 10.193548387096774, "grad_norm": 19.98120880126953, "learning_rate": 5e-07, "loss": 0.6956, "num_input_tokens_seen": 219224504, "step": 1896 }, { "epoch": 10.193548387096774, "loss": 0.784442663192749, "loss_ce": 1.8919970898423344e-05, "loss_iou": 0.310546875, "loss_num": 0.032958984375, "loss_xval": 0.78515625, "num_input_tokens_seen": 219224504, "step": 1896 }, { "epoch": 10.198924731182796, "grad_norm": 20.061800003051758, "learning_rate": 5e-07, "loss": 0.6834, "num_input_tokens_seen": 219340784, "step": 1897 }, { "epoch": 10.198924731182796, "loss": 0.6520618796348572, "loss_ce": 8.434454503003508e-05, "loss_iou": 0.255859375, "loss_num": 0.028076171875, "loss_xval": 0.65234375, "num_input_tokens_seen": 219340784, "step": 1897 }, { "epoch": 10.204301075268818, "grad_norm": 22.227458953857422, "learning_rate": 5e-07, "loss": 0.6431, "num_input_tokens_seen": 219454180, "step": 1898 }, { "epoch": 10.204301075268818, "loss": 0.567209005355835, "loss_ce": 7.034176815068349e-05, "loss_iou": 0.2490234375, "loss_num": 0.0137939453125, "loss_xval": 0.56640625, "num_input_tokens_seen": 219454180, "step": 1898 }, { "epoch": 10.209677419354838, "grad_norm": 30.87779998779297, "learning_rate": 5e-07, "loss": 0.6388, "num_input_tokens_seen": 219568256, "step": 1899 }, { "epoch": 10.209677419354838, "loss": 0.565259575843811, "loss_ce": 7.396285946015269e-05, "loss_iou": 0.240234375, "loss_num": 0.01708984375, "loss_xval": 0.56640625, "num_input_tokens_seen": 219568256, "step": 1899 }, { "epoch": 10.21505376344086, "grad_norm": 31.380325317382812, "learning_rate": 5e-07, "loss": 0.7194, "num_input_tokens_seen": 219680648, "step": 1900 }, { "epoch": 10.21505376344086, "loss": 0.7514981031417847, "loss_ce": 3.323986675241031e-05, "loss_iou": 0.318359375, "loss_num": 0.0228271484375, "loss_xval": 0.75, "num_input_tokens_seen": 219680648, "step": 1900 }, { "epoch": 10.220430107526882, "grad_norm": 17.871938705444336, "learning_rate": 5e-07, "loss": 0.6236, "num_input_tokens_seen": 219797768, "step": 1901 }, { "epoch": 10.220430107526882, "loss": 0.6679960489273071, "loss_ce": 2.7338252039044164e-05, "loss_iou": 0.2890625, "loss_num": 0.0181884765625, "loss_xval": 0.66796875, "num_input_tokens_seen": 219797768, "step": 1901 }, { "epoch": 10.225806451612904, "grad_norm": 42.076080322265625, "learning_rate": 5e-07, "loss": 0.6022, "num_input_tokens_seen": 219912588, "step": 1902 }, { "epoch": 10.225806451612904, "loss": 0.6137999296188354, "loss_ce": 3.0406965379370376e-05, "loss_iou": 0.267578125, "loss_num": 0.01556396484375, "loss_xval": 0.61328125, "num_input_tokens_seen": 219912588, "step": 1902 }, { "epoch": 10.231182795698924, "grad_norm": 27.258872985839844, "learning_rate": 5e-07, "loss": 0.5942, "num_input_tokens_seen": 220031004, "step": 1903 }, { "epoch": 10.231182795698924, "loss": 0.593576192855835, "loss_ce": 7.036324677756056e-05, "loss_iou": 0.2470703125, "loss_num": 0.019775390625, "loss_xval": 0.59375, "num_input_tokens_seen": 220031004, "step": 1903 }, { "epoch": 10.236559139784946, "grad_norm": 29.752037048339844, "learning_rate": 5e-07, "loss": 0.7487, "num_input_tokens_seen": 220145272, "step": 1904 }, { "epoch": 10.236559139784946, "loss": 0.9578033685684204, "loss_ce": 3.969396493630484e-05, "loss_iou": 0.400390625, "loss_num": 0.031494140625, "loss_xval": 0.95703125, "num_input_tokens_seen": 220145272, "step": 1904 }, { "epoch": 10.241935483870968, "grad_norm": 34.94432830810547, "learning_rate": 5e-07, "loss": 0.66, "num_input_tokens_seen": 220263152, "step": 1905 }, { "epoch": 10.241935483870968, "loss": 0.8569664359092712, "loss_ce": 3.281910176156089e-05, "loss_iou": 0.3515625, "loss_num": 0.03076171875, "loss_xval": 0.85546875, "num_input_tokens_seen": 220263152, "step": 1905 }, { "epoch": 10.24731182795699, "grad_norm": 29.75526237487793, "learning_rate": 5e-07, "loss": 0.8132, "num_input_tokens_seen": 220379492, "step": 1906 }, { "epoch": 10.24731182795699, "loss": 1.063521385192871, "loss_ce": 4.48245664301794e-05, "loss_iou": 0.455078125, "loss_num": 0.03076171875, "loss_xval": 1.0625, "num_input_tokens_seen": 220379492, "step": 1906 }, { "epoch": 10.25268817204301, "grad_norm": 24.724990844726562, "learning_rate": 5e-07, "loss": 0.5989, "num_input_tokens_seen": 220494508, "step": 1907 }, { "epoch": 10.25268817204301, "loss": 0.6764446496963501, "loss_ce": 5.308113759383559e-05, "loss_iou": 0.29296875, "loss_num": 0.0181884765625, "loss_xval": 0.67578125, "num_input_tokens_seen": 220494508, "step": 1907 }, { "epoch": 10.258064516129032, "grad_norm": 29.07135581970215, "learning_rate": 5e-07, "loss": 0.5556, "num_input_tokens_seen": 220608852, "step": 1908 }, { "epoch": 10.258064516129032, "loss": 0.5201647281646729, "loss_ce": 2.316813697689213e-05, "loss_iou": 0.22265625, "loss_num": 0.0147705078125, "loss_xval": 0.51953125, "num_input_tokens_seen": 220608852, "step": 1908 }, { "epoch": 10.263440860215054, "grad_norm": 19.07078742980957, "learning_rate": 5e-07, "loss": 0.6684, "num_input_tokens_seen": 220728192, "step": 1909 }, { "epoch": 10.263440860215054, "loss": 0.4137658476829529, "loss_ce": 6.959745951462537e-05, "loss_iou": 0.1826171875, "loss_num": 0.009765625, "loss_xval": 0.4140625, "num_input_tokens_seen": 220728192, "step": 1909 }, { "epoch": 10.268817204301076, "grad_norm": 15.234830856323242, "learning_rate": 5e-07, "loss": 0.678, "num_input_tokens_seen": 220845016, "step": 1910 }, { "epoch": 10.268817204301076, "loss": 0.8214367628097534, "loss_ce": 0.00014774870942346752, "loss_iou": 0.353515625, "loss_num": 0.022705078125, "loss_xval": 0.8203125, "num_input_tokens_seen": 220845016, "step": 1910 }, { "epoch": 10.274193548387096, "grad_norm": 26.903820037841797, "learning_rate": 5e-07, "loss": 0.7144, "num_input_tokens_seen": 220960012, "step": 1911 }, { "epoch": 10.274193548387096, "loss": 0.49244850873947144, "loss_ce": 1.6867184967850335e-05, "loss_iou": 0.21875, "loss_num": 0.0111083984375, "loss_xval": 0.4921875, "num_input_tokens_seen": 220960012, "step": 1911 }, { "epoch": 10.279569892473118, "grad_norm": 20.949953079223633, "learning_rate": 5e-07, "loss": 0.6744, "num_input_tokens_seen": 221077876, "step": 1912 }, { "epoch": 10.279569892473118, "loss": 0.6560376882553101, "loss_ce": 0.0002759769558906555, "loss_iou": 0.2890625, "loss_num": 0.0157470703125, "loss_xval": 0.65625, "num_input_tokens_seen": 221077876, "step": 1912 }, { "epoch": 10.28494623655914, "grad_norm": 23.848709106445312, "learning_rate": 5e-07, "loss": 0.8804, "num_input_tokens_seen": 221192460, "step": 1913 }, { "epoch": 10.28494623655914, "loss": 0.5397170186042786, "loss_ce": 4.415677540237084e-05, "loss_iou": 0.224609375, "loss_num": 0.0179443359375, "loss_xval": 0.5390625, "num_input_tokens_seen": 221192460, "step": 1913 }, { "epoch": 10.290322580645162, "grad_norm": 22.307783126831055, "learning_rate": 5e-07, "loss": 0.7259, "num_input_tokens_seen": 221304024, "step": 1914 }, { "epoch": 10.290322580645162, "loss": 0.5163952112197876, "loss_ce": 3.780951738008298e-05, "loss_iou": 0.2314453125, "loss_num": 0.01080322265625, "loss_xval": 0.515625, "num_input_tokens_seen": 221304024, "step": 1914 }, { "epoch": 10.295698924731182, "grad_norm": 23.866254806518555, "learning_rate": 5e-07, "loss": 0.7195, "num_input_tokens_seen": 221416088, "step": 1915 }, { "epoch": 10.295698924731182, "loss": 0.5932818651199341, "loss_ce": 2.010550451814197e-05, "loss_iou": 0.2470703125, "loss_num": 0.0198974609375, "loss_xval": 0.59375, "num_input_tokens_seen": 221416088, "step": 1915 }, { "epoch": 10.301075268817204, "grad_norm": 33.37058639526367, "learning_rate": 5e-07, "loss": 0.572, "num_input_tokens_seen": 221534308, "step": 1916 }, { "epoch": 10.301075268817204, "loss": 0.6384611129760742, "loss_ce": 3.3336051274091005e-05, "loss_iou": 0.287109375, "loss_num": 0.01287841796875, "loss_xval": 0.63671875, "num_input_tokens_seen": 221534308, "step": 1916 }, { "epoch": 10.306451612903226, "grad_norm": 43.71318054199219, "learning_rate": 5e-07, "loss": 0.7255, "num_input_tokens_seen": 221650424, "step": 1917 }, { "epoch": 10.306451612903226, "loss": 0.6977024078369141, "loss_ce": 7.060285133775324e-05, "loss_iou": 0.298828125, "loss_num": 0.0203857421875, "loss_xval": 0.69921875, "num_input_tokens_seen": 221650424, "step": 1917 }, { "epoch": 10.311827956989248, "grad_norm": 32.109500885009766, "learning_rate": 5e-07, "loss": 0.6483, "num_input_tokens_seen": 221765940, "step": 1918 }, { "epoch": 10.311827956989248, "loss": 0.7071126699447632, "loss_ce": 0.0005697461892850697, "loss_iou": 0.296875, "loss_num": 0.0228271484375, "loss_xval": 0.70703125, "num_input_tokens_seen": 221765940, "step": 1918 }, { "epoch": 10.317204301075268, "grad_norm": 20.498807907104492, "learning_rate": 5e-07, "loss": 0.5867, "num_input_tokens_seen": 221879960, "step": 1919 }, { "epoch": 10.317204301075268, "loss": 0.6033371090888977, "loss_ce": 6.562424823641777e-05, "loss_iou": 0.26171875, "loss_num": 0.0159912109375, "loss_xval": 0.6015625, "num_input_tokens_seen": 221879960, "step": 1919 }, { "epoch": 10.32258064516129, "grad_norm": 31.197072982788086, "learning_rate": 5e-07, "loss": 0.646, "num_input_tokens_seen": 221996580, "step": 1920 }, { "epoch": 10.32258064516129, "loss": 0.5729218125343323, "loss_ce": 4.5822882384527475e-05, "loss_iou": 0.248046875, "loss_num": 0.0152587890625, "loss_xval": 0.57421875, "num_input_tokens_seen": 221996580, "step": 1920 }, { "epoch": 10.327956989247312, "grad_norm": 23.86638832092285, "learning_rate": 5e-07, "loss": 0.7925, "num_input_tokens_seen": 222112160, "step": 1921 }, { "epoch": 10.327956989247312, "loss": 0.7876632213592529, "loss_ce": 0.0005538196419365704, "loss_iou": 0.345703125, "loss_num": 0.01953125, "loss_xval": 0.7890625, "num_input_tokens_seen": 222112160, "step": 1921 }, { "epoch": 10.333333333333334, "grad_norm": 23.512914657592773, "learning_rate": 5e-07, "loss": 0.6486, "num_input_tokens_seen": 222228636, "step": 1922 }, { "epoch": 10.333333333333334, "loss": 0.4327634274959564, "loss_ce": 2.4182028937502764e-05, "loss_iou": 0.1796875, "loss_num": 0.014404296875, "loss_xval": 0.43359375, "num_input_tokens_seen": 222228636, "step": 1922 }, { "epoch": 10.338709677419354, "grad_norm": 19.306015014648438, "learning_rate": 5e-07, "loss": 0.5371, "num_input_tokens_seen": 222343344, "step": 1923 }, { "epoch": 10.338709677419354, "loss": 0.5716241598129272, "loss_ce": 2.9895600164309144e-05, "loss_iou": 0.248046875, "loss_num": 0.01513671875, "loss_xval": 0.5703125, "num_input_tokens_seen": 222343344, "step": 1923 }, { "epoch": 10.344086021505376, "grad_norm": 15.347968101501465, "learning_rate": 5e-07, "loss": 0.6632, "num_input_tokens_seen": 222456824, "step": 1924 }, { "epoch": 10.344086021505376, "loss": 0.6533513069152832, "loss_ce": 3.094224666710943e-05, "loss_iou": 0.279296875, "loss_num": 0.0189208984375, "loss_xval": 0.65234375, "num_input_tokens_seen": 222456824, "step": 1924 }, { "epoch": 10.349462365591398, "grad_norm": 21.21315574645996, "learning_rate": 5e-07, "loss": 0.7912, "num_input_tokens_seen": 222570604, "step": 1925 }, { "epoch": 10.349462365591398, "loss": 0.7446939945220947, "loss_ce": 6.509012018796057e-05, "loss_iou": 0.3125, "loss_num": 0.02392578125, "loss_xval": 0.74609375, "num_input_tokens_seen": 222570604, "step": 1925 }, { "epoch": 10.35483870967742, "grad_norm": 30.530797958374023, "learning_rate": 5e-07, "loss": 0.6767, "num_input_tokens_seen": 222687668, "step": 1926 }, { "epoch": 10.35483870967742, "loss": 0.559639573097229, "loss_ce": 6.929271330591291e-05, "loss_iou": 0.2490234375, "loss_num": 0.0123291015625, "loss_xval": 0.55859375, "num_input_tokens_seen": 222687668, "step": 1926 }, { "epoch": 10.36021505376344, "grad_norm": 23.50404930114746, "learning_rate": 5e-07, "loss": 0.6878, "num_input_tokens_seen": 222802800, "step": 1927 }, { "epoch": 10.36021505376344, "loss": 0.5086895823478699, "loss_ce": 2.2582586097996682e-05, "loss_iou": 0.2060546875, "loss_num": 0.0191650390625, "loss_xval": 0.5078125, "num_input_tokens_seen": 222802800, "step": 1927 }, { "epoch": 10.365591397849462, "grad_norm": 19.453935623168945, "learning_rate": 5e-07, "loss": 0.6156, "num_input_tokens_seen": 222918728, "step": 1928 }, { "epoch": 10.365591397849462, "loss": 0.666563093662262, "loss_ce": 0.00012020458234474063, "loss_iou": 0.291015625, "loss_num": 0.0167236328125, "loss_xval": 0.66796875, "num_input_tokens_seen": 222918728, "step": 1928 }, { "epoch": 10.370967741935484, "grad_norm": 22.496471405029297, "learning_rate": 5e-07, "loss": 0.683, "num_input_tokens_seen": 223032992, "step": 1929 }, { "epoch": 10.370967741935484, "loss": 0.6679986715316772, "loss_ce": 2.9939534215372987e-05, "loss_iou": 0.283203125, "loss_num": 0.0203857421875, "loss_xval": 0.66796875, "num_input_tokens_seen": 223032992, "step": 1929 }, { "epoch": 10.376344086021506, "grad_norm": 29.194107055664062, "learning_rate": 5e-07, "loss": 0.9221, "num_input_tokens_seen": 223148376, "step": 1930 }, { "epoch": 10.376344086021506, "loss": 1.1350593566894531, "loss_ce": 4.948674177285284e-05, "loss_iou": 0.451171875, "loss_num": 0.046875, "loss_xval": 1.1328125, "num_input_tokens_seen": 223148376, "step": 1930 }, { "epoch": 10.381720430107526, "grad_norm": 19.649272918701172, "learning_rate": 5e-07, "loss": 0.8407, "num_input_tokens_seen": 223262656, "step": 1931 }, { "epoch": 10.381720430107526, "loss": 0.45265570282936096, "loss_ce": 1.8969156371895224e-05, "loss_iou": 0.181640625, "loss_num": 0.0177001953125, "loss_xval": 0.453125, "num_input_tokens_seen": 223262656, "step": 1931 }, { "epoch": 10.387096774193548, "grad_norm": 17.37013053894043, "learning_rate": 5e-07, "loss": 0.7088, "num_input_tokens_seen": 223381576, "step": 1932 }, { "epoch": 10.387096774193548, "loss": 0.5890978574752808, "loss_ce": 0.00023062952095642686, "loss_iou": 0.263671875, "loss_num": 0.0125732421875, "loss_xval": 0.58984375, "num_input_tokens_seen": 223381576, "step": 1932 }, { "epoch": 10.39247311827957, "grad_norm": 25.799482345581055, "learning_rate": 5e-07, "loss": 0.6876, "num_input_tokens_seen": 223492860, "step": 1933 }, { "epoch": 10.39247311827957, "loss": 0.8264507055282593, "loss_ce": 0.00015679857460781932, "loss_iou": 0.37109375, "loss_num": 0.016845703125, "loss_xval": 0.828125, "num_input_tokens_seen": 223492860, "step": 1933 }, { "epoch": 10.397849462365592, "grad_norm": 17.14049530029297, "learning_rate": 5e-07, "loss": 0.7363, "num_input_tokens_seen": 223610384, "step": 1934 }, { "epoch": 10.397849462365592, "loss": 0.6620342135429382, "loss_ce": 4.690287460107356e-05, "loss_iou": 0.2578125, "loss_num": 0.0291748046875, "loss_xval": 0.66015625, "num_input_tokens_seen": 223610384, "step": 1934 }, { "epoch": 10.403225806451612, "grad_norm": 26.710826873779297, "learning_rate": 5e-07, "loss": 0.7621, "num_input_tokens_seen": 223726676, "step": 1935 }, { "epoch": 10.403225806451612, "loss": 0.625300943851471, "loss_ce": 5.681889888364822e-05, "loss_iou": 0.271484375, "loss_num": 0.0166015625, "loss_xval": 0.625, "num_input_tokens_seen": 223726676, "step": 1935 }, { "epoch": 10.408602150537634, "grad_norm": 24.71769142150879, "learning_rate": 5e-07, "loss": 0.7504, "num_input_tokens_seen": 223840204, "step": 1936 }, { "epoch": 10.408602150537634, "loss": 0.5941591262817383, "loss_ce": 4.296081169741228e-05, "loss_iou": 0.2578125, "loss_num": 0.0159912109375, "loss_xval": 0.59375, "num_input_tokens_seen": 223840204, "step": 1936 }, { "epoch": 10.413978494623656, "grad_norm": 24.873638153076172, "learning_rate": 5e-07, "loss": 0.7004, "num_input_tokens_seen": 223953236, "step": 1937 }, { "epoch": 10.413978494623656, "loss": 0.922877311706543, "loss_ce": 2.5719327823026106e-05, "loss_iou": 0.384765625, "loss_num": 0.0308837890625, "loss_xval": 0.921875, "num_input_tokens_seen": 223953236, "step": 1937 }, { "epoch": 10.419354838709678, "grad_norm": 21.278043746948242, "learning_rate": 5e-07, "loss": 0.83, "num_input_tokens_seen": 224066832, "step": 1938 }, { "epoch": 10.419354838709678, "loss": 0.9671182632446289, "loss_ce": 7.728852506261319e-05, "loss_iou": 0.4140625, "loss_num": 0.0281982421875, "loss_xval": 0.96875, "num_input_tokens_seen": 224066832, "step": 1938 }, { "epoch": 10.424731182795698, "grad_norm": 19.139127731323242, "learning_rate": 5e-07, "loss": 0.849, "num_input_tokens_seen": 224184368, "step": 1939 }, { "epoch": 10.424731182795698, "loss": 1.2620309591293335, "loss_ce": 6.803418364142999e-05, "loss_iou": 0.5546875, "loss_num": 0.0306396484375, "loss_xval": 1.265625, "num_input_tokens_seen": 224184368, "step": 1939 }, { "epoch": 10.43010752688172, "grad_norm": 21.412351608276367, "learning_rate": 5e-07, "loss": 0.7814, "num_input_tokens_seen": 224300500, "step": 1940 }, { "epoch": 10.43010752688172, "loss": 0.6455463767051697, "loss_ce": 3.85448984161485e-05, "loss_iou": 0.267578125, "loss_num": 0.022216796875, "loss_xval": 0.64453125, "num_input_tokens_seen": 224300500, "step": 1940 }, { "epoch": 10.435483870967742, "grad_norm": 18.580537796020508, "learning_rate": 5e-07, "loss": 0.7134, "num_input_tokens_seen": 224413568, "step": 1941 }, { "epoch": 10.435483870967742, "loss": 0.8115792870521545, "loss_ce": 5.582879020948894e-05, "loss_iou": 0.3671875, "loss_num": 0.01519775390625, "loss_xval": 0.8125, "num_input_tokens_seen": 224413568, "step": 1941 }, { "epoch": 10.440860215053764, "grad_norm": 39.86334228515625, "learning_rate": 5e-07, "loss": 0.6398, "num_input_tokens_seen": 224527288, "step": 1942 }, { "epoch": 10.440860215053764, "loss": 0.8122904300689697, "loss_ce": 3.452736200415529e-05, "loss_iou": 0.353515625, "loss_num": 0.020751953125, "loss_xval": 0.8125, "num_input_tokens_seen": 224527288, "step": 1942 }, { "epoch": 10.446236559139784, "grad_norm": 19.44553565979004, "learning_rate": 5e-07, "loss": 0.6758, "num_input_tokens_seen": 224643708, "step": 1943 }, { "epoch": 10.446236559139784, "loss": 0.8007032871246338, "loss_ce": 0.00016616677748970687, "loss_iou": 0.3515625, "loss_num": 0.0194091796875, "loss_xval": 0.80078125, "num_input_tokens_seen": 224643708, "step": 1943 }, { "epoch": 10.451612903225806, "grad_norm": 23.735225677490234, "learning_rate": 5e-07, "loss": 0.7679, "num_input_tokens_seen": 224758716, "step": 1944 }, { "epoch": 10.451612903225806, "loss": 0.6201485395431519, "loss_ce": 3.134114376734942e-05, "loss_iou": 0.26171875, "loss_num": 0.0194091796875, "loss_xval": 0.62109375, "num_input_tokens_seen": 224758716, "step": 1944 }, { "epoch": 10.456989247311828, "grad_norm": 19.063840866088867, "learning_rate": 5e-07, "loss": 0.7993, "num_input_tokens_seen": 224871244, "step": 1945 }, { "epoch": 10.456989247311828, "loss": 1.1473779678344727, "loss_ce": 0.00022222421830520034, "loss_iou": 0.48828125, "loss_num": 0.033935546875, "loss_xval": 1.1484375, "num_input_tokens_seen": 224871244, "step": 1945 }, { "epoch": 10.46236559139785, "grad_norm": 23.9424991607666, "learning_rate": 5e-07, "loss": 0.7931, "num_input_tokens_seen": 224986356, "step": 1946 }, { "epoch": 10.46236559139785, "loss": 0.6754788160324097, "loss_ce": 6.379086698871106e-05, "loss_iou": 0.30078125, "loss_num": 0.01446533203125, "loss_xval": 0.67578125, "num_input_tokens_seen": 224986356, "step": 1946 }, { "epoch": 10.46774193548387, "grad_norm": 29.5279598236084, "learning_rate": 5e-07, "loss": 0.6449, "num_input_tokens_seen": 225099720, "step": 1947 }, { "epoch": 10.46774193548387, "loss": 0.643833577632904, "loss_ce": 3.47529276041314e-05, "loss_iou": 0.283203125, "loss_num": 0.0152587890625, "loss_xval": 0.64453125, "num_input_tokens_seen": 225099720, "step": 1947 }, { "epoch": 10.473118279569892, "grad_norm": 39.28269958496094, "learning_rate": 5e-07, "loss": 0.7337, "num_input_tokens_seen": 225214916, "step": 1948 }, { "epoch": 10.473118279569892, "loss": 0.7967339754104614, "loss_ce": 0.00010315069084754214, "loss_iou": 0.34375, "loss_num": 0.021484375, "loss_xval": 0.796875, "num_input_tokens_seen": 225214916, "step": 1948 }, { "epoch": 10.478494623655914, "grad_norm": 27.521625518798828, "learning_rate": 5e-07, "loss": 0.7016, "num_input_tokens_seen": 225327208, "step": 1949 }, { "epoch": 10.478494623655914, "loss": 0.9945357441902161, "loss_ce": 2.888631024688948e-05, "loss_iou": 0.404296875, "loss_num": 0.03759765625, "loss_xval": 0.99609375, "num_input_tokens_seen": 225327208, "step": 1949 }, { "epoch": 10.483870967741936, "grad_norm": 18.269010543823242, "learning_rate": 5e-07, "loss": 0.6168, "num_input_tokens_seen": 225440108, "step": 1950 }, { "epoch": 10.483870967741936, "loss": 0.6257836222648621, "loss_ce": 5.121594585943967e-05, "loss_iou": 0.2490234375, "loss_num": 0.025390625, "loss_xval": 0.625, "num_input_tokens_seen": 225440108, "step": 1950 }, { "epoch": 10.489247311827956, "grad_norm": 32.89683532714844, "learning_rate": 5e-07, "loss": 0.7292, "num_input_tokens_seen": 225556908, "step": 1951 }, { "epoch": 10.489247311827956, "loss": 0.9643956422805786, "loss_ce": 4.021893619210459e-05, "loss_iou": 0.396484375, "loss_num": 0.03466796875, "loss_xval": 0.96484375, "num_input_tokens_seen": 225556908, "step": 1951 }, { "epoch": 10.494623655913978, "grad_norm": 25.84834098815918, "learning_rate": 5e-07, "loss": 0.7058, "num_input_tokens_seen": 225675328, "step": 1952 }, { "epoch": 10.494623655913978, "loss": 0.8230438232421875, "loss_ce": 4.578894731821492e-05, "loss_iou": 0.36328125, "loss_num": 0.0191650390625, "loss_xval": 0.82421875, "num_input_tokens_seen": 225675328, "step": 1952 }, { "epoch": 10.5, "grad_norm": 30.831052780151367, "learning_rate": 5e-07, "loss": 0.9068, "num_input_tokens_seen": 225786788, "step": 1953 }, { "epoch": 10.5, "loss": 0.9102528691291809, "loss_ce": 9.659984789323062e-05, "loss_iou": 0.40234375, "loss_num": 0.0216064453125, "loss_xval": 0.91015625, "num_input_tokens_seen": 225786788, "step": 1953 }, { "epoch": 10.505376344086022, "grad_norm": 19.893701553344727, "learning_rate": 5e-07, "loss": 0.5685, "num_input_tokens_seen": 225904880, "step": 1954 }, { "epoch": 10.505376344086022, "loss": 0.6763559579849243, "loss_ce": 8.64259636728093e-05, "loss_iou": 0.28125, "loss_num": 0.022216796875, "loss_xval": 0.67578125, "num_input_tokens_seen": 225904880, "step": 1954 }, { "epoch": 10.510752688172044, "grad_norm": 29.222808837890625, "learning_rate": 5e-07, "loss": 0.825, "num_input_tokens_seen": 226022388, "step": 1955 }, { "epoch": 10.510752688172044, "loss": 0.7083009481430054, "loss_ce": 1.8476690456736833e-05, "loss_iou": 0.2890625, "loss_num": 0.026611328125, "loss_xval": 0.70703125, "num_input_tokens_seen": 226022388, "step": 1955 }, { "epoch": 10.516129032258064, "grad_norm": 42.21616744995117, "learning_rate": 5e-07, "loss": 0.7407, "num_input_tokens_seen": 226137320, "step": 1956 }, { "epoch": 10.516129032258064, "loss": 0.7058463096618652, "loss_ce": 3.5781638871412724e-05, "loss_iou": 0.3125, "loss_num": 0.0164794921875, "loss_xval": 0.70703125, "num_input_tokens_seen": 226137320, "step": 1956 }, { "epoch": 10.521505376344086, "grad_norm": 35.285499572753906, "learning_rate": 5e-07, "loss": 0.5731, "num_input_tokens_seen": 226247424, "step": 1957 }, { "epoch": 10.521505376344086, "loss": 0.5121692419052124, "loss_ce": 2.325059540453367e-05, "loss_iou": 0.2119140625, "loss_num": 0.0177001953125, "loss_xval": 0.51171875, "num_input_tokens_seen": 226247424, "step": 1957 }, { "epoch": 10.526881720430108, "grad_norm": 19.689002990722656, "learning_rate": 5e-07, "loss": 0.5878, "num_input_tokens_seen": 226362604, "step": 1958 }, { "epoch": 10.526881720430108, "loss": 0.638597309589386, "loss_ce": 4.750265725306235e-05, "loss_iou": 0.28515625, "loss_num": 0.013916015625, "loss_xval": 0.63671875, "num_input_tokens_seen": 226362604, "step": 1958 }, { "epoch": 10.532258064516128, "grad_norm": 23.919153213500977, "learning_rate": 5e-07, "loss": 0.7175, "num_input_tokens_seen": 226480328, "step": 1959 }, { "epoch": 10.532258064516128, "loss": 0.7540658116340637, "loss_ce": 3.749397365027107e-05, "loss_iou": 0.330078125, "loss_num": 0.0186767578125, "loss_xval": 0.75390625, "num_input_tokens_seen": 226480328, "step": 1959 }, { "epoch": 10.53763440860215, "grad_norm": 21.407066345214844, "learning_rate": 5e-07, "loss": 0.6791, "num_input_tokens_seen": 226593108, "step": 1960 }, { "epoch": 10.53763440860215, "loss": 0.8172352313995361, "loss_ce": 9.662326920079067e-05, "loss_iou": 0.3359375, "loss_num": 0.02880859375, "loss_xval": 0.81640625, "num_input_tokens_seen": 226593108, "step": 1960 }, { "epoch": 10.543010752688172, "grad_norm": 23.03013038635254, "learning_rate": 5e-07, "loss": 0.6909, "num_input_tokens_seen": 226709468, "step": 1961 }, { "epoch": 10.543010752688172, "loss": 0.8896295428276062, "loss_ce": 0.0004694499948527664, "loss_iou": 0.392578125, "loss_num": 0.0206298828125, "loss_xval": 0.890625, "num_input_tokens_seen": 226709468, "step": 1961 }, { "epoch": 10.548387096774194, "grad_norm": 33.84095001220703, "learning_rate": 5e-07, "loss": 0.5665, "num_input_tokens_seen": 226826064, "step": 1962 }, { "epoch": 10.548387096774194, "loss": 0.7234290242195129, "loss_ce": 4.034363882965408e-05, "loss_iou": 0.3125, "loss_num": 0.0196533203125, "loss_xval": 0.72265625, "num_input_tokens_seen": 226826064, "step": 1962 }, { "epoch": 10.553763440860216, "grad_norm": 25.935903549194336, "learning_rate": 5e-07, "loss": 0.6671, "num_input_tokens_seen": 226941404, "step": 1963 }, { "epoch": 10.553763440860216, "loss": 0.9317106008529663, "loss_ce": 0.00031413318356499076, "loss_iou": 0.421875, "loss_num": 0.0177001953125, "loss_xval": 0.9296875, "num_input_tokens_seen": 226941404, "step": 1963 }, { "epoch": 10.559139784946236, "grad_norm": 22.947336196899414, "learning_rate": 5e-07, "loss": 0.6228, "num_input_tokens_seen": 227056164, "step": 1964 }, { "epoch": 10.559139784946236, "loss": 0.6760610342025757, "loss_ce": 3.5618933907244354e-05, "loss_iou": 0.30859375, "loss_num": 0.01153564453125, "loss_xval": 0.67578125, "num_input_tokens_seen": 227056164, "step": 1964 }, { "epoch": 10.564516129032258, "grad_norm": 26.81161880493164, "learning_rate": 5e-07, "loss": 0.7139, "num_input_tokens_seen": 227169960, "step": 1965 }, { "epoch": 10.564516129032258, "loss": 0.7329831719398499, "loss_ce": 4.2496336391195655e-05, "loss_iou": 0.296875, "loss_num": 0.0279541015625, "loss_xval": 0.734375, "num_input_tokens_seen": 227169960, "step": 1965 }, { "epoch": 10.56989247311828, "grad_norm": 21.370882034301758, "learning_rate": 5e-07, "loss": 0.9014, "num_input_tokens_seen": 227284584, "step": 1966 }, { "epoch": 10.56989247311828, "loss": 0.782137393951416, "loss_ce": 3.286015271442011e-05, "loss_iou": 0.318359375, "loss_num": 0.0291748046875, "loss_xval": 0.78125, "num_input_tokens_seen": 227284584, "step": 1966 }, { "epoch": 10.575268817204302, "grad_norm": 26.29055404663086, "learning_rate": 5e-07, "loss": 0.6644, "num_input_tokens_seen": 227401020, "step": 1967 }, { "epoch": 10.575268817204302, "loss": 0.8343780636787415, "loss_ce": 2.7439102268544957e-05, "loss_iou": 0.349609375, "loss_num": 0.02734375, "loss_xval": 0.8359375, "num_input_tokens_seen": 227401020, "step": 1967 }, { "epoch": 10.580645161290322, "grad_norm": 17.701431274414062, "learning_rate": 5e-07, "loss": 0.68, "num_input_tokens_seen": 227517008, "step": 1968 }, { "epoch": 10.580645161290322, "loss": 0.559371292591095, "loss_ce": 4.512501982389949e-05, "loss_iou": 0.2275390625, "loss_num": 0.0211181640625, "loss_xval": 0.55859375, "num_input_tokens_seen": 227517008, "step": 1968 }, { "epoch": 10.586021505376344, "grad_norm": 23.33453369140625, "learning_rate": 5e-07, "loss": 0.8039, "num_input_tokens_seen": 227635096, "step": 1969 }, { "epoch": 10.586021505376344, "loss": 0.8320925831794739, "loss_ce": 6.134141585789621e-05, "loss_iou": 0.369140625, "loss_num": 0.01904296875, "loss_xval": 0.83203125, "num_input_tokens_seen": 227635096, "step": 1969 }, { "epoch": 10.591397849462366, "grad_norm": 17.03058624267578, "learning_rate": 5e-07, "loss": 0.6619, "num_input_tokens_seen": 227749800, "step": 1970 }, { "epoch": 10.591397849462366, "loss": 0.910195529460907, "loss_ce": 3.928943624487147e-05, "loss_iou": 0.392578125, "loss_num": 0.025390625, "loss_xval": 0.91015625, "num_input_tokens_seen": 227749800, "step": 1970 }, { "epoch": 10.596774193548388, "grad_norm": 20.881061553955078, "learning_rate": 5e-07, "loss": 0.7059, "num_input_tokens_seen": 227866416, "step": 1971 }, { "epoch": 10.596774193548388, "loss": 0.6404104828834534, "loss_ce": 2.9628403353854083e-05, "loss_iou": 0.267578125, "loss_num": 0.0213623046875, "loss_xval": 0.640625, "num_input_tokens_seen": 227866416, "step": 1971 }, { "epoch": 10.602150537634408, "grad_norm": 21.537508010864258, "learning_rate": 5e-07, "loss": 0.5639, "num_input_tokens_seen": 227980508, "step": 1972 }, { "epoch": 10.602150537634408, "loss": 0.443443238735199, "loss_ce": 2.2803764295531437e-05, "loss_iou": 0.185546875, "loss_num": 0.0145263671875, "loss_xval": 0.443359375, "num_input_tokens_seen": 227980508, "step": 1972 }, { "epoch": 10.60752688172043, "grad_norm": 42.30523681640625, "learning_rate": 5e-07, "loss": 0.6352, "num_input_tokens_seen": 228093876, "step": 1973 }, { "epoch": 10.60752688172043, "loss": 0.5493689775466919, "loss_ce": 5.2542658522725105e-05, "loss_iou": 0.25, "loss_num": 0.00970458984375, "loss_xval": 0.55078125, "num_input_tokens_seen": 228093876, "step": 1973 }, { "epoch": 10.612903225806452, "grad_norm": 28.820213317871094, "learning_rate": 5e-07, "loss": 0.6647, "num_input_tokens_seen": 228207340, "step": 1974 }, { "epoch": 10.612903225806452, "loss": 0.6069148778915405, "loss_ce": 0.00010337060666643083, "loss_iou": 0.2578125, "loss_num": 0.017822265625, "loss_xval": 0.60546875, "num_input_tokens_seen": 228207340, "step": 1974 }, { "epoch": 10.618279569892474, "grad_norm": 17.523483276367188, "learning_rate": 5e-07, "loss": 0.6358, "num_input_tokens_seen": 228324368, "step": 1975 }, { "epoch": 10.618279569892474, "loss": 0.9590786695480347, "loss_ce": 9.426888573216274e-05, "loss_iou": 0.42578125, "loss_num": 0.0213623046875, "loss_xval": 0.9609375, "num_input_tokens_seen": 228324368, "step": 1975 }, { "epoch": 10.623655913978494, "grad_norm": 19.376867294311523, "learning_rate": 5e-07, "loss": 0.69, "num_input_tokens_seen": 228443352, "step": 1976 }, { "epoch": 10.623655913978494, "loss": 0.603277862071991, "loss_ce": 0.00012844523007515818, "loss_iou": 0.251953125, "loss_num": 0.0198974609375, "loss_xval": 0.6015625, "num_input_tokens_seen": 228443352, "step": 1976 }, { "epoch": 10.629032258064516, "grad_norm": 33.77912139892578, "learning_rate": 5e-07, "loss": 0.7023, "num_input_tokens_seen": 228562568, "step": 1977 }, { "epoch": 10.629032258064516, "loss": 0.8269829750061035, "loss_ce": 7.866607484174892e-05, "loss_iou": 0.373046875, "loss_num": 0.016357421875, "loss_xval": 0.828125, "num_input_tokens_seen": 228562568, "step": 1977 }, { "epoch": 10.634408602150538, "grad_norm": 42.037559509277344, "learning_rate": 5e-07, "loss": 0.5891, "num_input_tokens_seen": 228678292, "step": 1978 }, { "epoch": 10.634408602150538, "loss": 0.43462663888931274, "loss_ce": 5.631222666124813e-05, "loss_iou": 0.1884765625, "loss_num": 0.01171875, "loss_xval": 0.43359375, "num_input_tokens_seen": 228678292, "step": 1978 }, { "epoch": 10.63978494623656, "grad_norm": 25.35824203491211, "learning_rate": 5e-07, "loss": 0.6659, "num_input_tokens_seen": 228793028, "step": 1979 }, { "epoch": 10.63978494623656, "loss": 0.5242252945899963, "loss_ce": 5.538136974791996e-05, "loss_iou": 0.208984375, "loss_num": 0.021484375, "loss_xval": 0.5234375, "num_input_tokens_seen": 228793028, "step": 1979 }, { "epoch": 10.64516129032258, "grad_norm": 37.304962158203125, "learning_rate": 5e-07, "loss": 0.7389, "num_input_tokens_seen": 228909168, "step": 1980 }, { "epoch": 10.64516129032258, "loss": 0.7456569671630859, "loss_ce": 5.149126809556037e-05, "loss_iou": 0.322265625, "loss_num": 0.0205078125, "loss_xval": 0.74609375, "num_input_tokens_seen": 228909168, "step": 1980 }, { "epoch": 10.650537634408602, "grad_norm": 36.090736389160156, "learning_rate": 5e-07, "loss": 0.6912, "num_input_tokens_seen": 229024704, "step": 1981 }, { "epoch": 10.650537634408602, "loss": 0.6868159770965576, "loss_ce": 4.838963286601938e-05, "loss_iou": 0.28515625, "loss_num": 0.0234375, "loss_xval": 0.6875, "num_input_tokens_seen": 229024704, "step": 1981 }, { "epoch": 10.655913978494624, "grad_norm": 53.196258544921875, "learning_rate": 5e-07, "loss": 0.6246, "num_input_tokens_seen": 229138936, "step": 1982 }, { "epoch": 10.655913978494624, "loss": 0.5386247634887695, "loss_ce": 5.0526658014860004e-05, "loss_iou": 0.228515625, "loss_num": 0.0164794921875, "loss_xval": 0.5390625, "num_input_tokens_seen": 229138936, "step": 1982 }, { "epoch": 10.661290322580646, "grad_norm": 32.27177810668945, "learning_rate": 5e-07, "loss": 0.5845, "num_input_tokens_seen": 229256324, "step": 1983 }, { "epoch": 10.661290322580646, "loss": 0.602318286895752, "loss_ce": 2.3402679289574735e-05, "loss_iou": 0.275390625, "loss_num": 0.01031494140625, "loss_xval": 0.6015625, "num_input_tokens_seen": 229256324, "step": 1983 }, { "epoch": 10.666666666666666, "grad_norm": 21.259103775024414, "learning_rate": 5e-07, "loss": 0.7371, "num_input_tokens_seen": 229372208, "step": 1984 }, { "epoch": 10.666666666666666, "loss": 0.6306367516517639, "loss_ce": 2.150660293409601e-05, "loss_iou": 0.25390625, "loss_num": 0.02490234375, "loss_xval": 0.62890625, "num_input_tokens_seen": 229372208, "step": 1984 }, { "epoch": 10.672043010752688, "grad_norm": 18.527677536010742, "learning_rate": 5e-07, "loss": 0.7294, "num_input_tokens_seen": 229490356, "step": 1985 }, { "epoch": 10.672043010752688, "loss": 0.7152329683303833, "loss_ce": 2.3031947421259247e-05, "loss_iou": 0.32421875, "loss_num": 0.013671875, "loss_xval": 0.71484375, "num_input_tokens_seen": 229490356, "step": 1985 }, { "epoch": 10.67741935483871, "grad_norm": 22.846298217773438, "learning_rate": 5e-07, "loss": 0.7167, "num_input_tokens_seen": 229606572, "step": 1986 }, { "epoch": 10.67741935483871, "loss": 0.8195562362670898, "loss_ce": 0.00022031637490727007, "loss_iou": 0.328125, "loss_num": 0.03271484375, "loss_xval": 0.8203125, "num_input_tokens_seen": 229606572, "step": 1986 }, { "epoch": 10.682795698924732, "grad_norm": 23.220006942749023, "learning_rate": 5e-07, "loss": 0.5614, "num_input_tokens_seen": 229724860, "step": 1987 }, { "epoch": 10.682795698924732, "loss": 0.49132227897644043, "loss_ce": 0.0001113622056436725, "loss_iou": 0.2236328125, "loss_num": 0.0089111328125, "loss_xval": 0.4921875, "num_input_tokens_seen": 229724860, "step": 1987 }, { "epoch": 10.688172043010752, "grad_norm": 26.792072296142578, "learning_rate": 5e-07, "loss": 0.6239, "num_input_tokens_seen": 229841760, "step": 1988 }, { "epoch": 10.688172043010752, "loss": 1.0284807682037354, "loss_ce": 3.8401638448704034e-05, "loss_iou": 0.435546875, "loss_num": 0.03173828125, "loss_xval": 1.03125, "num_input_tokens_seen": 229841760, "step": 1988 }, { "epoch": 10.693548387096774, "grad_norm": 19.006128311157227, "learning_rate": 5e-07, "loss": 0.617, "num_input_tokens_seen": 229953780, "step": 1989 }, { "epoch": 10.693548387096774, "loss": 0.5873334407806396, "loss_ce": 5.316159513313323e-05, "loss_iou": 0.232421875, "loss_num": 0.0244140625, "loss_xval": 0.5859375, "num_input_tokens_seen": 229953780, "step": 1989 }, { "epoch": 10.698924731182796, "grad_norm": 21.84731674194336, "learning_rate": 5e-07, "loss": 0.6876, "num_input_tokens_seen": 230073236, "step": 1990 }, { "epoch": 10.698924731182796, "loss": 0.45413485169410706, "loss_ce": 3.32887502736412e-05, "loss_iou": 0.1845703125, "loss_num": 0.01708984375, "loss_xval": 0.453125, "num_input_tokens_seen": 230073236, "step": 1990 }, { "epoch": 10.704301075268818, "grad_norm": 26.693727493286133, "learning_rate": 5e-07, "loss": 0.6664, "num_input_tokens_seen": 230189928, "step": 1991 }, { "epoch": 10.704301075268818, "loss": 0.5028948783874512, "loss_ce": 8.723134669708088e-05, "loss_iou": 0.2177734375, "loss_num": 0.0135498046875, "loss_xval": 0.50390625, "num_input_tokens_seen": 230189928, "step": 1991 }, { "epoch": 10.709677419354838, "grad_norm": 20.99868392944336, "learning_rate": 5e-07, "loss": 0.5556, "num_input_tokens_seen": 230305896, "step": 1992 }, { "epoch": 10.709677419354838, "loss": 0.5262702703475952, "loss_ce": 2.5163000827888027e-05, "loss_iou": 0.224609375, "loss_num": 0.0155029296875, "loss_xval": 0.52734375, "num_input_tokens_seen": 230305896, "step": 1992 }, { "epoch": 10.71505376344086, "grad_norm": 18.14219093322754, "learning_rate": 5e-07, "loss": 0.6996, "num_input_tokens_seen": 230420112, "step": 1993 }, { "epoch": 10.71505376344086, "loss": 0.5923154354095459, "loss_ce": 3.0243612854974344e-05, "loss_iou": 0.24609375, "loss_num": 0.02001953125, "loss_xval": 0.59375, "num_input_tokens_seen": 230420112, "step": 1993 }, { "epoch": 10.720430107526882, "grad_norm": 18.130321502685547, "learning_rate": 5e-07, "loss": 0.7385, "num_input_tokens_seen": 230535012, "step": 1994 }, { "epoch": 10.720430107526882, "loss": 0.5744065046310425, "loss_ce": 3.5193526855437085e-05, "loss_iou": 0.2578125, "loss_num": 0.0115966796875, "loss_xval": 0.57421875, "num_input_tokens_seen": 230535012, "step": 1994 }, { "epoch": 10.725806451612904, "grad_norm": 21.127500534057617, "learning_rate": 5e-07, "loss": 0.6486, "num_input_tokens_seen": 230649840, "step": 1995 }, { "epoch": 10.725806451612904, "loss": 0.3758719265460968, "loss_ce": 1.7445730918552727e-05, "loss_iou": 0.1484375, "loss_num": 0.015869140625, "loss_xval": 0.375, "num_input_tokens_seen": 230649840, "step": 1995 }, { "epoch": 10.731182795698924, "grad_norm": 23.19507598876953, "learning_rate": 5e-07, "loss": 0.622, "num_input_tokens_seen": 230765432, "step": 1996 }, { "epoch": 10.731182795698924, "loss": 0.7459778785705566, "loss_ce": 0.0001282213197555393, "loss_iou": 0.326171875, "loss_num": 0.018798828125, "loss_xval": 0.74609375, "num_input_tokens_seen": 230765432, "step": 1996 }, { "epoch": 10.736559139784946, "grad_norm": 19.95818519592285, "learning_rate": 5e-07, "loss": 0.7925, "num_input_tokens_seen": 230881100, "step": 1997 }, { "epoch": 10.736559139784946, "loss": 0.8177186250686646, "loss_ce": 9.168189717456698e-05, "loss_iou": 0.353515625, "loss_num": 0.0223388671875, "loss_xval": 0.81640625, "num_input_tokens_seen": 230881100, "step": 1997 }, { "epoch": 10.741935483870968, "grad_norm": 20.72757339477539, "learning_rate": 5e-07, "loss": 0.6577, "num_input_tokens_seen": 230994896, "step": 1998 }, { "epoch": 10.741935483870968, "loss": 0.7375099062919617, "loss_ce": 8.318386971950531e-05, "loss_iou": 0.322265625, "loss_num": 0.0181884765625, "loss_xval": 0.73828125, "num_input_tokens_seen": 230994896, "step": 1998 }, { "epoch": 10.74731182795699, "grad_norm": 24.481868743896484, "learning_rate": 5e-07, "loss": 0.6039, "num_input_tokens_seen": 231112148, "step": 1999 }, { "epoch": 10.74731182795699, "loss": 0.595803439617157, "loss_ce": 0.00010030592238763347, "loss_iou": 0.26953125, "loss_num": 0.011474609375, "loss_xval": 0.59375, "num_input_tokens_seen": 231112148, "step": 1999 }, { "epoch": 10.75268817204301, "grad_norm": 22.821786880493164, "learning_rate": 5e-07, "loss": 0.7277, "num_input_tokens_seen": 231230236, "step": 2000 }, { "epoch": 10.75268817204301, "eval_icons_CIoU": 0.14418525621294975, "eval_icons_GIoU": 0.11298028007149696, "eval_icons_IoU": 0.29984869062900543, "eval_icons_MAE_all": 0.03202081099152565, "eval_icons_MAE_h": 0.03543289750814438, "eval_icons_MAE_w": 0.05557488650083542, "eval_icons_MAE_x_boxes": 0.05318366549909115, "eval_icons_MAE_y_boxes": 0.032998183742165565, "eval_icons_NUM_probability": 0.998530924320221, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 1.9171446561813354, "eval_icons_loss_ce": 0.00023260226589627564, "eval_icons_loss_iou": 0.87158203125, "eval_icons_loss_num": 0.032718658447265625, "eval_icons_loss_xval": 1.9072265625, "eval_icons_runtime": 41.1359, "eval_icons_samples_per_second": 1.215, "eval_icons_steps_per_second": 0.049, "num_input_tokens_seen": 231230236, "step": 2000 }, { "epoch": 10.75268817204301, "eval_screenspot_CIoU": 0.3372385154167811, "eval_screenspot_GIoU": 0.3297786811987559, "eval_screenspot_IoU": 0.4264782170454661, "eval_screenspot_MAE_all": 0.05757373757660389, "eval_screenspot_MAE_h": 0.049372597907980285, "eval_screenspot_MAE_w": 0.07210068901379903, "eval_screenspot_MAE_x_boxes": 0.07737050019204617, "eval_screenspot_MAE_y_boxes": 0.03895781685908636, "eval_screenspot_NUM_probability": 0.9997060497601827, "eval_screenspot_inside_bbox": 0.7487499912579855, "eval_screenspot_loss": 1.6956923007965088, "eval_screenspot_loss_ce": 0.00011047448788303882, "eval_screenspot_loss_iou": 0.721923828125, "eval_screenspot_loss_num": 0.06576283772786458, "eval_screenspot_loss_xval": 1.7727864583333333, "eval_screenspot_runtime": 70.3105, "eval_screenspot_samples_per_second": 1.266, "eval_screenspot_steps_per_second": 0.043, "num_input_tokens_seen": 231230236, "step": 2000 }, { "epoch": 10.75268817204301, "loss": 1.6309130191802979, "loss_ce": 5.369395512389019e-05, "loss_iou": 0.6875, "loss_num": 0.0517578125, "loss_xval": 1.6328125, "num_input_tokens_seen": 231230236, "step": 2000 }, { "epoch": 10.758064516129032, "grad_norm": 20.976200103759766, "learning_rate": 5e-07, "loss": 0.7134, "num_input_tokens_seen": 231345948, "step": 2001 }, { "epoch": 10.758064516129032, "loss": 0.6146517992019653, "loss_ce": 2.781073999358341e-05, "loss_iou": 0.28125, "loss_num": 0.01055908203125, "loss_xval": 0.61328125, "num_input_tokens_seen": 231345948, "step": 2001 }, { "epoch": 10.763440860215054, "grad_norm": 18.019577026367188, "learning_rate": 5e-07, "loss": 0.8378, "num_input_tokens_seen": 231461064, "step": 2002 }, { "epoch": 10.763440860215054, "loss": 0.5331418514251709, "loss_ce": 6.077068974263966e-05, "loss_iou": 0.22265625, "loss_num": 0.017333984375, "loss_xval": 0.53125, "num_input_tokens_seen": 231461064, "step": 2002 }, { "epoch": 10.768817204301076, "grad_norm": 22.981908798217773, "learning_rate": 5e-07, "loss": 0.5925, "num_input_tokens_seen": 231575960, "step": 2003 }, { "epoch": 10.768817204301076, "loss": 0.5701022148132324, "loss_ce": 3.385998206795193e-05, "loss_iou": 0.25, "loss_num": 0.013916015625, "loss_xval": 0.5703125, "num_input_tokens_seen": 231575960, "step": 2003 }, { "epoch": 10.774193548387096, "grad_norm": 28.68568992614746, "learning_rate": 5e-07, "loss": 0.669, "num_input_tokens_seen": 231694592, "step": 2004 }, { "epoch": 10.774193548387096, "loss": 0.6803416013717651, "loss_ce": 0.00028789896168746054, "loss_iou": 0.296875, "loss_num": 0.0172119140625, "loss_xval": 0.6796875, "num_input_tokens_seen": 231694592, "step": 2004 }, { "epoch": 10.779569892473118, "grad_norm": 29.141353607177734, "learning_rate": 5e-07, "loss": 0.7155, "num_input_tokens_seen": 231810032, "step": 2005 }, { "epoch": 10.779569892473118, "loss": 0.5904806852340698, "loss_ce": 2.6598423573886976e-05, "loss_iou": 0.2470703125, "loss_num": 0.0191650390625, "loss_xval": 0.58984375, "num_input_tokens_seen": 231810032, "step": 2005 }, { "epoch": 10.78494623655914, "grad_norm": 22.403230667114258, "learning_rate": 5e-07, "loss": 0.8442, "num_input_tokens_seen": 231923980, "step": 2006 }, { "epoch": 10.78494623655914, "loss": 0.4874541461467743, "loss_ce": 2.7375986974220723e-05, "loss_iou": 0.2197265625, "loss_num": 0.00982666015625, "loss_xval": 0.48828125, "num_input_tokens_seen": 231923980, "step": 2006 }, { "epoch": 10.790322580645162, "grad_norm": 30.32322883605957, "learning_rate": 5e-07, "loss": 0.7071, "num_input_tokens_seen": 232039868, "step": 2007 }, { "epoch": 10.790322580645162, "loss": 0.5531120300292969, "loss_ce": 0.00013347636559046805, "loss_iou": 0.2333984375, "loss_num": 0.017333984375, "loss_xval": 0.5546875, "num_input_tokens_seen": 232039868, "step": 2007 }, { "epoch": 10.795698924731182, "grad_norm": 29.240707397460938, "learning_rate": 5e-07, "loss": 0.6253, "num_input_tokens_seen": 232154924, "step": 2008 }, { "epoch": 10.795698924731182, "loss": 0.6823473572731018, "loss_ce": 9.635682363295928e-05, "loss_iou": 0.27734375, "loss_num": 0.026123046875, "loss_xval": 0.68359375, "num_input_tokens_seen": 232154924, "step": 2008 }, { "epoch": 10.801075268817204, "grad_norm": 19.56832504272461, "learning_rate": 5e-07, "loss": 0.6959, "num_input_tokens_seen": 232270916, "step": 2009 }, { "epoch": 10.801075268817204, "loss": 0.7248849868774414, "loss_ce": 3.148747418890707e-05, "loss_iou": 0.326171875, "loss_num": 0.01446533203125, "loss_xval": 0.7265625, "num_input_tokens_seen": 232270916, "step": 2009 }, { "epoch": 10.806451612903226, "grad_norm": 26.96200180053711, "learning_rate": 5e-07, "loss": 0.5472, "num_input_tokens_seen": 232385700, "step": 2010 }, { "epoch": 10.806451612903226, "loss": 0.4690186381340027, "loss_ce": 2.4512570234946907e-05, "loss_iou": 0.1982421875, "loss_num": 0.0145263671875, "loss_xval": 0.46875, "num_input_tokens_seen": 232385700, "step": 2010 }, { "epoch": 10.811827956989248, "grad_norm": 21.79180335998535, "learning_rate": 5e-07, "loss": 0.5752, "num_input_tokens_seen": 232502668, "step": 2011 }, { "epoch": 10.811827956989248, "loss": 0.5764468908309937, "loss_ce": 3.086751530645415e-05, "loss_iou": 0.251953125, "loss_num": 0.01446533203125, "loss_xval": 0.578125, "num_input_tokens_seen": 232502668, "step": 2011 }, { "epoch": 10.817204301075268, "grad_norm": 17.19378662109375, "learning_rate": 5e-07, "loss": 0.6271, "num_input_tokens_seen": 232618672, "step": 2012 }, { "epoch": 10.817204301075268, "loss": 0.6187019944190979, "loss_ce": 4.9663500249153e-05, "loss_iou": 0.26171875, "loss_num": 0.0189208984375, "loss_xval": 0.6171875, "num_input_tokens_seen": 232618672, "step": 2012 }, { "epoch": 10.82258064516129, "grad_norm": 16.114503860473633, "learning_rate": 5e-07, "loss": 0.7669, "num_input_tokens_seen": 232733072, "step": 2013 }, { "epoch": 10.82258064516129, "loss": 0.830115795135498, "loss_ce": 3.766938243643381e-05, "loss_iou": 0.373046875, "loss_num": 0.0166015625, "loss_xval": 0.828125, "num_input_tokens_seen": 232733072, "step": 2013 }, { "epoch": 10.827956989247312, "grad_norm": 19.698223114013672, "learning_rate": 5e-07, "loss": 0.7615, "num_input_tokens_seen": 232849872, "step": 2014 }, { "epoch": 10.827956989247312, "loss": 0.8841198682785034, "loss_ce": 8.666004578117281e-05, "loss_iou": 0.380859375, "loss_num": 0.0244140625, "loss_xval": 0.8828125, "num_input_tokens_seen": 232849872, "step": 2014 }, { "epoch": 10.833333333333334, "grad_norm": 19.750036239624023, "learning_rate": 5e-07, "loss": 0.5992, "num_input_tokens_seen": 232963512, "step": 2015 }, { "epoch": 10.833333333333334, "loss": 0.8372099995613098, "loss_ce": 5.181369124329649e-05, "loss_iou": 0.361328125, "loss_num": 0.0225830078125, "loss_xval": 0.8359375, "num_input_tokens_seen": 232963512, "step": 2015 }, { "epoch": 10.838709677419354, "grad_norm": 26.707548141479492, "learning_rate": 5e-07, "loss": 0.5892, "num_input_tokens_seen": 233079608, "step": 2016 }, { "epoch": 10.838709677419354, "loss": 0.5569323301315308, "loss_ce": 4.757365968544036e-05, "loss_iou": 0.2451171875, "loss_num": 0.01336669921875, "loss_xval": 0.55859375, "num_input_tokens_seen": 233079608, "step": 2016 }, { "epoch": 10.844086021505376, "grad_norm": 26.1616268157959, "learning_rate": 5e-07, "loss": 0.706, "num_input_tokens_seen": 233196568, "step": 2017 }, { "epoch": 10.844086021505376, "loss": 0.7957503795623779, "loss_ce": 9.604185470379889e-05, "loss_iou": 0.326171875, "loss_num": 0.02880859375, "loss_xval": 0.796875, "num_input_tokens_seen": 233196568, "step": 2017 }, { "epoch": 10.849462365591398, "grad_norm": 25.554025650024414, "learning_rate": 5e-07, "loss": 0.8012, "num_input_tokens_seen": 233311208, "step": 2018 }, { "epoch": 10.849462365591398, "loss": 0.8869876861572266, "loss_ce": 2.477858288330026e-05, "loss_iou": 0.373046875, "loss_num": 0.0281982421875, "loss_xval": 0.88671875, "num_input_tokens_seen": 233311208, "step": 2018 }, { "epoch": 10.85483870967742, "grad_norm": 18.550369262695312, "learning_rate": 5e-07, "loss": 0.574, "num_input_tokens_seen": 233426616, "step": 2019 }, { "epoch": 10.85483870967742, "loss": 0.7356630563735962, "loss_ce": 6.741838296875358e-05, "loss_iou": 0.3359375, "loss_num": 0.0130615234375, "loss_xval": 0.734375, "num_input_tokens_seen": 233426616, "step": 2019 }, { "epoch": 10.86021505376344, "grad_norm": 18.93515968322754, "learning_rate": 5e-07, "loss": 0.535, "num_input_tokens_seen": 233543964, "step": 2020 }, { "epoch": 10.86021505376344, "loss": 0.4958817660808563, "loss_ce": 3.2140502298716456e-05, "loss_iou": 0.205078125, "loss_num": 0.0169677734375, "loss_xval": 0.49609375, "num_input_tokens_seen": 233543964, "step": 2020 }, { "epoch": 10.865591397849462, "grad_norm": 21.06085968017578, "learning_rate": 5e-07, "loss": 0.6144, "num_input_tokens_seen": 233657640, "step": 2021 }, { "epoch": 10.865591397849462, "loss": 0.616325855255127, "loss_ce": 0.00011491587792988867, "loss_iou": 0.24609375, "loss_num": 0.024658203125, "loss_xval": 0.6171875, "num_input_tokens_seen": 233657640, "step": 2021 }, { "epoch": 10.870967741935484, "grad_norm": 21.022016525268555, "learning_rate": 5e-07, "loss": 0.6093, "num_input_tokens_seen": 233773184, "step": 2022 }, { "epoch": 10.870967741935484, "loss": 0.5887919068336487, "loss_ce": 4.679903213400394e-05, "loss_iou": 0.23828125, "loss_num": 0.0224609375, "loss_xval": 0.58984375, "num_input_tokens_seen": 233773184, "step": 2022 }, { "epoch": 10.876344086021506, "grad_norm": 22.93672752380371, "learning_rate": 5e-07, "loss": 0.636, "num_input_tokens_seen": 233885840, "step": 2023 }, { "epoch": 10.876344086021506, "loss": 0.7114521265029907, "loss_ce": 2.6297144358977675e-05, "loss_iou": 0.30078125, "loss_num": 0.022216796875, "loss_xval": 0.7109375, "num_input_tokens_seen": 233885840, "step": 2023 }, { "epoch": 10.881720430107526, "grad_norm": 20.94415283203125, "learning_rate": 5e-07, "loss": 0.6187, "num_input_tokens_seen": 234004152, "step": 2024 }, { "epoch": 10.881720430107526, "loss": 0.6473881006240845, "loss_ce": 0.00017128861509263515, "loss_iou": 0.2890625, "loss_num": 0.01373291015625, "loss_xval": 0.6484375, "num_input_tokens_seen": 234004152, "step": 2024 }, { "epoch": 10.887096774193548, "grad_norm": 20.777690887451172, "learning_rate": 5e-07, "loss": 0.6971, "num_input_tokens_seen": 234119284, "step": 2025 }, { "epoch": 10.887096774193548, "loss": 0.47511786222457886, "loss_ce": 8.124432497425005e-05, "loss_iou": 0.1962890625, "loss_num": 0.0167236328125, "loss_xval": 0.474609375, "num_input_tokens_seen": 234119284, "step": 2025 }, { "epoch": 10.89247311827957, "grad_norm": 49.34548568725586, "learning_rate": 5e-07, "loss": 0.6374, "num_input_tokens_seen": 234236300, "step": 2026 }, { "epoch": 10.89247311827957, "loss": 0.5576401948928833, "loss_ce": 2.3034492187434807e-05, "loss_iou": 0.244140625, "loss_num": 0.0140380859375, "loss_xval": 0.55859375, "num_input_tokens_seen": 234236300, "step": 2026 }, { "epoch": 10.897849462365592, "grad_norm": 33.71906280517578, "learning_rate": 5e-07, "loss": 0.7655, "num_input_tokens_seen": 234351536, "step": 2027 }, { "epoch": 10.897849462365592, "loss": 0.7720080614089966, "loss_ce": 3.544825813150965e-05, "loss_iou": 0.33984375, "loss_num": 0.0184326171875, "loss_xval": 0.7734375, "num_input_tokens_seen": 234351536, "step": 2027 }, { "epoch": 10.903225806451612, "grad_norm": 19.797679901123047, "learning_rate": 5e-07, "loss": 0.6933, "num_input_tokens_seen": 234469292, "step": 2028 }, { "epoch": 10.903225806451612, "loss": 0.6199413537979126, "loss_ce": 6.829711492173374e-05, "loss_iou": 0.271484375, "loss_num": 0.015625, "loss_xval": 0.62109375, "num_input_tokens_seen": 234469292, "step": 2028 }, { "epoch": 10.908602150537634, "grad_norm": 26.08950424194336, "learning_rate": 5e-07, "loss": 0.845, "num_input_tokens_seen": 234582348, "step": 2029 }, { "epoch": 10.908602150537634, "loss": 0.6758080720901489, "loss_ce": 2.6794374207383953e-05, "loss_iou": 0.28125, "loss_num": 0.0228271484375, "loss_xval": 0.67578125, "num_input_tokens_seen": 234582348, "step": 2029 }, { "epoch": 10.913978494623656, "grad_norm": 23.455291748046875, "learning_rate": 5e-07, "loss": 0.5786, "num_input_tokens_seen": 234696340, "step": 2030 }, { "epoch": 10.913978494623656, "loss": 0.5014959573745728, "loss_ce": 3.1087507522897795e-05, "loss_iou": 0.2197265625, "loss_num": 0.01239013671875, "loss_xval": 0.5, "num_input_tokens_seen": 234696340, "step": 2030 }, { "epoch": 10.919354838709678, "grad_norm": 23.677717208862305, "learning_rate": 5e-07, "loss": 0.805, "num_input_tokens_seen": 234812628, "step": 2031 }, { "epoch": 10.919354838709678, "loss": 0.6575119495391846, "loss_ce": 4.128184446017258e-05, "loss_iou": 0.28125, "loss_num": 0.0191650390625, "loss_xval": 0.65625, "num_input_tokens_seen": 234812628, "step": 2031 }, { "epoch": 10.924731182795698, "grad_norm": 21.273488998413086, "learning_rate": 5e-07, "loss": 0.744, "num_input_tokens_seen": 234928824, "step": 2032 }, { "epoch": 10.924731182795698, "loss": 0.6203961372375488, "loss_ce": 3.480482700979337e-05, "loss_iou": 0.265625, "loss_num": 0.017822265625, "loss_xval": 0.62109375, "num_input_tokens_seen": 234928824, "step": 2032 }, { "epoch": 10.93010752688172, "grad_norm": 21.028532028198242, "learning_rate": 5e-07, "loss": 0.633, "num_input_tokens_seen": 235041552, "step": 2033 }, { "epoch": 10.93010752688172, "loss": 0.5015544891357422, "loss_ce": 8.964663720689714e-05, "loss_iou": 0.212890625, "loss_num": 0.0150146484375, "loss_xval": 0.5, "num_input_tokens_seen": 235041552, "step": 2033 }, { "epoch": 10.935483870967742, "grad_norm": 22.74390411376953, "learning_rate": 5e-07, "loss": 0.7223, "num_input_tokens_seen": 235154444, "step": 2034 }, { "epoch": 10.935483870967742, "loss": 0.8408218026161194, "loss_ce": 0.00012353708734735847, "loss_iou": 0.365234375, "loss_num": 0.0220947265625, "loss_xval": 0.83984375, "num_input_tokens_seen": 235154444, "step": 2034 }, { "epoch": 10.940860215053764, "grad_norm": 18.235454559326172, "learning_rate": 5e-07, "loss": 0.7574, "num_input_tokens_seen": 235271384, "step": 2035 }, { "epoch": 10.940860215053764, "loss": 0.7464399933815002, "loss_ce": 0.00010209013998974115, "loss_iou": 0.298828125, "loss_num": 0.02978515625, "loss_xval": 0.74609375, "num_input_tokens_seen": 235271384, "step": 2035 }, { "epoch": 10.946236559139784, "grad_norm": 23.558259963989258, "learning_rate": 5e-07, "loss": 0.678, "num_input_tokens_seen": 235384764, "step": 2036 }, { "epoch": 10.946236559139784, "loss": 0.6352976560592651, "loss_ce": 4.373595947981812e-05, "loss_iou": 0.283203125, "loss_num": 0.0140380859375, "loss_xval": 0.63671875, "num_input_tokens_seen": 235384764, "step": 2036 }, { "epoch": 10.951612903225806, "grad_norm": 30.653318405151367, "learning_rate": 5e-07, "loss": 0.6714, "num_input_tokens_seen": 235498880, "step": 2037 }, { "epoch": 10.951612903225806, "loss": 0.8148351907730103, "loss_ce": 0.00019899971084669232, "loss_iou": 0.35546875, "loss_num": 0.0205078125, "loss_xval": 0.81640625, "num_input_tokens_seen": 235498880, "step": 2037 }, { "epoch": 10.956989247311828, "grad_norm": 21.82588005065918, "learning_rate": 5e-07, "loss": 0.686, "num_input_tokens_seen": 235612820, "step": 2038 }, { "epoch": 10.956989247311828, "loss": 0.6006085872650146, "loss_ce": 2.262655289086979e-05, "loss_iou": 0.265625, "loss_num": 0.01434326171875, "loss_xval": 0.6015625, "num_input_tokens_seen": 235612820, "step": 2038 }, { "epoch": 10.96236559139785, "grad_norm": 22.89728546142578, "learning_rate": 5e-07, "loss": 0.7202, "num_input_tokens_seen": 235729304, "step": 2039 }, { "epoch": 10.96236559139785, "loss": 0.6954776048660278, "loss_ce": 4.298479689168744e-05, "loss_iou": 0.30859375, "loss_num": 0.015869140625, "loss_xval": 0.6953125, "num_input_tokens_seen": 235729304, "step": 2039 }, { "epoch": 10.967741935483872, "grad_norm": 15.728914260864258, "learning_rate": 5e-07, "loss": 0.6089, "num_input_tokens_seen": 235848428, "step": 2040 }, { "epoch": 10.967741935483872, "loss": 0.632951021194458, "loss_ce": 0.004533063154667616, "loss_iou": 0.2451171875, "loss_num": 0.027587890625, "loss_xval": 0.62890625, "num_input_tokens_seen": 235848428, "step": 2040 }, { "epoch": 10.973118279569892, "grad_norm": 20.5300235748291, "learning_rate": 5e-07, "loss": 0.6858, "num_input_tokens_seen": 235964804, "step": 2041 }, { "epoch": 10.973118279569892, "loss": 0.8389391899108887, "loss_ce": 7.197794911917299e-05, "loss_iou": 0.3515625, "loss_num": 0.02685546875, "loss_xval": 0.83984375, "num_input_tokens_seen": 235964804, "step": 2041 }, { "epoch": 10.978494623655914, "grad_norm": 27.576772689819336, "learning_rate": 5e-07, "loss": 0.6807, "num_input_tokens_seen": 236080592, "step": 2042 }, { "epoch": 10.978494623655914, "loss": 0.49367907643318176, "loss_ce": 2.6767607778310776e-05, "loss_iou": 0.2099609375, "loss_num": 0.0145263671875, "loss_xval": 0.494140625, "num_input_tokens_seen": 236080592, "step": 2042 }, { "epoch": 10.983870967741936, "grad_norm": 28.14325523376465, "learning_rate": 5e-07, "loss": 0.7426, "num_input_tokens_seen": 236196344, "step": 2043 }, { "epoch": 10.983870967741936, "loss": 0.7637389898300171, "loss_ce": 6.702161044813693e-05, "loss_iou": 0.33203125, "loss_num": 0.020263671875, "loss_xval": 0.765625, "num_input_tokens_seen": 236196344, "step": 2043 }, { "epoch": 10.989247311827956, "grad_norm": 34.110374450683594, "learning_rate": 5e-07, "loss": 0.7223, "num_input_tokens_seen": 236315796, "step": 2044 }, { "epoch": 10.989247311827956, "loss": 0.4861105680465698, "loss_ce": 2.6577632525004447e-05, "loss_iou": 0.21484375, "loss_num": 0.01141357421875, "loss_xval": 0.486328125, "num_input_tokens_seen": 236315796, "step": 2044 }, { "epoch": 10.994623655913978, "grad_norm": 27.97881507873535, "learning_rate": 5e-07, "loss": 0.6786, "num_input_tokens_seen": 236433520, "step": 2045 }, { "epoch": 10.994623655913978, "loss": 0.5510604381561279, "loss_ce": 3.505939821479842e-05, "loss_iou": 0.2255859375, "loss_num": 0.02001953125, "loss_xval": 0.55078125, "num_input_tokens_seen": 236433520, "step": 2045 }, { "epoch": 11.0, "grad_norm": 25.60142707824707, "learning_rate": 5e-07, "loss": 0.6324, "num_input_tokens_seen": 236548568, "step": 2046 }, { "epoch": 11.0, "loss": 0.6394593119621277, "loss_ce": 5.4993673984427005e-05, "loss_iou": 0.287109375, "loss_num": 0.01336669921875, "loss_xval": 0.640625, "num_input_tokens_seen": 236548568, "step": 2046 }, { "epoch": 11.005376344086022, "grad_norm": 17.62491798400879, "learning_rate": 5e-07, "loss": 0.6533, "num_input_tokens_seen": 236664848, "step": 2047 }, { "epoch": 11.005376344086022, "loss": 0.6594856977462769, "loss_ce": 6.189768464537337e-05, "loss_iou": 0.2734375, "loss_num": 0.0220947265625, "loss_xval": 0.66015625, "num_input_tokens_seen": 236664848, "step": 2047 }, { "epoch": 11.010752688172044, "grad_norm": 21.229694366455078, "learning_rate": 5e-07, "loss": 0.62, "num_input_tokens_seen": 236777688, "step": 2048 }, { "epoch": 11.010752688172044, "loss": 0.4505240321159363, "loss_ce": 8.455351053271443e-05, "loss_iou": 0.193359375, "loss_num": 0.01275634765625, "loss_xval": 0.451171875, "num_input_tokens_seen": 236777688, "step": 2048 }, { "epoch": 11.016129032258064, "grad_norm": 21.906248092651367, "learning_rate": 5e-07, "loss": 0.5803, "num_input_tokens_seen": 236895556, "step": 2049 }, { "epoch": 11.016129032258064, "loss": 0.7004770636558533, "loss_ce": 3.765006840694696e-05, "loss_iou": 0.3203125, "loss_num": 0.01226806640625, "loss_xval": 0.69921875, "num_input_tokens_seen": 236895556, "step": 2049 }, { "epoch": 11.021505376344086, "grad_norm": 44.07777404785156, "learning_rate": 5e-07, "loss": 0.5644, "num_input_tokens_seen": 237012712, "step": 2050 }, { "epoch": 11.021505376344086, "loss": 0.4966142475605011, "loss_ce": 3.219750215066597e-05, "loss_iou": 0.2216796875, "loss_num": 0.0108642578125, "loss_xval": 0.49609375, "num_input_tokens_seen": 237012712, "step": 2050 }, { "epoch": 11.026881720430108, "grad_norm": 29.01714324951172, "learning_rate": 5e-07, "loss": 0.7276, "num_input_tokens_seen": 237126244, "step": 2051 }, { "epoch": 11.026881720430108, "loss": 0.5574014186859131, "loss_ce": 2.8409289370756596e-05, "loss_iou": 0.2392578125, "loss_num": 0.0157470703125, "loss_xval": 0.55859375, "num_input_tokens_seen": 237126244, "step": 2051 }, { "epoch": 11.03225806451613, "grad_norm": 24.728153228759766, "learning_rate": 5e-07, "loss": 0.7068, "num_input_tokens_seen": 237241988, "step": 2052 }, { "epoch": 11.03225806451613, "loss": 0.7823984622955322, "loss_ce": 4.984543193131685e-05, "loss_iou": 0.34375, "loss_num": 0.01904296875, "loss_xval": 0.78125, "num_input_tokens_seen": 237241988, "step": 2052 }, { "epoch": 11.03763440860215, "grad_norm": 28.869808197021484, "learning_rate": 5e-07, "loss": 0.6903, "num_input_tokens_seen": 237355264, "step": 2053 }, { "epoch": 11.03763440860215, "loss": 0.6512144207954407, "loss_ce": 9.139414032688364e-05, "loss_iou": 0.287109375, "loss_num": 0.01531982421875, "loss_xval": 0.65234375, "num_input_tokens_seen": 237355264, "step": 2053 }, { "epoch": 11.043010752688172, "grad_norm": 26.279273986816406, "learning_rate": 5e-07, "loss": 0.7272, "num_input_tokens_seen": 237468884, "step": 2054 }, { "epoch": 11.043010752688172, "loss": 0.5293533205986023, "loss_ce": 5.641590541927144e-05, "loss_iou": 0.2177734375, "loss_num": 0.018798828125, "loss_xval": 0.53125, "num_input_tokens_seen": 237468884, "step": 2054 }, { "epoch": 11.048387096774194, "grad_norm": 17.72920799255371, "learning_rate": 5e-07, "loss": 0.6181, "num_input_tokens_seen": 237583748, "step": 2055 }, { "epoch": 11.048387096774194, "loss": 0.6403082013130188, "loss_ce": 4.937082849210128e-05, "loss_iou": 0.265625, "loss_num": 0.0220947265625, "loss_xval": 0.640625, "num_input_tokens_seen": 237583748, "step": 2055 }, { "epoch": 11.053763440860216, "grad_norm": 26.21930694580078, "learning_rate": 5e-07, "loss": 0.6505, "num_input_tokens_seen": 237702264, "step": 2056 }, { "epoch": 11.053763440860216, "loss": 0.7034726142883301, "loss_ce": 0.00010346388444304466, "loss_iou": 0.27734375, "loss_num": 0.030029296875, "loss_xval": 0.703125, "num_input_tokens_seen": 237702264, "step": 2056 }, { "epoch": 11.059139784946236, "grad_norm": 36.38947677612305, "learning_rate": 5e-07, "loss": 0.6711, "num_input_tokens_seen": 237816684, "step": 2057 }, { "epoch": 11.059139784946236, "loss": 0.6930199861526489, "loss_ce": 2.682338708837051e-05, "loss_iou": 0.291015625, "loss_num": 0.02197265625, "loss_xval": 0.69140625, "num_input_tokens_seen": 237816684, "step": 2057 }, { "epoch": 11.064516129032258, "grad_norm": 22.77438735961914, "learning_rate": 5e-07, "loss": 0.7863, "num_input_tokens_seen": 237931780, "step": 2058 }, { "epoch": 11.064516129032258, "loss": 0.6639083623886108, "loss_ce": 2.900804793171119e-05, "loss_iou": 0.2578125, "loss_num": 0.0296630859375, "loss_xval": 0.6640625, "num_input_tokens_seen": 237931780, "step": 2058 }, { "epoch": 11.06989247311828, "grad_norm": 14.615533828735352, "learning_rate": 5e-07, "loss": 0.7768, "num_input_tokens_seen": 238044644, "step": 2059 }, { "epoch": 11.06989247311828, "loss": 0.8158282041549683, "loss_ce": 3.226273111067712e-05, "loss_iou": 0.35546875, "loss_num": 0.020751953125, "loss_xval": 0.81640625, "num_input_tokens_seen": 238044644, "step": 2059 }, { "epoch": 11.075268817204302, "grad_norm": 23.986234664916992, "learning_rate": 5e-07, "loss": 0.7033, "num_input_tokens_seen": 238162348, "step": 2060 }, { "epoch": 11.075268817204302, "loss": 0.709265947341919, "loss_ce": 3.741171894944273e-05, "loss_iou": 0.30859375, "loss_num": 0.018310546875, "loss_xval": 0.7109375, "num_input_tokens_seen": 238162348, "step": 2060 }, { "epoch": 11.080645161290322, "grad_norm": 55.38761520385742, "learning_rate": 5e-07, "loss": 0.6775, "num_input_tokens_seen": 238275060, "step": 2061 }, { "epoch": 11.080645161290322, "loss": 0.7302616834640503, "loss_ce": 3.7042209442006424e-05, "loss_iou": 0.306640625, "loss_num": 0.0230712890625, "loss_xval": 0.73046875, "num_input_tokens_seen": 238275060, "step": 2061 }, { "epoch": 11.086021505376344, "grad_norm": 35.50905227661133, "learning_rate": 5e-07, "loss": 0.6245, "num_input_tokens_seen": 238389196, "step": 2062 }, { "epoch": 11.086021505376344, "loss": 0.49294513463974, "loss_ce": 2.5203402401530184e-05, "loss_iou": 0.224609375, "loss_num": 0.00860595703125, "loss_xval": 0.4921875, "num_input_tokens_seen": 238389196, "step": 2062 }, { "epoch": 11.091397849462366, "grad_norm": 22.160865783691406, "learning_rate": 5e-07, "loss": 0.6913, "num_input_tokens_seen": 238504244, "step": 2063 }, { "epoch": 11.091397849462366, "loss": 0.5768684148788452, "loss_ce": 2.5106040993705392e-05, "loss_iou": 0.263671875, "loss_num": 0.009765625, "loss_xval": 0.578125, "num_input_tokens_seen": 238504244, "step": 2063 }, { "epoch": 11.096774193548388, "grad_norm": 19.024784088134766, "learning_rate": 5e-07, "loss": 0.6722, "num_input_tokens_seen": 238623020, "step": 2064 }, { "epoch": 11.096774193548388, "loss": 0.5881659388542175, "loss_ce": 3.1161798688117415e-05, "loss_iou": 0.26171875, "loss_num": 0.0126953125, "loss_xval": 0.58984375, "num_input_tokens_seen": 238623020, "step": 2064 }, { "epoch": 11.102150537634408, "grad_norm": 19.219343185424805, "learning_rate": 5e-07, "loss": 0.6126, "num_input_tokens_seen": 238739356, "step": 2065 }, { "epoch": 11.102150537634408, "loss": 0.7413510084152222, "loss_ce": 1.7992751963902265e-05, "loss_iou": 0.298828125, "loss_num": 0.0284423828125, "loss_xval": 0.7421875, "num_input_tokens_seen": 238739356, "step": 2065 }, { "epoch": 11.10752688172043, "grad_norm": 21.426103591918945, "learning_rate": 5e-07, "loss": 0.8431, "num_input_tokens_seen": 238856408, "step": 2066 }, { "epoch": 11.10752688172043, "loss": 0.8268189430236816, "loss_ce": 3.671022568596527e-05, "loss_iou": 0.357421875, "loss_num": 0.022216796875, "loss_xval": 0.828125, "num_input_tokens_seen": 238856408, "step": 2066 }, { "epoch": 11.112903225806452, "grad_norm": 29.007659912109375, "learning_rate": 5e-07, "loss": 0.6413, "num_input_tokens_seen": 238971484, "step": 2067 }, { "epoch": 11.112903225806452, "loss": 0.6670221090316772, "loss_ce": 2.9885875846957788e-05, "loss_iou": 0.3046875, "loss_num": 0.0118408203125, "loss_xval": 0.66796875, "num_input_tokens_seen": 238971484, "step": 2067 }, { "epoch": 11.118279569892474, "grad_norm": 23.768898010253906, "learning_rate": 5e-07, "loss": 0.7006, "num_input_tokens_seen": 239087740, "step": 2068 }, { "epoch": 11.118279569892474, "loss": 0.6756953597068787, "loss_ce": 3.6197874578647316e-05, "loss_iou": 0.275390625, "loss_num": 0.025390625, "loss_xval": 0.67578125, "num_input_tokens_seen": 239087740, "step": 2068 }, { "epoch": 11.123655913978494, "grad_norm": 21.018512725830078, "learning_rate": 5e-07, "loss": 0.7035, "num_input_tokens_seen": 239199876, "step": 2069 }, { "epoch": 11.123655913978494, "loss": 0.5183260440826416, "loss_ce": 1.545377199363429e-05, "loss_iou": 0.2099609375, "loss_num": 0.019775390625, "loss_xval": 0.51953125, "num_input_tokens_seen": 239199876, "step": 2069 }, { "epoch": 11.129032258064516, "grad_norm": 17.117097854614258, "learning_rate": 5e-07, "loss": 0.6875, "num_input_tokens_seen": 239316100, "step": 2070 }, { "epoch": 11.129032258064516, "loss": 0.5913406610488892, "loss_ce": 3.206977271474898e-05, "loss_iou": 0.25390625, "loss_num": 0.0167236328125, "loss_xval": 0.58984375, "num_input_tokens_seen": 239316100, "step": 2070 }, { "epoch": 11.134408602150538, "grad_norm": 19.162343978881836, "learning_rate": 5e-07, "loss": 0.8242, "num_input_tokens_seen": 239427772, "step": 2071 }, { "epoch": 11.134408602150538, "loss": 0.766392171382904, "loss_ce": 3.471258969511837e-05, "loss_iou": 0.318359375, "loss_num": 0.0260009765625, "loss_xval": 0.765625, "num_input_tokens_seen": 239427772, "step": 2071 }, { "epoch": 11.13978494623656, "grad_norm": 24.126178741455078, "learning_rate": 5e-07, "loss": 0.8198, "num_input_tokens_seen": 239539604, "step": 2072 }, { "epoch": 11.13978494623656, "loss": 0.726610004901886, "loss_ce": 4.7522218665108085e-05, "loss_iou": 0.3359375, "loss_num": 0.0111083984375, "loss_xval": 0.7265625, "num_input_tokens_seen": 239539604, "step": 2072 }, { "epoch": 11.14516129032258, "grad_norm": 18.56098747253418, "learning_rate": 5e-07, "loss": 0.6768, "num_input_tokens_seen": 239655396, "step": 2073 }, { "epoch": 11.14516129032258, "loss": 0.28176265954971313, "loss_ce": 2.4355806090170518e-05, "loss_iou": 0.1240234375, "loss_num": 0.0067138671875, "loss_xval": 0.28125, "num_input_tokens_seen": 239655396, "step": 2073 }, { "epoch": 11.150537634408602, "grad_norm": 16.346250534057617, "learning_rate": 5e-07, "loss": 0.8744, "num_input_tokens_seen": 239773208, "step": 2074 }, { "epoch": 11.150537634408602, "loss": 0.755457878112793, "loss_ce": 8.676567085785791e-05, "loss_iou": 0.318359375, "loss_num": 0.02392578125, "loss_xval": 0.75390625, "num_input_tokens_seen": 239773208, "step": 2074 }, { "epoch": 11.155913978494624, "grad_norm": 19.319181442260742, "learning_rate": 5e-07, "loss": 0.8649, "num_input_tokens_seen": 239884920, "step": 2075 }, { "epoch": 11.155913978494624, "loss": 0.626767635345459, "loss_ce": 5.866261926712468e-05, "loss_iou": 0.259765625, "loss_num": 0.021484375, "loss_xval": 0.625, "num_input_tokens_seen": 239884920, "step": 2075 }, { "epoch": 11.161290322580646, "grad_norm": 19.66993522644043, "learning_rate": 5e-07, "loss": 0.6908, "num_input_tokens_seen": 240002920, "step": 2076 }, { "epoch": 11.161290322580646, "loss": 0.6542616486549377, "loss_ce": 5.631391468341462e-05, "loss_iou": 0.26171875, "loss_num": 0.02587890625, "loss_xval": 0.65234375, "num_input_tokens_seen": 240002920, "step": 2076 }, { "epoch": 11.166666666666666, "grad_norm": 24.76284408569336, "learning_rate": 5e-07, "loss": 0.6626, "num_input_tokens_seen": 240118388, "step": 2077 }, { "epoch": 11.166666666666666, "loss": 0.4741555452346802, "loss_ce": 3.443164678174071e-05, "loss_iou": 0.2041015625, "loss_num": 0.01324462890625, "loss_xval": 0.474609375, "num_input_tokens_seen": 240118388, "step": 2077 }, { "epoch": 11.172043010752688, "grad_norm": 22.142580032348633, "learning_rate": 5e-07, "loss": 0.6169, "num_input_tokens_seen": 240235096, "step": 2078 }, { "epoch": 11.172043010752688, "loss": 0.4842308759689331, "loss_ce": 0.0001000221527647227, "loss_iou": 0.208984375, "loss_num": 0.01318359375, "loss_xval": 0.484375, "num_input_tokens_seen": 240235096, "step": 2078 }, { "epoch": 11.17741935483871, "grad_norm": 27.5268497467041, "learning_rate": 5e-07, "loss": 0.7672, "num_input_tokens_seen": 240352716, "step": 2079 }, { "epoch": 11.17741935483871, "loss": 0.9453302621841431, "loss_ce": 1.7792292055673897e-05, "loss_iou": 0.41015625, "loss_num": 0.02490234375, "loss_xval": 0.9453125, "num_input_tokens_seen": 240352716, "step": 2079 }, { "epoch": 11.182795698924732, "grad_norm": 23.718849182128906, "learning_rate": 5e-07, "loss": 0.6791, "num_input_tokens_seen": 240466084, "step": 2080 }, { "epoch": 11.182795698924732, "loss": 0.7477108240127563, "loss_ce": 3.018816641997546e-05, "loss_iou": 0.314453125, "loss_num": 0.023681640625, "loss_xval": 0.74609375, "num_input_tokens_seen": 240466084, "step": 2080 }, { "epoch": 11.188172043010752, "grad_norm": 16.411991119384766, "learning_rate": 5e-07, "loss": 0.6367, "num_input_tokens_seen": 240582536, "step": 2081 }, { "epoch": 11.188172043010752, "loss": 0.7754387855529785, "loss_ce": 4.8213529225904495e-05, "loss_iou": 0.353515625, "loss_num": 0.01385498046875, "loss_xval": 0.7734375, "num_input_tokens_seen": 240582536, "step": 2081 }, { "epoch": 11.193548387096774, "grad_norm": 21.017723083496094, "learning_rate": 5e-07, "loss": 0.6001, "num_input_tokens_seen": 240696916, "step": 2082 }, { "epoch": 11.193548387096774, "loss": 0.4157460331916809, "loss_ce": 3.556662340997718e-05, "loss_iou": 0.1669921875, "loss_num": 0.016357421875, "loss_xval": 0.416015625, "num_input_tokens_seen": 240696916, "step": 2082 }, { "epoch": 11.198924731182796, "grad_norm": 31.334964752197266, "learning_rate": 5e-07, "loss": 0.747, "num_input_tokens_seen": 240813440, "step": 2083 }, { "epoch": 11.198924731182796, "loss": 0.8108232021331787, "loss_ce": 3.2194129744311795e-05, "loss_iou": 0.34375, "loss_num": 0.025146484375, "loss_xval": 0.8125, "num_input_tokens_seen": 240813440, "step": 2083 }, { "epoch": 11.204301075268818, "grad_norm": 27.136672973632812, "learning_rate": 5e-07, "loss": 0.6659, "num_input_tokens_seen": 240930896, "step": 2084 }, { "epoch": 11.204301075268818, "loss": 0.7039058208465576, "loss_ce": 4.835438448935747e-05, "loss_iou": 0.31640625, "loss_num": 0.0140380859375, "loss_xval": 0.703125, "num_input_tokens_seen": 240930896, "step": 2084 }, { "epoch": 11.209677419354838, "grad_norm": 16.749277114868164, "learning_rate": 5e-07, "loss": 0.7953, "num_input_tokens_seen": 241046620, "step": 2085 }, { "epoch": 11.209677419354838, "loss": 0.4935646653175354, "loss_ce": 3.439150168560445e-05, "loss_iou": 0.216796875, "loss_num": 0.011962890625, "loss_xval": 0.494140625, "num_input_tokens_seen": 241046620, "step": 2085 }, { "epoch": 11.21505376344086, "grad_norm": 18.211881637573242, "learning_rate": 5e-07, "loss": 0.6461, "num_input_tokens_seen": 241162668, "step": 2086 }, { "epoch": 11.21505376344086, "loss": 0.6611711978912354, "loss_ce": 3.837477197521366e-05, "loss_iou": 0.2890625, "loss_num": 0.0167236328125, "loss_xval": 0.66015625, "num_input_tokens_seen": 241162668, "step": 2086 }, { "epoch": 11.220430107526882, "grad_norm": 20.63280487060547, "learning_rate": 5e-07, "loss": 0.6679, "num_input_tokens_seen": 241275044, "step": 2087 }, { "epoch": 11.220430107526882, "loss": 0.654509425163269, "loss_ce": 9.045995830092579e-05, "loss_iou": 0.291015625, "loss_num": 0.0146484375, "loss_xval": 0.65625, "num_input_tokens_seen": 241275044, "step": 2087 }, { "epoch": 11.225806451612904, "grad_norm": 21.980649948120117, "learning_rate": 5e-07, "loss": 0.6525, "num_input_tokens_seen": 241388636, "step": 2088 }, { "epoch": 11.225806451612904, "loss": 0.6639879941940308, "loss_ce": 4.757654824061319e-05, "loss_iou": 0.287109375, "loss_num": 0.0181884765625, "loss_xval": 0.6640625, "num_input_tokens_seen": 241388636, "step": 2088 }, { "epoch": 11.231182795698924, "grad_norm": 16.394088745117188, "learning_rate": 5e-07, "loss": 0.613, "num_input_tokens_seen": 241502872, "step": 2089 }, { "epoch": 11.231182795698924, "loss": 0.8068015575408936, "loss_ce": 3.883129102177918e-05, "loss_iou": 0.31640625, "loss_num": 0.0341796875, "loss_xval": 0.80859375, "num_input_tokens_seen": 241502872, "step": 2089 }, { "epoch": 11.236559139784946, "grad_norm": 19.68535804748535, "learning_rate": 5e-07, "loss": 0.8007, "num_input_tokens_seen": 241616940, "step": 2090 }, { "epoch": 11.236559139784946, "loss": 1.0337345600128174, "loss_ce": 4.3023916077800095e-05, "loss_iou": 0.412109375, "loss_num": 0.04248046875, "loss_xval": 1.03125, "num_input_tokens_seen": 241616940, "step": 2090 }, { "epoch": 11.241935483870968, "grad_norm": 23.96269416809082, "learning_rate": 5e-07, "loss": 0.7855, "num_input_tokens_seen": 241731328, "step": 2091 }, { "epoch": 11.241935483870968, "loss": 0.6872756481170654, "loss_ce": 1.9787166820606217e-05, "loss_iou": 0.291015625, "loss_num": 0.020751953125, "loss_xval": 0.6875, "num_input_tokens_seen": 241731328, "step": 2091 }, { "epoch": 11.24731182795699, "grad_norm": 19.409467697143555, "learning_rate": 5e-07, "loss": 0.5166, "num_input_tokens_seen": 241848112, "step": 2092 }, { "epoch": 11.24731182795699, "loss": 0.4280588626861572, "loss_ce": 1.9306247850181535e-05, "loss_iou": 0.18359375, "loss_num": 0.01226806640625, "loss_xval": 0.427734375, "num_input_tokens_seen": 241848112, "step": 2092 }, { "epoch": 11.25268817204301, "grad_norm": 25.875646591186523, "learning_rate": 5e-07, "loss": 0.5294, "num_input_tokens_seen": 241966212, "step": 2093 }, { "epoch": 11.25268817204301, "loss": 0.4107147455215454, "loss_ce": 3.968044984503649e-05, "loss_iou": 0.1650390625, "loss_num": 0.0159912109375, "loss_xval": 0.41015625, "num_input_tokens_seen": 241966212, "step": 2093 }, { "epoch": 11.258064516129032, "grad_norm": 24.591604232788086, "learning_rate": 5e-07, "loss": 0.6533, "num_input_tokens_seen": 242080760, "step": 2094 }, { "epoch": 11.258064516129032, "loss": 0.5036864280700684, "loss_ce": 2.4312150344485417e-05, "loss_iou": 0.228515625, "loss_num": 0.00921630859375, "loss_xval": 0.50390625, "num_input_tokens_seen": 242080760, "step": 2094 }, { "epoch": 11.263440860215054, "grad_norm": 21.80743408203125, "learning_rate": 5e-07, "loss": 0.6714, "num_input_tokens_seen": 242195480, "step": 2095 }, { "epoch": 11.263440860215054, "loss": 0.4879566431045532, "loss_ce": 4.160313619649969e-05, "loss_iou": 0.216796875, "loss_num": 0.0111083984375, "loss_xval": 0.48828125, "num_input_tokens_seen": 242195480, "step": 2095 }, { "epoch": 11.268817204301076, "grad_norm": 33.06886291503906, "learning_rate": 5e-07, "loss": 0.5136, "num_input_tokens_seen": 242313612, "step": 2096 }, { "epoch": 11.268817204301076, "loss": 0.4726737141609192, "loss_ce": 1.7481192116974853e-05, "loss_iou": 0.208984375, "loss_num": 0.010986328125, "loss_xval": 0.47265625, "num_input_tokens_seen": 242313612, "step": 2096 }, { "epoch": 11.274193548387096, "grad_norm": 31.593029022216797, "learning_rate": 5e-07, "loss": 0.7709, "num_input_tokens_seen": 242428744, "step": 2097 }, { "epoch": 11.274193548387096, "loss": 0.9741858243942261, "loss_ce": 0.0001868077670224011, "loss_iou": 0.4375, "loss_num": 0.019775390625, "loss_xval": 0.97265625, "num_input_tokens_seen": 242428744, "step": 2097 }, { "epoch": 11.279569892473118, "grad_norm": 24.730104446411133, "learning_rate": 5e-07, "loss": 0.6723, "num_input_tokens_seen": 242541636, "step": 2098 }, { "epoch": 11.279569892473118, "loss": 0.7475849390029907, "loss_ce": 2.632761243148707e-05, "loss_iou": 0.3359375, "loss_num": 0.0152587890625, "loss_xval": 0.74609375, "num_input_tokens_seen": 242541636, "step": 2098 }, { "epoch": 11.28494623655914, "grad_norm": 24.867128372192383, "learning_rate": 5e-07, "loss": 0.6929, "num_input_tokens_seen": 242658708, "step": 2099 }, { "epoch": 11.28494623655914, "loss": 1.0153093338012695, "loss_ce": 5.0620838010217994e-05, "loss_iou": 0.462890625, "loss_num": 0.0179443359375, "loss_xval": 1.015625, "num_input_tokens_seen": 242658708, "step": 2099 }, { "epoch": 11.290322580645162, "grad_norm": 18.93107795715332, "learning_rate": 5e-07, "loss": 0.5806, "num_input_tokens_seen": 242771464, "step": 2100 }, { "epoch": 11.290322580645162, "loss": 0.5752196311950684, "loss_ce": 2.4295242837979458e-05, "loss_iou": 0.2412109375, "loss_num": 0.0186767578125, "loss_xval": 0.57421875, "num_input_tokens_seen": 242771464, "step": 2100 }, { "epoch": 11.295698924731182, "grad_norm": 21.24274253845215, "learning_rate": 5e-07, "loss": 0.5829, "num_input_tokens_seen": 242887460, "step": 2101 }, { "epoch": 11.295698924731182, "loss": 0.6396787166595459, "loss_ce": 3.0278752092272043e-05, "loss_iou": 0.265625, "loss_num": 0.021728515625, "loss_xval": 0.640625, "num_input_tokens_seen": 242887460, "step": 2101 }, { "epoch": 11.301075268817204, "grad_norm": 32.10942459106445, "learning_rate": 5e-07, "loss": 0.788, "num_input_tokens_seen": 243002112, "step": 2102 }, { "epoch": 11.301075268817204, "loss": 1.0349483489990234, "loss_ce": 3.622775329858996e-05, "loss_iou": 0.470703125, "loss_num": 0.0189208984375, "loss_xval": 1.03125, "num_input_tokens_seen": 243002112, "step": 2102 }, { "epoch": 11.306451612903226, "grad_norm": 19.45651626586914, "learning_rate": 5e-07, "loss": 0.7066, "num_input_tokens_seen": 243119188, "step": 2103 }, { "epoch": 11.306451612903226, "loss": 0.65700364112854, "loss_ce": 2.123598096659407e-05, "loss_iou": 0.287109375, "loss_num": 0.016357421875, "loss_xval": 0.65625, "num_input_tokens_seen": 243119188, "step": 2103 }, { "epoch": 11.311827956989248, "grad_norm": 23.788904190063477, "learning_rate": 5e-07, "loss": 0.7684, "num_input_tokens_seen": 243233768, "step": 2104 }, { "epoch": 11.311827956989248, "loss": 0.5063976049423218, "loss_ce": 4.993422771804035e-05, "loss_iou": 0.2236328125, "loss_num": 0.0120849609375, "loss_xval": 0.5078125, "num_input_tokens_seen": 243233768, "step": 2104 }, { "epoch": 11.317204301075268, "grad_norm": 17.11598014831543, "learning_rate": 5e-07, "loss": 0.608, "num_input_tokens_seen": 243349088, "step": 2105 }, { "epoch": 11.317204301075268, "loss": 0.494870662689209, "loss_ce": 0.00011966934835072607, "loss_iou": 0.2197265625, "loss_num": 0.0108642578125, "loss_xval": 0.494140625, "num_input_tokens_seen": 243349088, "step": 2105 }, { "epoch": 11.32258064516129, "grad_norm": 38.17893981933594, "learning_rate": 5e-07, "loss": 0.7358, "num_input_tokens_seen": 243461488, "step": 2106 }, { "epoch": 11.32258064516129, "loss": 1.069659948348999, "loss_ce": 7.991100574145094e-05, "loss_iou": 0.470703125, "loss_num": 0.025634765625, "loss_xval": 1.0703125, "num_input_tokens_seen": 243461488, "step": 2106 }, { "epoch": 11.327956989247312, "grad_norm": 23.783729553222656, "learning_rate": 5e-07, "loss": 0.797, "num_input_tokens_seen": 243575180, "step": 2107 }, { "epoch": 11.327956989247312, "loss": 0.8423197269439697, "loss_ce": 3.460170410107821e-05, "loss_iou": 0.37109375, "loss_num": 0.0203857421875, "loss_xval": 0.84375, "num_input_tokens_seen": 243575180, "step": 2107 }, { "epoch": 11.333333333333334, "grad_norm": 32.30073928833008, "learning_rate": 5e-07, "loss": 0.765, "num_input_tokens_seen": 243690972, "step": 2108 }, { "epoch": 11.333333333333334, "loss": 0.8446410894393921, "loss_ce": 3.6637891753343865e-05, "loss_iou": 0.345703125, "loss_num": 0.0302734375, "loss_xval": 0.84375, "num_input_tokens_seen": 243690972, "step": 2108 }, { "epoch": 11.338709677419354, "grad_norm": 44.482913970947266, "learning_rate": 5e-07, "loss": 0.595, "num_input_tokens_seen": 243808892, "step": 2109 }, { "epoch": 11.338709677419354, "loss": 0.4958671033382416, "loss_ce": 1.748898284859024e-05, "loss_iou": 0.212890625, "loss_num": 0.01397705078125, "loss_xval": 0.49609375, "num_input_tokens_seen": 243808892, "step": 2109 }, { "epoch": 11.344086021505376, "grad_norm": 40.05585861206055, "learning_rate": 5e-07, "loss": 0.6926, "num_input_tokens_seen": 243924888, "step": 2110 }, { "epoch": 11.344086021505376, "loss": 0.7976497411727905, "loss_ce": 4.229524711263366e-05, "loss_iou": 0.36328125, "loss_num": 0.01434326171875, "loss_xval": 0.796875, "num_input_tokens_seen": 243924888, "step": 2110 }, { "epoch": 11.349462365591398, "grad_norm": 20.083091735839844, "learning_rate": 5e-07, "loss": 0.7129, "num_input_tokens_seen": 244040484, "step": 2111 }, { "epoch": 11.349462365591398, "loss": 0.5536719560623169, "loss_ce": 2.207372381235473e-05, "loss_iou": 0.2333984375, "loss_num": 0.0174560546875, "loss_xval": 0.5546875, "num_input_tokens_seen": 244040484, "step": 2111 }, { "epoch": 11.35483870967742, "grad_norm": 19.297407150268555, "learning_rate": 5e-07, "loss": 0.7295, "num_input_tokens_seen": 244156736, "step": 2112 }, { "epoch": 11.35483870967742, "loss": 0.791286826133728, "loss_ce": 2.7119011065224186e-05, "loss_iou": 0.34765625, "loss_num": 0.019287109375, "loss_xval": 0.79296875, "num_input_tokens_seen": 244156736, "step": 2112 }, { "epoch": 11.36021505376344, "grad_norm": 20.765901565551758, "learning_rate": 5e-07, "loss": 0.5719, "num_input_tokens_seen": 244275144, "step": 2113 }, { "epoch": 11.36021505376344, "loss": 0.5500991940498352, "loss_ce": 5.036144284531474e-05, "loss_iou": 0.23828125, "loss_num": 0.014892578125, "loss_xval": 0.55078125, "num_input_tokens_seen": 244275144, "step": 2113 }, { "epoch": 11.365591397849462, "grad_norm": 24.718847274780273, "learning_rate": 5e-07, "loss": 0.7326, "num_input_tokens_seen": 244391012, "step": 2114 }, { "epoch": 11.365591397849462, "loss": 0.684357225894928, "loss_ce": 3.1071533157955855e-05, "loss_iou": 0.296875, "loss_num": 0.0179443359375, "loss_xval": 0.68359375, "num_input_tokens_seen": 244391012, "step": 2114 }, { "epoch": 11.370967741935484, "grad_norm": 21.30289077758789, "learning_rate": 5e-07, "loss": 0.6143, "num_input_tokens_seen": 244507616, "step": 2115 }, { "epoch": 11.370967741935484, "loss": 0.6139978170394897, "loss_ce": 4.5194290578365326e-05, "loss_iou": 0.271484375, "loss_num": 0.0142822265625, "loss_xval": 0.61328125, "num_input_tokens_seen": 244507616, "step": 2115 }, { "epoch": 11.376344086021506, "grad_norm": 22.162212371826172, "learning_rate": 5e-07, "loss": 0.4818, "num_input_tokens_seen": 244625552, "step": 2116 }, { "epoch": 11.376344086021506, "loss": 0.4179939031600952, "loss_ce": 2.5165549232042395e-05, "loss_iou": 0.173828125, "loss_num": 0.01385498046875, "loss_xval": 0.41796875, "num_input_tokens_seen": 244625552, "step": 2116 }, { "epoch": 11.381720430107526, "grad_norm": 31.7055606842041, "learning_rate": 5e-07, "loss": 0.5987, "num_input_tokens_seen": 244737336, "step": 2117 }, { "epoch": 11.381720430107526, "loss": 0.6786239147186279, "loss_ce": 3.5021927033085376e-05, "loss_iou": 0.28125, "loss_num": 0.023681640625, "loss_xval": 0.6796875, "num_input_tokens_seen": 244737336, "step": 2117 }, { "epoch": 11.387096774193548, "grad_norm": 22.253278732299805, "learning_rate": 5e-07, "loss": 0.5657, "num_input_tokens_seen": 244854468, "step": 2118 }, { "epoch": 11.387096774193548, "loss": 0.5065898895263672, "loss_ce": 0.00024226370442193002, "loss_iou": 0.2119140625, "loss_num": 0.0167236328125, "loss_xval": 0.5078125, "num_input_tokens_seen": 244854468, "step": 2118 }, { "epoch": 11.39247311827957, "grad_norm": 23.63190269470215, "learning_rate": 5e-07, "loss": 0.7204, "num_input_tokens_seen": 244966860, "step": 2119 }, { "epoch": 11.39247311827957, "loss": 0.709038257598877, "loss_ce": 5.3888616093900055e-05, "loss_iou": 0.302734375, "loss_num": 0.0208740234375, "loss_xval": 0.7109375, "num_input_tokens_seen": 244966860, "step": 2119 }, { "epoch": 11.397849462365592, "grad_norm": 23.82849884033203, "learning_rate": 5e-07, "loss": 0.6498, "num_input_tokens_seen": 245083980, "step": 2120 }, { "epoch": 11.397849462365592, "loss": 0.6435818076133728, "loss_ce": 2.709852924454026e-05, "loss_iou": 0.27734375, "loss_num": 0.0181884765625, "loss_xval": 0.64453125, "num_input_tokens_seen": 245083980, "step": 2120 }, { "epoch": 11.403225806451612, "grad_norm": 23.509626388549805, "learning_rate": 5e-07, "loss": 0.7494, "num_input_tokens_seen": 245201060, "step": 2121 }, { "epoch": 11.403225806451612, "loss": 0.6550769805908203, "loss_ce": 4.770157829625532e-05, "loss_iou": 0.26953125, "loss_num": 0.0238037109375, "loss_xval": 0.65625, "num_input_tokens_seen": 245201060, "step": 2121 }, { "epoch": 11.408602150537634, "grad_norm": 25.773906707763672, "learning_rate": 5e-07, "loss": 0.7676, "num_input_tokens_seen": 245315704, "step": 2122 }, { "epoch": 11.408602150537634, "loss": 0.6067138314247131, "loss_ce": 2.4369377570110373e-05, "loss_iou": 0.271484375, "loss_num": 0.01318359375, "loss_xval": 0.60546875, "num_input_tokens_seen": 245315704, "step": 2122 }, { "epoch": 11.413978494623656, "grad_norm": 28.925907135009766, "learning_rate": 5e-07, "loss": 0.6147, "num_input_tokens_seen": 245427176, "step": 2123 }, { "epoch": 11.413978494623656, "loss": 0.6058349609375, "loss_ce": 0.00012206692190375179, "loss_iou": 0.265625, "loss_num": 0.0150146484375, "loss_xval": 0.60546875, "num_input_tokens_seen": 245427176, "step": 2123 }, { "epoch": 11.419354838709678, "grad_norm": 32.8327751159668, "learning_rate": 5e-07, "loss": 0.8069, "num_input_tokens_seen": 245543524, "step": 2124 }, { "epoch": 11.419354838709678, "loss": 1.0076072216033936, "loss_ce": 3.8826467061880976e-05, "loss_iou": 0.431640625, "loss_num": 0.0286865234375, "loss_xval": 1.0078125, "num_input_tokens_seen": 245543524, "step": 2124 }, { "epoch": 11.424731182795698, "grad_norm": 25.326791763305664, "learning_rate": 5e-07, "loss": 0.6556, "num_input_tokens_seen": 245660704, "step": 2125 }, { "epoch": 11.424731182795698, "loss": 0.5156635046005249, "loss_ce": 3.850684151984751e-05, "loss_iou": 0.23046875, "loss_num": 0.01068115234375, "loss_xval": 0.515625, "num_input_tokens_seen": 245660704, "step": 2125 }, { "epoch": 11.43010752688172, "grad_norm": 22.699016571044922, "learning_rate": 5e-07, "loss": 0.705, "num_input_tokens_seen": 245776212, "step": 2126 }, { "epoch": 11.43010752688172, "loss": 0.7992671132087708, "loss_ce": 0.00013380989548750222, "loss_iou": 0.328125, "loss_num": 0.0281982421875, "loss_xval": 0.80078125, "num_input_tokens_seen": 245776212, "step": 2126 }, { "epoch": 11.435483870967742, "grad_norm": 21.147628784179688, "learning_rate": 5e-07, "loss": 0.6758, "num_input_tokens_seen": 245893496, "step": 2127 }, { "epoch": 11.435483870967742, "loss": 0.8478180170059204, "loss_ce": 3.97577096009627e-05, "loss_iou": 0.353515625, "loss_num": 0.0284423828125, "loss_xval": 0.84765625, "num_input_tokens_seen": 245893496, "step": 2127 }, { "epoch": 11.440860215053764, "grad_norm": 21.73328399658203, "learning_rate": 5e-07, "loss": 0.5916, "num_input_tokens_seen": 246010108, "step": 2128 }, { "epoch": 11.440860215053764, "loss": 0.6587367057800293, "loss_ce": 4.526866541709751e-05, "loss_iou": 0.291015625, "loss_num": 0.01507568359375, "loss_xval": 0.66015625, "num_input_tokens_seen": 246010108, "step": 2128 }, { "epoch": 11.446236559139784, "grad_norm": 23.764848709106445, "learning_rate": 5e-07, "loss": 0.6003, "num_input_tokens_seen": 246126580, "step": 2129 }, { "epoch": 11.446236559139784, "loss": 0.5414114594459534, "loss_ce": 2.963785664178431e-05, "loss_iou": 0.228515625, "loss_num": 0.01708984375, "loss_xval": 0.54296875, "num_input_tokens_seen": 246126580, "step": 2129 }, { "epoch": 11.451612903225806, "grad_norm": 28.578243255615234, "learning_rate": 5e-07, "loss": 0.5643, "num_input_tokens_seen": 246243128, "step": 2130 }, { "epoch": 11.451612903225806, "loss": 0.6283140778541565, "loss_ce": 1.817926749936305e-05, "loss_iou": 0.271484375, "loss_num": 0.016845703125, "loss_xval": 0.62890625, "num_input_tokens_seen": 246243128, "step": 2130 }, { "epoch": 11.456989247311828, "grad_norm": 23.74277687072754, "learning_rate": 5e-07, "loss": 0.788, "num_input_tokens_seen": 246359340, "step": 2131 }, { "epoch": 11.456989247311828, "loss": 1.0869245529174805, "loss_ce": 0.0003766190493479371, "loss_iou": 0.4921875, "loss_num": 0.0203857421875, "loss_xval": 1.0859375, "num_input_tokens_seen": 246359340, "step": 2131 }, { "epoch": 11.46236559139785, "grad_norm": 25.108354568481445, "learning_rate": 5e-07, "loss": 0.6628, "num_input_tokens_seen": 246471908, "step": 2132 }, { "epoch": 11.46236559139785, "loss": 0.6350727081298828, "loss_ce": 6.291955651249737e-05, "loss_iou": 0.26171875, "loss_num": 0.0220947265625, "loss_xval": 0.63671875, "num_input_tokens_seen": 246471908, "step": 2132 }, { "epoch": 11.46774193548387, "grad_norm": 40.481143951416016, "learning_rate": 5e-07, "loss": 0.6647, "num_input_tokens_seen": 246586020, "step": 2133 }, { "epoch": 11.46774193548387, "loss": 0.5628642439842224, "loss_ce": 0.0004862849891651422, "loss_iou": 0.2421875, "loss_num": 0.0155029296875, "loss_xval": 0.5625, "num_input_tokens_seen": 246586020, "step": 2133 }, { "epoch": 11.473118279569892, "grad_norm": 16.991456985473633, "learning_rate": 5e-07, "loss": 0.6225, "num_input_tokens_seen": 246700160, "step": 2134 }, { "epoch": 11.473118279569892, "loss": 0.7151505351066589, "loss_ce": 6.261722592171282e-05, "loss_iou": 0.28515625, "loss_num": 0.02880859375, "loss_xval": 0.71484375, "num_input_tokens_seen": 246700160, "step": 2134 }, { "epoch": 11.478494623655914, "grad_norm": 23.3402099609375, "learning_rate": 5e-07, "loss": 0.5875, "num_input_tokens_seen": 246816416, "step": 2135 }, { "epoch": 11.478494623655914, "loss": 0.7157236337661743, "loss_ce": 2.538349144742824e-05, "loss_iou": 0.306640625, "loss_num": 0.0208740234375, "loss_xval": 0.71484375, "num_input_tokens_seen": 246816416, "step": 2135 }, { "epoch": 11.483870967741936, "grad_norm": 28.11224937438965, "learning_rate": 5e-07, "loss": 0.6985, "num_input_tokens_seen": 246930668, "step": 2136 }, { "epoch": 11.483870967741936, "loss": 0.49415719509124756, "loss_ce": 1.6563270037295297e-05, "loss_iou": 0.2138671875, "loss_num": 0.01324462890625, "loss_xval": 0.494140625, "num_input_tokens_seen": 246930668, "step": 2136 }, { "epoch": 11.489247311827956, "grad_norm": 26.832717895507812, "learning_rate": 5e-07, "loss": 0.6029, "num_input_tokens_seen": 247044840, "step": 2137 }, { "epoch": 11.489247311827956, "loss": 0.6845877766609192, "loss_ce": 1.7468664736952633e-05, "loss_iou": 0.306640625, "loss_num": 0.0146484375, "loss_xval": 0.68359375, "num_input_tokens_seen": 247044840, "step": 2137 }, { "epoch": 11.494623655913978, "grad_norm": 26.19735336303711, "learning_rate": 5e-07, "loss": 0.6642, "num_input_tokens_seen": 247160488, "step": 2138 }, { "epoch": 11.494623655913978, "loss": 0.6104157567024231, "loss_ce": 3.367725730640814e-05, "loss_iou": 0.2373046875, "loss_num": 0.0272216796875, "loss_xval": 0.609375, "num_input_tokens_seen": 247160488, "step": 2138 }, { "epoch": 11.5, "grad_norm": 19.34952735900879, "learning_rate": 5e-07, "loss": 0.6372, "num_input_tokens_seen": 247276324, "step": 2139 }, { "epoch": 11.5, "loss": 0.8370357751846313, "loss_ce": 0.00012171333219157532, "loss_iou": 0.34375, "loss_num": 0.0301513671875, "loss_xval": 0.8359375, "num_input_tokens_seen": 247276324, "step": 2139 }, { "epoch": 11.505376344086022, "grad_norm": 18.556432723999023, "learning_rate": 5e-07, "loss": 0.5818, "num_input_tokens_seen": 247390304, "step": 2140 }, { "epoch": 11.505376344086022, "loss": 0.6484616994857788, "loss_ce": 2.423391561023891e-05, "loss_iou": 0.259765625, "loss_num": 0.025390625, "loss_xval": 0.6484375, "num_input_tokens_seen": 247390304, "step": 2140 }, { "epoch": 11.510752688172044, "grad_norm": 20.293615341186523, "learning_rate": 5e-07, "loss": 0.7432, "num_input_tokens_seen": 247507908, "step": 2141 }, { "epoch": 11.510752688172044, "loss": 0.7231738567352295, "loss_ce": 2.9292799808899872e-05, "loss_iou": 0.30078125, "loss_num": 0.0245361328125, "loss_xval": 0.72265625, "num_input_tokens_seen": 247507908, "step": 2141 }, { "epoch": 11.516129032258064, "grad_norm": 21.38381004333496, "learning_rate": 5e-07, "loss": 0.7808, "num_input_tokens_seen": 247618892, "step": 2142 }, { "epoch": 11.516129032258064, "loss": 0.7175582647323608, "loss_ce": 2.8950809792149812e-05, "loss_iou": 0.30078125, "loss_num": 0.0230712890625, "loss_xval": 0.71875, "num_input_tokens_seen": 247618892, "step": 2142 }, { "epoch": 11.521505376344086, "grad_norm": 22.98020362854004, "learning_rate": 5e-07, "loss": 0.5143, "num_input_tokens_seen": 247732804, "step": 2143 }, { "epoch": 11.521505376344086, "loss": 0.6489527225494385, "loss_ce": 2.700962795643136e-05, "loss_iou": 0.27734375, "loss_num": 0.01904296875, "loss_xval": 0.6484375, "num_input_tokens_seen": 247732804, "step": 2143 }, { "epoch": 11.526881720430108, "grad_norm": 19.960134506225586, "learning_rate": 5e-07, "loss": 0.7579, "num_input_tokens_seen": 247847664, "step": 2144 }, { "epoch": 11.526881720430108, "loss": 0.5587366819381714, "loss_ce": 2.0875799236819148e-05, "loss_iou": 0.25390625, "loss_num": 0.01025390625, "loss_xval": 0.55859375, "num_input_tokens_seen": 247847664, "step": 2144 }, { "epoch": 11.532258064516128, "grad_norm": 16.721193313598633, "learning_rate": 5e-07, "loss": 0.5917, "num_input_tokens_seen": 247962140, "step": 2145 }, { "epoch": 11.532258064516128, "loss": 0.43998056650161743, "loss_ce": 3.914572880603373e-05, "loss_iou": 0.1875, "loss_num": 0.01275634765625, "loss_xval": 0.439453125, "num_input_tokens_seen": 247962140, "step": 2145 }, { "epoch": 11.53763440860215, "grad_norm": 25.152053833007812, "learning_rate": 5e-07, "loss": 0.6237, "num_input_tokens_seen": 248077956, "step": 2146 }, { "epoch": 11.53763440860215, "loss": 0.6103729009628296, "loss_ce": 2.1318945073289797e-05, "loss_iou": 0.2578125, "loss_num": 0.018798828125, "loss_xval": 0.609375, "num_input_tokens_seen": 248077956, "step": 2146 }, { "epoch": 11.543010752688172, "grad_norm": 15.929099082946777, "learning_rate": 5e-07, "loss": 0.6551, "num_input_tokens_seen": 248195276, "step": 2147 }, { "epoch": 11.543010752688172, "loss": 0.6755988001823425, "loss_ce": 6.173267320264131e-05, "loss_iou": 0.306640625, "loss_num": 0.0125732421875, "loss_xval": 0.67578125, "num_input_tokens_seen": 248195276, "step": 2147 }, { "epoch": 11.548387096774194, "grad_norm": 22.65334701538086, "learning_rate": 5e-07, "loss": 0.7658, "num_input_tokens_seen": 248310236, "step": 2148 }, { "epoch": 11.548387096774194, "loss": 0.8616220355033875, "loss_ce": 4.9768561439123005e-05, "loss_iou": 0.357421875, "loss_num": 0.0294189453125, "loss_xval": 0.86328125, "num_input_tokens_seen": 248310236, "step": 2148 }, { "epoch": 11.553763440860216, "grad_norm": 30.436948776245117, "learning_rate": 5e-07, "loss": 0.6412, "num_input_tokens_seen": 248425340, "step": 2149 }, { "epoch": 11.553763440860216, "loss": 0.7036548852920532, "loss_ce": 4.1540712118148804e-05, "loss_iou": 0.3125, "loss_num": 0.0155029296875, "loss_xval": 0.703125, "num_input_tokens_seen": 248425340, "step": 2149 }, { "epoch": 11.559139784946236, "grad_norm": 27.152111053466797, "learning_rate": 5e-07, "loss": 0.7581, "num_input_tokens_seen": 248541108, "step": 2150 }, { "epoch": 11.559139784946236, "loss": 0.9011535048484802, "loss_ce": 3.0491448342218064e-05, "loss_iou": 0.39453125, "loss_num": 0.022216796875, "loss_xval": 0.90234375, "num_input_tokens_seen": 248541108, "step": 2150 }, { "epoch": 11.564516129032258, "grad_norm": 19.479663848876953, "learning_rate": 5e-07, "loss": 0.67, "num_input_tokens_seen": 248657836, "step": 2151 }, { "epoch": 11.564516129032258, "loss": 0.5821922421455383, "loss_ce": 3.896084308507852e-05, "loss_iou": 0.234375, "loss_num": 0.022705078125, "loss_xval": 0.58203125, "num_input_tokens_seen": 248657836, "step": 2151 }, { "epoch": 11.56989247311828, "grad_norm": 19.987680435180664, "learning_rate": 5e-07, "loss": 0.7202, "num_input_tokens_seen": 248773144, "step": 2152 }, { "epoch": 11.56989247311828, "loss": 0.9805049300193787, "loss_ce": 3.6240970075596124e-05, "loss_iou": 0.4453125, "loss_num": 0.01806640625, "loss_xval": 0.98046875, "num_input_tokens_seen": 248773144, "step": 2152 }, { "epoch": 11.575268817204302, "grad_norm": 28.64654541015625, "learning_rate": 5e-07, "loss": 0.6217, "num_input_tokens_seen": 248891956, "step": 2153 }, { "epoch": 11.575268817204302, "loss": 0.45535218715667725, "loss_ce": 2.9935234124423005e-05, "loss_iou": 0.203125, "loss_num": 0.0096435546875, "loss_xval": 0.455078125, "num_input_tokens_seen": 248891956, "step": 2153 }, { "epoch": 11.580645161290322, "grad_norm": 39.297386169433594, "learning_rate": 5e-07, "loss": 0.7235, "num_input_tokens_seen": 249009300, "step": 2154 }, { "epoch": 11.580645161290322, "loss": 0.9735503792762756, "loss_ce": 0.0001617064408492297, "loss_iou": 0.416015625, "loss_num": 0.0283203125, "loss_xval": 0.97265625, "num_input_tokens_seen": 249009300, "step": 2154 }, { "epoch": 11.586021505376344, "grad_norm": 31.104387283325195, "learning_rate": 5e-07, "loss": 0.6432, "num_input_tokens_seen": 249125000, "step": 2155 }, { "epoch": 11.586021505376344, "loss": 0.4483010768890381, "loss_ce": 0.00018092083337251097, "loss_iou": 0.189453125, "loss_num": 0.013916015625, "loss_xval": 0.447265625, "num_input_tokens_seen": 249125000, "step": 2155 }, { "epoch": 11.591397849462366, "grad_norm": 24.57097816467285, "learning_rate": 5e-07, "loss": 0.5166, "num_input_tokens_seen": 249243372, "step": 2156 }, { "epoch": 11.591397849462366, "loss": 0.46998709440231323, "loss_ce": 1.6373514881706797e-05, "loss_iou": 0.19140625, "loss_num": 0.0174560546875, "loss_xval": 0.470703125, "num_input_tokens_seen": 249243372, "step": 2156 }, { "epoch": 11.596774193548388, "grad_norm": 17.66974639892578, "learning_rate": 5e-07, "loss": 0.6518, "num_input_tokens_seen": 249359848, "step": 2157 }, { "epoch": 11.596774193548388, "loss": 0.49589264392852783, "loss_ce": 4.3008905777242035e-05, "loss_iou": 0.2158203125, "loss_num": 0.01275634765625, "loss_xval": 0.49609375, "num_input_tokens_seen": 249359848, "step": 2157 }, { "epoch": 11.602150537634408, "grad_norm": 33.4935302734375, "learning_rate": 5e-07, "loss": 0.784, "num_input_tokens_seen": 249475724, "step": 2158 }, { "epoch": 11.602150537634408, "loss": 0.7052223682403564, "loss_ce": 0.00014427157293539494, "loss_iou": 0.306640625, "loss_num": 0.0181884765625, "loss_xval": 0.703125, "num_input_tokens_seen": 249475724, "step": 2158 }, { "epoch": 11.60752688172043, "grad_norm": 27.522083282470703, "learning_rate": 5e-07, "loss": 0.7341, "num_input_tokens_seen": 249588112, "step": 2159 }, { "epoch": 11.60752688172043, "loss": 0.7940988540649414, "loss_ce": 3.149186886730604e-05, "loss_iou": 0.341796875, "loss_num": 0.0220947265625, "loss_xval": 0.79296875, "num_input_tokens_seen": 249588112, "step": 2159 }, { "epoch": 11.612903225806452, "grad_norm": 21.12872314453125, "learning_rate": 5e-07, "loss": 0.7688, "num_input_tokens_seen": 249704872, "step": 2160 }, { "epoch": 11.612903225806452, "loss": 0.38503706455230713, "loss_ce": 2.729003426793497e-05, "loss_iou": 0.16796875, "loss_num": 0.0096435546875, "loss_xval": 0.384765625, "num_input_tokens_seen": 249704872, "step": 2160 }, { "epoch": 11.618279569892474, "grad_norm": 22.362323760986328, "learning_rate": 5e-07, "loss": 0.6313, "num_input_tokens_seen": 249823092, "step": 2161 }, { "epoch": 11.618279569892474, "loss": 0.568467915058136, "loss_ce": 4.751400410896167e-05, "loss_iou": 0.2578125, "loss_num": 0.010498046875, "loss_xval": 0.5703125, "num_input_tokens_seen": 249823092, "step": 2161 }, { "epoch": 11.623655913978494, "grad_norm": 23.21346664428711, "learning_rate": 5e-07, "loss": 0.6763, "num_input_tokens_seen": 249935636, "step": 2162 }, { "epoch": 11.623655913978494, "loss": 0.7649186253547668, "loss_ce": 2.6050671294797212e-05, "loss_iou": 0.33984375, "loss_num": 0.0167236328125, "loss_xval": 0.765625, "num_input_tokens_seen": 249935636, "step": 2162 }, { "epoch": 11.629032258064516, "grad_norm": 20.94448471069336, "learning_rate": 5e-07, "loss": 0.7796, "num_input_tokens_seen": 250048528, "step": 2163 }, { "epoch": 11.629032258064516, "loss": 0.9212308526039124, "loss_ce": 8.824713586363941e-05, "loss_iou": 0.41015625, "loss_num": 0.0201416015625, "loss_xval": 0.921875, "num_input_tokens_seen": 250048528, "step": 2163 }, { "epoch": 11.634408602150538, "grad_norm": 20.8480167388916, "learning_rate": 5e-07, "loss": 0.547, "num_input_tokens_seen": 250163420, "step": 2164 }, { "epoch": 11.634408602150538, "loss": 0.6485153436660767, "loss_ce": 7.784134504618123e-05, "loss_iou": 0.2734375, "loss_num": 0.020751953125, "loss_xval": 0.6484375, "num_input_tokens_seen": 250163420, "step": 2164 }, { "epoch": 11.63978494623656, "grad_norm": 27.934024810791016, "learning_rate": 5e-07, "loss": 0.7266, "num_input_tokens_seen": 250276308, "step": 2165 }, { "epoch": 11.63978494623656, "loss": 0.7944573163986206, "loss_ce": 2.3720172976027243e-05, "loss_iou": 0.3515625, "loss_num": 0.018310546875, "loss_xval": 0.79296875, "num_input_tokens_seen": 250276308, "step": 2165 }, { "epoch": 11.64516129032258, "grad_norm": 19.53363800048828, "learning_rate": 5e-07, "loss": 0.7093, "num_input_tokens_seen": 250390480, "step": 2166 }, { "epoch": 11.64516129032258, "loss": 0.6948820352554321, "loss_ce": 5.782442167401314e-05, "loss_iou": 0.279296875, "loss_num": 0.02685546875, "loss_xval": 0.6953125, "num_input_tokens_seen": 250390480, "step": 2166 }, { "epoch": 11.650537634408602, "grad_norm": 17.738197326660156, "learning_rate": 5e-07, "loss": 0.5801, "num_input_tokens_seen": 250505616, "step": 2167 }, { "epoch": 11.650537634408602, "loss": 0.6290808916091919, "loss_ce": 5.256633448880166e-05, "loss_iou": 0.28125, "loss_num": 0.0130615234375, "loss_xval": 0.62890625, "num_input_tokens_seen": 250505616, "step": 2167 }, { "epoch": 11.655913978494624, "grad_norm": 19.62503433227539, "learning_rate": 5e-07, "loss": 0.7053, "num_input_tokens_seen": 250623068, "step": 2168 }, { "epoch": 11.655913978494624, "loss": 0.5946195125579834, "loss_ce": 0.0002591857628431171, "loss_iou": 0.2578125, "loss_num": 0.01519775390625, "loss_xval": 0.59375, "num_input_tokens_seen": 250623068, "step": 2168 }, { "epoch": 11.661290322580646, "grad_norm": 18.9862117767334, "learning_rate": 5e-07, "loss": 0.6743, "num_input_tokens_seen": 250735980, "step": 2169 }, { "epoch": 11.661290322580646, "loss": 0.5574016571044922, "loss_ce": 2.8649135856539942e-05, "loss_iou": 0.2412109375, "loss_num": 0.01507568359375, "loss_xval": 0.55859375, "num_input_tokens_seen": 250735980, "step": 2169 }, { "epoch": 11.666666666666666, "grad_norm": 19.31372833251953, "learning_rate": 5e-07, "loss": 0.6579, "num_input_tokens_seen": 250851232, "step": 2170 }, { "epoch": 11.666666666666666, "loss": 0.47683802247047424, "loss_ce": 3.138244937872514e-05, "loss_iou": 0.20703125, "loss_num": 0.01275634765625, "loss_xval": 0.4765625, "num_input_tokens_seen": 250851232, "step": 2170 }, { "epoch": 11.672043010752688, "grad_norm": 23.119823455810547, "learning_rate": 5e-07, "loss": 0.5471, "num_input_tokens_seen": 250966772, "step": 2171 }, { "epoch": 11.672043010752688, "loss": 0.5011211633682251, "loss_ce": 2.248190503451042e-05, "loss_iou": 0.2099609375, "loss_num": 0.0162353515625, "loss_xval": 0.5, "num_input_tokens_seen": 250966772, "step": 2171 }, { "epoch": 11.67741935483871, "grad_norm": 16.52552032470703, "learning_rate": 5e-07, "loss": 0.6747, "num_input_tokens_seen": 251082304, "step": 2172 }, { "epoch": 11.67741935483871, "loss": 1.038108229637146, "loss_ce": 2.2281317797023803e-05, "loss_iou": 0.455078125, "loss_num": 0.02587890625, "loss_xval": 1.0390625, "num_input_tokens_seen": 251082304, "step": 2172 }, { "epoch": 11.682795698924732, "grad_norm": 24.107168197631836, "learning_rate": 5e-07, "loss": 0.8328, "num_input_tokens_seen": 251202088, "step": 2173 }, { "epoch": 11.682795698924732, "loss": 0.7783727645874023, "loss_ce": 5.2404509915504605e-05, "loss_iou": 0.3515625, "loss_num": 0.01513671875, "loss_xval": 0.77734375, "num_input_tokens_seen": 251202088, "step": 2173 }, { "epoch": 11.688172043010752, "grad_norm": 22.266782760620117, "learning_rate": 5e-07, "loss": 0.6838, "num_input_tokens_seen": 251315336, "step": 2174 }, { "epoch": 11.688172043010752, "loss": 0.576939046382904, "loss_ce": 3.474729965091683e-05, "loss_iou": 0.255859375, "loss_num": 0.01275634765625, "loss_xval": 0.578125, "num_input_tokens_seen": 251315336, "step": 2174 }, { "epoch": 11.693548387096774, "grad_norm": 23.780954360961914, "learning_rate": 5e-07, "loss": 0.7348, "num_input_tokens_seen": 251431476, "step": 2175 }, { "epoch": 11.693548387096774, "loss": 0.8154520392417908, "loss_ce": 2.2351414372678846e-05, "loss_iou": 0.33203125, "loss_num": 0.0306396484375, "loss_xval": 0.81640625, "num_input_tokens_seen": 251431476, "step": 2175 }, { "epoch": 11.698924731182796, "grad_norm": 19.243999481201172, "learning_rate": 5e-07, "loss": 0.6943, "num_input_tokens_seen": 251547332, "step": 2176 }, { "epoch": 11.698924731182796, "loss": 0.6587257385253906, "loss_ce": 3.429373100516386e-05, "loss_iou": 0.27734375, "loss_num": 0.0208740234375, "loss_xval": 0.66015625, "num_input_tokens_seen": 251547332, "step": 2176 }, { "epoch": 11.704301075268818, "grad_norm": 18.441740036010742, "learning_rate": 5e-07, "loss": 0.561, "num_input_tokens_seen": 251665564, "step": 2177 }, { "epoch": 11.704301075268818, "loss": 0.5513006448745728, "loss_ce": 3.114890569122508e-05, "loss_iou": 0.25, "loss_num": 0.01043701171875, "loss_xval": 0.55078125, "num_input_tokens_seen": 251665564, "step": 2177 }, { "epoch": 11.709677419354838, "grad_norm": 22.22773551940918, "learning_rate": 5e-07, "loss": 0.6207, "num_input_tokens_seen": 251781000, "step": 2178 }, { "epoch": 11.709677419354838, "loss": 0.8679913282394409, "loss_ce": 7.141892274376005e-05, "loss_iou": 0.373046875, "loss_num": 0.0244140625, "loss_xval": 0.8671875, "num_input_tokens_seen": 251781000, "step": 2178 }, { "epoch": 11.71505376344086, "grad_norm": 15.02297306060791, "learning_rate": 5e-07, "loss": 0.6179, "num_input_tokens_seen": 251895676, "step": 2179 }, { "epoch": 11.71505376344086, "loss": 0.5793838500976562, "loss_ce": 3.812197974184528e-05, "loss_iou": 0.23828125, "loss_num": 0.020751953125, "loss_xval": 0.578125, "num_input_tokens_seen": 251895676, "step": 2179 }, { "epoch": 11.720430107526882, "grad_norm": 28.494348526000977, "learning_rate": 5e-07, "loss": 0.7399, "num_input_tokens_seen": 252010524, "step": 2180 }, { "epoch": 11.720430107526882, "loss": 0.8562428951263428, "loss_ce": 4.1704253817442805e-05, "loss_iou": 0.36328125, "loss_num": 0.02587890625, "loss_xval": 0.85546875, "num_input_tokens_seen": 252010524, "step": 2180 }, { "epoch": 11.725806451612904, "grad_norm": 22.38324546813965, "learning_rate": 5e-07, "loss": 0.8366, "num_input_tokens_seen": 252127392, "step": 2181 }, { "epoch": 11.725806451612904, "loss": 0.8435355424880981, "loss_ce": 2.9681581509066746e-05, "loss_iou": 0.357421875, "loss_num": 0.025634765625, "loss_xval": 0.84375, "num_input_tokens_seen": 252127392, "step": 2181 }, { "epoch": 11.731182795698924, "grad_norm": 31.811161041259766, "learning_rate": 5e-07, "loss": 0.7893, "num_input_tokens_seen": 252241424, "step": 2182 }, { "epoch": 11.731182795698924, "loss": 0.7074340581893921, "loss_ce": 3.656703847809695e-05, "loss_iou": 0.310546875, "loss_num": 0.0174560546875, "loss_xval": 0.70703125, "num_input_tokens_seen": 252241424, "step": 2182 }, { "epoch": 11.736559139784946, "grad_norm": 35.09598159790039, "learning_rate": 5e-07, "loss": 0.7212, "num_input_tokens_seen": 252356636, "step": 2183 }, { "epoch": 11.736559139784946, "loss": 0.807647705078125, "loss_ce": 9.148790559265763e-05, "loss_iou": 0.3359375, "loss_num": 0.0269775390625, "loss_xval": 0.80859375, "num_input_tokens_seen": 252356636, "step": 2183 }, { "epoch": 11.741935483870968, "grad_norm": 48.8600959777832, "learning_rate": 5e-07, "loss": 0.7588, "num_input_tokens_seen": 252473052, "step": 2184 }, { "epoch": 11.741935483870968, "loss": 0.9050160646438599, "loss_ce": 0.00010882742935791612, "loss_iou": 0.404296875, "loss_num": 0.01904296875, "loss_xval": 0.90625, "num_input_tokens_seen": 252473052, "step": 2184 }, { "epoch": 11.74731182795699, "grad_norm": 44.91159439086914, "learning_rate": 5e-07, "loss": 0.7143, "num_input_tokens_seen": 252588052, "step": 2185 }, { "epoch": 11.74731182795699, "loss": 0.6059814691543579, "loss_ce": 2.4472308723488823e-05, "loss_iou": 0.26171875, "loss_num": 0.016845703125, "loss_xval": 0.60546875, "num_input_tokens_seen": 252588052, "step": 2185 }, { "epoch": 11.75268817204301, "grad_norm": 34.0020866394043, "learning_rate": 5e-07, "loss": 0.6649, "num_input_tokens_seen": 252705752, "step": 2186 }, { "epoch": 11.75268817204301, "loss": 0.8154618740081787, "loss_ce": 3.215962351532653e-05, "loss_iou": 0.3671875, "loss_num": 0.0162353515625, "loss_xval": 0.81640625, "num_input_tokens_seen": 252705752, "step": 2186 }, { "epoch": 11.758064516129032, "grad_norm": 46.5749626159668, "learning_rate": 5e-07, "loss": 0.6179, "num_input_tokens_seen": 252825116, "step": 2187 }, { "epoch": 11.758064516129032, "loss": 0.49108755588531494, "loss_ce": 5.971951759420335e-05, "loss_iou": 0.2216796875, "loss_num": 0.00933837890625, "loss_xval": 0.490234375, "num_input_tokens_seen": 252825116, "step": 2187 }, { "epoch": 11.763440860215054, "grad_norm": 29.754093170166016, "learning_rate": 5e-07, "loss": 0.6639, "num_input_tokens_seen": 252940256, "step": 2188 }, { "epoch": 11.763440860215054, "loss": 0.7341892719268799, "loss_ce": 5.843375765834935e-05, "loss_iou": 0.322265625, "loss_num": 0.0181884765625, "loss_xval": 0.734375, "num_input_tokens_seen": 252940256, "step": 2188 }, { "epoch": 11.768817204301076, "grad_norm": 16.240447998046875, "learning_rate": 5e-07, "loss": 0.581, "num_input_tokens_seen": 253052808, "step": 2189 }, { "epoch": 11.768817204301076, "loss": 0.5847526788711548, "loss_ce": 3.58500074071344e-05, "loss_iou": 0.263671875, "loss_num": 0.0118408203125, "loss_xval": 0.5859375, "num_input_tokens_seen": 253052808, "step": 2189 }, { "epoch": 11.774193548387096, "grad_norm": 27.532276153564453, "learning_rate": 5e-07, "loss": 0.5532, "num_input_tokens_seen": 253165016, "step": 2190 }, { "epoch": 11.774193548387096, "loss": 0.4517865777015686, "loss_ce": 0.0001874290028354153, "loss_iou": 0.1748046875, "loss_num": 0.0203857421875, "loss_xval": 0.451171875, "num_input_tokens_seen": 253165016, "step": 2190 }, { "epoch": 11.779569892473118, "grad_norm": 21.683412551879883, "learning_rate": 5e-07, "loss": 0.7209, "num_input_tokens_seen": 253280280, "step": 2191 }, { "epoch": 11.779569892473118, "loss": 0.5898649096488953, "loss_ce": 2.113259324687533e-05, "loss_iou": 0.2333984375, "loss_num": 0.024658203125, "loss_xval": 0.58984375, "num_input_tokens_seen": 253280280, "step": 2191 }, { "epoch": 11.78494623655914, "grad_norm": 21.657230377197266, "learning_rate": 5e-07, "loss": 0.7273, "num_input_tokens_seen": 253395348, "step": 2192 }, { "epoch": 11.78494623655914, "loss": 0.906898558139801, "loss_ce": 3.816699972958304e-05, "loss_iou": 0.3984375, "loss_num": 0.021728515625, "loss_xval": 0.90625, "num_input_tokens_seen": 253395348, "step": 2192 }, { "epoch": 11.790322580645162, "grad_norm": 30.750417709350586, "learning_rate": 5e-07, "loss": 0.7395, "num_input_tokens_seen": 253509088, "step": 2193 }, { "epoch": 11.790322580645162, "loss": 0.5066200494766235, "loss_ce": 2.8285117878112942e-05, "loss_iou": 0.2197265625, "loss_num": 0.0133056640625, "loss_xval": 0.5078125, "num_input_tokens_seen": 253509088, "step": 2193 }, { "epoch": 11.795698924731182, "grad_norm": 36.887298583984375, "learning_rate": 5e-07, "loss": 0.709, "num_input_tokens_seen": 253627928, "step": 2194 }, { "epoch": 11.795698924731182, "loss": 1.0118505954742432, "loss_ce": 0.0001317987043876201, "loss_iou": 0.4453125, "loss_num": 0.0245361328125, "loss_xval": 1.015625, "num_input_tokens_seen": 253627928, "step": 2194 }, { "epoch": 11.801075268817204, "grad_norm": 25.076610565185547, "learning_rate": 5e-07, "loss": 0.598, "num_input_tokens_seen": 253747548, "step": 2195 }, { "epoch": 11.801075268817204, "loss": 0.5163941383361816, "loss_ce": 3.6732708394993097e-05, "loss_iou": 0.2216796875, "loss_num": 0.0146484375, "loss_xval": 0.515625, "num_input_tokens_seen": 253747548, "step": 2195 }, { "epoch": 11.806451612903226, "grad_norm": 23.568824768066406, "learning_rate": 5e-07, "loss": 0.6491, "num_input_tokens_seen": 253860124, "step": 2196 }, { "epoch": 11.806451612903226, "loss": 0.9336156845092773, "loss_ce": 2.1949490474071354e-05, "loss_iou": 0.4140625, "loss_num": 0.0211181640625, "loss_xval": 0.93359375, "num_input_tokens_seen": 253860124, "step": 2196 }, { "epoch": 11.811827956989248, "grad_norm": 20.080202102661133, "learning_rate": 5e-07, "loss": 0.5488, "num_input_tokens_seen": 253972140, "step": 2197 }, { "epoch": 11.811827956989248, "loss": 0.6413982510566711, "loss_ce": 0.00016290023631881922, "loss_iou": 0.28515625, "loss_num": 0.014404296875, "loss_xval": 0.640625, "num_input_tokens_seen": 253972140, "step": 2197 }, { "epoch": 11.817204301075268, "grad_norm": 23.144371032714844, "learning_rate": 5e-07, "loss": 0.6712, "num_input_tokens_seen": 254090576, "step": 2198 }, { "epoch": 11.817204301075268, "loss": 0.6316598057746887, "loss_ce": 6.799968105042353e-05, "loss_iou": 0.26171875, "loss_num": 0.0216064453125, "loss_xval": 0.6328125, "num_input_tokens_seen": 254090576, "step": 2198 }, { "epoch": 11.82258064516129, "grad_norm": 22.78315544128418, "learning_rate": 5e-07, "loss": 0.6746, "num_input_tokens_seen": 254206096, "step": 2199 }, { "epoch": 11.82258064516129, "loss": 0.4470004439353943, "loss_ce": 0.0001010345877148211, "loss_iou": 0.1826171875, "loss_num": 0.0164794921875, "loss_xval": 0.447265625, "num_input_tokens_seen": 254206096, "step": 2199 }, { "epoch": 11.827956989247312, "grad_norm": 17.289417266845703, "learning_rate": 5e-07, "loss": 0.705, "num_input_tokens_seen": 254321572, "step": 2200 }, { "epoch": 11.827956989247312, "loss": 0.6533675193786621, "loss_ce": 4.7206704039126635e-05, "loss_iou": 0.279296875, "loss_num": 0.019287109375, "loss_xval": 0.65234375, "num_input_tokens_seen": 254321572, "step": 2200 }, { "epoch": 11.833333333333334, "grad_norm": 22.6743221282959, "learning_rate": 5e-07, "loss": 0.7467, "num_input_tokens_seen": 254434608, "step": 2201 }, { "epoch": 11.833333333333334, "loss": 0.6836808323860168, "loss_ce": 8.710048859938979e-05, "loss_iou": 0.27734375, "loss_num": 0.02587890625, "loss_xval": 0.68359375, "num_input_tokens_seen": 254434608, "step": 2201 }, { "epoch": 11.838709677419354, "grad_norm": 33.0670051574707, "learning_rate": 5e-07, "loss": 0.5995, "num_input_tokens_seen": 254547432, "step": 2202 }, { "epoch": 11.838709677419354, "loss": 0.48224762082099915, "loss_ce": 6.987740925978869e-05, "loss_iou": 0.20703125, "loss_num": 0.01373291015625, "loss_xval": 0.482421875, "num_input_tokens_seen": 254547432, "step": 2202 }, { "epoch": 11.844086021505376, "grad_norm": 32.6998176574707, "learning_rate": 5e-07, "loss": 0.6294, "num_input_tokens_seen": 254665632, "step": 2203 }, { "epoch": 11.844086021505376, "loss": 0.7247218489646912, "loss_ce": 0.0001124626214732416, "loss_iou": 0.306640625, "loss_num": 0.02197265625, "loss_xval": 0.7265625, "num_input_tokens_seen": 254665632, "step": 2203 }, { "epoch": 11.849462365591398, "grad_norm": 28.567773818969727, "learning_rate": 5e-07, "loss": 0.6671, "num_input_tokens_seen": 254780468, "step": 2204 }, { "epoch": 11.849462365591398, "loss": 0.6537674069404602, "loss_ce": 1.982555477297865e-05, "loss_iou": 0.291015625, "loss_num": 0.0145263671875, "loss_xval": 0.65234375, "num_input_tokens_seen": 254780468, "step": 2204 }, { "epoch": 11.85483870967742, "grad_norm": 26.460309982299805, "learning_rate": 5e-07, "loss": 0.6362, "num_input_tokens_seen": 254896264, "step": 2205 }, { "epoch": 11.85483870967742, "loss": 0.7002983093261719, "loss_ce": 4.196106601739302e-05, "loss_iou": 0.3125, "loss_num": 0.0146484375, "loss_xval": 0.69921875, "num_input_tokens_seen": 254896264, "step": 2205 }, { "epoch": 11.86021505376344, "grad_norm": 20.315004348754883, "learning_rate": 5e-07, "loss": 0.665, "num_input_tokens_seen": 255016112, "step": 2206 }, { "epoch": 11.86021505376344, "loss": 0.6040138006210327, "loss_ce": 0.0009864519815891981, "loss_iou": 0.271484375, "loss_num": 0.01214599609375, "loss_xval": 0.6015625, "num_input_tokens_seen": 255016112, "step": 2206 }, { "epoch": 11.865591397849462, "grad_norm": 18.409751892089844, "learning_rate": 5e-07, "loss": 0.6603, "num_input_tokens_seen": 255129496, "step": 2207 }, { "epoch": 11.865591397849462, "loss": 0.4646325409412384, "loss_ce": 3.293935878900811e-05, "loss_iou": 0.1962890625, "loss_num": 0.01458740234375, "loss_xval": 0.46484375, "num_input_tokens_seen": 255129496, "step": 2207 }, { "epoch": 11.870967741935484, "grad_norm": 20.824052810668945, "learning_rate": 5e-07, "loss": 0.8051, "num_input_tokens_seen": 255244172, "step": 2208 }, { "epoch": 11.870967741935484, "loss": 1.0983809232711792, "loss_ce": 0.0002364129468332976, "loss_iou": 0.490234375, "loss_num": 0.023681640625, "loss_xval": 1.1015625, "num_input_tokens_seen": 255244172, "step": 2208 }, { "epoch": 11.876344086021506, "grad_norm": 33.146419525146484, "learning_rate": 5e-07, "loss": 0.7763, "num_input_tokens_seen": 255357984, "step": 2209 }, { "epoch": 11.876344086021506, "loss": 0.9025886058807373, "loss_ce": 0.00024482241133227944, "loss_iou": 0.392578125, "loss_num": 0.023193359375, "loss_xval": 0.90234375, "num_input_tokens_seen": 255357984, "step": 2209 }, { "epoch": 11.881720430107526, "grad_norm": 19.99505615234375, "learning_rate": 5e-07, "loss": 0.5946, "num_input_tokens_seen": 255473796, "step": 2210 }, { "epoch": 11.881720430107526, "loss": 0.7681185007095337, "loss_ce": 5.2137118473183364e-05, "loss_iou": 0.33203125, "loss_num": 0.0205078125, "loss_xval": 0.76953125, "num_input_tokens_seen": 255473796, "step": 2210 }, { "epoch": 11.887096774193548, "grad_norm": 22.693559646606445, "learning_rate": 5e-07, "loss": 0.6172, "num_input_tokens_seen": 255590244, "step": 2211 }, { "epoch": 11.887096774193548, "loss": 0.49989181756973267, "loss_ce": 1.388092277920805e-05, "loss_iou": 0.212890625, "loss_num": 0.01495361328125, "loss_xval": 0.5, "num_input_tokens_seen": 255590244, "step": 2211 }, { "epoch": 11.89247311827957, "grad_norm": 24.315309524536133, "learning_rate": 5e-07, "loss": 0.6157, "num_input_tokens_seen": 255708568, "step": 2212 }, { "epoch": 11.89247311827957, "loss": 0.6329891085624695, "loss_ce": 5.4541898862225935e-05, "loss_iou": 0.27734375, "loss_num": 0.0157470703125, "loss_xval": 0.6328125, "num_input_tokens_seen": 255708568, "step": 2212 }, { "epoch": 11.897849462365592, "grad_norm": 28.992794036865234, "learning_rate": 5e-07, "loss": 0.5709, "num_input_tokens_seen": 255826272, "step": 2213 }, { "epoch": 11.897849462365592, "loss": 0.640392541885376, "loss_ce": 0.0001336847199127078, "loss_iou": 0.2734375, "loss_num": 0.018798828125, "loss_xval": 0.640625, "num_input_tokens_seen": 255826272, "step": 2213 }, { "epoch": 11.903225806451612, "grad_norm": 25.83478546142578, "learning_rate": 5e-07, "loss": 0.7124, "num_input_tokens_seen": 255942188, "step": 2214 }, { "epoch": 11.903225806451612, "loss": 0.6997977495193481, "loss_ce": 9.073904220713302e-05, "loss_iou": 0.294921875, "loss_num": 0.0218505859375, "loss_xval": 0.69921875, "num_input_tokens_seen": 255942188, "step": 2214 }, { "epoch": 11.908602150537634, "grad_norm": 23.14396095275879, "learning_rate": 5e-07, "loss": 0.6176, "num_input_tokens_seen": 256059248, "step": 2215 }, { "epoch": 11.908602150537634, "loss": 0.5277089476585388, "loss_ce": 0.0001210506961797364, "loss_iou": 0.21875, "loss_num": 0.0179443359375, "loss_xval": 0.52734375, "num_input_tokens_seen": 256059248, "step": 2215 }, { "epoch": 11.913978494623656, "grad_norm": 18.029897689819336, "learning_rate": 5e-07, "loss": 0.5671, "num_input_tokens_seen": 256175888, "step": 2216 }, { "epoch": 11.913978494623656, "loss": 0.4915034770965576, "loss_ce": 0.0006587646203115582, "loss_iou": 0.2080078125, "loss_num": 0.01507568359375, "loss_xval": 0.490234375, "num_input_tokens_seen": 256175888, "step": 2216 }, { "epoch": 11.919354838709678, "grad_norm": 28.954002380371094, "learning_rate": 5e-07, "loss": 0.7043, "num_input_tokens_seen": 256290696, "step": 2217 }, { "epoch": 11.919354838709678, "loss": 0.5974920392036438, "loss_ce": 7.990116864675656e-05, "loss_iou": 0.26171875, "loss_num": 0.01531982421875, "loss_xval": 0.59765625, "num_input_tokens_seen": 256290696, "step": 2217 }, { "epoch": 11.924731182795698, "grad_norm": 19.06382942199707, "learning_rate": 5e-07, "loss": 0.7183, "num_input_tokens_seen": 256405804, "step": 2218 }, { "epoch": 11.924731182795698, "loss": 0.6678686141967773, "loss_ce": 2.189336191804614e-05, "loss_iou": 0.2890625, "loss_num": 0.017578125, "loss_xval": 0.66796875, "num_input_tokens_seen": 256405804, "step": 2218 }, { "epoch": 11.93010752688172, "grad_norm": 21.414688110351562, "learning_rate": 5e-07, "loss": 0.5478, "num_input_tokens_seen": 256521244, "step": 2219 }, { "epoch": 11.93010752688172, "loss": 0.5896390676498413, "loss_ce": 3.9447717426810414e-05, "loss_iou": 0.22265625, "loss_num": 0.0286865234375, "loss_xval": 0.58984375, "num_input_tokens_seen": 256521244, "step": 2219 }, { "epoch": 11.935483870967742, "grad_norm": 18.483423233032227, "learning_rate": 5e-07, "loss": 0.6766, "num_input_tokens_seen": 256638556, "step": 2220 }, { "epoch": 11.935483870967742, "loss": 0.5640100240707397, "loss_ce": 4.518120113061741e-05, "loss_iou": 0.240234375, "loss_num": 0.016845703125, "loss_xval": 0.5625, "num_input_tokens_seen": 256638556, "step": 2220 }, { "epoch": 11.940860215053764, "grad_norm": 32.1106071472168, "learning_rate": 5e-07, "loss": 0.5164, "num_input_tokens_seen": 256754716, "step": 2221 }, { "epoch": 11.940860215053764, "loss": 0.4964974820613861, "loss_ce": 3.752965130843222e-05, "loss_iou": 0.2216796875, "loss_num": 0.01055908203125, "loss_xval": 0.49609375, "num_input_tokens_seen": 256754716, "step": 2221 }, { "epoch": 11.946236559139784, "grad_norm": 17.074520111083984, "learning_rate": 5e-07, "loss": 0.657, "num_input_tokens_seen": 256868644, "step": 2222 }, { "epoch": 11.946236559139784, "loss": 0.5560582876205444, "loss_ce": 2.8026130166836083e-05, "loss_iou": 0.2255859375, "loss_num": 0.0206298828125, "loss_xval": 0.5546875, "num_input_tokens_seen": 256868644, "step": 2222 }, { "epoch": 11.951612903225806, "grad_norm": 22.916006088256836, "learning_rate": 5e-07, "loss": 0.6538, "num_input_tokens_seen": 256985128, "step": 2223 }, { "epoch": 11.951612903225806, "loss": 0.6873331069946289, "loss_ce": 7.728100172244012e-05, "loss_iou": 0.287109375, "loss_num": 0.0224609375, "loss_xval": 0.6875, "num_input_tokens_seen": 256985128, "step": 2223 }, { "epoch": 11.956989247311828, "grad_norm": 20.045740127563477, "learning_rate": 5e-07, "loss": 0.5048, "num_input_tokens_seen": 257102356, "step": 2224 }, { "epoch": 11.956989247311828, "loss": 0.33779463171958923, "loss_ce": 2.607696660561487e-05, "loss_iou": 0.1484375, "loss_num": 0.00830078125, "loss_xval": 0.337890625, "num_input_tokens_seen": 257102356, "step": 2224 }, { "epoch": 11.96236559139785, "grad_norm": 22.2525634765625, "learning_rate": 5e-07, "loss": 0.7033, "num_input_tokens_seen": 257218384, "step": 2225 }, { "epoch": 11.96236559139785, "loss": 0.7752348184585571, "loss_ce": 8.830194565234706e-05, "loss_iou": 0.337890625, "loss_num": 0.019775390625, "loss_xval": 0.7734375, "num_input_tokens_seen": 257218384, "step": 2225 }, { "epoch": 11.967741935483872, "grad_norm": 35.93985366821289, "learning_rate": 5e-07, "loss": 0.6327, "num_input_tokens_seen": 257333188, "step": 2226 }, { "epoch": 11.967741935483872, "loss": 0.4509546756744385, "loss_ce": 2.692679117899388e-05, "loss_iou": 0.1923828125, "loss_num": 0.01324462890625, "loss_xval": 0.451171875, "num_input_tokens_seen": 257333188, "step": 2226 }, { "epoch": 11.973118279569892, "grad_norm": 37.51325988769531, "learning_rate": 5e-07, "loss": 0.7327, "num_input_tokens_seen": 257446580, "step": 2227 }, { "epoch": 11.973118279569892, "loss": 0.5483834147453308, "loss_ce": 4.356090721557848e-05, "loss_iou": 0.212890625, "loss_num": 0.0242919921875, "loss_xval": 0.546875, "num_input_tokens_seen": 257446580, "step": 2227 }, { "epoch": 11.978494623655914, "grad_norm": 31.89232063293457, "learning_rate": 5e-07, "loss": 0.6167, "num_input_tokens_seen": 257562620, "step": 2228 }, { "epoch": 11.978494623655914, "loss": 0.6114879250526428, "loss_ce": 3.771538467844948e-05, "loss_iou": 0.271484375, "loss_num": 0.0135498046875, "loss_xval": 0.61328125, "num_input_tokens_seen": 257562620, "step": 2228 }, { "epoch": 11.983870967741936, "grad_norm": 25.952360153198242, "learning_rate": 5e-07, "loss": 0.7002, "num_input_tokens_seen": 257675780, "step": 2229 }, { "epoch": 11.983870967741936, "loss": 0.8406500816345215, "loss_ce": 0.00019598225480876863, "loss_iou": 0.345703125, "loss_num": 0.0299072265625, "loss_xval": 0.83984375, "num_input_tokens_seen": 257675780, "step": 2229 }, { "epoch": 11.989247311827956, "grad_norm": 20.173065185546875, "learning_rate": 5e-07, "loss": 0.6299, "num_input_tokens_seen": 257791212, "step": 2230 }, { "epoch": 11.989247311827956, "loss": 0.9873247146606445, "loss_ce": 2.0008403225801885e-05, "loss_iou": 0.43359375, "loss_num": 0.0234375, "loss_xval": 0.98828125, "num_input_tokens_seen": 257791212, "step": 2230 }, { "epoch": 11.994623655913978, "grad_norm": 18.22447395324707, "learning_rate": 5e-07, "loss": 0.574, "num_input_tokens_seen": 257905624, "step": 2231 }, { "epoch": 11.994623655913978, "loss": 0.6016300916671753, "loss_ce": 6.756573566235602e-05, "loss_iou": 0.26953125, "loss_num": 0.0123291015625, "loss_xval": 0.6015625, "num_input_tokens_seen": 257905624, "step": 2231 }, { "epoch": 12.0, "grad_norm": 24.689619064331055, "learning_rate": 5e-07, "loss": 0.6081, "num_input_tokens_seen": 258019272, "step": 2232 }, { "epoch": 12.0, "loss": 0.6778873205184937, "loss_ce": 3.08500457322225e-05, "loss_iou": 0.28125, "loss_num": 0.0228271484375, "loss_xval": 0.6796875, "num_input_tokens_seen": 258019272, "step": 2232 }, { "epoch": 12.005376344086022, "grad_norm": 30.962265014648438, "learning_rate": 5e-07, "loss": 0.6076, "num_input_tokens_seen": 258132068, "step": 2233 }, { "epoch": 12.005376344086022, "loss": 0.7729821801185608, "loss_ce": 3.295574424555525e-05, "loss_iou": 0.333984375, "loss_num": 0.020751953125, "loss_xval": 0.7734375, "num_input_tokens_seen": 258132068, "step": 2233 }, { "epoch": 12.010752688172044, "grad_norm": 45.781944274902344, "learning_rate": 5e-07, "loss": 0.6038, "num_input_tokens_seen": 258247964, "step": 2234 }, { "epoch": 12.010752688172044, "loss": 0.5754930973052979, "loss_ce": 5.3646152082365006e-05, "loss_iou": 0.265625, "loss_num": 0.0084228515625, "loss_xval": 0.57421875, "num_input_tokens_seen": 258247964, "step": 2234 }, { "epoch": 12.016129032258064, "grad_norm": 32.80252456665039, "learning_rate": 5e-07, "loss": 0.5355, "num_input_tokens_seen": 258364476, "step": 2235 }, { "epoch": 12.016129032258064, "loss": 0.6914684772491455, "loss_ce": 0.0006725747371092439, "loss_iou": 0.2578125, "loss_num": 0.034912109375, "loss_xval": 0.69140625, "num_input_tokens_seen": 258364476, "step": 2235 }, { "epoch": 12.021505376344086, "grad_norm": 23.38483238220215, "learning_rate": 5e-07, "loss": 0.4935, "num_input_tokens_seen": 258482836, "step": 2236 }, { "epoch": 12.021505376344086, "loss": 0.5246791839599609, "loss_ce": 0.00026515955687500536, "loss_iou": 0.2080078125, "loss_num": 0.0216064453125, "loss_xval": 0.5234375, "num_input_tokens_seen": 258482836, "step": 2236 }, { "epoch": 12.026881720430108, "grad_norm": 17.865633010864258, "learning_rate": 5e-07, "loss": 0.521, "num_input_tokens_seen": 258600604, "step": 2237 }, { "epoch": 12.026881720430108, "loss": 0.43312594294548035, "loss_ce": 2.0464442059164867e-05, "loss_iou": 0.1865234375, "loss_num": 0.01214599609375, "loss_xval": 0.43359375, "num_input_tokens_seen": 258600604, "step": 2237 }, { "epoch": 12.03225806451613, "grad_norm": 17.45099639892578, "learning_rate": 5e-07, "loss": 0.7318, "num_input_tokens_seen": 258713476, "step": 2238 }, { "epoch": 12.03225806451613, "loss": 0.575912356376648, "loss_ce": 4.56484267488122e-05, "loss_iou": 0.2333984375, "loss_num": 0.02197265625, "loss_xval": 0.57421875, "num_input_tokens_seen": 258713476, "step": 2238 }, { "epoch": 12.03763440860215, "grad_norm": 25.281410217285156, "learning_rate": 5e-07, "loss": 0.7319, "num_input_tokens_seen": 258828884, "step": 2239 }, { "epoch": 12.03763440860215, "loss": 0.5654556751251221, "loss_ce": 2.5997502234531567e-05, "loss_iou": 0.255859375, "loss_num": 0.0107421875, "loss_xval": 0.56640625, "num_input_tokens_seen": 258828884, "step": 2239 }, { "epoch": 12.043010752688172, "grad_norm": 24.215524673461914, "learning_rate": 5e-07, "loss": 0.742, "num_input_tokens_seen": 258943916, "step": 2240 }, { "epoch": 12.043010752688172, "loss": 0.6770160794258118, "loss_ce": 1.410905497323256e-05, "loss_iou": 0.302734375, "loss_num": 0.0146484375, "loss_xval": 0.67578125, "num_input_tokens_seen": 258943916, "step": 2240 }, { "epoch": 12.048387096774194, "grad_norm": 21.435123443603516, "learning_rate": 5e-07, "loss": 0.6774, "num_input_tokens_seen": 259059208, "step": 2241 }, { "epoch": 12.048387096774194, "loss": 0.6007335186004639, "loss_ce": 2.5489805921097286e-05, "loss_iou": 0.236328125, "loss_num": 0.025634765625, "loss_xval": 0.6015625, "num_input_tokens_seen": 259059208, "step": 2241 }, { "epoch": 12.053763440860216, "grad_norm": 30.818933486938477, "learning_rate": 5e-07, "loss": 0.5901, "num_input_tokens_seen": 259178140, "step": 2242 }, { "epoch": 12.053763440860216, "loss": 0.49514949321746826, "loss_ce": 0.00027645195950753987, "loss_iou": 0.2197265625, "loss_num": 0.010986328125, "loss_xval": 0.494140625, "num_input_tokens_seen": 259178140, "step": 2242 }, { "epoch": 12.059139784946236, "grad_norm": 37.46064758300781, "learning_rate": 5e-07, "loss": 0.6587, "num_input_tokens_seen": 259294208, "step": 2243 }, { "epoch": 12.059139784946236, "loss": 0.7331529855728149, "loss_ce": 0.0006090087117627263, "loss_iou": 0.30859375, "loss_num": 0.02294921875, "loss_xval": 0.734375, "num_input_tokens_seen": 259294208, "step": 2243 }, { "epoch": 12.064516129032258, "grad_norm": 33.21875762939453, "learning_rate": 5e-07, "loss": 0.6339, "num_input_tokens_seen": 259409088, "step": 2244 }, { "epoch": 12.064516129032258, "loss": 0.7196424603462219, "loss_ce": 3.794347503571771e-05, "loss_iou": 0.3125, "loss_num": 0.0191650390625, "loss_xval": 0.71875, "num_input_tokens_seen": 259409088, "step": 2244 }, { "epoch": 12.06989247311828, "grad_norm": 29.200042724609375, "learning_rate": 5e-07, "loss": 0.676, "num_input_tokens_seen": 259526564, "step": 2245 }, { "epoch": 12.06989247311828, "loss": 0.6538325548171997, "loss_ce": 2.396313357166946e-05, "loss_iou": 0.26171875, "loss_num": 0.0262451171875, "loss_xval": 0.65234375, "num_input_tokens_seen": 259526564, "step": 2245 }, { "epoch": 12.075268817204302, "grad_norm": 30.272912979125977, "learning_rate": 5e-07, "loss": 0.6355, "num_input_tokens_seen": 259641240, "step": 2246 }, { "epoch": 12.075268817204302, "loss": 0.8120744228363037, "loss_ce": 6.272186874412e-05, "loss_iou": 0.359375, "loss_num": 0.018310546875, "loss_xval": 0.8125, "num_input_tokens_seen": 259641240, "step": 2246 }, { "epoch": 12.080645161290322, "grad_norm": 33.71619415283203, "learning_rate": 5e-07, "loss": 0.6364, "num_input_tokens_seen": 259758080, "step": 2247 }, { "epoch": 12.080645161290322, "loss": 0.5280168056488037, "loss_ce": 6.265939009608701e-05, "loss_iou": 0.23046875, "loss_num": 0.01336669921875, "loss_xval": 0.52734375, "num_input_tokens_seen": 259758080, "step": 2247 }, { "epoch": 12.086021505376344, "grad_norm": 36.577423095703125, "learning_rate": 5e-07, "loss": 0.7848, "num_input_tokens_seen": 259874164, "step": 2248 }, { "epoch": 12.086021505376344, "loss": 1.0620436668395996, "loss_ce": 3.191464566043578e-05, "loss_iou": 0.486328125, "loss_num": 0.0179443359375, "loss_xval": 1.0625, "num_input_tokens_seen": 259874164, "step": 2248 }, { "epoch": 12.091397849462366, "grad_norm": 35.199729919433594, "learning_rate": 5e-07, "loss": 0.7262, "num_input_tokens_seen": 259987852, "step": 2249 }, { "epoch": 12.091397849462366, "loss": 0.6211289763450623, "loss_ce": 3.5207638575229794e-05, "loss_iou": 0.263671875, "loss_num": 0.0185546875, "loss_xval": 0.62109375, "num_input_tokens_seen": 259987852, "step": 2249 }, { "epoch": 12.096774193548388, "grad_norm": 22.833099365234375, "learning_rate": 5e-07, "loss": 0.6586, "num_input_tokens_seen": 260102684, "step": 2250 }, { "epoch": 12.096774193548388, "eval_icons_CIoU": 0.13793887943029404, "eval_icons_GIoU": 0.10657289624214172, "eval_icons_IoU": 0.301620215177536, "eval_icons_MAE_all": 0.032558915205299854, "eval_icons_MAE_h": 0.03814006969332695, "eval_icons_MAE_w": 0.05549078993499279, "eval_icons_MAE_x_boxes": 0.05286085605621338, "eval_icons_MAE_y_boxes": 0.03315967135131359, "eval_icons_NUM_probability": 0.9986069798469543, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 1.936518669128418, "eval_icons_loss_ce": 0.00023320735635934398, "eval_icons_loss_iou": 0.87548828125, "eval_icons_loss_num": 0.03334808349609375, "eval_icons_loss_xval": 1.91748046875, "eval_icons_runtime": 46.533, "eval_icons_samples_per_second": 1.075, "eval_icons_steps_per_second": 0.043, "num_input_tokens_seen": 260102684, "step": 2250 }, { "epoch": 12.096774193548388, "eval_screenspot_CIoU": 0.33824723958969116, "eval_screenspot_GIoU": 0.32872991263866425, "eval_screenspot_IoU": 0.42955907185872394, "eval_screenspot_MAE_all": 0.05520756853123506, "eval_screenspot_MAE_h": 0.04937497712671757, "eval_screenspot_MAE_w": 0.07051117594043414, "eval_screenspot_MAE_x_boxes": 0.07355323247611523, "eval_screenspot_MAE_y_boxes": 0.03868744460244974, "eval_screenspot_NUM_probability": 0.999758780002594, "eval_screenspot_inside_bbox": 0.7383333245913187, "eval_screenspot_loss": 1.6746087074279785, "eval_screenspot_loss_ce": 9.548469339885439e-05, "eval_screenspot_loss_iou": 0.7195638020833334, "eval_screenspot_loss_num": 0.062774658203125, "eval_screenspot_loss_xval": 1.7530924479166667, "eval_screenspot_runtime": 73.2107, "eval_screenspot_samples_per_second": 1.216, "eval_screenspot_steps_per_second": 0.041, "num_input_tokens_seen": 260102684, "step": 2250 }, { "epoch": 12.096774193548388, "loss": 1.613325595855713, "loss_ce": 4.438964242581278e-05, "loss_iou": 0.68359375, "loss_num": 0.048828125, "loss_xval": 1.609375, "num_input_tokens_seen": 260102684, "step": 2250 }, { "epoch": 12.102150537634408, "grad_norm": 13.249110221862793, "learning_rate": 5e-07, "loss": 0.5166, "num_input_tokens_seen": 260219932, "step": 2251 }, { "epoch": 12.102150537634408, "loss": 0.4780575633049011, "loss_ce": 3.0226145099732094e-05, "loss_iou": 0.208984375, "loss_num": 0.01202392578125, "loss_xval": 0.478515625, "num_input_tokens_seen": 260219932, "step": 2251 }, { "epoch": 12.10752688172043, "grad_norm": 26.602088928222656, "learning_rate": 5e-07, "loss": 0.6689, "num_input_tokens_seen": 260333212, "step": 2252 }, { "epoch": 12.10752688172043, "loss": 0.7182999849319458, "loss_ce": 3.826418469543569e-05, "loss_iou": 0.31640625, "loss_num": 0.0174560546875, "loss_xval": 0.71875, "num_input_tokens_seen": 260333212, "step": 2252 }, { "epoch": 12.112903225806452, "grad_norm": 22.320049285888672, "learning_rate": 5e-07, "loss": 0.6239, "num_input_tokens_seen": 260448196, "step": 2253 }, { "epoch": 12.112903225806452, "loss": 0.48881012201309204, "loss_ce": 4.0591789002064615e-05, "loss_iou": 0.201171875, "loss_num": 0.01708984375, "loss_xval": 0.48828125, "num_input_tokens_seen": 260448196, "step": 2253 }, { "epoch": 12.118279569892474, "grad_norm": 19.843181610107422, "learning_rate": 5e-07, "loss": 0.71, "num_input_tokens_seen": 260562612, "step": 2254 }, { "epoch": 12.118279569892474, "loss": 0.6594621539115906, "loss_ce": 3.8312035030685365e-05, "loss_iou": 0.28515625, "loss_num": 0.017822265625, "loss_xval": 0.66015625, "num_input_tokens_seen": 260562612, "step": 2254 }, { "epoch": 12.123655913978494, "grad_norm": 18.86272430419922, "learning_rate": 5e-07, "loss": 0.5524, "num_input_tokens_seen": 260681752, "step": 2255 }, { "epoch": 12.123655913978494, "loss": 0.5692368745803833, "loss_ce": 2.3031438104226254e-05, "loss_iou": 0.25, "loss_num": 0.0140380859375, "loss_xval": 0.5703125, "num_input_tokens_seen": 260681752, "step": 2255 }, { "epoch": 12.129032258064516, "grad_norm": 30.249732971191406, "learning_rate": 5e-07, "loss": 0.6479, "num_input_tokens_seen": 260798948, "step": 2256 }, { "epoch": 12.129032258064516, "loss": 0.5679531097412109, "loss_ce": 2.1002128050895408e-05, "loss_iou": 0.23828125, "loss_num": 0.018310546875, "loss_xval": 0.56640625, "num_input_tokens_seen": 260798948, "step": 2256 }, { "epoch": 12.134408602150538, "grad_norm": 28.711275100708008, "learning_rate": 5e-07, "loss": 0.694, "num_input_tokens_seen": 260913108, "step": 2257 }, { "epoch": 12.134408602150538, "loss": 0.8682879209518433, "loss_ce": 0.0001238942495547235, "loss_iou": 0.3671875, "loss_num": 0.0262451171875, "loss_xval": 0.8671875, "num_input_tokens_seen": 260913108, "step": 2257 }, { "epoch": 12.13978494623656, "grad_norm": 25.6513729095459, "learning_rate": 5e-07, "loss": 0.5937, "num_input_tokens_seen": 261031116, "step": 2258 }, { "epoch": 12.13978494623656, "loss": 0.5342808961868286, "loss_ce": 0.0001011642743833363, "loss_iou": 0.228515625, "loss_num": 0.0152587890625, "loss_xval": 0.53515625, "num_input_tokens_seen": 261031116, "step": 2258 }, { "epoch": 12.14516129032258, "grad_norm": 39.941246032714844, "learning_rate": 5e-07, "loss": 0.6689, "num_input_tokens_seen": 261146688, "step": 2259 }, { "epoch": 12.14516129032258, "loss": 0.7351536750793457, "loss_ce": 4.626693407772109e-05, "loss_iou": 0.3203125, "loss_num": 0.0185546875, "loss_xval": 0.734375, "num_input_tokens_seen": 261146688, "step": 2259 }, { "epoch": 12.150537634408602, "grad_norm": 28.46076202392578, "learning_rate": 5e-07, "loss": 0.6356, "num_input_tokens_seen": 261262888, "step": 2260 }, { "epoch": 12.150537634408602, "loss": 0.5999081134796143, "loss_ce": 5.45715847692918e-05, "loss_iou": 0.248046875, "loss_num": 0.0206298828125, "loss_xval": 0.6015625, "num_input_tokens_seen": 261262888, "step": 2260 }, { "epoch": 12.155913978494624, "grad_norm": 20.347490310668945, "learning_rate": 5e-07, "loss": 0.8847, "num_input_tokens_seen": 261377600, "step": 2261 }, { "epoch": 12.155913978494624, "loss": 1.1561474800109863, "loss_ce": 1.9600181985879317e-05, "loss_iou": 0.50390625, "loss_num": 0.0296630859375, "loss_xval": 1.15625, "num_input_tokens_seen": 261377600, "step": 2261 }, { "epoch": 12.161290322580646, "grad_norm": 35.06581497192383, "learning_rate": 5e-07, "loss": 0.6816, "num_input_tokens_seen": 261493768, "step": 2262 }, { "epoch": 12.161290322580646, "loss": 0.7676058411598206, "loss_ce": 2.7708621928468347e-05, "loss_iou": 0.345703125, "loss_num": 0.015625, "loss_xval": 0.765625, "num_input_tokens_seen": 261493768, "step": 2262 }, { "epoch": 12.166666666666666, "grad_norm": 37.396610260009766, "learning_rate": 5e-07, "loss": 0.6892, "num_input_tokens_seen": 261608744, "step": 2263 }, { "epoch": 12.166666666666666, "loss": 0.7883695960044861, "loss_ce": 3.95131683035288e-05, "loss_iou": 0.341796875, "loss_num": 0.0211181640625, "loss_xval": 0.7890625, "num_input_tokens_seen": 261608744, "step": 2263 }, { "epoch": 12.172043010752688, "grad_norm": 49.306190490722656, "learning_rate": 5e-07, "loss": 0.7102, "num_input_tokens_seen": 261724540, "step": 2264 }, { "epoch": 12.172043010752688, "loss": 0.4383888244628906, "loss_ce": 3.432409357628785e-05, "loss_iou": 0.1875, "loss_num": 0.01287841796875, "loss_xval": 0.4375, "num_input_tokens_seen": 261724540, "step": 2264 }, { "epoch": 12.17741935483871, "grad_norm": 65.75788879394531, "learning_rate": 5e-07, "loss": 0.7844, "num_input_tokens_seen": 261839796, "step": 2265 }, { "epoch": 12.17741935483871, "loss": 0.7388044595718384, "loss_ce": 3.498904698062688e-05, "loss_iou": 0.298828125, "loss_num": 0.028564453125, "loss_xval": 0.73828125, "num_input_tokens_seen": 261839796, "step": 2265 }, { "epoch": 12.182795698924732, "grad_norm": 44.5602912902832, "learning_rate": 5e-07, "loss": 0.6202, "num_input_tokens_seen": 261957680, "step": 2266 }, { "epoch": 12.182795698924732, "loss": 0.5315068364143372, "loss_ce": 1.2708185749943368e-05, "loss_iou": 0.232421875, "loss_num": 0.01336669921875, "loss_xval": 0.53125, "num_input_tokens_seen": 261957680, "step": 2266 }, { "epoch": 12.188172043010752, "grad_norm": 38.504493713378906, "learning_rate": 5e-07, "loss": 0.7491, "num_input_tokens_seen": 262071080, "step": 2267 }, { "epoch": 12.188172043010752, "loss": 0.9602862000465393, "loss_ce": 8.111234637908638e-05, "loss_iou": 0.41796875, "loss_num": 0.025390625, "loss_xval": 0.9609375, "num_input_tokens_seen": 262071080, "step": 2267 }, { "epoch": 12.193548387096774, "grad_norm": 29.200868606567383, "learning_rate": 5e-07, "loss": 0.6, "num_input_tokens_seen": 262186608, "step": 2268 }, { "epoch": 12.193548387096774, "loss": 0.6367567777633667, "loss_ce": 3.802225546678528e-05, "loss_iou": 0.279296875, "loss_num": 0.0159912109375, "loss_xval": 0.63671875, "num_input_tokens_seen": 262186608, "step": 2268 }, { "epoch": 12.198924731182796, "grad_norm": 25.15671730041504, "learning_rate": 5e-07, "loss": 0.6822, "num_input_tokens_seen": 262299708, "step": 2269 }, { "epoch": 12.198924731182796, "loss": 0.5296791791915894, "loss_ce": 0.0001381586043862626, "loss_iou": 0.2451171875, "loss_num": 0.00762939453125, "loss_xval": 0.53125, "num_input_tokens_seen": 262299708, "step": 2269 }, { "epoch": 12.204301075268818, "grad_norm": 19.17604637145996, "learning_rate": 5e-07, "loss": 0.6075, "num_input_tokens_seen": 262416896, "step": 2270 }, { "epoch": 12.204301075268818, "loss": 0.7942230701446533, "loss_ce": 3.365769953234121e-05, "loss_iou": 0.34375, "loss_num": 0.021240234375, "loss_xval": 0.79296875, "num_input_tokens_seen": 262416896, "step": 2270 }, { "epoch": 12.209677419354838, "grad_norm": 25.77549934387207, "learning_rate": 5e-07, "loss": 0.8061, "num_input_tokens_seen": 262533028, "step": 2271 }, { "epoch": 12.209677419354838, "loss": 1.1462461948394775, "loss_ce": 0.00012792805500794202, "loss_iou": 0.49609375, "loss_num": 0.0311279296875, "loss_xval": 1.1484375, "num_input_tokens_seen": 262533028, "step": 2271 }, { "epoch": 12.21505376344086, "grad_norm": 20.49109649658203, "learning_rate": 5e-07, "loss": 0.6496, "num_input_tokens_seen": 262651264, "step": 2272 }, { "epoch": 12.21505376344086, "loss": 0.6736049652099609, "loss_ce": 2.1008168914704584e-05, "loss_iou": 0.302734375, "loss_num": 0.01373291015625, "loss_xval": 0.671875, "num_input_tokens_seen": 262651264, "step": 2272 }, { "epoch": 12.220430107526882, "grad_norm": 28.20632553100586, "learning_rate": 5e-07, "loss": 0.5404, "num_input_tokens_seen": 262767604, "step": 2273 }, { "epoch": 12.220430107526882, "loss": 0.3567870557308197, "loss_ce": 3.6565928894560784e-05, "loss_iou": 0.15625, "loss_num": 0.00872802734375, "loss_xval": 0.357421875, "num_input_tokens_seen": 262767604, "step": 2273 }, { "epoch": 12.225806451612904, "grad_norm": 62.05178451538086, "learning_rate": 5e-07, "loss": 0.6761, "num_input_tokens_seen": 262887228, "step": 2274 }, { "epoch": 12.225806451612904, "loss": 0.6711755990982056, "loss_ce": 3.3031217753887177e-05, "loss_iou": 0.287109375, "loss_num": 0.0194091796875, "loss_xval": 0.671875, "num_input_tokens_seen": 262887228, "step": 2274 }, { "epoch": 12.231182795698924, "grad_norm": 19.93333625793457, "learning_rate": 5e-07, "loss": 0.7287, "num_input_tokens_seen": 262997980, "step": 2275 }, { "epoch": 12.231182795698924, "loss": 1.1776816844940186, "loss_ce": 6.93567781127058e-05, "loss_iou": 0.53125, "loss_num": 0.0228271484375, "loss_xval": 1.1796875, "num_input_tokens_seen": 262997980, "step": 2275 }, { "epoch": 12.236559139784946, "grad_norm": 18.54846954345703, "learning_rate": 5e-07, "loss": 0.7387, "num_input_tokens_seen": 263114704, "step": 2276 }, { "epoch": 12.236559139784946, "loss": 0.5410423278808594, "loss_ce": 2.6669724320527166e-05, "loss_iou": 0.24609375, "loss_num": 0.00982666015625, "loss_xval": 0.5390625, "num_input_tokens_seen": 263114704, "step": 2276 }, { "epoch": 12.241935483870968, "grad_norm": 17.261011123657227, "learning_rate": 5e-07, "loss": 0.6532, "num_input_tokens_seen": 263231212, "step": 2277 }, { "epoch": 12.241935483870968, "loss": 0.7580950260162354, "loss_ce": 0.00016040311311371624, "loss_iou": 0.310546875, "loss_num": 0.0274658203125, "loss_xval": 0.7578125, "num_input_tokens_seen": 263231212, "step": 2277 }, { "epoch": 12.24731182795699, "grad_norm": 17.112205505371094, "learning_rate": 5e-07, "loss": 0.5716, "num_input_tokens_seen": 263347316, "step": 2278 }, { "epoch": 12.24731182795699, "loss": 0.5199978351593018, "loss_ce": 5.459400563267991e-05, "loss_iou": 0.23046875, "loss_num": 0.01177978515625, "loss_xval": 0.51953125, "num_input_tokens_seen": 263347316, "step": 2278 }, { "epoch": 12.25268817204301, "grad_norm": 21.43230628967285, "learning_rate": 5e-07, "loss": 0.6643, "num_input_tokens_seen": 263465260, "step": 2279 }, { "epoch": 12.25268817204301, "loss": 0.6492360234260559, "loss_ce": 6.61149388179183e-05, "loss_iou": 0.279296875, "loss_num": 0.01806640625, "loss_xval": 0.6484375, "num_input_tokens_seen": 263465260, "step": 2279 }, { "epoch": 12.258064516129032, "grad_norm": 23.5891170501709, "learning_rate": 5e-07, "loss": 0.7084, "num_input_tokens_seen": 263583396, "step": 2280 }, { "epoch": 12.258064516129032, "loss": 0.6846384406089783, "loss_ce": 0.0001901807263493538, "loss_iou": 0.298828125, "loss_num": 0.0172119140625, "loss_xval": 0.68359375, "num_input_tokens_seen": 263583396, "step": 2280 }, { "epoch": 12.263440860215054, "grad_norm": 23.16090965270996, "learning_rate": 5e-07, "loss": 0.746, "num_input_tokens_seen": 263697388, "step": 2281 }, { "epoch": 12.263440860215054, "loss": 0.9286810159683228, "loss_ce": 0.00021425157319754362, "loss_iou": 0.4140625, "loss_num": 0.02001953125, "loss_xval": 0.9296875, "num_input_tokens_seen": 263697388, "step": 2281 }, { "epoch": 12.268817204301076, "grad_norm": 22.190309524536133, "learning_rate": 5e-07, "loss": 0.776, "num_input_tokens_seen": 263813492, "step": 2282 }, { "epoch": 12.268817204301076, "loss": 0.9563454985618591, "loss_ce": 4.6700402890564874e-05, "loss_iou": 0.427734375, "loss_num": 0.02001953125, "loss_xval": 0.95703125, "num_input_tokens_seen": 263813492, "step": 2282 }, { "epoch": 12.274193548387096, "grad_norm": 16.6146240234375, "learning_rate": 5e-07, "loss": 0.5634, "num_input_tokens_seen": 263929432, "step": 2283 }, { "epoch": 12.274193548387096, "loss": 0.47877442836761475, "loss_ce": 1.4649326658400241e-05, "loss_iou": 0.2080078125, "loss_num": 0.0126953125, "loss_xval": 0.478515625, "num_input_tokens_seen": 263929432, "step": 2283 }, { "epoch": 12.279569892473118, "grad_norm": 21.724220275878906, "learning_rate": 5e-07, "loss": 0.5739, "num_input_tokens_seen": 264043232, "step": 2284 }, { "epoch": 12.279569892473118, "loss": 0.6192048788070679, "loss_ce": 0.0007356200367212296, "loss_iou": 0.271484375, "loss_num": 0.01483154296875, "loss_xval": 0.6171875, "num_input_tokens_seen": 264043232, "step": 2284 }, { "epoch": 12.28494623655914, "grad_norm": 37.28693771362305, "learning_rate": 5e-07, "loss": 0.7671, "num_input_tokens_seen": 264159272, "step": 2285 }, { "epoch": 12.28494623655914, "loss": 0.8717418909072876, "loss_ce": 0.0001598759990883991, "loss_iou": 0.380859375, "loss_num": 0.021728515625, "loss_xval": 0.87109375, "num_input_tokens_seen": 264159272, "step": 2285 }, { "epoch": 12.290322580645162, "grad_norm": 30.42784881591797, "learning_rate": 5e-07, "loss": 0.4979, "num_input_tokens_seen": 264276412, "step": 2286 }, { "epoch": 12.290322580645162, "loss": 0.49196258187294006, "loss_ce": 1.921604052768089e-05, "loss_iou": 0.216796875, "loss_num": 0.01153564453125, "loss_xval": 0.4921875, "num_input_tokens_seen": 264276412, "step": 2286 }, { "epoch": 12.295698924731182, "grad_norm": 22.141889572143555, "learning_rate": 5e-07, "loss": 0.7588, "num_input_tokens_seen": 264392236, "step": 2287 }, { "epoch": 12.295698924731182, "loss": 0.8422348499298096, "loss_ce": 7.170226308517158e-05, "loss_iou": 0.359375, "loss_num": 0.0247802734375, "loss_xval": 0.84375, "num_input_tokens_seen": 264392236, "step": 2287 }, { "epoch": 12.301075268817204, "grad_norm": 24.713333129882812, "learning_rate": 5e-07, "loss": 0.4718, "num_input_tokens_seen": 264508556, "step": 2288 }, { "epoch": 12.301075268817204, "loss": 0.3830825090408325, "loss_ce": 2.587652488728054e-05, "loss_iou": 0.1650390625, "loss_num": 0.0106201171875, "loss_xval": 0.3828125, "num_input_tokens_seen": 264508556, "step": 2288 }, { "epoch": 12.306451612903226, "grad_norm": 18.458147048950195, "learning_rate": 5e-07, "loss": 0.4881, "num_input_tokens_seen": 264625424, "step": 2289 }, { "epoch": 12.306451612903226, "loss": 0.6543319225311279, "loss_ce": 3.500189632177353e-05, "loss_iou": 0.296875, "loss_num": 0.01220703125, "loss_xval": 0.65625, "num_input_tokens_seen": 264625424, "step": 2289 }, { "epoch": 12.311827956989248, "grad_norm": 37.950130462646484, "learning_rate": 5e-07, "loss": 0.6377, "num_input_tokens_seen": 264739308, "step": 2290 }, { "epoch": 12.311827956989248, "loss": 0.5981965065002441, "loss_ce": 5.197931022848934e-05, "loss_iou": 0.2431640625, "loss_num": 0.0224609375, "loss_xval": 0.59765625, "num_input_tokens_seen": 264739308, "step": 2290 }, { "epoch": 12.317204301075268, "grad_norm": 32.924095153808594, "learning_rate": 5e-07, "loss": 0.7036, "num_input_tokens_seen": 264854240, "step": 2291 }, { "epoch": 12.317204301075268, "loss": 0.8448829650878906, "loss_ce": 3.431563527556136e-05, "loss_iou": 0.375, "loss_num": 0.0186767578125, "loss_xval": 0.84375, "num_input_tokens_seen": 264854240, "step": 2291 }, { "epoch": 12.32258064516129, "grad_norm": 30.261192321777344, "learning_rate": 5e-07, "loss": 0.6142, "num_input_tokens_seen": 264969972, "step": 2292 }, { "epoch": 12.32258064516129, "loss": 0.7949684858322144, "loss_ce": 4.6621455112472177e-05, "loss_iou": 0.33984375, "loss_num": 0.02294921875, "loss_xval": 0.796875, "num_input_tokens_seen": 264969972, "step": 2292 }, { "epoch": 12.327956989247312, "grad_norm": 19.134336471557617, "learning_rate": 5e-07, "loss": 0.4139, "num_input_tokens_seen": 265087532, "step": 2293 }, { "epoch": 12.327956989247312, "loss": 0.36542701721191406, "loss_ce": 4.0046885260380805e-05, "loss_iou": 0.1552734375, "loss_num": 0.010986328125, "loss_xval": 0.365234375, "num_input_tokens_seen": 265087532, "step": 2293 }, { "epoch": 12.333333333333334, "grad_norm": 16.65472412109375, "learning_rate": 5e-07, "loss": 0.6733, "num_input_tokens_seen": 265203256, "step": 2294 }, { "epoch": 12.333333333333334, "loss": 0.6362549066543579, "loss_ce": 2.4449624106637202e-05, "loss_iou": 0.26953125, "loss_num": 0.0194091796875, "loss_xval": 0.63671875, "num_input_tokens_seen": 265203256, "step": 2294 }, { "epoch": 12.338709677419354, "grad_norm": 24.9720516204834, "learning_rate": 5e-07, "loss": 0.5766, "num_input_tokens_seen": 265312840, "step": 2295 }, { "epoch": 12.338709677419354, "loss": 0.6697309017181396, "loss_ce": 5.3182367992121726e-05, "loss_iou": 0.302734375, "loss_num": 0.01275634765625, "loss_xval": 0.66796875, "num_input_tokens_seen": 265312840, "step": 2295 }, { "epoch": 12.344086021505376, "grad_norm": 30.55889892578125, "learning_rate": 5e-07, "loss": 0.6682, "num_input_tokens_seen": 265429400, "step": 2296 }, { "epoch": 12.344086021505376, "loss": 0.5785468816757202, "loss_ce": 5.566752952290699e-05, "loss_iou": 0.2578125, "loss_num": 0.01275634765625, "loss_xval": 0.578125, "num_input_tokens_seen": 265429400, "step": 2296 }, { "epoch": 12.349462365591398, "grad_norm": 23.109085083007812, "learning_rate": 5e-07, "loss": 0.6216, "num_input_tokens_seen": 265544984, "step": 2297 }, { "epoch": 12.349462365591398, "loss": 0.5067894458770752, "loss_ce": 7.562831160612404e-05, "loss_iou": 0.2265625, "loss_num": 0.01055908203125, "loss_xval": 0.5078125, "num_input_tokens_seen": 265544984, "step": 2297 }, { "epoch": 12.35483870967742, "grad_norm": 32.517967224121094, "learning_rate": 5e-07, "loss": 0.5382, "num_input_tokens_seen": 265660024, "step": 2298 }, { "epoch": 12.35483870967742, "loss": 0.8320507407188416, "loss_ce": 1.9486085875541903e-05, "loss_iou": 0.353515625, "loss_num": 0.025146484375, "loss_xval": 0.83203125, "num_input_tokens_seen": 265660024, "step": 2298 }, { "epoch": 12.36021505376344, "grad_norm": 24.788164138793945, "learning_rate": 5e-07, "loss": 0.7054, "num_input_tokens_seen": 265774628, "step": 2299 }, { "epoch": 12.36021505376344, "loss": 0.9394758343696594, "loss_ce": 2.272335404995829e-05, "loss_iou": 0.3984375, "loss_num": 0.02880859375, "loss_xval": 0.9375, "num_input_tokens_seen": 265774628, "step": 2299 }, { "epoch": 12.365591397849462, "grad_norm": 40.21893310546875, "learning_rate": 5e-07, "loss": 0.6354, "num_input_tokens_seen": 265891984, "step": 2300 }, { "epoch": 12.365591397849462, "loss": 0.7458710670471191, "loss_ce": 2.1439474949147552e-05, "loss_iou": 0.302734375, "loss_num": 0.02783203125, "loss_xval": 0.74609375, "num_input_tokens_seen": 265891984, "step": 2300 }, { "epoch": 12.370967741935484, "grad_norm": 20.008085250854492, "learning_rate": 5e-07, "loss": 0.8319, "num_input_tokens_seen": 266006608, "step": 2301 }, { "epoch": 12.370967741935484, "loss": 0.8750108480453491, "loss_ce": 1.0849938007595483e-05, "loss_iou": 0.33203125, "loss_num": 0.0419921875, "loss_xval": 0.875, "num_input_tokens_seen": 266006608, "step": 2301 }, { "epoch": 12.376344086021506, "grad_norm": 28.278118133544922, "learning_rate": 5e-07, "loss": 0.6418, "num_input_tokens_seen": 266123880, "step": 2302 }, { "epoch": 12.376344086021506, "loss": 0.40772145986557007, "loss_ce": 0.00012869996135123074, "loss_iou": 0.1796875, "loss_num": 0.0096435546875, "loss_xval": 0.408203125, "num_input_tokens_seen": 266123880, "step": 2302 }, { "epoch": 12.381720430107526, "grad_norm": 28.436330795288086, "learning_rate": 5e-07, "loss": 0.6763, "num_input_tokens_seen": 266242408, "step": 2303 }, { "epoch": 12.381720430107526, "loss": 0.6709251403808594, "loss_ce": 2.673033668543212e-05, "loss_iou": 0.3046875, "loss_num": 0.0126953125, "loss_xval": 0.671875, "num_input_tokens_seen": 266242408, "step": 2303 }, { "epoch": 12.387096774193548, "grad_norm": 33.42384719848633, "learning_rate": 5e-07, "loss": 0.5922, "num_input_tokens_seen": 266358116, "step": 2304 }, { "epoch": 12.387096774193548, "loss": 0.5274056196212769, "loss_ce": 6.186954851727933e-05, "loss_iou": 0.2421875, "loss_num": 0.00836181640625, "loss_xval": 0.52734375, "num_input_tokens_seen": 266358116, "step": 2304 }, { "epoch": 12.39247311827957, "grad_norm": 22.6754150390625, "learning_rate": 5e-07, "loss": 0.7165, "num_input_tokens_seen": 266472312, "step": 2305 }, { "epoch": 12.39247311827957, "loss": 0.8028227090835571, "loss_ce": 8.826664998196065e-05, "loss_iou": 0.337890625, "loss_num": 0.0255126953125, "loss_xval": 0.8046875, "num_input_tokens_seen": 266472312, "step": 2305 }, { "epoch": 12.397849462365592, "grad_norm": 19.456655502319336, "learning_rate": 5e-07, "loss": 0.5337, "num_input_tokens_seen": 266588876, "step": 2306 }, { "epoch": 12.397849462365592, "loss": 0.44191038608551025, "loss_ce": 1.5844363588257693e-05, "loss_iou": 0.1865234375, "loss_num": 0.013671875, "loss_xval": 0.44140625, "num_input_tokens_seen": 266588876, "step": 2306 }, { "epoch": 12.403225806451612, "grad_norm": 20.115982055664062, "learning_rate": 5e-07, "loss": 0.6853, "num_input_tokens_seen": 266702964, "step": 2307 }, { "epoch": 12.403225806451612, "loss": 0.5107898116111755, "loss_ce": 4.760634692502208e-05, "loss_iou": 0.2119140625, "loss_num": 0.0172119140625, "loss_xval": 0.51171875, "num_input_tokens_seen": 266702964, "step": 2307 }, { "epoch": 12.408602150537634, "grad_norm": 22.391128540039062, "learning_rate": 5e-07, "loss": 0.605, "num_input_tokens_seen": 266814864, "step": 2308 }, { "epoch": 12.408602150537634, "loss": 0.6454029083251953, "loss_ce": 1.717679333523847e-05, "loss_iou": 0.2578125, "loss_num": 0.0260009765625, "loss_xval": 0.64453125, "num_input_tokens_seen": 266814864, "step": 2308 }, { "epoch": 12.413978494623656, "grad_norm": 32.58487319946289, "learning_rate": 5e-07, "loss": 0.7799, "num_input_tokens_seen": 266928176, "step": 2309 }, { "epoch": 12.413978494623656, "loss": 0.7893763780593872, "loss_ce": 6.97302893968299e-05, "loss_iou": 0.34765625, "loss_num": 0.019287109375, "loss_xval": 0.7890625, "num_input_tokens_seen": 266928176, "step": 2309 }, { "epoch": 12.419354838709678, "grad_norm": 28.50775718688965, "learning_rate": 5e-07, "loss": 0.6839, "num_input_tokens_seen": 267043364, "step": 2310 }, { "epoch": 12.419354838709678, "loss": 0.6814627647399902, "loss_ce": 6.630475400015712e-05, "loss_iou": 0.30078125, "loss_num": 0.0162353515625, "loss_xval": 0.6796875, "num_input_tokens_seen": 267043364, "step": 2310 }, { "epoch": 12.424731182795698, "grad_norm": 25.544145584106445, "learning_rate": 5e-07, "loss": 0.4799, "num_input_tokens_seen": 267158444, "step": 2311 }, { "epoch": 12.424731182795698, "loss": 0.4151132106781006, "loss_ce": 1.3104136087349616e-05, "loss_iou": 0.1640625, "loss_num": 0.017333984375, "loss_xval": 0.416015625, "num_input_tokens_seen": 267158444, "step": 2311 }, { "epoch": 12.43010752688172, "grad_norm": 22.257600784301758, "learning_rate": 5e-07, "loss": 0.5734, "num_input_tokens_seen": 267277444, "step": 2312 }, { "epoch": 12.43010752688172, "loss": 0.7695658802986145, "loss_ce": 3.459162689978257e-05, "loss_iou": 0.330078125, "loss_num": 0.021728515625, "loss_xval": 0.76953125, "num_input_tokens_seen": 267277444, "step": 2312 }, { "epoch": 12.435483870967742, "grad_norm": 27.05709457397461, "learning_rate": 5e-07, "loss": 0.5788, "num_input_tokens_seen": 267392168, "step": 2313 }, { "epoch": 12.435483870967742, "loss": 0.4744437336921692, "loss_ce": 1.7452823158237152e-05, "loss_iou": 0.205078125, "loss_num": 0.012939453125, "loss_xval": 0.474609375, "num_input_tokens_seen": 267392168, "step": 2313 }, { "epoch": 12.440860215053764, "grad_norm": 25.060564041137695, "learning_rate": 5e-07, "loss": 0.6875, "num_input_tokens_seen": 267506804, "step": 2314 }, { "epoch": 12.440860215053764, "loss": 1.0154058933258057, "loss_ce": 2.502595270925667e-05, "loss_iou": 0.41796875, "loss_num": 0.036376953125, "loss_xval": 1.015625, "num_input_tokens_seen": 267506804, "step": 2314 }, { "epoch": 12.446236559139784, "grad_norm": 20.085397720336914, "learning_rate": 5e-07, "loss": 0.677, "num_input_tokens_seen": 267623064, "step": 2315 }, { "epoch": 12.446236559139784, "loss": 0.4853641390800476, "loss_ce": 1.255990173376631e-05, "loss_iou": 0.208984375, "loss_num": 0.0133056640625, "loss_xval": 0.484375, "num_input_tokens_seen": 267623064, "step": 2315 }, { "epoch": 12.451612903225806, "grad_norm": 22.8654842376709, "learning_rate": 5e-07, "loss": 0.5849, "num_input_tokens_seen": 267741668, "step": 2316 }, { "epoch": 12.451612903225806, "loss": 0.5356693863868713, "loss_ce": 2.486261655576527e-05, "loss_iou": 0.2294921875, "loss_num": 0.015380859375, "loss_xval": 0.53515625, "num_input_tokens_seen": 267741668, "step": 2316 }, { "epoch": 12.456989247311828, "grad_norm": 34.126041412353516, "learning_rate": 5e-07, "loss": 0.7659, "num_input_tokens_seen": 267859628, "step": 2317 }, { "epoch": 12.456989247311828, "loss": 0.8963216543197632, "loss_ce": 8.138974953908473e-05, "loss_iou": 0.392578125, "loss_num": 0.0220947265625, "loss_xval": 0.89453125, "num_input_tokens_seen": 267859628, "step": 2317 }, { "epoch": 12.46236559139785, "grad_norm": 25.877180099487305, "learning_rate": 5e-07, "loss": 0.733, "num_input_tokens_seen": 267973004, "step": 2318 }, { "epoch": 12.46236559139785, "loss": 0.7336639165878296, "loss_ce": 2.131210385414306e-05, "loss_iou": 0.2890625, "loss_num": 0.03125, "loss_xval": 0.734375, "num_input_tokens_seen": 267973004, "step": 2318 }, { "epoch": 12.46774193548387, "grad_norm": 33.934940338134766, "learning_rate": 5e-07, "loss": 0.5736, "num_input_tokens_seen": 268088084, "step": 2319 }, { "epoch": 12.46774193548387, "loss": 0.489524245262146, "loss_ce": 2.229274650744628e-05, "loss_iou": 0.208984375, "loss_num": 0.01422119140625, "loss_xval": 0.490234375, "num_input_tokens_seen": 268088084, "step": 2319 }, { "epoch": 12.473118279569892, "grad_norm": 33.87031936645508, "learning_rate": 5e-07, "loss": 0.6666, "num_input_tokens_seen": 268199532, "step": 2320 }, { "epoch": 12.473118279569892, "loss": 0.43233922123908997, "loss_ce": 2.7214262445340864e-05, "loss_iou": 0.1982421875, "loss_num": 0.007110595703125, "loss_xval": 0.431640625, "num_input_tokens_seen": 268199532, "step": 2320 }, { "epoch": 12.478494623655914, "grad_norm": 27.52652931213379, "learning_rate": 5e-07, "loss": 0.838, "num_input_tokens_seen": 268316584, "step": 2321 }, { "epoch": 12.478494623655914, "loss": 0.8030418157577515, "loss_ce": 6.331418262561783e-05, "loss_iou": 0.349609375, "loss_num": 0.0205078125, "loss_xval": 0.8046875, "num_input_tokens_seen": 268316584, "step": 2321 }, { "epoch": 12.483870967741936, "grad_norm": 23.911319732666016, "learning_rate": 5e-07, "loss": 0.6415, "num_input_tokens_seen": 268432268, "step": 2322 }, { "epoch": 12.483870967741936, "loss": 0.7620419263839722, "loss_ce": 7.908388943178579e-05, "loss_iou": 0.31640625, "loss_num": 0.0260009765625, "loss_xval": 0.76171875, "num_input_tokens_seen": 268432268, "step": 2322 }, { "epoch": 12.489247311827956, "grad_norm": 25.74092674255371, "learning_rate": 5e-07, "loss": 0.6556, "num_input_tokens_seen": 268546416, "step": 2323 }, { "epoch": 12.489247311827956, "loss": 0.6442300081253052, "loss_ce": 6.49760477244854e-05, "loss_iou": 0.27734375, "loss_num": 0.017578125, "loss_xval": 0.64453125, "num_input_tokens_seen": 268546416, "step": 2323 }, { "epoch": 12.494623655913978, "grad_norm": 21.046875, "learning_rate": 5e-07, "loss": 0.6089, "num_input_tokens_seen": 268662636, "step": 2324 }, { "epoch": 12.494623655913978, "loss": 0.629574716091156, "loss_ce": 5.813412644783966e-05, "loss_iou": 0.29296875, "loss_num": 0.0087890625, "loss_xval": 0.62890625, "num_input_tokens_seen": 268662636, "step": 2324 }, { "epoch": 12.5, "grad_norm": 24.987735748291016, "learning_rate": 5e-07, "loss": 0.644, "num_input_tokens_seen": 268778608, "step": 2325 }, { "epoch": 12.5, "loss": 0.7083088159561157, "loss_ce": 5.687812517862767e-05, "loss_iou": 0.3125, "loss_num": 0.0169677734375, "loss_xval": 0.70703125, "num_input_tokens_seen": 268778608, "step": 2325 }, { "epoch": 12.505376344086022, "grad_norm": 51.552425384521484, "learning_rate": 5e-07, "loss": 0.5855, "num_input_tokens_seen": 268896716, "step": 2326 }, { "epoch": 12.505376344086022, "loss": 0.5642255544662476, "loss_ce": 1.6604630218353122e-05, "loss_iou": 0.2314453125, "loss_num": 0.02001953125, "loss_xval": 0.5625, "num_input_tokens_seen": 268896716, "step": 2326 }, { "epoch": 12.510752688172044, "grad_norm": 37.82125473022461, "learning_rate": 5e-07, "loss": 0.4532, "num_input_tokens_seen": 269010916, "step": 2327 }, { "epoch": 12.510752688172044, "loss": 0.43667691946029663, "loss_ce": 3.140324406558648e-05, "loss_iou": 0.1875, "loss_num": 0.01251220703125, "loss_xval": 0.4375, "num_input_tokens_seen": 269010916, "step": 2327 }, { "epoch": 12.516129032258064, "grad_norm": 21.15007972717285, "learning_rate": 5e-07, "loss": 0.6561, "num_input_tokens_seen": 269122696, "step": 2328 }, { "epoch": 12.516129032258064, "loss": 0.8640390038490295, "loss_ce": 2.535113708290737e-05, "loss_iou": 0.373046875, "loss_num": 0.02392578125, "loss_xval": 0.86328125, "num_input_tokens_seen": 269122696, "step": 2328 }, { "epoch": 12.521505376344086, "grad_norm": 19.68057632446289, "learning_rate": 5e-07, "loss": 0.6352, "num_input_tokens_seen": 269234188, "step": 2329 }, { "epoch": 12.521505376344086, "loss": 0.6051247119903564, "loss_ce": 2.2166917915455997e-05, "loss_iou": 0.2412109375, "loss_num": 0.0242919921875, "loss_xval": 0.60546875, "num_input_tokens_seen": 269234188, "step": 2329 }, { "epoch": 12.526881720430108, "grad_norm": 27.114347457885742, "learning_rate": 5e-07, "loss": 0.718, "num_input_tokens_seen": 269350700, "step": 2330 }, { "epoch": 12.526881720430108, "loss": 0.9308149814605713, "loss_ce": 2.8852256946265697e-05, "loss_iou": 0.38671875, "loss_num": 0.031494140625, "loss_xval": 0.9296875, "num_input_tokens_seen": 269350700, "step": 2330 }, { "epoch": 12.532258064516128, "grad_norm": 22.90873908996582, "learning_rate": 5e-07, "loss": 0.6436, "num_input_tokens_seen": 269466476, "step": 2331 }, { "epoch": 12.532258064516128, "loss": 0.6880173683166504, "loss_ce": 2.903729910030961e-05, "loss_iou": 0.3125, "loss_num": 0.0125732421875, "loss_xval": 0.6875, "num_input_tokens_seen": 269466476, "step": 2331 }, { "epoch": 12.53763440860215, "grad_norm": 29.546642303466797, "learning_rate": 5e-07, "loss": 0.6561, "num_input_tokens_seen": 269580884, "step": 2332 }, { "epoch": 12.53763440860215, "loss": 0.4417693018913269, "loss_ce": 2.733959991019219e-05, "loss_iou": 0.19140625, "loss_num": 0.01214599609375, "loss_xval": 0.44140625, "num_input_tokens_seen": 269580884, "step": 2332 }, { "epoch": 12.543010752688172, "grad_norm": 24.33175277709961, "learning_rate": 5e-07, "loss": 0.6871, "num_input_tokens_seen": 269699500, "step": 2333 }, { "epoch": 12.543010752688172, "loss": 0.5928016901016235, "loss_ce": 2.8231317628524266e-05, "loss_iou": 0.26171875, "loss_num": 0.01409912109375, "loss_xval": 0.59375, "num_input_tokens_seen": 269699500, "step": 2333 }, { "epoch": 12.548387096774194, "grad_norm": 27.619232177734375, "learning_rate": 5e-07, "loss": 0.5827, "num_input_tokens_seen": 269815844, "step": 2334 }, { "epoch": 12.548387096774194, "loss": 0.6179399490356445, "loss_ce": 2.0043593394802883e-05, "loss_iou": 0.283203125, "loss_num": 0.01055908203125, "loss_xval": 0.6171875, "num_input_tokens_seen": 269815844, "step": 2334 }, { "epoch": 12.553763440860216, "grad_norm": 21.789318084716797, "learning_rate": 5e-07, "loss": 0.605, "num_input_tokens_seen": 269931640, "step": 2335 }, { "epoch": 12.553763440860216, "loss": 0.879430890083313, "loss_ce": 3.63339131581597e-05, "loss_iou": 0.39453125, "loss_num": 0.01806640625, "loss_xval": 0.87890625, "num_input_tokens_seen": 269931640, "step": 2335 }, { "epoch": 12.559139784946236, "grad_norm": 28.068065643310547, "learning_rate": 5e-07, "loss": 0.604, "num_input_tokens_seen": 270048768, "step": 2336 }, { "epoch": 12.559139784946236, "loss": 0.7906172275543213, "loss_ce": 0.0002119149430654943, "loss_iou": 0.34765625, "loss_num": 0.0185546875, "loss_xval": 0.7890625, "num_input_tokens_seen": 270048768, "step": 2336 }, { "epoch": 12.564516129032258, "grad_norm": 35.92253494262695, "learning_rate": 5e-07, "loss": 0.6987, "num_input_tokens_seen": 270164132, "step": 2337 }, { "epoch": 12.564516129032258, "loss": 0.6949385404586792, "loss_ce": 0.00011430530139477924, "loss_iou": 0.28125, "loss_num": 0.0269775390625, "loss_xval": 0.6953125, "num_input_tokens_seen": 270164132, "step": 2337 }, { "epoch": 12.56989247311828, "grad_norm": 28.30955696105957, "learning_rate": 5e-07, "loss": 0.5468, "num_input_tokens_seen": 270278640, "step": 2338 }, { "epoch": 12.56989247311828, "loss": 0.6956109404563904, "loss_ce": 5.4280924814520404e-05, "loss_iou": 0.302734375, "loss_num": 0.017578125, "loss_xval": 0.6953125, "num_input_tokens_seen": 270278640, "step": 2338 }, { "epoch": 12.575268817204302, "grad_norm": 18.68280601501465, "learning_rate": 5e-07, "loss": 0.5035, "num_input_tokens_seen": 270392648, "step": 2339 }, { "epoch": 12.575268817204302, "loss": 0.47328686714172363, "loss_ce": 2.0269279048079625e-05, "loss_iou": 0.17578125, "loss_num": 0.0242919921875, "loss_xval": 0.47265625, "num_input_tokens_seen": 270392648, "step": 2339 }, { "epoch": 12.580645161290322, "grad_norm": 25.04399299621582, "learning_rate": 5e-07, "loss": 0.5296, "num_input_tokens_seen": 270507744, "step": 2340 }, { "epoch": 12.580645161290322, "loss": 0.5738991498947144, "loss_ce": 4.660763079300523e-05, "loss_iou": 0.240234375, "loss_num": 0.0186767578125, "loss_xval": 0.57421875, "num_input_tokens_seen": 270507744, "step": 2340 }, { "epoch": 12.586021505376344, "grad_norm": 24.89091682434082, "learning_rate": 5e-07, "loss": 0.6612, "num_input_tokens_seen": 270625328, "step": 2341 }, { "epoch": 12.586021505376344, "loss": 0.5472670197486877, "loss_ce": 2.580390173534397e-05, "loss_iou": 0.248046875, "loss_num": 0.01031494140625, "loss_xval": 0.546875, "num_input_tokens_seen": 270625328, "step": 2341 }, { "epoch": 12.591397849462366, "grad_norm": 31.586557388305664, "learning_rate": 5e-07, "loss": 0.7352, "num_input_tokens_seen": 270742664, "step": 2342 }, { "epoch": 12.591397849462366, "loss": 0.7583987712860107, "loss_ce": 0.0003421473957132548, "loss_iou": 0.32421875, "loss_num": 0.021728515625, "loss_xval": 0.7578125, "num_input_tokens_seen": 270742664, "step": 2342 }, { "epoch": 12.596774193548388, "grad_norm": 35.92729949951172, "learning_rate": 5e-07, "loss": 0.5432, "num_input_tokens_seen": 270859220, "step": 2343 }, { "epoch": 12.596774193548388, "loss": 0.534242570400238, "loss_ce": 6.28975685685873e-05, "loss_iou": 0.2333984375, "loss_num": 0.013427734375, "loss_xval": 0.53515625, "num_input_tokens_seen": 270859220, "step": 2343 }, { "epoch": 12.602150537634408, "grad_norm": 22.3006591796875, "learning_rate": 5e-07, "loss": 0.7464, "num_input_tokens_seen": 270972592, "step": 2344 }, { "epoch": 12.602150537634408, "loss": 0.729051411151886, "loss_ce": 4.7493296733591706e-05, "loss_iou": 0.298828125, "loss_num": 0.0263671875, "loss_xval": 0.73046875, "num_input_tokens_seen": 270972592, "step": 2344 }, { "epoch": 12.60752688172043, "grad_norm": 21.285730361938477, "learning_rate": 5e-07, "loss": 0.5574, "num_input_tokens_seen": 271088396, "step": 2345 }, { "epoch": 12.60752688172043, "loss": 0.3618409335613251, "loss_ce": 2.4549422960262746e-05, "loss_iou": 0.1435546875, "loss_num": 0.014892578125, "loss_xval": 0.361328125, "num_input_tokens_seen": 271088396, "step": 2345 }, { "epoch": 12.612903225806452, "grad_norm": 25.12166976928711, "learning_rate": 5e-07, "loss": 0.5817, "num_input_tokens_seen": 271205468, "step": 2346 }, { "epoch": 12.612903225806452, "loss": 0.7405494451522827, "loss_ce": 7.08932857378386e-05, "loss_iou": 0.337890625, "loss_num": 0.01324462890625, "loss_xval": 0.7421875, "num_input_tokens_seen": 271205468, "step": 2346 }, { "epoch": 12.618279569892474, "grad_norm": 24.066083908081055, "learning_rate": 5e-07, "loss": 0.7921, "num_input_tokens_seen": 271318780, "step": 2347 }, { "epoch": 12.618279569892474, "loss": 0.7256609797477722, "loss_ce": 7.504518725909293e-05, "loss_iou": 0.30078125, "loss_num": 0.024658203125, "loss_xval": 0.7265625, "num_input_tokens_seen": 271318780, "step": 2347 }, { "epoch": 12.623655913978494, "grad_norm": 26.228870391845703, "learning_rate": 5e-07, "loss": 0.6351, "num_input_tokens_seen": 271434500, "step": 2348 }, { "epoch": 12.623655913978494, "loss": 0.5520192384719849, "loss_ce": 1.730357325868681e-05, "loss_iou": 0.24609375, "loss_num": 0.011962890625, "loss_xval": 0.55078125, "num_input_tokens_seen": 271434500, "step": 2348 }, { "epoch": 12.629032258064516, "grad_norm": 24.77269744873047, "learning_rate": 5e-07, "loss": 0.6269, "num_input_tokens_seen": 271549600, "step": 2349 }, { "epoch": 12.629032258064516, "loss": 0.543594479560852, "loss_ce": 1.541742676636204e-05, "loss_iou": 0.2412109375, "loss_num": 0.01226806640625, "loss_xval": 0.54296875, "num_input_tokens_seen": 271549600, "step": 2349 }, { "epoch": 12.634408602150538, "grad_norm": 40.39140319824219, "learning_rate": 5e-07, "loss": 0.5989, "num_input_tokens_seen": 271664504, "step": 2350 }, { "epoch": 12.634408602150538, "loss": 0.7988418936729431, "loss_ce": 1.3797325664199889e-05, "loss_iou": 0.3203125, "loss_num": 0.03125, "loss_xval": 0.796875, "num_input_tokens_seen": 271664504, "step": 2350 }, { "epoch": 12.63978494623656, "grad_norm": 32.63795852661133, "learning_rate": 5e-07, "loss": 0.6714, "num_input_tokens_seen": 271781636, "step": 2351 }, { "epoch": 12.63978494623656, "loss": 0.8225723505020142, "loss_ce": 6.261440285015851e-05, "loss_iou": 0.357421875, "loss_num": 0.0216064453125, "loss_xval": 0.82421875, "num_input_tokens_seen": 271781636, "step": 2351 }, { "epoch": 12.64516129032258, "grad_norm": 27.66382598876953, "learning_rate": 5e-07, "loss": 0.735, "num_input_tokens_seen": 271898652, "step": 2352 }, { "epoch": 12.64516129032258, "loss": 0.8854950666427612, "loss_ce": 0.001339819049462676, "loss_iou": 0.375, "loss_num": 0.0267333984375, "loss_xval": 0.8828125, "num_input_tokens_seen": 271898652, "step": 2352 }, { "epoch": 12.650537634408602, "grad_norm": 26.619918823242188, "learning_rate": 5e-07, "loss": 0.5657, "num_input_tokens_seen": 272014320, "step": 2353 }, { "epoch": 12.650537634408602, "loss": 0.5124835968017578, "loss_ce": 3.239068610128015e-05, "loss_iou": 0.2421875, "loss_num": 0.0057373046875, "loss_xval": 0.51171875, "num_input_tokens_seen": 272014320, "step": 2353 }, { "epoch": 12.655913978494624, "grad_norm": 26.015396118164062, "learning_rate": 5e-07, "loss": 0.4318, "num_input_tokens_seen": 272131728, "step": 2354 }, { "epoch": 12.655913978494624, "loss": 0.5112684965133667, "loss_ce": 3.8064277759986e-05, "loss_iou": 0.21875, "loss_num": 0.01470947265625, "loss_xval": 0.51171875, "num_input_tokens_seen": 272131728, "step": 2354 }, { "epoch": 12.661290322580646, "grad_norm": 27.263025283813477, "learning_rate": 5e-07, "loss": 0.634, "num_input_tokens_seen": 272248004, "step": 2355 }, { "epoch": 12.661290322580646, "loss": 0.5682248473167419, "loss_ce": 0.000109605782199651, "loss_iou": 0.24609375, "loss_num": 0.014892578125, "loss_xval": 0.56640625, "num_input_tokens_seen": 272248004, "step": 2355 }, { "epoch": 12.666666666666666, "grad_norm": 29.218286514282227, "learning_rate": 5e-07, "loss": 0.7238, "num_input_tokens_seen": 272365660, "step": 2356 }, { "epoch": 12.666666666666666, "loss": 0.8011777400970459, "loss_ce": 0.0001523843820905313, "loss_iou": 0.359375, "loss_num": 0.016357421875, "loss_xval": 0.80078125, "num_input_tokens_seen": 272365660, "step": 2356 }, { "epoch": 12.672043010752688, "grad_norm": 22.104671478271484, "learning_rate": 5e-07, "loss": 0.5689, "num_input_tokens_seen": 272484172, "step": 2357 }, { "epoch": 12.672043010752688, "loss": 0.3827322721481323, "loss_ce": 4.185340731055476e-05, "loss_iou": 0.1689453125, "loss_num": 0.009033203125, "loss_xval": 0.3828125, "num_input_tokens_seen": 272484172, "step": 2357 }, { "epoch": 12.67741935483871, "grad_norm": 24.52880096435547, "learning_rate": 5e-07, "loss": 0.6847, "num_input_tokens_seen": 272601060, "step": 2358 }, { "epoch": 12.67741935483871, "loss": 0.5478663444519043, "loss_ce": 1.4798510164837353e-05, "loss_iou": 0.2353515625, "loss_num": 0.01556396484375, "loss_xval": 0.546875, "num_input_tokens_seen": 272601060, "step": 2358 }, { "epoch": 12.682795698924732, "grad_norm": 20.191743850708008, "learning_rate": 5e-07, "loss": 0.6712, "num_input_tokens_seen": 272713388, "step": 2359 }, { "epoch": 12.682795698924732, "loss": 0.6866030693054199, "loss_ce": 7.963559619383886e-05, "loss_iou": 0.310546875, "loss_num": 0.0130615234375, "loss_xval": 0.6875, "num_input_tokens_seen": 272713388, "step": 2359 }, { "epoch": 12.688172043010752, "grad_norm": 20.626728057861328, "learning_rate": 5e-07, "loss": 0.7719, "num_input_tokens_seen": 272827836, "step": 2360 }, { "epoch": 12.688172043010752, "loss": 0.6323812007904053, "loss_ce": 5.6970424338942394e-05, "loss_iou": 0.291015625, "loss_num": 0.010009765625, "loss_xval": 0.6328125, "num_input_tokens_seen": 272827836, "step": 2360 }, { "epoch": 12.693548387096774, "grad_norm": 19.954971313476562, "learning_rate": 5e-07, "loss": 0.8425, "num_input_tokens_seen": 272945360, "step": 2361 }, { "epoch": 12.693548387096774, "loss": 1.1463333368301392, "loss_ce": 9.309982124250382e-05, "loss_iou": 0.50390625, "loss_num": 0.02734375, "loss_xval": 1.1484375, "num_input_tokens_seen": 272945360, "step": 2361 }, { "epoch": 12.698924731182796, "grad_norm": 22.0068359375, "learning_rate": 5e-07, "loss": 0.6823, "num_input_tokens_seen": 273060676, "step": 2362 }, { "epoch": 12.698924731182796, "loss": 0.6120867729187012, "loss_ce": 2.6221314328722656e-05, "loss_iou": 0.259765625, "loss_num": 0.01806640625, "loss_xval": 0.61328125, "num_input_tokens_seen": 273060676, "step": 2362 }, { "epoch": 12.704301075268818, "grad_norm": 32.02793502807617, "learning_rate": 5e-07, "loss": 0.6733, "num_input_tokens_seen": 273175864, "step": 2363 }, { "epoch": 12.704301075268818, "loss": 0.7192939519882202, "loss_ce": 5.5669435823801905e-05, "loss_iou": 0.3203125, "loss_num": 0.01611328125, "loss_xval": 0.71875, "num_input_tokens_seen": 273175864, "step": 2363 }, { "epoch": 12.709677419354838, "grad_norm": 42.35188674926758, "learning_rate": 5e-07, "loss": 0.6451, "num_input_tokens_seen": 273291252, "step": 2364 }, { "epoch": 12.709677419354838, "loss": 0.6531250476837158, "loss_ce": 4.884602458332665e-05, "loss_iou": 0.2890625, "loss_num": 0.0150146484375, "loss_xval": 0.65234375, "num_input_tokens_seen": 273291252, "step": 2364 }, { "epoch": 12.71505376344086, "grad_norm": 40.288108825683594, "learning_rate": 5e-07, "loss": 0.7289, "num_input_tokens_seen": 273409396, "step": 2365 }, { "epoch": 12.71505376344086, "loss": 0.6906930208206177, "loss_ce": 1.922904630191624e-05, "loss_iou": 0.3046875, "loss_num": 0.0164794921875, "loss_xval": 0.69140625, "num_input_tokens_seen": 273409396, "step": 2365 }, { "epoch": 12.720430107526882, "grad_norm": 21.798227310180664, "learning_rate": 5e-07, "loss": 0.5941, "num_input_tokens_seen": 273523424, "step": 2366 }, { "epoch": 12.720430107526882, "loss": 0.5826611518859863, "loss_ce": 1.9541814253898337e-05, "loss_iou": 0.21875, "loss_num": 0.0289306640625, "loss_xval": 0.58203125, "num_input_tokens_seen": 273523424, "step": 2366 }, { "epoch": 12.725806451612904, "grad_norm": 23.442214965820312, "learning_rate": 5e-07, "loss": 0.6815, "num_input_tokens_seen": 273641596, "step": 2367 }, { "epoch": 12.725806451612904, "loss": 0.9107047915458679, "loss_ce": 6.026080882293172e-05, "loss_iou": 0.37109375, "loss_num": 0.033447265625, "loss_xval": 0.91015625, "num_input_tokens_seen": 273641596, "step": 2367 }, { "epoch": 12.731182795698924, "grad_norm": 22.769460678100586, "learning_rate": 5e-07, "loss": 0.7076, "num_input_tokens_seen": 273756188, "step": 2368 }, { "epoch": 12.731182795698924, "loss": 0.7425985336303711, "loss_ce": 4.4823085772804916e-05, "loss_iou": 0.330078125, "loss_num": 0.0166015625, "loss_xval": 0.7421875, "num_input_tokens_seen": 273756188, "step": 2368 }, { "epoch": 12.736559139784946, "grad_norm": 30.907005310058594, "learning_rate": 5e-07, "loss": 0.6046, "num_input_tokens_seen": 273870768, "step": 2369 }, { "epoch": 12.736559139784946, "loss": 0.7595614194869995, "loss_ce": 3.9964863390196115e-05, "loss_iou": 0.3203125, "loss_num": 0.0234375, "loss_xval": 0.7578125, "num_input_tokens_seen": 273870768, "step": 2369 }, { "epoch": 12.741935483870968, "grad_norm": 39.21873092651367, "learning_rate": 5e-07, "loss": 0.664, "num_input_tokens_seen": 273984972, "step": 2370 }, { "epoch": 12.741935483870968, "loss": 0.474386066198349, "loss_ce": 2.083701838273555e-05, "loss_iou": 0.2119140625, "loss_num": 0.010009765625, "loss_xval": 0.474609375, "num_input_tokens_seen": 273984972, "step": 2370 }, { "epoch": 12.74731182795699, "grad_norm": 35.1690559387207, "learning_rate": 5e-07, "loss": 0.6523, "num_input_tokens_seen": 274102100, "step": 2371 }, { "epoch": 12.74731182795699, "loss": 0.5786153078079224, "loss_ce": 0.0001241002173628658, "loss_iou": 0.263671875, "loss_num": 0.010498046875, "loss_xval": 0.578125, "num_input_tokens_seen": 274102100, "step": 2371 }, { "epoch": 12.75268817204301, "grad_norm": 25.055419921875, "learning_rate": 5e-07, "loss": 0.6128, "num_input_tokens_seen": 274215628, "step": 2372 }, { "epoch": 12.75268817204301, "loss": 0.6067582368850708, "loss_ce": 6.878601561766118e-05, "loss_iou": 0.248046875, "loss_num": 0.02197265625, "loss_xval": 0.60546875, "num_input_tokens_seen": 274215628, "step": 2372 }, { "epoch": 12.758064516129032, "grad_norm": 24.348304748535156, "learning_rate": 5e-07, "loss": 0.6391, "num_input_tokens_seen": 274329948, "step": 2373 }, { "epoch": 12.758064516129032, "loss": 0.45257437229156494, "loss_ce": 5.971597056486644e-05, "loss_iou": 0.1982421875, "loss_num": 0.0113525390625, "loss_xval": 0.453125, "num_input_tokens_seen": 274329948, "step": 2373 }, { "epoch": 12.763440860215054, "grad_norm": 29.740068435668945, "learning_rate": 5e-07, "loss": 0.6762, "num_input_tokens_seen": 274446048, "step": 2374 }, { "epoch": 12.763440860215054, "loss": 0.6216009855270386, "loss_ce": 1.895854074973613e-05, "loss_iou": 0.271484375, "loss_num": 0.015869140625, "loss_xval": 0.62109375, "num_input_tokens_seen": 274446048, "step": 2374 }, { "epoch": 12.768817204301076, "grad_norm": 15.3117094039917, "learning_rate": 5e-07, "loss": 0.7081, "num_input_tokens_seen": 274563844, "step": 2375 }, { "epoch": 12.768817204301076, "loss": 0.7331908941268921, "loss_ce": 3.66115418728441e-05, "loss_iou": 0.29296875, "loss_num": 0.029296875, "loss_xval": 0.734375, "num_input_tokens_seen": 274563844, "step": 2375 }, { "epoch": 12.774193548387096, "grad_norm": 26.83132553100586, "learning_rate": 5e-07, "loss": 0.5987, "num_input_tokens_seen": 274677076, "step": 2376 }, { "epoch": 12.774193548387096, "loss": 0.4765869379043579, "loss_ce": 2.4417538952548057e-05, "loss_iou": 0.21484375, "loss_num": 0.00958251953125, "loss_xval": 0.4765625, "num_input_tokens_seen": 274677076, "step": 2376 }, { "epoch": 12.779569892473118, "grad_norm": 23.115692138671875, "learning_rate": 5e-07, "loss": 0.6704, "num_input_tokens_seen": 274795564, "step": 2377 }, { "epoch": 12.779569892473118, "loss": 0.6782448291778564, "loss_ce": 2.2184041881700978e-05, "loss_iou": 0.3046875, "loss_num": 0.01397705078125, "loss_xval": 0.6796875, "num_input_tokens_seen": 274795564, "step": 2377 }, { "epoch": 12.78494623655914, "grad_norm": 25.182861328125, "learning_rate": 5e-07, "loss": 0.826, "num_input_tokens_seen": 274909016, "step": 2378 }, { "epoch": 12.78494623655914, "loss": 0.34530484676361084, "loss_ce": 2.897354897868354e-05, "loss_iou": 0.1474609375, "loss_num": 0.010009765625, "loss_xval": 0.345703125, "num_input_tokens_seen": 274909016, "step": 2378 }, { "epoch": 12.790322580645162, "grad_norm": 27.991804122924805, "learning_rate": 5e-07, "loss": 0.6883, "num_input_tokens_seen": 275026456, "step": 2379 }, { "epoch": 12.790322580645162, "loss": 0.5630207061767578, "loss_ce": 3.2442971132695675e-05, "loss_iou": 0.234375, "loss_num": 0.0185546875, "loss_xval": 0.5625, "num_input_tokens_seen": 275026456, "step": 2379 }, { "epoch": 12.795698924731182, "grad_norm": 21.7269287109375, "learning_rate": 5e-07, "loss": 0.6249, "num_input_tokens_seen": 275141884, "step": 2380 }, { "epoch": 12.795698924731182, "loss": 0.6176972389221191, "loss_ce": 2.145596045011189e-05, "loss_iou": 0.275390625, "loss_num": 0.0130615234375, "loss_xval": 0.6171875, "num_input_tokens_seen": 275141884, "step": 2380 }, { "epoch": 12.801075268817204, "grad_norm": 76.99066162109375, "learning_rate": 5e-07, "loss": 0.6334, "num_input_tokens_seen": 275255512, "step": 2381 }, { "epoch": 12.801075268817204, "loss": 0.43462225794792175, "loss_ce": 5.195967241888866e-05, "loss_iou": 0.2001953125, "loss_num": 0.0068359375, "loss_xval": 0.43359375, "num_input_tokens_seen": 275255512, "step": 2381 }, { "epoch": 12.806451612903226, "grad_norm": 28.970130920410156, "learning_rate": 5e-07, "loss": 0.6554, "num_input_tokens_seen": 275369544, "step": 2382 }, { "epoch": 12.806451612903226, "loss": 0.5600073337554932, "loss_ce": 7.077934424160048e-05, "loss_iou": 0.2451171875, "loss_num": 0.01409912109375, "loss_xval": 0.55859375, "num_input_tokens_seen": 275369544, "step": 2382 }, { "epoch": 12.811827956989248, "grad_norm": 49.09086608886719, "learning_rate": 5e-07, "loss": 0.6859, "num_input_tokens_seen": 275488348, "step": 2383 }, { "epoch": 12.811827956989248, "loss": 0.6905863881111145, "loss_ce": 3.462989116087556e-05, "loss_iou": 0.310546875, "loss_num": 0.0140380859375, "loss_xval": 0.69140625, "num_input_tokens_seen": 275488348, "step": 2383 }, { "epoch": 12.817204301075268, "grad_norm": 36.64726257324219, "learning_rate": 5e-07, "loss": 0.677, "num_input_tokens_seen": 275607408, "step": 2384 }, { "epoch": 12.817204301075268, "loss": 0.5962605476379395, "loss_ce": 6.916257552802563e-05, "loss_iou": 0.2578125, "loss_num": 0.015869140625, "loss_xval": 0.59765625, "num_input_tokens_seen": 275607408, "step": 2384 }, { "epoch": 12.82258064516129, "grad_norm": 26.35330581665039, "learning_rate": 5e-07, "loss": 0.6999, "num_input_tokens_seen": 275725912, "step": 2385 }, { "epoch": 12.82258064516129, "loss": 0.9670602083206177, "loss_ce": 0.0002633411204442382, "loss_iou": 0.41796875, "loss_num": 0.0262451171875, "loss_xval": 0.96875, "num_input_tokens_seen": 275725912, "step": 2385 }, { "epoch": 12.827956989247312, "grad_norm": 22.412446975708008, "learning_rate": 5e-07, "loss": 0.5316, "num_input_tokens_seen": 275838744, "step": 2386 }, { "epoch": 12.827956989247312, "loss": 0.4885474741458893, "loss_ce": 2.2089443518780172e-05, "loss_iou": 0.2138671875, "loss_num": 0.01202392578125, "loss_xval": 0.48828125, "num_input_tokens_seen": 275838744, "step": 2386 }, { "epoch": 12.833333333333334, "grad_norm": 19.52828025817871, "learning_rate": 5e-07, "loss": 0.5682, "num_input_tokens_seen": 275954928, "step": 2387 }, { "epoch": 12.833333333333334, "loss": 0.4441108703613281, "loss_ce": 1.909524144139141e-05, "loss_iou": 0.197265625, "loss_num": 0.01007080078125, "loss_xval": 0.443359375, "num_input_tokens_seen": 275954928, "step": 2387 }, { "epoch": 12.838709677419354, "grad_norm": 39.787147521972656, "learning_rate": 5e-07, "loss": 0.6649, "num_input_tokens_seen": 276071632, "step": 2388 }, { "epoch": 12.838709677419354, "loss": 0.5062512159347534, "loss_ce": 2.564715759945102e-05, "loss_iou": 0.2255859375, "loss_num": 0.01080322265625, "loss_xval": 0.5078125, "num_input_tokens_seen": 276071632, "step": 2388 }, { "epoch": 12.844086021505376, "grad_norm": 34.06643295288086, "learning_rate": 5e-07, "loss": 0.638, "num_input_tokens_seen": 276189924, "step": 2389 }, { "epoch": 12.844086021505376, "loss": 0.5457077622413635, "loss_ce": 5.347228579921648e-05, "loss_iou": 0.228515625, "loss_num": 0.0179443359375, "loss_xval": 0.546875, "num_input_tokens_seen": 276189924, "step": 2389 }, { "epoch": 12.849462365591398, "grad_norm": 25.588470458984375, "learning_rate": 5e-07, "loss": 0.7423, "num_input_tokens_seen": 276304340, "step": 2390 }, { "epoch": 12.849462365591398, "loss": 0.6771670579910278, "loss_ce": 4.298529529478401e-05, "loss_iou": 0.306640625, "loss_num": 0.0123291015625, "loss_xval": 0.67578125, "num_input_tokens_seen": 276304340, "step": 2390 }, { "epoch": 12.85483870967742, "grad_norm": 26.858821868896484, "learning_rate": 5e-07, "loss": 0.6985, "num_input_tokens_seen": 276418252, "step": 2391 }, { "epoch": 12.85483870967742, "loss": 0.6162527799606323, "loss_ce": 4.180593168712221e-05, "loss_iou": 0.251953125, "loss_num": 0.02197265625, "loss_xval": 0.6171875, "num_input_tokens_seen": 276418252, "step": 2391 }, { "epoch": 12.86021505376344, "grad_norm": 30.14354705810547, "learning_rate": 5e-07, "loss": 0.6138, "num_input_tokens_seen": 276534808, "step": 2392 }, { "epoch": 12.86021505376344, "loss": 0.7698413133621216, "loss_ce": 6.591062265215442e-05, "loss_iou": 0.333984375, "loss_num": 0.020263671875, "loss_xval": 0.76953125, "num_input_tokens_seen": 276534808, "step": 2392 }, { "epoch": 12.865591397849462, "grad_norm": 60.43046951293945, "learning_rate": 5e-07, "loss": 0.6377, "num_input_tokens_seen": 276650748, "step": 2393 }, { "epoch": 12.865591397849462, "loss": 0.7465649843215942, "loss_ce": 0.00022710917983204126, "loss_iou": 0.328125, "loss_num": 0.01806640625, "loss_xval": 0.74609375, "num_input_tokens_seen": 276650748, "step": 2393 }, { "epoch": 12.870967741935484, "grad_norm": 25.047733306884766, "learning_rate": 5e-07, "loss": 0.6208, "num_input_tokens_seen": 276762340, "step": 2394 }, { "epoch": 12.870967741935484, "loss": 0.6770286560058594, "loss_ce": 2.672764821909368e-05, "loss_iou": 0.302734375, "loss_num": 0.01470947265625, "loss_xval": 0.67578125, "num_input_tokens_seen": 276762340, "step": 2394 }, { "epoch": 12.876344086021506, "grad_norm": 35.95089340209961, "learning_rate": 5e-07, "loss": 0.5064, "num_input_tokens_seen": 276880120, "step": 2395 }, { "epoch": 12.876344086021506, "loss": 0.5232229232788086, "loss_ce": 2.9578797693829983e-05, "loss_iou": 0.20703125, "loss_num": 0.021728515625, "loss_xval": 0.5234375, "num_input_tokens_seen": 276880120, "step": 2395 }, { "epoch": 12.881720430107526, "grad_norm": 25.952255249023438, "learning_rate": 5e-07, "loss": 0.6594, "num_input_tokens_seen": 276997128, "step": 2396 }, { "epoch": 12.881720430107526, "loss": 0.512472927570343, "loss_ce": 2.1736870621680282e-05, "loss_iou": 0.232421875, "loss_num": 0.00921630859375, "loss_xval": 0.51171875, "num_input_tokens_seen": 276997128, "step": 2396 }, { "epoch": 12.887096774193548, "grad_norm": 21.203060150146484, "learning_rate": 5e-07, "loss": 0.4504, "num_input_tokens_seen": 277116020, "step": 2397 }, { "epoch": 12.887096774193548, "loss": 0.374484658241272, "loss_ce": 3.396697866264731e-05, "loss_iou": 0.150390625, "loss_num": 0.0150146484375, "loss_xval": 0.375, "num_input_tokens_seen": 277116020, "step": 2397 }, { "epoch": 12.89247311827957, "grad_norm": 18.92160987854004, "learning_rate": 5e-07, "loss": 0.6323, "num_input_tokens_seen": 277228092, "step": 2398 }, { "epoch": 12.89247311827957, "loss": 0.656042218208313, "loss_ce": 3.640342765720561e-05, "loss_iou": 0.26171875, "loss_num": 0.0263671875, "loss_xval": 0.65625, "num_input_tokens_seen": 277228092, "step": 2398 }, { "epoch": 12.897849462365592, "grad_norm": 32.57687759399414, "learning_rate": 5e-07, "loss": 0.7912, "num_input_tokens_seen": 277343868, "step": 2399 }, { "epoch": 12.897849462365592, "loss": 0.8471947908401489, "loss_ce": 2.683072307263501e-05, "loss_iou": 0.3828125, "loss_num": 0.015869140625, "loss_xval": 0.84765625, "num_input_tokens_seen": 277343868, "step": 2399 }, { "epoch": 12.903225806451612, "grad_norm": 34.42566680908203, "learning_rate": 5e-07, "loss": 0.7379, "num_input_tokens_seen": 277456300, "step": 2400 }, { "epoch": 12.903225806451612, "loss": 0.713402509689331, "loss_ce": 0.0002676989242900163, "loss_iou": 0.3203125, "loss_num": 0.0140380859375, "loss_xval": 0.71484375, "num_input_tokens_seen": 277456300, "step": 2400 }, { "epoch": 12.908602150537634, "grad_norm": 24.845335006713867, "learning_rate": 5e-07, "loss": 0.5681, "num_input_tokens_seen": 277573828, "step": 2401 }, { "epoch": 12.908602150537634, "loss": 0.49697065353393555, "loss_ce": 2.241071342723444e-05, "loss_iou": 0.205078125, "loss_num": 0.0172119140625, "loss_xval": 0.49609375, "num_input_tokens_seen": 277573828, "step": 2401 }, { "epoch": 12.913978494623656, "grad_norm": 31.33653450012207, "learning_rate": 5e-07, "loss": 0.7336, "num_input_tokens_seen": 277691968, "step": 2402 }, { "epoch": 12.913978494623656, "loss": 0.5469178557395935, "loss_ce": 4.281860310584307e-05, "loss_iou": 0.251953125, "loss_num": 0.00823974609375, "loss_xval": 0.546875, "num_input_tokens_seen": 277691968, "step": 2402 }, { "epoch": 12.919354838709678, "grad_norm": 39.03513717651367, "learning_rate": 5e-07, "loss": 0.5394, "num_input_tokens_seen": 277803960, "step": 2403 }, { "epoch": 12.919354838709678, "loss": 0.6695781350135803, "loss_ce": 2.2487827664008364e-05, "loss_iou": 0.27734375, "loss_num": 0.023193359375, "loss_xval": 0.66796875, "num_input_tokens_seen": 277803960, "step": 2403 }, { "epoch": 12.924731182795698, "grad_norm": 27.04831886291504, "learning_rate": 5e-07, "loss": 0.6011, "num_input_tokens_seen": 277921928, "step": 2404 }, { "epoch": 12.924731182795698, "loss": 0.48841845989227295, "loss_ce": 1.5137948139454238e-05, "loss_iou": 0.2060546875, "loss_num": 0.01519775390625, "loss_xval": 0.48828125, "num_input_tokens_seen": 277921928, "step": 2404 }, { "epoch": 12.93010752688172, "grad_norm": 22.048460006713867, "learning_rate": 5e-07, "loss": 0.6159, "num_input_tokens_seen": 278039368, "step": 2405 }, { "epoch": 12.93010752688172, "loss": 0.7823178768157959, "loss_ce": 9.132907871389762e-05, "loss_iou": 0.337890625, "loss_num": 0.0213623046875, "loss_xval": 0.78125, "num_input_tokens_seen": 278039368, "step": 2405 }, { "epoch": 12.935483870967742, "grad_norm": 24.108299255371094, "learning_rate": 5e-07, "loss": 0.65, "num_input_tokens_seen": 278152320, "step": 2406 }, { "epoch": 12.935483870967742, "loss": 0.637966513633728, "loss_ce": 2.709183536353521e-05, "loss_iou": 0.27734375, "loss_num": 0.0166015625, "loss_xval": 0.63671875, "num_input_tokens_seen": 278152320, "step": 2406 }, { "epoch": 12.940860215053764, "grad_norm": 29.363046646118164, "learning_rate": 5e-07, "loss": 0.6243, "num_input_tokens_seen": 278269024, "step": 2407 }, { "epoch": 12.940860215053764, "loss": 0.6559014320373535, "loss_ce": 1.7658518117968924e-05, "loss_iou": 0.275390625, "loss_num": 0.020751953125, "loss_xval": 0.65625, "num_input_tokens_seen": 278269024, "step": 2407 }, { "epoch": 12.946236559139784, "grad_norm": 26.145671844482422, "learning_rate": 5e-07, "loss": 0.6596, "num_input_tokens_seen": 278384884, "step": 2408 }, { "epoch": 12.946236559139784, "loss": 0.38590672612190247, "loss_ce": 4.246303433319554e-05, "loss_iou": 0.1767578125, "loss_num": 0.006500244140625, "loss_xval": 0.38671875, "num_input_tokens_seen": 278384884, "step": 2408 }, { "epoch": 12.951612903225806, "grad_norm": 29.99488067626953, "learning_rate": 5e-07, "loss": 0.8386, "num_input_tokens_seen": 278499384, "step": 2409 }, { "epoch": 12.951612903225806, "loss": 0.8270717859268188, "loss_ce": 0.0002895886136684567, "loss_iou": 0.369140625, "loss_num": 0.0181884765625, "loss_xval": 0.828125, "num_input_tokens_seen": 278499384, "step": 2409 }, { "epoch": 12.956989247311828, "grad_norm": 32.72203826904297, "learning_rate": 5e-07, "loss": 0.6864, "num_input_tokens_seen": 278616220, "step": 2410 }, { "epoch": 12.956989247311828, "loss": 0.6845347881317139, "loss_ce": 8.653230906929821e-05, "loss_iou": 0.3125, "loss_num": 0.01226806640625, "loss_xval": 0.68359375, "num_input_tokens_seen": 278616220, "step": 2410 }, { "epoch": 12.96236559139785, "grad_norm": 37.22199249267578, "learning_rate": 5e-07, "loss": 0.6352, "num_input_tokens_seen": 278732472, "step": 2411 }, { "epoch": 12.96236559139785, "loss": 0.6018410921096802, "loss_ce": 3.44239997502882e-05, "loss_iou": 0.27734375, "loss_num": 0.00921630859375, "loss_xval": 0.6015625, "num_input_tokens_seen": 278732472, "step": 2411 }, { "epoch": 12.967741935483872, "grad_norm": 17.917814254760742, "learning_rate": 5e-07, "loss": 0.6393, "num_input_tokens_seen": 278847212, "step": 2412 }, { "epoch": 12.967741935483872, "loss": 0.5089500546455383, "loss_ce": 3.891855521942489e-05, "loss_iou": 0.216796875, "loss_num": 0.01519775390625, "loss_xval": 0.5078125, "num_input_tokens_seen": 278847212, "step": 2412 }, { "epoch": 12.973118279569892, "grad_norm": 24.89435386657715, "learning_rate": 5e-07, "loss": 0.7117, "num_input_tokens_seen": 278963496, "step": 2413 }, { "epoch": 12.973118279569892, "loss": 0.677300214767456, "loss_ce": 5.4116328101372346e-05, "loss_iou": 0.287109375, "loss_num": 0.0201416015625, "loss_xval": 0.67578125, "num_input_tokens_seen": 278963496, "step": 2413 }, { "epoch": 12.978494623655914, "grad_norm": 27.26348114013672, "learning_rate": 5e-07, "loss": 0.6599, "num_input_tokens_seen": 279078488, "step": 2414 }, { "epoch": 12.978494623655914, "loss": 0.6602356433868408, "loss_ce": 1.836452429415658e-05, "loss_iou": 0.28515625, "loss_num": 0.017822265625, "loss_xval": 0.66015625, "num_input_tokens_seen": 279078488, "step": 2414 }, { "epoch": 12.983870967741936, "grad_norm": 26.08987808227539, "learning_rate": 5e-07, "loss": 0.6064, "num_input_tokens_seen": 279193312, "step": 2415 }, { "epoch": 12.983870967741936, "loss": 0.6898607611656189, "loss_ce": 0.00016349978977814317, "loss_iou": 0.287109375, "loss_num": 0.0230712890625, "loss_xval": 0.69140625, "num_input_tokens_seen": 279193312, "step": 2415 }, { "epoch": 12.989247311827956, "grad_norm": 17.7944278717041, "learning_rate": 5e-07, "loss": 0.5219, "num_input_tokens_seen": 279308980, "step": 2416 }, { "epoch": 12.989247311827956, "loss": 0.3495557904243469, "loss_ce": 6.847198528703302e-05, "loss_iou": 0.150390625, "loss_num": 0.00994873046875, "loss_xval": 0.349609375, "num_input_tokens_seen": 279308980, "step": 2416 }, { "epoch": 12.994623655913978, "grad_norm": 18.531997680664062, "learning_rate": 5e-07, "loss": 0.5349, "num_input_tokens_seen": 279423232, "step": 2417 }, { "epoch": 12.994623655913978, "loss": 0.6481245756149292, "loss_ce": 5.328474071575329e-05, "loss_iou": 0.2890625, "loss_num": 0.01361083984375, "loss_xval": 0.6484375, "num_input_tokens_seen": 279423232, "step": 2417 }, { "epoch": 13.0, "grad_norm": 35.27280044555664, "learning_rate": 5e-07, "loss": 0.5363, "num_input_tokens_seen": 279536812, "step": 2418 }, { "epoch": 13.0, "loss": 0.5784327387809753, "loss_ce": 6.35931282886304e-05, "loss_iou": 0.2490234375, "loss_num": 0.016357421875, "loss_xval": 0.578125, "num_input_tokens_seen": 279536812, "step": 2418 }, { "epoch": 13.005376344086022, "grad_norm": 28.325178146362305, "learning_rate": 5e-07, "loss": 0.9011, "num_input_tokens_seen": 279650156, "step": 2419 }, { "epoch": 13.005376344086022, "loss": 0.6184859871864319, "loss_ce": 1.6729878552723676e-05, "loss_iou": 0.26953125, "loss_num": 0.01611328125, "loss_xval": 0.6171875, "num_input_tokens_seen": 279650156, "step": 2419 }, { "epoch": 13.010752688172044, "grad_norm": 29.076814651489258, "learning_rate": 5e-07, "loss": 0.5273, "num_input_tokens_seen": 279764508, "step": 2420 }, { "epoch": 13.010752688172044, "loss": 0.6543402671813965, "loss_ce": 4.342583270044997e-05, "loss_iou": 0.296875, "loss_num": 0.01239013671875, "loss_xval": 0.65625, "num_input_tokens_seen": 279764508, "step": 2420 }, { "epoch": 13.016129032258064, "grad_norm": 29.789609909057617, "learning_rate": 5e-07, "loss": 0.7147, "num_input_tokens_seen": 279878104, "step": 2421 }, { "epoch": 13.016129032258064, "loss": 0.7691026926040649, "loss_ce": 5.9764497564174235e-05, "loss_iou": 0.353515625, "loss_num": 0.0126953125, "loss_xval": 0.76953125, "num_input_tokens_seen": 279878104, "step": 2421 }, { "epoch": 13.021505376344086, "grad_norm": 25.561729431152344, "learning_rate": 5e-07, "loss": 0.7103, "num_input_tokens_seen": 279997208, "step": 2422 }, { "epoch": 13.021505376344086, "loss": 0.6880226135253906, "loss_ce": 3.430702054174617e-05, "loss_iou": 0.296875, "loss_num": 0.0189208984375, "loss_xval": 0.6875, "num_input_tokens_seen": 279997208, "step": 2422 }, { "epoch": 13.026881720430108, "grad_norm": 19.765581130981445, "learning_rate": 5e-07, "loss": 0.5248, "num_input_tokens_seen": 280112280, "step": 2423 }, { "epoch": 13.026881720430108, "loss": 0.6930219531059265, "loss_ce": 2.876198232115712e-05, "loss_iou": 0.31640625, "loss_num": 0.011962890625, "loss_xval": 0.69140625, "num_input_tokens_seen": 280112280, "step": 2423 }, { "epoch": 13.03225806451613, "grad_norm": 22.653156280517578, "learning_rate": 5e-07, "loss": 0.6697, "num_input_tokens_seen": 280229040, "step": 2424 }, { "epoch": 13.03225806451613, "loss": 0.5320698022842407, "loss_ce": 0.00033150595845654607, "loss_iou": 0.2470703125, "loss_num": 0.00762939453125, "loss_xval": 0.53125, "num_input_tokens_seen": 280229040, "step": 2424 }, { "epoch": 13.03763440860215, "grad_norm": 20.41098403930664, "learning_rate": 5e-07, "loss": 0.6281, "num_input_tokens_seen": 280344788, "step": 2425 }, { "epoch": 13.03763440860215, "loss": 0.6012188196182251, "loss_ce": 2.25046715058852e-05, "loss_iou": 0.267578125, "loss_num": 0.01312255859375, "loss_xval": 0.6015625, "num_input_tokens_seen": 280344788, "step": 2425 }, { "epoch": 13.043010752688172, "grad_norm": 18.198305130004883, "learning_rate": 5e-07, "loss": 0.6504, "num_input_tokens_seen": 280462404, "step": 2426 }, { "epoch": 13.043010752688172, "loss": 0.7799479961395264, "loss_ce": 4.0811923099681735e-05, "loss_iou": 0.34765625, "loss_num": 0.0172119140625, "loss_xval": 0.78125, "num_input_tokens_seen": 280462404, "step": 2426 }, { "epoch": 13.048387096774194, "grad_norm": 19.189712524414062, "learning_rate": 5e-07, "loss": 0.6315, "num_input_tokens_seen": 280579716, "step": 2427 }, { "epoch": 13.048387096774194, "loss": 0.6546276807785034, "loss_ce": 8.67121561896056e-05, "loss_iou": 0.2734375, "loss_num": 0.0213623046875, "loss_xval": 0.65625, "num_input_tokens_seen": 280579716, "step": 2427 }, { "epoch": 13.053763440860216, "grad_norm": 33.366939544677734, "learning_rate": 5e-07, "loss": 0.6654, "num_input_tokens_seen": 280695372, "step": 2428 }, { "epoch": 13.053763440860216, "loss": 0.7383052110671997, "loss_ce": 2.399874028924387e-05, "loss_iou": 0.310546875, "loss_num": 0.02392578125, "loss_xval": 0.73828125, "num_input_tokens_seen": 280695372, "step": 2428 }, { "epoch": 13.059139784946236, "grad_norm": 27.31283950805664, "learning_rate": 5e-07, "loss": 0.6282, "num_input_tokens_seen": 280812288, "step": 2429 }, { "epoch": 13.059139784946236, "loss": 0.8696932792663574, "loss_ce": 6.432902591768652e-05, "loss_iou": 0.375, "loss_num": 0.0234375, "loss_xval": 0.87109375, "num_input_tokens_seen": 280812288, "step": 2429 }, { "epoch": 13.064516129032258, "grad_norm": 25.990888595581055, "learning_rate": 5e-07, "loss": 0.7953, "num_input_tokens_seen": 280924340, "step": 2430 }, { "epoch": 13.064516129032258, "loss": 0.9292159676551819, "loss_ce": 1.6704347217455506e-05, "loss_iou": 0.40234375, "loss_num": 0.0252685546875, "loss_xval": 0.9296875, "num_input_tokens_seen": 280924340, "step": 2430 }, { "epoch": 13.06989247311828, "grad_norm": 25.50885581970215, "learning_rate": 5e-07, "loss": 0.638, "num_input_tokens_seen": 281039632, "step": 2431 }, { "epoch": 13.06989247311828, "loss": 0.6275405883789062, "loss_ce": 9.91934139165096e-05, "loss_iou": 0.265625, "loss_num": 0.0194091796875, "loss_xval": 0.62890625, "num_input_tokens_seen": 281039632, "step": 2431 }, { "epoch": 13.075268817204302, "grad_norm": 27.539762496948242, "learning_rate": 5e-07, "loss": 0.5581, "num_input_tokens_seen": 281151428, "step": 2432 }, { "epoch": 13.075268817204302, "loss": 0.55742347240448, "loss_ce": 0.0005387014243751764, "loss_iou": 0.23046875, "loss_num": 0.019287109375, "loss_xval": 0.55859375, "num_input_tokens_seen": 281151428, "step": 2432 }, { "epoch": 13.080645161290322, "grad_norm": 17.16065216064453, "learning_rate": 5e-07, "loss": 0.6454, "num_input_tokens_seen": 281267544, "step": 2433 }, { "epoch": 13.080645161290322, "loss": 0.5653469562530518, "loss_ce": 3.9351958548650146e-05, "loss_iou": 0.25390625, "loss_num": 0.01153564453125, "loss_xval": 0.56640625, "num_input_tokens_seen": 281267544, "step": 2433 }, { "epoch": 13.086021505376344, "grad_norm": 33.639347076416016, "learning_rate": 5e-07, "loss": 0.4676, "num_input_tokens_seen": 281383400, "step": 2434 }, { "epoch": 13.086021505376344, "loss": 0.5871920585632324, "loss_ce": 3.383941657375544e-05, "loss_iou": 0.259765625, "loss_num": 0.01336669921875, "loss_xval": 0.5859375, "num_input_tokens_seen": 281383400, "step": 2434 }, { "epoch": 13.091397849462366, "grad_norm": 38.126930236816406, "learning_rate": 5e-07, "loss": 0.4934, "num_input_tokens_seen": 281500840, "step": 2435 }, { "epoch": 13.091397849462366, "loss": 0.35707104206085205, "loss_ce": 1.5362878912128508e-05, "loss_iou": 0.1474609375, "loss_num": 0.01220703125, "loss_xval": 0.357421875, "num_input_tokens_seen": 281500840, "step": 2435 }, { "epoch": 13.096774193548388, "grad_norm": 19.33639907836914, "learning_rate": 5e-07, "loss": 0.6952, "num_input_tokens_seen": 281617220, "step": 2436 }, { "epoch": 13.096774193548388, "loss": 0.5642238855361938, "loss_ce": 1.4942927009542473e-05, "loss_iou": 0.244140625, "loss_num": 0.01507568359375, "loss_xval": 0.5625, "num_input_tokens_seen": 281617220, "step": 2436 }, { "epoch": 13.102150537634408, "grad_norm": 57.10881042480469, "learning_rate": 5e-07, "loss": 0.6179, "num_input_tokens_seen": 281734656, "step": 2437 }, { "epoch": 13.102150537634408, "loss": 0.6180188655853271, "loss_ce": 3.78987897420302e-05, "loss_iou": 0.271484375, "loss_num": 0.01531982421875, "loss_xval": 0.6171875, "num_input_tokens_seen": 281734656, "step": 2437 }, { "epoch": 13.10752688172043, "grad_norm": 45.02981185913086, "learning_rate": 5e-07, "loss": 0.7257, "num_input_tokens_seen": 281847200, "step": 2438 }, { "epoch": 13.10752688172043, "loss": 0.8991976380348206, "loss_ce": 2.771963772829622e-05, "loss_iou": 0.3984375, "loss_num": 0.0203857421875, "loss_xval": 0.8984375, "num_input_tokens_seen": 281847200, "step": 2438 }, { "epoch": 13.112903225806452, "grad_norm": 94.75301361083984, "learning_rate": 5e-07, "loss": 0.7067, "num_input_tokens_seen": 281961692, "step": 2439 }, { "epoch": 13.112903225806452, "loss": 0.6846203804016113, "loss_ce": 5.0044345698552206e-05, "loss_iou": 0.31640625, "loss_num": 0.0106201171875, "loss_xval": 0.68359375, "num_input_tokens_seen": 281961692, "step": 2439 }, { "epoch": 13.118279569892474, "grad_norm": 23.2861328125, "learning_rate": 5e-07, "loss": 0.7184, "num_input_tokens_seen": 282075960, "step": 2440 }, { "epoch": 13.118279569892474, "loss": 0.7556655406951904, "loss_ce": 5.0270311476197094e-05, "loss_iou": 0.337890625, "loss_num": 0.01611328125, "loss_xval": 0.75390625, "num_input_tokens_seen": 282075960, "step": 2440 }, { "epoch": 13.123655913978494, "grad_norm": 19.874910354614258, "learning_rate": 5e-07, "loss": 0.5959, "num_input_tokens_seen": 282192436, "step": 2441 }, { "epoch": 13.123655913978494, "loss": 0.5125407576560974, "loss_ce": 8.958037506090477e-05, "loss_iou": 0.220703125, "loss_num": 0.01422119140625, "loss_xval": 0.51171875, "num_input_tokens_seen": 282192436, "step": 2441 }, { "epoch": 13.129032258064516, "grad_norm": 30.731569290161133, "learning_rate": 5e-07, "loss": 0.4924, "num_input_tokens_seen": 282307944, "step": 2442 }, { "epoch": 13.129032258064516, "loss": 0.40872418880462646, "loss_ce": 3.280625242041424e-05, "loss_iou": 0.1640625, "loss_num": 0.016357421875, "loss_xval": 0.408203125, "num_input_tokens_seen": 282307944, "step": 2442 }, { "epoch": 13.134408602150538, "grad_norm": 22.787931442260742, "learning_rate": 5e-07, "loss": 0.6728, "num_input_tokens_seen": 282424192, "step": 2443 }, { "epoch": 13.134408602150538, "loss": 0.5108994245529175, "loss_ce": 3.517290679155849e-05, "loss_iou": 0.23046875, "loss_num": 0.010009765625, "loss_xval": 0.51171875, "num_input_tokens_seen": 282424192, "step": 2443 }, { "epoch": 13.13978494623656, "grad_norm": 18.967737197875977, "learning_rate": 5e-07, "loss": 0.5478, "num_input_tokens_seen": 282539516, "step": 2444 }, { "epoch": 13.13978494623656, "loss": 0.5053308010101318, "loss_ce": 2.0716443032142706e-05, "loss_iou": 0.2216796875, "loss_num": 0.012451171875, "loss_xval": 0.50390625, "num_input_tokens_seen": 282539516, "step": 2444 }, { "epoch": 13.14516129032258, "grad_norm": 21.640892028808594, "learning_rate": 5e-07, "loss": 0.641, "num_input_tokens_seen": 282654896, "step": 2445 }, { "epoch": 13.14516129032258, "loss": 0.423473984003067, "loss_ce": 1.2058897482347675e-05, "loss_iou": 0.1630859375, "loss_num": 0.019287109375, "loss_xval": 0.423828125, "num_input_tokens_seen": 282654896, "step": 2445 }, { "epoch": 13.150537634408602, "grad_norm": 39.838409423828125, "learning_rate": 5e-07, "loss": 0.5961, "num_input_tokens_seen": 282772708, "step": 2446 }, { "epoch": 13.150537634408602, "loss": 0.5937898755073547, "loss_ce": 3.986540832556784e-05, "loss_iou": 0.259765625, "loss_num": 0.01483154296875, "loss_xval": 0.59375, "num_input_tokens_seen": 282772708, "step": 2446 }, { "epoch": 13.155913978494624, "grad_norm": 36.4556770324707, "learning_rate": 5e-07, "loss": 0.7073, "num_input_tokens_seen": 282887056, "step": 2447 }, { "epoch": 13.155913978494624, "loss": 0.860413134098053, "loss_ce": 6.156326708151028e-05, "loss_iou": 0.349609375, "loss_num": 0.031982421875, "loss_xval": 0.859375, "num_input_tokens_seen": 282887056, "step": 2447 }, { "epoch": 13.161290322580646, "grad_norm": 30.16617774963379, "learning_rate": 5e-07, "loss": 0.6746, "num_input_tokens_seen": 283003672, "step": 2448 }, { "epoch": 13.161290322580646, "loss": 0.7561399340629578, "loss_ce": 3.6421271943254396e-05, "loss_iou": 0.34765625, "loss_num": 0.01190185546875, "loss_xval": 0.7578125, "num_input_tokens_seen": 283003672, "step": 2448 }, { "epoch": 13.166666666666666, "grad_norm": 22.959880828857422, "learning_rate": 5e-07, "loss": 0.6046, "num_input_tokens_seen": 283119836, "step": 2449 }, { "epoch": 13.166666666666666, "loss": 0.563983678817749, "loss_ce": 1.8871653082896955e-05, "loss_iou": 0.251953125, "loss_num": 0.01214599609375, "loss_xval": 0.5625, "num_input_tokens_seen": 283119836, "step": 2449 }, { "epoch": 13.172043010752688, "grad_norm": 21.91883659362793, "learning_rate": 5e-07, "loss": 0.5822, "num_input_tokens_seen": 283233944, "step": 2450 }, { "epoch": 13.172043010752688, "loss": 0.624302864074707, "loss_ce": 3.5304830817040056e-05, "loss_iou": 0.26953125, "loss_num": 0.0172119140625, "loss_xval": 0.625, "num_input_tokens_seen": 283233944, "step": 2450 }, { "epoch": 13.17741935483871, "grad_norm": 26.28732681274414, "learning_rate": 5e-07, "loss": 0.6385, "num_input_tokens_seen": 283345868, "step": 2451 }, { "epoch": 13.17741935483871, "loss": 0.5993865728378296, "loss_ce": 2.1302912500686944e-05, "loss_iou": 0.248046875, "loss_num": 0.0203857421875, "loss_xval": 0.59765625, "num_input_tokens_seen": 283345868, "step": 2451 }, { "epoch": 13.182795698924732, "grad_norm": 22.357635498046875, "learning_rate": 5e-07, "loss": 0.6418, "num_input_tokens_seen": 283455448, "step": 2452 }, { "epoch": 13.182795698924732, "loss": 0.7444062232971191, "loss_ce": 2.146919723600149e-05, "loss_iou": 0.31640625, "loss_num": 0.022216796875, "loss_xval": 0.74609375, "num_input_tokens_seen": 283455448, "step": 2452 }, { "epoch": 13.188172043010752, "grad_norm": 36.56707000732422, "learning_rate": 5e-07, "loss": 0.559, "num_input_tokens_seen": 283567736, "step": 2453 }, { "epoch": 13.188172043010752, "loss": 0.7319467067718506, "loss_ce": 0.00037930585676804185, "loss_iou": 0.314453125, "loss_num": 0.02001953125, "loss_xval": 0.73046875, "num_input_tokens_seen": 283567736, "step": 2453 }, { "epoch": 13.193548387096774, "grad_norm": 26.967491149902344, "learning_rate": 5e-07, "loss": 0.7003, "num_input_tokens_seen": 283682812, "step": 2454 }, { "epoch": 13.193548387096774, "loss": 0.4526544213294983, "loss_ce": 1.7711639884510078e-05, "loss_iou": 0.189453125, "loss_num": 0.01483154296875, "loss_xval": 0.453125, "num_input_tokens_seen": 283682812, "step": 2454 }, { "epoch": 13.198924731182796, "grad_norm": 20.011240005493164, "learning_rate": 5e-07, "loss": 0.6722, "num_input_tokens_seen": 283794708, "step": 2455 }, { "epoch": 13.198924731182796, "loss": 0.6020373106002808, "loss_ce": 0.00010863247007364407, "loss_iou": 0.275390625, "loss_num": 0.0106201171875, "loss_xval": 0.6015625, "num_input_tokens_seen": 283794708, "step": 2455 }, { "epoch": 13.204301075268818, "grad_norm": 32.58604431152344, "learning_rate": 5e-07, "loss": 0.6938, "num_input_tokens_seen": 283910088, "step": 2456 }, { "epoch": 13.204301075268818, "loss": 0.6816740036010742, "loss_ce": 3.3397147490177304e-05, "loss_iou": 0.28125, "loss_num": 0.024169921875, "loss_xval": 0.6796875, "num_input_tokens_seen": 283910088, "step": 2456 }, { "epoch": 13.209677419354838, "grad_norm": 28.718692779541016, "learning_rate": 5e-07, "loss": 0.7895, "num_input_tokens_seen": 284025276, "step": 2457 }, { "epoch": 13.209677419354838, "loss": 0.6694498062133789, "loss_ce": 1.6209789464483038e-05, "loss_iou": 0.28515625, "loss_num": 0.0194091796875, "loss_xval": 0.66796875, "num_input_tokens_seen": 284025276, "step": 2457 }, { "epoch": 13.21505376344086, "grad_norm": 23.380224227905273, "learning_rate": 5e-07, "loss": 0.4542, "num_input_tokens_seen": 284139956, "step": 2458 }, { "epoch": 13.21505376344086, "loss": 0.4575364291667938, "loss_ce": 1.6890937331481837e-05, "loss_iou": 0.173828125, "loss_num": 0.02197265625, "loss_xval": 0.45703125, "num_input_tokens_seen": 284139956, "step": 2458 }, { "epoch": 13.220430107526882, "grad_norm": 21.808101654052734, "learning_rate": 5e-07, "loss": 0.574, "num_input_tokens_seen": 284256328, "step": 2459 }, { "epoch": 13.220430107526882, "loss": 0.5115116238594055, "loss_ce": 3.7029072700534016e-05, "loss_iou": 0.232421875, "loss_num": 0.0091552734375, "loss_xval": 0.51171875, "num_input_tokens_seen": 284256328, "step": 2459 }, { "epoch": 13.225806451612904, "grad_norm": 25.838125228881836, "learning_rate": 5e-07, "loss": 0.5643, "num_input_tokens_seen": 284372516, "step": 2460 }, { "epoch": 13.225806451612904, "loss": 0.46598953008651733, "loss_ce": 4.7136101784417406e-05, "loss_iou": 0.20703125, "loss_num": 0.01019287109375, "loss_xval": 0.466796875, "num_input_tokens_seen": 284372516, "step": 2460 }, { "epoch": 13.231182795698924, "grad_norm": 26.6071720123291, "learning_rate": 5e-07, "loss": 0.7148, "num_input_tokens_seen": 284487168, "step": 2461 }, { "epoch": 13.231182795698924, "loss": 0.8694133758544922, "loss_ce": 2.862181281670928e-05, "loss_iou": 0.384765625, "loss_num": 0.02001953125, "loss_xval": 0.87109375, "num_input_tokens_seen": 284487168, "step": 2461 }, { "epoch": 13.236559139784946, "grad_norm": 19.88571548461914, "learning_rate": 5e-07, "loss": 0.7283, "num_input_tokens_seen": 284605180, "step": 2462 }, { "epoch": 13.236559139784946, "loss": 1.1571279764175415, "loss_ce": 2.342602238059044e-05, "loss_iou": 0.482421875, "loss_num": 0.038818359375, "loss_xval": 1.15625, "num_input_tokens_seen": 284605180, "step": 2462 }, { "epoch": 13.241935483870968, "grad_norm": 26.839691162109375, "learning_rate": 5e-07, "loss": 0.898, "num_input_tokens_seen": 284714288, "step": 2463 }, { "epoch": 13.241935483870968, "loss": 0.9194896817207336, "loss_ce": 5.608405263046734e-05, "loss_iou": 0.41015625, "loss_num": 0.0198974609375, "loss_xval": 0.91796875, "num_input_tokens_seen": 284714288, "step": 2463 }, { "epoch": 13.24731182795699, "grad_norm": 35.79433059692383, "learning_rate": 5e-07, "loss": 0.6535, "num_input_tokens_seen": 284829892, "step": 2464 }, { "epoch": 13.24731182795699, "loss": 0.5273603796958923, "loss_ce": 1.659284498600755e-05, "loss_iou": 0.2255859375, "loss_num": 0.01513671875, "loss_xval": 0.52734375, "num_input_tokens_seen": 284829892, "step": 2464 }, { "epoch": 13.25268817204301, "grad_norm": 19.881174087524414, "learning_rate": 5e-07, "loss": 0.7339, "num_input_tokens_seen": 284947528, "step": 2465 }, { "epoch": 13.25268817204301, "loss": 0.46290892362594604, "loss_ce": 1.831108602345921e-05, "loss_iou": 0.19921875, "loss_num": 0.0126953125, "loss_xval": 0.462890625, "num_input_tokens_seen": 284947528, "step": 2465 }, { "epoch": 13.258064516129032, "grad_norm": 33.592071533203125, "learning_rate": 5e-07, "loss": 0.5973, "num_input_tokens_seen": 285060780, "step": 2466 }, { "epoch": 13.258064516129032, "loss": 0.3331465721130371, "loss_ce": 1.669579796725884e-05, "loss_iou": 0.138671875, "loss_num": 0.01116943359375, "loss_xval": 0.333984375, "num_input_tokens_seen": 285060780, "step": 2466 }, { "epoch": 13.263440860215054, "grad_norm": 60.1846923828125, "learning_rate": 5e-07, "loss": 0.6044, "num_input_tokens_seen": 285179316, "step": 2467 }, { "epoch": 13.263440860215054, "loss": 0.730101466178894, "loss_ce": 5.994434468448162e-05, "loss_iou": 0.314453125, "loss_num": 0.0203857421875, "loss_xval": 0.73046875, "num_input_tokens_seen": 285179316, "step": 2467 }, { "epoch": 13.268817204301076, "grad_norm": 26.330095291137695, "learning_rate": 5e-07, "loss": 0.7123, "num_input_tokens_seen": 285292268, "step": 2468 }, { "epoch": 13.268817204301076, "loss": 0.8850434422492981, "loss_ce": 3.367519093444571e-05, "loss_iou": 0.369140625, "loss_num": 0.0291748046875, "loss_xval": 0.88671875, "num_input_tokens_seen": 285292268, "step": 2468 }, { "epoch": 13.274193548387096, "grad_norm": 17.194459915161133, "learning_rate": 5e-07, "loss": 0.6955, "num_input_tokens_seen": 285406776, "step": 2469 }, { "epoch": 13.274193548387096, "loss": 0.804366946220398, "loss_ce": 4.557733336696401e-05, "loss_iou": 0.34375, "loss_num": 0.0234375, "loss_xval": 0.8046875, "num_input_tokens_seen": 285406776, "step": 2469 }, { "epoch": 13.279569892473118, "grad_norm": 22.75674057006836, "learning_rate": 5e-07, "loss": 0.667, "num_input_tokens_seen": 285521148, "step": 2470 }, { "epoch": 13.279569892473118, "loss": 0.6914327144622803, "loss_ce": 2.6485427952138707e-05, "loss_iou": 0.3046875, "loss_num": 0.0164794921875, "loss_xval": 0.69140625, "num_input_tokens_seen": 285521148, "step": 2470 }, { "epoch": 13.28494623655914, "grad_norm": 19.744768142700195, "learning_rate": 5e-07, "loss": 0.4362, "num_input_tokens_seen": 285636992, "step": 2471 }, { "epoch": 13.28494623655914, "loss": 0.3957745432853699, "loss_ce": 2.2591284505324438e-05, "loss_iou": 0.1611328125, "loss_num": 0.01458740234375, "loss_xval": 0.396484375, "num_input_tokens_seen": 285636992, "step": 2471 }, { "epoch": 13.290322580645162, "grad_norm": 24.33632469177246, "learning_rate": 5e-07, "loss": 0.4728, "num_input_tokens_seen": 285757340, "step": 2472 }, { "epoch": 13.290322580645162, "loss": 0.5276155471801758, "loss_ce": 2.7663394575938582e-05, "loss_iou": 0.228515625, "loss_num": 0.013916015625, "loss_xval": 0.52734375, "num_input_tokens_seen": 285757340, "step": 2472 }, { "epoch": 13.295698924731182, "grad_norm": 27.438880920410156, "learning_rate": 5e-07, "loss": 0.5864, "num_input_tokens_seen": 285874852, "step": 2473 }, { "epoch": 13.295698924731182, "loss": 0.5268900394439697, "loss_ce": 3.454327816143632e-05, "loss_iou": 0.2392578125, "loss_num": 0.00994873046875, "loss_xval": 0.52734375, "num_input_tokens_seen": 285874852, "step": 2473 }, { "epoch": 13.301075268817204, "grad_norm": 33.016361236572266, "learning_rate": 5e-07, "loss": 0.7022, "num_input_tokens_seen": 285990532, "step": 2474 }, { "epoch": 13.301075268817204, "loss": 0.7234176397323608, "loss_ce": 2.8941383789060637e-05, "loss_iou": 0.318359375, "loss_num": 0.017578125, "loss_xval": 0.72265625, "num_input_tokens_seen": 285990532, "step": 2474 }, { "epoch": 13.306451612903226, "grad_norm": 20.78963279724121, "learning_rate": 5e-07, "loss": 0.7939, "num_input_tokens_seen": 286100976, "step": 2475 }, { "epoch": 13.306451612903226, "loss": 0.3661181926727295, "loss_ce": 2.9328324671951123e-05, "loss_iou": 0.1435546875, "loss_num": 0.0159912109375, "loss_xval": 0.365234375, "num_input_tokens_seen": 286100976, "step": 2475 }, { "epoch": 13.311827956989248, "grad_norm": 20.6458683013916, "learning_rate": 5e-07, "loss": 0.3988, "num_input_tokens_seen": 286218536, "step": 2476 }, { "epoch": 13.311827956989248, "loss": 0.4473004639148712, "loss_ce": 3.482997271930799e-05, "loss_iou": 0.1953125, "loss_num": 0.011474609375, "loss_xval": 0.447265625, "num_input_tokens_seen": 286218536, "step": 2476 }, { "epoch": 13.317204301075268, "grad_norm": 33.32045364379883, "learning_rate": 5e-07, "loss": 0.6043, "num_input_tokens_seen": 286334092, "step": 2477 }, { "epoch": 13.317204301075268, "loss": 0.9737314581871033, "loss_ce": 0.00022077260655350983, "loss_iou": 0.443359375, "loss_num": 0.017578125, "loss_xval": 0.97265625, "num_input_tokens_seen": 286334092, "step": 2477 }, { "epoch": 13.32258064516129, "grad_norm": 22.077220916748047, "learning_rate": 5e-07, "loss": 0.8509, "num_input_tokens_seen": 286448512, "step": 2478 }, { "epoch": 13.32258064516129, "loss": 1.2105075120925903, "loss_ce": 5.829078872920945e-05, "loss_iou": 0.546875, "loss_num": 0.0234375, "loss_xval": 1.2109375, "num_input_tokens_seen": 286448512, "step": 2478 }, { "epoch": 13.327956989247312, "grad_norm": 23.210134506225586, "learning_rate": 5e-07, "loss": 0.6939, "num_input_tokens_seen": 286560888, "step": 2479 }, { "epoch": 13.327956989247312, "loss": 0.5260206460952759, "loss_ce": 1.9639768652268685e-05, "loss_iou": 0.2294921875, "loss_num": 0.0133056640625, "loss_xval": 0.52734375, "num_input_tokens_seen": 286560888, "step": 2479 }, { "epoch": 13.333333333333334, "grad_norm": 28.19485855102539, "learning_rate": 5e-07, "loss": 0.5816, "num_input_tokens_seen": 286680932, "step": 2480 }, { "epoch": 13.333333333333334, "loss": 0.5729188919067383, "loss_ce": 4.290726064937189e-05, "loss_iou": 0.23828125, "loss_num": 0.0191650390625, "loss_xval": 0.57421875, "num_input_tokens_seen": 286680932, "step": 2480 }, { "epoch": 13.338709677419354, "grad_norm": 35.67665481567383, "learning_rate": 5e-07, "loss": 0.6038, "num_input_tokens_seen": 286796084, "step": 2481 }, { "epoch": 13.338709677419354, "loss": 0.6261131763458252, "loss_ce": 1.4587799341825303e-05, "loss_iou": 0.27734375, "loss_num": 0.0140380859375, "loss_xval": 0.625, "num_input_tokens_seen": 286796084, "step": 2481 }, { "epoch": 13.344086021505376, "grad_norm": 39.490211486816406, "learning_rate": 5e-07, "loss": 0.6299, "num_input_tokens_seen": 286909724, "step": 2482 }, { "epoch": 13.344086021505376, "loss": 0.6902192234992981, "loss_ce": 3.366703458596021e-05, "loss_iou": 0.27734375, "loss_num": 0.0272216796875, "loss_xval": 0.69140625, "num_input_tokens_seen": 286909724, "step": 2482 }, { "epoch": 13.349462365591398, "grad_norm": 42.74925231933594, "learning_rate": 5e-07, "loss": 0.6085, "num_input_tokens_seen": 287026004, "step": 2483 }, { "epoch": 13.349462365591398, "loss": 0.7578749656677246, "loss_ce": 0.0001845101360231638, "loss_iou": 0.296875, "loss_num": 0.03271484375, "loss_xval": 0.7578125, "num_input_tokens_seen": 287026004, "step": 2483 }, { "epoch": 13.35483870967742, "grad_norm": 50.05965042114258, "learning_rate": 5e-07, "loss": 0.5846, "num_input_tokens_seen": 287142184, "step": 2484 }, { "epoch": 13.35483870967742, "loss": 0.6687197685241699, "loss_ce": 1.8559419913799502e-05, "loss_iou": 0.30078125, "loss_num": 0.01361083984375, "loss_xval": 0.66796875, "num_input_tokens_seen": 287142184, "step": 2484 }, { "epoch": 13.36021505376344, "grad_norm": 33.210941314697266, "learning_rate": 5e-07, "loss": 0.5614, "num_input_tokens_seen": 287258524, "step": 2485 }, { "epoch": 13.36021505376344, "loss": 0.6106216311454773, "loss_ce": 2.5907185772666708e-05, "loss_iou": 0.27734375, "loss_num": 0.01141357421875, "loss_xval": 0.609375, "num_input_tokens_seen": 287258524, "step": 2485 }, { "epoch": 13.365591397849462, "grad_norm": 29.722524642944336, "learning_rate": 5e-07, "loss": 0.6047, "num_input_tokens_seen": 287371532, "step": 2486 }, { "epoch": 13.365591397849462, "loss": 0.6132251024246216, "loss_ce": 6.591931014554575e-05, "loss_iou": 0.279296875, "loss_num": 0.01104736328125, "loss_xval": 0.61328125, "num_input_tokens_seen": 287371532, "step": 2486 }, { "epoch": 13.370967741935484, "grad_norm": 34.61610794067383, "learning_rate": 5e-07, "loss": 0.5353, "num_input_tokens_seen": 287484160, "step": 2487 }, { "epoch": 13.370967741935484, "loss": 0.4707207977771759, "loss_ce": 1.7680376913631335e-05, "loss_iou": 0.1826171875, "loss_num": 0.0208740234375, "loss_xval": 0.470703125, "num_input_tokens_seen": 287484160, "step": 2487 }, { "epoch": 13.376344086021506, "grad_norm": 34.58983612060547, "learning_rate": 5e-07, "loss": 0.6963, "num_input_tokens_seen": 287598684, "step": 2488 }, { "epoch": 13.376344086021506, "loss": 1.1206505298614502, "loss_ce": 4.49656872660853e-05, "loss_iou": 0.486328125, "loss_num": 0.029296875, "loss_xval": 1.1171875, "num_input_tokens_seen": 287598684, "step": 2488 }, { "epoch": 13.381720430107526, "grad_norm": 26.5360164642334, "learning_rate": 5e-07, "loss": 0.543, "num_input_tokens_seen": 287716196, "step": 2489 }, { "epoch": 13.381720430107526, "loss": 0.4858664274215698, "loss_ce": 2.656466676853597e-05, "loss_iou": 0.2119140625, "loss_num": 0.01239013671875, "loss_xval": 0.486328125, "num_input_tokens_seen": 287716196, "step": 2489 }, { "epoch": 13.387096774193548, "grad_norm": 16.507413864135742, "learning_rate": 5e-07, "loss": 0.5163, "num_input_tokens_seen": 287830704, "step": 2490 }, { "epoch": 13.387096774193548, "loss": 0.46928495168685913, "loss_ce": 4.668791007134132e-05, "loss_iou": 0.2099609375, "loss_num": 0.00994873046875, "loss_xval": 0.46875, "num_input_tokens_seen": 287830704, "step": 2490 }, { "epoch": 13.39247311827957, "grad_norm": 22.70148277282715, "learning_rate": 5e-07, "loss": 0.7459, "num_input_tokens_seen": 287946700, "step": 2491 }, { "epoch": 13.39247311827957, "loss": 1.2422629594802856, "loss_ce": 7.541846571257338e-05, "loss_iou": 0.5390625, "loss_num": 0.03271484375, "loss_xval": 1.2421875, "num_input_tokens_seen": 287946700, "step": 2491 }, { "epoch": 13.397849462365592, "grad_norm": 37.73140335083008, "learning_rate": 5e-07, "loss": 0.8218, "num_input_tokens_seen": 288059412, "step": 2492 }, { "epoch": 13.397849462365592, "loss": 0.9383212327957153, "loss_ce": 8.880232053343207e-05, "loss_iou": 0.412109375, "loss_num": 0.0228271484375, "loss_xval": 0.9375, "num_input_tokens_seen": 288059412, "step": 2492 }, { "epoch": 13.403225806451612, "grad_norm": 29.78910255432129, "learning_rate": 5e-07, "loss": 0.6707, "num_input_tokens_seen": 288176944, "step": 2493 }, { "epoch": 13.403225806451612, "loss": 0.5669336318969727, "loss_ce": 3.909537917934358e-05, "loss_iou": 0.228515625, "loss_num": 0.022216796875, "loss_xval": 0.56640625, "num_input_tokens_seen": 288176944, "step": 2493 }, { "epoch": 13.408602150537634, "grad_norm": 21.623117446899414, "learning_rate": 5e-07, "loss": 0.632, "num_input_tokens_seen": 288287476, "step": 2494 }, { "epoch": 13.408602150537634, "loss": 0.6780394315719604, "loss_ce": 6.0922156990272924e-05, "loss_iou": 0.29296875, "loss_num": 0.0181884765625, "loss_xval": 0.6796875, "num_input_tokens_seen": 288287476, "step": 2494 }, { "epoch": 13.413978494623656, "grad_norm": 25.19701385498047, "learning_rate": 5e-07, "loss": 0.6303, "num_input_tokens_seen": 288404632, "step": 2495 }, { "epoch": 13.413978494623656, "loss": 0.4355798661708832, "loss_ce": 3.297832518001087e-05, "loss_iou": 0.2041015625, "loss_num": 0.00531005859375, "loss_xval": 0.435546875, "num_input_tokens_seen": 288404632, "step": 2495 }, { "epoch": 13.419354838709678, "grad_norm": 21.738935470581055, "learning_rate": 5e-07, "loss": 0.7571, "num_input_tokens_seen": 288517284, "step": 2496 }, { "epoch": 13.419354838709678, "loss": 0.8315666317939758, "loss_ce": 2.369540379731916e-05, "loss_iou": 0.318359375, "loss_num": 0.0390625, "loss_xval": 0.83203125, "num_input_tokens_seen": 288517284, "step": 2496 }, { "epoch": 13.424731182795698, "grad_norm": 37.551273345947266, "learning_rate": 5e-07, "loss": 0.5872, "num_input_tokens_seen": 288635060, "step": 2497 }, { "epoch": 13.424731182795698, "loss": 0.4878951907157898, "loss_ce": 4.1168968891724944e-05, "loss_iou": 0.201171875, "loss_num": 0.01708984375, "loss_xval": 0.48828125, "num_input_tokens_seen": 288635060, "step": 2497 }, { "epoch": 13.43010752688172, "grad_norm": 46.61618423461914, "learning_rate": 5e-07, "loss": 0.5579, "num_input_tokens_seen": 288748976, "step": 2498 }, { "epoch": 13.43010752688172, "loss": 0.43743568658828735, "loss_ce": 0.00017985817976295948, "loss_iou": 0.1904296875, "loss_num": 0.0113525390625, "loss_xval": 0.4375, "num_input_tokens_seen": 288748976, "step": 2498 }, { "epoch": 13.435483870967742, "grad_norm": 25.60638999938965, "learning_rate": 5e-07, "loss": 0.6414, "num_input_tokens_seen": 288866868, "step": 2499 }, { "epoch": 13.435483870967742, "loss": 0.8645281791687012, "loss_ce": 2.6276295102434233e-05, "loss_iou": 0.38671875, "loss_num": 0.017822265625, "loss_xval": 0.86328125, "num_input_tokens_seen": 288866868, "step": 2499 }, { "epoch": 13.440860215053764, "grad_norm": 28.596269607543945, "learning_rate": 5e-07, "loss": 0.6875, "num_input_tokens_seen": 288979832, "step": 2500 }, { "epoch": 13.440860215053764, "eval_icons_CIoU": 0.1546672210097313, "eval_icons_GIoU": 0.12311464548110962, "eval_icons_IoU": 0.3026292026042938, "eval_icons_MAE_all": 0.032523443922400475, "eval_icons_MAE_h": 0.03337849210947752, "eval_icons_MAE_w": 0.05580953322350979, "eval_icons_MAE_x_boxes": 0.05253669619560242, "eval_icons_MAE_y_boxes": 0.03400277905166149, "eval_icons_NUM_probability": 0.9985814392566681, "eval_icons_inside_bbox": 0.6215277910232544, "eval_icons_loss": 1.8959285020828247, "eval_icons_loss_ce": 0.0002599588333396241, "eval_icons_loss_iou": 0.86474609375, "eval_icons_loss_num": 0.03275299072265625, "eval_icons_loss_xval": 1.892578125, "eval_icons_runtime": 40.3204, "eval_icons_samples_per_second": 1.24, "eval_icons_steps_per_second": 0.05, "num_input_tokens_seen": 288979832, "step": 2500 }, { "epoch": 13.440860215053764, "eval_screenspot_CIoU": 0.33289235830307007, "eval_screenspot_GIoU": 0.32408996919790906, "eval_screenspot_IoU": 0.4294418493906657, "eval_screenspot_MAE_all": 0.05491895601153374, "eval_screenspot_MAE_h": 0.04989652646084627, "eval_screenspot_MAE_w": 0.07426385829846065, "eval_screenspot_MAE_x_boxes": 0.07462216168642044, "eval_screenspot_MAE_y_boxes": 0.038392458111047745, "eval_screenspot_NUM_probability": 0.9998427629470825, "eval_screenspot_inside_bbox": 0.7620833317438761, "eval_screenspot_loss": 1.6909090280532837, "eval_screenspot_loss_ce": 6.962361961389736e-05, "eval_screenspot_loss_iou": 0.7295735677083334, "eval_screenspot_loss_num": 0.06203460693359375, "eval_screenspot_loss_xval": 1.7682291666666667, "eval_screenspot_runtime": 91.0803, "eval_screenspot_samples_per_second": 0.977, "eval_screenspot_steps_per_second": 0.033, "num_input_tokens_seen": 288979832, "step": 2500 }, { "epoch": 13.440860215053764, "loss": 1.6284483671188354, "loss_ce": 3.0342491299961694e-05, "loss_iou": 0.69921875, "loss_num": 0.046875, "loss_xval": 1.625, "num_input_tokens_seen": 288979832, "step": 2500 }, { "epoch": 13.446236559139784, "grad_norm": 25.326881408691406, "learning_rate": 5e-07, "loss": 0.7335, "num_input_tokens_seen": 289095048, "step": 2501 }, { "epoch": 13.446236559139784, "loss": 0.8636436462402344, "loss_ce": 0.00011823703243862838, "loss_iou": 0.375, "loss_num": 0.02294921875, "loss_xval": 0.86328125, "num_input_tokens_seen": 289095048, "step": 2501 }, { "epoch": 13.451612903225806, "grad_norm": 40.608211517333984, "learning_rate": 5e-07, "loss": 0.6013, "num_input_tokens_seen": 289211448, "step": 2502 }, { "epoch": 13.451612903225806, "loss": 0.5947459936141968, "loss_ce": 1.941678419825621e-05, "loss_iou": 0.259765625, "loss_num": 0.0147705078125, "loss_xval": 0.59375, "num_input_tokens_seen": 289211448, "step": 2502 }, { "epoch": 13.456989247311828, "grad_norm": 64.88700866699219, "learning_rate": 5e-07, "loss": 0.5954, "num_input_tokens_seen": 289327052, "step": 2503 }, { "epoch": 13.456989247311828, "loss": 0.4180024266242981, "loss_ce": 3.367955650901422e-05, "loss_iou": 0.1875, "loss_num": 0.0086669921875, "loss_xval": 0.41796875, "num_input_tokens_seen": 289327052, "step": 2503 }, { "epoch": 13.46236559139785, "grad_norm": 30.84988021850586, "learning_rate": 5e-07, "loss": 0.7408, "num_input_tokens_seen": 289441100, "step": 2504 }, { "epoch": 13.46236559139785, "loss": 0.7583461403846741, "loss_ce": 4.534732943284325e-05, "loss_iou": 0.3359375, "loss_num": 0.017333984375, "loss_xval": 0.7578125, "num_input_tokens_seen": 289441100, "step": 2504 }, { "epoch": 13.46774193548387, "grad_norm": 26.2672176361084, "learning_rate": 5e-07, "loss": 0.7147, "num_input_tokens_seen": 289554168, "step": 2505 }, { "epoch": 13.46774193548387, "loss": 0.9548584818840027, "loss_ce": 2.4480974389007315e-05, "loss_iou": 0.4140625, "loss_num": 0.0250244140625, "loss_xval": 0.953125, "num_input_tokens_seen": 289554168, "step": 2505 }, { "epoch": 13.473118279569892, "grad_norm": 17.35026741027832, "learning_rate": 5e-07, "loss": 0.5027, "num_input_tokens_seen": 289672824, "step": 2506 }, { "epoch": 13.473118279569892, "loss": 0.5293300747871399, "loss_ce": 3.32286290358752e-05, "loss_iou": 0.2236328125, "loss_num": 0.016357421875, "loss_xval": 0.53125, "num_input_tokens_seen": 289672824, "step": 2506 }, { "epoch": 13.478494623655914, "grad_norm": 27.798070907592773, "learning_rate": 5e-07, "loss": 0.6226, "num_input_tokens_seen": 289788300, "step": 2507 }, { "epoch": 13.478494623655914, "loss": 0.6411705017089844, "loss_ce": 5.728574615204707e-05, "loss_iou": 0.28125, "loss_num": 0.0159912109375, "loss_xval": 0.640625, "num_input_tokens_seen": 289788300, "step": 2507 }, { "epoch": 13.483870967741936, "grad_norm": 38.61562728881836, "learning_rate": 5e-07, "loss": 0.6495, "num_input_tokens_seen": 289903404, "step": 2508 }, { "epoch": 13.483870967741936, "loss": 0.8276797533035278, "loss_ce": 4.300909859011881e-05, "loss_iou": 0.373046875, "loss_num": 0.016357421875, "loss_xval": 0.828125, "num_input_tokens_seen": 289903404, "step": 2508 }, { "epoch": 13.489247311827956, "grad_norm": 30.189035415649414, "learning_rate": 5e-07, "loss": 0.6146, "num_input_tokens_seen": 290020992, "step": 2509 }, { "epoch": 13.489247311827956, "loss": 0.5287920236587524, "loss_ce": 0.000227605109103024, "loss_iou": 0.2275390625, "loss_num": 0.01458740234375, "loss_xval": 0.52734375, "num_input_tokens_seen": 290020992, "step": 2509 }, { "epoch": 13.494623655913978, "grad_norm": 22.182958602905273, "learning_rate": 5e-07, "loss": 0.5649, "num_input_tokens_seen": 290139248, "step": 2510 }, { "epoch": 13.494623655913978, "loss": 0.5738892555236816, "loss_ce": 3.669026409625076e-05, "loss_iou": 0.25, "loss_num": 0.01470947265625, "loss_xval": 0.57421875, "num_input_tokens_seen": 290139248, "step": 2510 }, { "epoch": 13.5, "grad_norm": 18.842700958251953, "learning_rate": 5e-07, "loss": 0.5169, "num_input_tokens_seen": 290256364, "step": 2511 }, { "epoch": 13.5, "loss": 0.48817700147628784, "loss_ce": 1.781773244147189e-05, "loss_iou": 0.203125, "loss_num": 0.016357421875, "loss_xval": 0.48828125, "num_input_tokens_seen": 290256364, "step": 2511 }, { "epoch": 13.505376344086022, "grad_norm": 24.01421356201172, "learning_rate": 5e-07, "loss": 0.6307, "num_input_tokens_seen": 290373556, "step": 2512 }, { "epoch": 13.505376344086022, "loss": 0.7417179942131042, "loss_ce": 1.8740533050731756e-05, "loss_iou": 0.328125, "loss_num": 0.0172119140625, "loss_xval": 0.7421875, "num_input_tokens_seen": 290373556, "step": 2512 }, { "epoch": 13.510752688172044, "grad_norm": 29.17827796936035, "learning_rate": 5e-07, "loss": 0.5759, "num_input_tokens_seen": 290489400, "step": 2513 }, { "epoch": 13.510752688172044, "loss": 0.5867021083831787, "loss_ce": 3.22010564559605e-05, "loss_iou": 0.259765625, "loss_num": 0.01361083984375, "loss_xval": 0.5859375, "num_input_tokens_seen": 290489400, "step": 2513 }, { "epoch": 13.516129032258064, "grad_norm": 20.575931549072266, "learning_rate": 5e-07, "loss": 0.6767, "num_input_tokens_seen": 290605788, "step": 2514 }, { "epoch": 13.516129032258064, "loss": 0.5134655833244324, "loss_ce": 3.782342537306249e-05, "loss_iou": 0.2236328125, "loss_num": 0.0133056640625, "loss_xval": 0.51171875, "num_input_tokens_seen": 290605788, "step": 2514 }, { "epoch": 13.521505376344086, "grad_norm": 26.51527214050293, "learning_rate": 5e-07, "loss": 0.9361, "num_input_tokens_seen": 290724180, "step": 2515 }, { "epoch": 13.521505376344086, "loss": 0.7450828552246094, "loss_ce": 8.77656348166056e-05, "loss_iou": 0.3203125, "loss_num": 0.0206298828125, "loss_xval": 0.74609375, "num_input_tokens_seen": 290724180, "step": 2515 }, { "epoch": 13.526881720430108, "grad_norm": 28.38813591003418, "learning_rate": 5e-07, "loss": 0.7698, "num_input_tokens_seen": 290840080, "step": 2516 }, { "epoch": 13.526881720430108, "loss": 0.8501167893409729, "loss_ce": 1.910745595523622e-05, "loss_iou": 0.353515625, "loss_num": 0.02880859375, "loss_xval": 0.8515625, "num_input_tokens_seen": 290840080, "step": 2516 }, { "epoch": 13.532258064516128, "grad_norm": 19.946636199951172, "learning_rate": 5e-07, "loss": 0.5027, "num_input_tokens_seen": 290955620, "step": 2517 }, { "epoch": 13.532258064516128, "loss": 0.6031797528266907, "loss_ce": 3.0332888854900375e-05, "loss_iou": 0.267578125, "loss_num": 0.01348876953125, "loss_xval": 0.6015625, "num_input_tokens_seen": 290955620, "step": 2517 }, { "epoch": 13.53763440860215, "grad_norm": 20.1138858795166, "learning_rate": 5e-07, "loss": 0.6806, "num_input_tokens_seen": 291072748, "step": 2518 }, { "epoch": 13.53763440860215, "loss": 0.4336535334587097, "loss_ce": 5.9795846027554944e-05, "loss_iou": 0.1953125, "loss_num": 0.0086669921875, "loss_xval": 0.43359375, "num_input_tokens_seen": 291072748, "step": 2518 }, { "epoch": 13.543010752688172, "grad_norm": 16.92995834350586, "learning_rate": 5e-07, "loss": 0.572, "num_input_tokens_seen": 291188276, "step": 2519 }, { "epoch": 13.543010752688172, "loss": 0.6520004868507385, "loss_ce": 2.294894147780724e-05, "loss_iou": 0.296875, "loss_num": 0.01171875, "loss_xval": 0.65234375, "num_input_tokens_seen": 291188276, "step": 2519 }, { "epoch": 13.548387096774194, "grad_norm": 31.199901580810547, "learning_rate": 5e-07, "loss": 0.5976, "num_input_tokens_seen": 291303500, "step": 2520 }, { "epoch": 13.548387096774194, "loss": 0.45838940143585205, "loss_ce": 1.537629941594787e-05, "loss_iou": 0.1962890625, "loss_num": 0.01318359375, "loss_xval": 0.458984375, "num_input_tokens_seen": 291303500, "step": 2520 }, { "epoch": 13.553763440860216, "grad_norm": 32.491050720214844, "learning_rate": 5e-07, "loss": 0.5758, "num_input_tokens_seen": 291420440, "step": 2521 }, { "epoch": 13.553763440860216, "loss": 0.4260627031326294, "loss_ce": 3.73035654774867e-05, "loss_iou": 0.1787109375, "loss_num": 0.01385498046875, "loss_xval": 0.42578125, "num_input_tokens_seen": 291420440, "step": 2521 }, { "epoch": 13.559139784946236, "grad_norm": 26.56205940246582, "learning_rate": 5e-07, "loss": 0.5688, "num_input_tokens_seen": 291537700, "step": 2522 }, { "epoch": 13.559139784946236, "loss": 0.6283172965049744, "loss_ce": 2.13896873901831e-05, "loss_iou": 0.2734375, "loss_num": 0.0164794921875, "loss_xval": 0.62890625, "num_input_tokens_seen": 291537700, "step": 2522 }, { "epoch": 13.564516129032258, "grad_norm": 23.035907745361328, "learning_rate": 5e-07, "loss": 0.6082, "num_input_tokens_seen": 291654148, "step": 2523 }, { "epoch": 13.564516129032258, "loss": 0.7616535425186157, "loss_ce": 5.686750228051096e-05, "loss_iou": 0.33203125, "loss_num": 0.019775390625, "loss_xval": 0.76171875, "num_input_tokens_seen": 291654148, "step": 2523 }, { "epoch": 13.56989247311828, "grad_norm": 31.577442169189453, "learning_rate": 5e-07, "loss": 0.731, "num_input_tokens_seen": 291769404, "step": 2524 }, { "epoch": 13.56989247311828, "loss": 0.71424800157547, "loss_ce": 0.00013668650353793055, "loss_iou": 0.322265625, "loss_num": 0.0137939453125, "loss_xval": 0.71484375, "num_input_tokens_seen": 291769404, "step": 2524 }, { "epoch": 13.575268817204302, "grad_norm": 23.12339210510254, "learning_rate": 5e-07, "loss": 0.6875, "num_input_tokens_seen": 291882700, "step": 2525 }, { "epoch": 13.575268817204302, "loss": 0.5103921890258789, "loss_ce": 1.6166070054168813e-05, "loss_iou": 0.21875, "loss_num": 0.01434326171875, "loss_xval": 0.51171875, "num_input_tokens_seen": 291882700, "step": 2525 }, { "epoch": 13.580645161290322, "grad_norm": 31.67677116394043, "learning_rate": 5e-07, "loss": 0.5434, "num_input_tokens_seen": 292000088, "step": 2526 }, { "epoch": 13.580645161290322, "loss": 0.5510612726211548, "loss_ce": 3.5895005566999316e-05, "loss_iou": 0.251953125, "loss_num": 0.00982666015625, "loss_xval": 0.55078125, "num_input_tokens_seen": 292000088, "step": 2526 }, { "epoch": 13.586021505376344, "grad_norm": 24.617795944213867, "learning_rate": 5e-07, "loss": 0.6457, "num_input_tokens_seen": 292110968, "step": 2527 }, { "epoch": 13.586021505376344, "loss": 0.5290980935096741, "loss_ce": 4.535272455541417e-05, "loss_iou": 0.2255859375, "loss_num": 0.015380859375, "loss_xval": 0.52734375, "num_input_tokens_seen": 292110968, "step": 2527 }, { "epoch": 13.591397849462366, "grad_norm": 23.813730239868164, "learning_rate": 5e-07, "loss": 0.5845, "num_input_tokens_seen": 292227544, "step": 2528 }, { "epoch": 13.591397849462366, "loss": 0.4671890139579773, "loss_ce": 2.5949715563911013e-05, "loss_iou": 0.1904296875, "loss_num": 0.01708984375, "loss_xval": 0.466796875, "num_input_tokens_seen": 292227544, "step": 2528 }, { "epoch": 13.596774193548388, "grad_norm": 23.580978393554688, "learning_rate": 5e-07, "loss": 0.6112, "num_input_tokens_seen": 292342776, "step": 2529 }, { "epoch": 13.596774193548388, "loss": 0.6768202781677246, "loss_ce": 6.245569238672033e-05, "loss_iou": 0.275390625, "loss_num": 0.025146484375, "loss_xval": 0.67578125, "num_input_tokens_seen": 292342776, "step": 2529 }, { "epoch": 13.602150537634408, "grad_norm": 32.6282958984375, "learning_rate": 5e-07, "loss": 0.764, "num_input_tokens_seen": 292458976, "step": 2530 }, { "epoch": 13.602150537634408, "loss": 0.7294522523880005, "loss_ce": 5.165791662875563e-05, "loss_iou": 0.33203125, "loss_num": 0.01312255859375, "loss_xval": 0.73046875, "num_input_tokens_seen": 292458976, "step": 2530 }, { "epoch": 13.60752688172043, "grad_norm": 25.71038818359375, "learning_rate": 5e-07, "loss": 0.5144, "num_input_tokens_seen": 292575048, "step": 2531 }, { "epoch": 13.60752688172043, "loss": 0.477323979139328, "loss_ce": 2.905000110331457e-05, "loss_iou": 0.205078125, "loss_num": 0.01336669921875, "loss_xval": 0.4765625, "num_input_tokens_seen": 292575048, "step": 2531 }, { "epoch": 13.612903225806452, "grad_norm": 25.689437866210938, "learning_rate": 5e-07, "loss": 0.8233, "num_input_tokens_seen": 292689280, "step": 2532 }, { "epoch": 13.612903225806452, "loss": 0.7636659145355225, "loss_ce": 0.00048234270070679486, "loss_iou": 0.333984375, "loss_num": 0.0194091796875, "loss_xval": 0.76171875, "num_input_tokens_seen": 292689280, "step": 2532 }, { "epoch": 13.618279569892474, "grad_norm": 40.29010009765625, "learning_rate": 5e-07, "loss": 0.6672, "num_input_tokens_seen": 292804552, "step": 2533 }, { "epoch": 13.618279569892474, "loss": 0.5107659697532654, "loss_ce": 2.377413329668343e-05, "loss_iou": 0.2177734375, "loss_num": 0.0150146484375, "loss_xval": 0.51171875, "num_input_tokens_seen": 292804552, "step": 2533 }, { "epoch": 13.623655913978494, "grad_norm": 31.505332946777344, "learning_rate": 5e-07, "loss": 0.5574, "num_input_tokens_seen": 292920132, "step": 2534 }, { "epoch": 13.623655913978494, "loss": 0.6494301557540894, "loss_ce": 1.6116387996589765e-05, "loss_iou": 0.28125, "loss_num": 0.0172119140625, "loss_xval": 0.6484375, "num_input_tokens_seen": 292920132, "step": 2534 }, { "epoch": 13.629032258064516, "grad_norm": 29.2274227142334, "learning_rate": 5e-07, "loss": 0.5464, "num_input_tokens_seen": 293035872, "step": 2535 }, { "epoch": 13.629032258064516, "loss": 0.5592712759971619, "loss_ce": 6.716309144394472e-05, "loss_iou": 0.2197265625, "loss_num": 0.02392578125, "loss_xval": 0.55859375, "num_input_tokens_seen": 293035872, "step": 2535 }, { "epoch": 13.634408602150538, "grad_norm": 22.145404815673828, "learning_rate": 5e-07, "loss": 0.6288, "num_input_tokens_seen": 293154556, "step": 2536 }, { "epoch": 13.634408602150538, "loss": 0.5649685263633728, "loss_ce": 2.709717591642402e-05, "loss_iou": 0.248046875, "loss_num": 0.01373291015625, "loss_xval": 0.56640625, "num_input_tokens_seen": 293154556, "step": 2536 }, { "epoch": 13.63978494623656, "grad_norm": 25.531131744384766, "learning_rate": 5e-07, "loss": 0.5028, "num_input_tokens_seen": 293272828, "step": 2537 }, { "epoch": 13.63978494623656, "loss": 0.4607120156288147, "loss_ce": 0.0002628109068609774, "loss_iou": 0.201171875, "loss_num": 0.01171875, "loss_xval": 0.4609375, "num_input_tokens_seen": 293272828, "step": 2537 }, { "epoch": 13.64516129032258, "grad_norm": 34.849365234375, "learning_rate": 5e-07, "loss": 0.4645, "num_input_tokens_seen": 293388752, "step": 2538 }, { "epoch": 13.64516129032258, "loss": 0.4435041546821594, "loss_ce": 2.2715432351105846e-05, "loss_iou": 0.19921875, "loss_num": 0.00921630859375, "loss_xval": 0.443359375, "num_input_tokens_seen": 293388752, "step": 2538 }, { "epoch": 13.650537634408602, "grad_norm": 24.237472534179688, "learning_rate": 5e-07, "loss": 0.6563, "num_input_tokens_seen": 293506508, "step": 2539 }, { "epoch": 13.650537634408602, "loss": 0.6572475433349609, "loss_ce": 2.0982213754905388e-05, "loss_iou": 0.294921875, "loss_num": 0.0133056640625, "loss_xval": 0.65625, "num_input_tokens_seen": 293506508, "step": 2539 }, { "epoch": 13.655913978494624, "grad_norm": 26.350067138671875, "learning_rate": 5e-07, "loss": 0.5701, "num_input_tokens_seen": 293621964, "step": 2540 }, { "epoch": 13.655913978494624, "loss": 0.5919977426528931, "loss_ce": 1.7730666513671167e-05, "loss_iou": 0.267578125, "loss_num": 0.01153564453125, "loss_xval": 0.59375, "num_input_tokens_seen": 293621964, "step": 2540 }, { "epoch": 13.661290322580646, "grad_norm": 24.18915367126465, "learning_rate": 5e-07, "loss": 0.8375, "num_input_tokens_seen": 293732420, "step": 2541 }, { "epoch": 13.661290322580646, "loss": 1.1255332231521606, "loss_ce": 4.490727951633744e-05, "loss_iou": 0.494140625, "loss_num": 0.0269775390625, "loss_xval": 1.125, "num_input_tokens_seen": 293732420, "step": 2541 }, { "epoch": 13.666666666666666, "grad_norm": 24.44210433959961, "learning_rate": 5e-07, "loss": 0.7017, "num_input_tokens_seen": 293849628, "step": 2542 }, { "epoch": 13.666666666666666, "loss": 0.7488250732421875, "loss_ce": 4.578612424666062e-05, "loss_iou": 0.32421875, "loss_num": 0.019775390625, "loss_xval": 0.75, "num_input_tokens_seen": 293849628, "step": 2542 }, { "epoch": 13.672043010752688, "grad_norm": 24.338830947875977, "learning_rate": 5e-07, "loss": 0.7232, "num_input_tokens_seen": 293962940, "step": 2543 }, { "epoch": 13.672043010752688, "loss": 0.6127004623413086, "loss_ce": 2.9574195650639012e-05, "loss_iou": 0.26953125, "loss_num": 0.014404296875, "loss_xval": 0.61328125, "num_input_tokens_seen": 293962940, "step": 2543 }, { "epoch": 13.67741935483871, "grad_norm": 30.768301010131836, "learning_rate": 5e-07, "loss": 0.6449, "num_input_tokens_seen": 294078640, "step": 2544 }, { "epoch": 13.67741935483871, "loss": 0.5914455652236938, "loss_ce": 1.4867855497868732e-05, "loss_iou": 0.26171875, "loss_num": 0.01336669921875, "loss_xval": 0.58984375, "num_input_tokens_seen": 294078640, "step": 2544 }, { "epoch": 13.682795698924732, "grad_norm": 45.30036544799805, "learning_rate": 5e-07, "loss": 0.6248, "num_input_tokens_seen": 294195860, "step": 2545 }, { "epoch": 13.682795698924732, "loss": 0.640479564666748, "loss_ce": 9.871491056401283e-05, "loss_iou": 0.28515625, "loss_num": 0.01416015625, "loss_xval": 0.640625, "num_input_tokens_seen": 294195860, "step": 2545 }, { "epoch": 13.688172043010752, "grad_norm": 20.05927085876465, "learning_rate": 5e-07, "loss": 0.654, "num_input_tokens_seen": 294313232, "step": 2546 }, { "epoch": 13.688172043010752, "loss": 0.5002864599227905, "loss_ce": 4.2309387936256826e-05, "loss_iou": 0.2138671875, "loss_num": 0.01458740234375, "loss_xval": 0.5, "num_input_tokens_seen": 294313232, "step": 2546 }, { "epoch": 13.693548387096774, "grad_norm": 24.81466293334961, "learning_rate": 5e-07, "loss": 0.6531, "num_input_tokens_seen": 294430168, "step": 2547 }, { "epoch": 13.693548387096774, "loss": 0.6355212330818176, "loss_ce": 2.3210948711493984e-05, "loss_iou": 0.275390625, "loss_num": 0.0169677734375, "loss_xval": 0.63671875, "num_input_tokens_seen": 294430168, "step": 2547 }, { "epoch": 13.698924731182796, "grad_norm": 20.546619415283203, "learning_rate": 5e-07, "loss": 0.7141, "num_input_tokens_seen": 294545096, "step": 2548 }, { "epoch": 13.698924731182796, "loss": 0.6331888437271118, "loss_ce": 1.0163238584937062e-05, "loss_iou": 0.2578125, "loss_num": 0.023681640625, "loss_xval": 0.6328125, "num_input_tokens_seen": 294545096, "step": 2548 }, { "epoch": 13.704301075268818, "grad_norm": 22.283771514892578, "learning_rate": 5e-07, "loss": 0.6269, "num_input_tokens_seen": 294661380, "step": 2549 }, { "epoch": 13.704301075268818, "loss": 0.5988517999649048, "loss_ce": 3.587070750654675e-05, "loss_iou": 0.267578125, "loss_num": 0.0128173828125, "loss_xval": 0.59765625, "num_input_tokens_seen": 294661380, "step": 2549 }, { "epoch": 13.709677419354838, "grad_norm": 29.940053939819336, "learning_rate": 5e-07, "loss": 0.53, "num_input_tokens_seen": 294777996, "step": 2550 }, { "epoch": 13.709677419354838, "loss": 0.5713807344436646, "loss_ce": 3.066683711949736e-05, "loss_iou": 0.2578125, "loss_num": 0.0107421875, "loss_xval": 0.5703125, "num_input_tokens_seen": 294777996, "step": 2550 }, { "epoch": 13.71505376344086, "grad_norm": 38.19960021972656, "learning_rate": 5e-07, "loss": 0.5167, "num_input_tokens_seen": 294896488, "step": 2551 }, { "epoch": 13.71505376344086, "loss": 0.6450206637382507, "loss_ce": 0.0002452480548527092, "loss_iou": 0.291015625, "loss_num": 0.01214599609375, "loss_xval": 0.64453125, "num_input_tokens_seen": 294896488, "step": 2551 }, { "epoch": 13.720430107526882, "grad_norm": 36.40572738647461, "learning_rate": 5e-07, "loss": 0.8543, "num_input_tokens_seen": 295013352, "step": 2552 }, { "epoch": 13.720430107526882, "loss": 0.8796977996826172, "loss_ce": 5.915380097576417e-05, "loss_iou": 0.39453125, "loss_num": 0.017822265625, "loss_xval": 0.87890625, "num_input_tokens_seen": 295013352, "step": 2552 }, { "epoch": 13.725806451612904, "grad_norm": 29.71619415283203, "learning_rate": 5e-07, "loss": 0.6151, "num_input_tokens_seen": 295128760, "step": 2553 }, { "epoch": 13.725806451612904, "loss": 0.4883086085319519, "loss_ce": 2.737803151831031e-05, "loss_iou": 0.2041015625, "loss_num": 0.0162353515625, "loss_xval": 0.48828125, "num_input_tokens_seen": 295128760, "step": 2553 }, { "epoch": 13.731182795698924, "grad_norm": 30.68787384033203, "learning_rate": 5e-07, "loss": 0.6759, "num_input_tokens_seen": 295241304, "step": 2554 }, { "epoch": 13.731182795698924, "loss": 0.6414411067962646, "loss_ce": 2.2666157747153193e-05, "loss_iou": 0.28125, "loss_num": 0.0157470703125, "loss_xval": 0.640625, "num_input_tokens_seen": 295241304, "step": 2554 }, { "epoch": 13.736559139784946, "grad_norm": 17.795299530029297, "learning_rate": 5e-07, "loss": 0.5441, "num_input_tokens_seen": 295355688, "step": 2555 }, { "epoch": 13.736559139784946, "loss": 0.39907100796699524, "loss_ce": 2.3166738174040802e-05, "loss_iou": 0.1484375, "loss_num": 0.0205078125, "loss_xval": 0.3984375, "num_input_tokens_seen": 295355688, "step": 2555 }, { "epoch": 13.741935483870968, "grad_norm": 15.667884826660156, "learning_rate": 5e-07, "loss": 0.6237, "num_input_tokens_seen": 295474660, "step": 2556 }, { "epoch": 13.741935483870968, "loss": 0.7676105499267578, "loss_ce": 3.23952772305347e-05, "loss_iou": 0.33984375, "loss_num": 0.017578125, "loss_xval": 0.765625, "num_input_tokens_seen": 295474660, "step": 2556 }, { "epoch": 13.74731182795699, "grad_norm": 23.121461868286133, "learning_rate": 5e-07, "loss": 0.6402, "num_input_tokens_seen": 295589664, "step": 2557 }, { "epoch": 13.74731182795699, "loss": 0.4535638093948364, "loss_ce": 7.261614518938586e-05, "loss_iou": 0.1962890625, "loss_num": 0.01239013671875, "loss_xval": 0.453125, "num_input_tokens_seen": 295589664, "step": 2557 }, { "epoch": 13.75268817204301, "grad_norm": 49.83491897583008, "learning_rate": 5e-07, "loss": 0.5925, "num_input_tokens_seen": 295705316, "step": 2558 }, { "epoch": 13.75268817204301, "loss": 0.5364000797271729, "loss_ce": 2.3108845198294148e-05, "loss_iou": 0.2265625, "loss_num": 0.016845703125, "loss_xval": 0.53515625, "num_input_tokens_seen": 295705316, "step": 2558 }, { "epoch": 13.758064516129032, "grad_norm": 52.12287902832031, "learning_rate": 5e-07, "loss": 0.6477, "num_input_tokens_seen": 295821136, "step": 2559 }, { "epoch": 13.758064516129032, "loss": 0.6321223974227905, "loss_ce": 4.236626045894809e-05, "loss_iou": 0.291015625, "loss_num": 0.009765625, "loss_xval": 0.6328125, "num_input_tokens_seen": 295821136, "step": 2559 }, { "epoch": 13.763440860215054, "grad_norm": 39.25777053833008, "learning_rate": 5e-07, "loss": 0.7496, "num_input_tokens_seen": 295935256, "step": 2560 }, { "epoch": 13.763440860215054, "loss": 0.5861601829528809, "loss_ce": 0.00010059003398055211, "loss_iou": 0.267578125, "loss_num": 0.01055908203125, "loss_xval": 0.5859375, "num_input_tokens_seen": 295935256, "step": 2560 }, { "epoch": 13.768817204301076, "grad_norm": 24.345653533935547, "learning_rate": 5e-07, "loss": 0.6414, "num_input_tokens_seen": 296049996, "step": 2561 }, { "epoch": 13.768817204301076, "loss": 0.48689329624176025, "loss_ce": 7.690103666391224e-05, "loss_iou": 0.2138671875, "loss_num": 0.011962890625, "loss_xval": 0.486328125, "num_input_tokens_seen": 296049996, "step": 2561 }, { "epoch": 13.774193548387096, "grad_norm": 22.54024314880371, "learning_rate": 5e-07, "loss": 0.6236, "num_input_tokens_seen": 296164784, "step": 2562 }, { "epoch": 13.774193548387096, "loss": 0.6070727109909058, "loss_ce": 1.7029007722157985e-05, "loss_iou": 0.2431640625, "loss_num": 0.024169921875, "loss_xval": 0.60546875, "num_input_tokens_seen": 296164784, "step": 2562 }, { "epoch": 13.779569892473118, "grad_norm": 22.229637145996094, "learning_rate": 5e-07, "loss": 0.6075, "num_input_tokens_seen": 296281232, "step": 2563 }, { "epoch": 13.779569892473118, "loss": 0.7541815638542175, "loss_ce": 3.116788502666168e-05, "loss_iou": 0.328125, "loss_num": 0.01953125, "loss_xval": 0.75390625, "num_input_tokens_seen": 296281232, "step": 2563 }, { "epoch": 13.78494623655914, "grad_norm": 33.20132064819336, "learning_rate": 5e-07, "loss": 0.6514, "num_input_tokens_seen": 296399480, "step": 2564 }, { "epoch": 13.78494623655914, "loss": 0.6575390696525574, "loss_ce": 6.837672844994813e-05, "loss_iou": 0.28125, "loss_num": 0.01904296875, "loss_xval": 0.65625, "num_input_tokens_seen": 296399480, "step": 2564 }, { "epoch": 13.790322580645162, "grad_norm": 29.355192184448242, "learning_rate": 5e-07, "loss": 0.6966, "num_input_tokens_seen": 296512612, "step": 2565 }, { "epoch": 13.790322580645162, "loss": 1.335474967956543, "loss_ce": 2.5625238777138293e-05, "loss_iou": 0.62109375, "loss_num": 0.018798828125, "loss_xval": 1.3359375, "num_input_tokens_seen": 296512612, "step": 2565 }, { "epoch": 13.795698924731182, "grad_norm": 24.638086318969727, "learning_rate": 5e-07, "loss": 0.5938, "num_input_tokens_seen": 296629688, "step": 2566 }, { "epoch": 13.795698924731182, "loss": 0.4814647138118744, "loss_ce": 1.938858622452244e-05, "loss_iou": 0.21875, "loss_num": 0.0087890625, "loss_xval": 0.48046875, "num_input_tokens_seen": 296629688, "step": 2566 }, { "epoch": 13.801075268817204, "grad_norm": 24.18471336364746, "learning_rate": 5e-07, "loss": 0.6644, "num_input_tokens_seen": 296746876, "step": 2567 }, { "epoch": 13.801075268817204, "loss": 0.5674116015434265, "loss_ce": 2.8783058951376006e-05, "loss_iou": 0.248046875, "loss_num": 0.0140380859375, "loss_xval": 0.56640625, "num_input_tokens_seen": 296746876, "step": 2567 }, { "epoch": 13.806451612903226, "grad_norm": 86.52061462402344, "learning_rate": 5e-07, "loss": 0.4661, "num_input_tokens_seen": 296864460, "step": 2568 }, { "epoch": 13.806451612903226, "loss": 0.5517987012863159, "loss_ce": 4.0833278035279363e-05, "loss_iou": 0.240234375, "loss_num": 0.01416015625, "loss_xval": 0.55078125, "num_input_tokens_seen": 296864460, "step": 2568 }, { "epoch": 13.811827956989248, "grad_norm": 37.01811981201172, "learning_rate": 5e-07, "loss": 0.5847, "num_input_tokens_seen": 296978020, "step": 2569 }, { "epoch": 13.811827956989248, "loss": 0.6079394817352295, "loss_ce": 2.9301041649887338e-05, "loss_iou": 0.265625, "loss_num": 0.0150146484375, "loss_xval": 0.609375, "num_input_tokens_seen": 296978020, "step": 2569 }, { "epoch": 13.817204301075268, "grad_norm": 28.852991104125977, "learning_rate": 5e-07, "loss": 0.6483, "num_input_tokens_seen": 297095396, "step": 2570 }, { "epoch": 13.817204301075268, "loss": 0.677509605884552, "loss_ce": 1.9336297555128112e-05, "loss_iou": 0.298828125, "loss_num": 0.01611328125, "loss_xval": 0.67578125, "num_input_tokens_seen": 297095396, "step": 2570 }, { "epoch": 13.82258064516129, "grad_norm": 19.012346267700195, "learning_rate": 5e-07, "loss": 0.5336, "num_input_tokens_seen": 297207712, "step": 2571 }, { "epoch": 13.82258064516129, "loss": 0.674596905708313, "loss_ce": 3.635775647126138e-05, "loss_iou": 0.302734375, "loss_num": 0.01361083984375, "loss_xval": 0.67578125, "num_input_tokens_seen": 297207712, "step": 2571 }, { "epoch": 13.827956989247312, "grad_norm": 94.27461242675781, "learning_rate": 5e-07, "loss": 0.7594, "num_input_tokens_seen": 297322160, "step": 2572 }, { "epoch": 13.827956989247312, "loss": 0.865889310836792, "loss_ce": 4.459727642824873e-05, "loss_iou": 0.388671875, "loss_num": 0.017822265625, "loss_xval": 0.8671875, "num_input_tokens_seen": 297322160, "step": 2572 }, { "epoch": 13.833333333333334, "grad_norm": 19.457304000854492, "learning_rate": 5e-07, "loss": 0.5265, "num_input_tokens_seen": 297439732, "step": 2573 }, { "epoch": 13.833333333333334, "loss": 0.4293368458747864, "loss_ce": 1.5558392988168634e-05, "loss_iou": 0.1943359375, "loss_num": 0.0081787109375, "loss_xval": 0.4296875, "num_input_tokens_seen": 297439732, "step": 2573 }, { "epoch": 13.838709677419354, "grad_norm": 24.237655639648438, "learning_rate": 5e-07, "loss": 0.7112, "num_input_tokens_seen": 297555664, "step": 2574 }, { "epoch": 13.838709677419354, "loss": 0.9839116334915161, "loss_ce": 2.4879002012312412e-05, "loss_iou": 0.4296875, "loss_num": 0.02490234375, "loss_xval": 0.984375, "num_input_tokens_seen": 297555664, "step": 2574 }, { "epoch": 13.844086021505376, "grad_norm": 18.30708885192871, "learning_rate": 5e-07, "loss": 0.6079, "num_input_tokens_seen": 297670116, "step": 2575 }, { "epoch": 13.844086021505376, "loss": 0.45619338750839233, "loss_ce": 1.6615937056485564e-05, "loss_iou": 0.1875, "loss_num": 0.0162353515625, "loss_xval": 0.45703125, "num_input_tokens_seen": 297670116, "step": 2575 }, { "epoch": 13.849462365591398, "grad_norm": 21.009523391723633, "learning_rate": 5e-07, "loss": 0.6821, "num_input_tokens_seen": 297784360, "step": 2576 }, { "epoch": 13.849462365591398, "loss": 0.6025570631027222, "loss_ce": 1.7970251064980403e-05, "loss_iou": 0.263671875, "loss_num": 0.0147705078125, "loss_xval": 0.6015625, "num_input_tokens_seen": 297784360, "step": 2576 }, { "epoch": 13.85483870967742, "grad_norm": 35.226165771484375, "learning_rate": 5e-07, "loss": 0.5783, "num_input_tokens_seen": 297899504, "step": 2577 }, { "epoch": 13.85483870967742, "loss": 0.5569347143173218, "loss_ce": 0.0002941153070423752, "loss_iou": 0.244140625, "loss_num": 0.013671875, "loss_xval": 0.5546875, "num_input_tokens_seen": 297899504, "step": 2577 }, { "epoch": 13.86021505376344, "grad_norm": 35.031856536865234, "learning_rate": 5e-07, "loss": 0.6982, "num_input_tokens_seen": 298015744, "step": 2578 }, { "epoch": 13.86021505376344, "loss": 0.6675378084182739, "loss_ce": 5.735519516747445e-05, "loss_iou": 0.29296875, "loss_num": 0.0159912109375, "loss_xval": 0.66796875, "num_input_tokens_seen": 298015744, "step": 2578 }, { "epoch": 13.865591397849462, "grad_norm": 26.34271240234375, "learning_rate": 5e-07, "loss": 0.4622, "num_input_tokens_seen": 298130520, "step": 2579 }, { "epoch": 13.865591397849462, "loss": 0.6536095142364502, "loss_ce": 0.0017234937986359, "loss_iou": 0.275390625, "loss_num": 0.02001953125, "loss_xval": 0.65234375, "num_input_tokens_seen": 298130520, "step": 2579 }, { "epoch": 13.870967741935484, "grad_norm": 18.67284393310547, "learning_rate": 5e-07, "loss": 0.6135, "num_input_tokens_seen": 298241652, "step": 2580 }, { "epoch": 13.870967741935484, "loss": 0.5895169377326965, "loss_ce": 3.938339068554342e-05, "loss_iou": 0.26171875, "loss_num": 0.0135498046875, "loss_xval": 0.58984375, "num_input_tokens_seen": 298241652, "step": 2580 }, { "epoch": 13.876344086021506, "grad_norm": 29.317914962768555, "learning_rate": 5e-07, "loss": 0.5594, "num_input_tokens_seen": 298359120, "step": 2581 }, { "epoch": 13.876344086021506, "loss": 0.49089890718460083, "loss_ce": 5.420306479209103e-05, "loss_iou": 0.2138671875, "loss_num": 0.01263427734375, "loss_xval": 0.490234375, "num_input_tokens_seen": 298359120, "step": 2581 }, { "epoch": 13.881720430107526, "grad_norm": 31.162921905517578, "learning_rate": 5e-07, "loss": 0.7214, "num_input_tokens_seen": 298474216, "step": 2582 }, { "epoch": 13.881720430107526, "loss": 0.5360285639762878, "loss_ce": 1.7829563148552552e-05, "loss_iou": 0.21875, "loss_num": 0.019775390625, "loss_xval": 0.53515625, "num_input_tokens_seen": 298474216, "step": 2582 }, { "epoch": 13.887096774193548, "grad_norm": 24.125083923339844, "learning_rate": 5e-07, "loss": 0.56, "num_input_tokens_seen": 298590628, "step": 2583 }, { "epoch": 13.887096774193548, "loss": 0.546055257320404, "loss_ce": 3.478935832390562e-05, "loss_iou": 0.2421875, "loss_num": 0.01220703125, "loss_xval": 0.546875, "num_input_tokens_seen": 298590628, "step": 2583 }, { "epoch": 13.89247311827957, "grad_norm": 28.446683883666992, "learning_rate": 5e-07, "loss": 0.5325, "num_input_tokens_seen": 298709188, "step": 2584 }, { "epoch": 13.89247311827957, "loss": 0.5427574515342712, "loss_ce": 3.28592759615276e-05, "loss_iou": 0.2373046875, "loss_num": 0.01373291015625, "loss_xval": 0.54296875, "num_input_tokens_seen": 298709188, "step": 2584 }, { "epoch": 13.897849462365592, "grad_norm": 21.949172973632812, "learning_rate": 5e-07, "loss": 0.5838, "num_input_tokens_seen": 298824832, "step": 2585 }, { "epoch": 13.897849462365592, "loss": 0.5009878873825073, "loss_ce": 1.1325928426231258e-05, "loss_iou": 0.212890625, "loss_num": 0.01495361328125, "loss_xval": 0.5, "num_input_tokens_seen": 298824832, "step": 2585 }, { "epoch": 13.903225806451612, "grad_norm": 23.402414321899414, "learning_rate": 5e-07, "loss": 0.6371, "num_input_tokens_seen": 298943348, "step": 2586 }, { "epoch": 13.903225806451612, "loss": 1.0894129276275635, "loss_ce": 5.760321073466912e-05, "loss_iou": 0.478515625, "loss_num": 0.0263671875, "loss_xval": 1.0859375, "num_input_tokens_seen": 298943348, "step": 2586 }, { "epoch": 13.908602150537634, "grad_norm": 21.662628173828125, "learning_rate": 5e-07, "loss": 0.6117, "num_input_tokens_seen": 299061184, "step": 2587 }, { "epoch": 13.908602150537634, "loss": 0.4439086616039276, "loss_ce": 6.101187318563461e-05, "loss_iou": 0.185546875, "loss_num": 0.01458740234375, "loss_xval": 0.443359375, "num_input_tokens_seen": 299061184, "step": 2587 }, { "epoch": 13.913978494623656, "grad_norm": 23.154430389404297, "learning_rate": 5e-07, "loss": 0.7874, "num_input_tokens_seen": 299181168, "step": 2588 }, { "epoch": 13.913978494623656, "loss": 0.8827579021453857, "loss_ce": 0.0001896216708701104, "loss_iou": 0.39453125, "loss_num": 0.0189208984375, "loss_xval": 0.8828125, "num_input_tokens_seen": 299181168, "step": 2588 }, { "epoch": 13.919354838709678, "grad_norm": 25.42271614074707, "learning_rate": 5e-07, "loss": 0.6568, "num_input_tokens_seen": 299295280, "step": 2589 }, { "epoch": 13.919354838709678, "loss": 0.7803006172180176, "loss_ce": 2.7186935767531395e-05, "loss_iou": 0.345703125, "loss_num": 0.017822265625, "loss_xval": 0.78125, "num_input_tokens_seen": 299295280, "step": 2589 }, { "epoch": 13.924731182795698, "grad_norm": 21.444555282592773, "learning_rate": 5e-07, "loss": 0.5382, "num_input_tokens_seen": 299412480, "step": 2590 }, { "epoch": 13.924731182795698, "loss": 0.5303003787994385, "loss_ce": 2.6941443138639443e-05, "loss_iou": 0.23828125, "loss_num": 0.0108642578125, "loss_xval": 0.53125, "num_input_tokens_seen": 299412480, "step": 2590 }, { "epoch": 13.93010752688172, "grad_norm": 34.10324478149414, "learning_rate": 5e-07, "loss": 0.6011, "num_input_tokens_seen": 299529064, "step": 2591 }, { "epoch": 13.93010752688172, "loss": 0.45753782987594604, "loss_ce": 1.8312082829652354e-05, "loss_iou": 0.1953125, "loss_num": 0.01336669921875, "loss_xval": 0.45703125, "num_input_tokens_seen": 299529064, "step": 2591 }, { "epoch": 13.935483870967742, "grad_norm": 40.25286865234375, "learning_rate": 5e-07, "loss": 0.5879, "num_input_tokens_seen": 299646196, "step": 2592 }, { "epoch": 13.935483870967742, "loss": 0.7600297927856445, "loss_ce": 2.005645728786476e-05, "loss_iou": 0.314453125, "loss_num": 0.026123046875, "loss_xval": 0.76171875, "num_input_tokens_seen": 299646196, "step": 2592 }, { "epoch": 13.940860215053764, "grad_norm": 57.65624237060547, "learning_rate": 5e-07, "loss": 0.6202, "num_input_tokens_seen": 299760332, "step": 2593 }, { "epoch": 13.940860215053764, "loss": 0.485688179731369, "loss_ce": 3.1433919502887875e-05, "loss_iou": 0.2236328125, "loss_num": 0.007537841796875, "loss_xval": 0.486328125, "num_input_tokens_seen": 299760332, "step": 2593 }, { "epoch": 13.946236559139784, "grad_norm": 41.39457702636719, "learning_rate": 5e-07, "loss": 0.8396, "num_input_tokens_seen": 299871796, "step": 2594 }, { "epoch": 13.946236559139784, "loss": 0.7229481935501099, "loss_ce": 0.0001088875942514278, "loss_iou": 0.30859375, "loss_num": 0.0208740234375, "loss_xval": 0.72265625, "num_input_tokens_seen": 299871796, "step": 2594 }, { "epoch": 13.951612903225806, "grad_norm": 19.720666885375977, "learning_rate": 5e-07, "loss": 0.6014, "num_input_tokens_seen": 299988152, "step": 2595 }, { "epoch": 13.951612903225806, "loss": 0.8421996831893921, "loss_ce": 0.00015867594629526138, "loss_iou": 0.345703125, "loss_num": 0.02978515625, "loss_xval": 0.84375, "num_input_tokens_seen": 299988152, "step": 2595 }, { "epoch": 13.956989247311828, "grad_norm": 20.1818904876709, "learning_rate": 5e-07, "loss": 0.5624, "num_input_tokens_seen": 300105232, "step": 2596 }, { "epoch": 13.956989247311828, "loss": 0.8683013916015625, "loss_ce": 1.523455011920305e-05, "loss_iou": 0.3828125, "loss_num": 0.0203857421875, "loss_xval": 0.8671875, "num_input_tokens_seen": 300105232, "step": 2596 }, { "epoch": 13.96236559139785, "grad_norm": 31.9885196685791, "learning_rate": 5e-07, "loss": 0.7095, "num_input_tokens_seen": 300221508, "step": 2597 }, { "epoch": 13.96236559139785, "loss": 0.5473451614379883, "loss_ce": 0.00010396607831353322, "loss_iou": 0.251953125, "loss_num": 0.0089111328125, "loss_xval": 0.546875, "num_input_tokens_seen": 300221508, "step": 2597 }, { "epoch": 13.967741935483872, "grad_norm": 31.784080505371094, "learning_rate": 5e-07, "loss": 0.5726, "num_input_tokens_seen": 300336008, "step": 2598 }, { "epoch": 13.967741935483872, "loss": 0.6604338884353638, "loss_ce": 3.349269536556676e-05, "loss_iou": 0.302734375, "loss_num": 0.010986328125, "loss_xval": 0.66015625, "num_input_tokens_seen": 300336008, "step": 2598 }, { "epoch": 13.973118279569892, "grad_norm": 30.870710372924805, "learning_rate": 5e-07, "loss": 0.6199, "num_input_tokens_seen": 300450860, "step": 2599 }, { "epoch": 13.973118279569892, "loss": 0.46303996443748474, "loss_ce": 2.7281937946099788e-05, "loss_iou": 0.20703125, "loss_num": 0.00970458984375, "loss_xval": 0.462890625, "num_input_tokens_seen": 300450860, "step": 2599 }, { "epoch": 13.978494623655914, "grad_norm": 40.38347244262695, "learning_rate": 5e-07, "loss": 0.5778, "num_input_tokens_seen": 300566692, "step": 2600 }, { "epoch": 13.978494623655914, "loss": 0.5057529211044312, "loss_ce": 1.5667053958168253e-05, "loss_iou": 0.2060546875, "loss_num": 0.0186767578125, "loss_xval": 0.50390625, "num_input_tokens_seen": 300566692, "step": 2600 }, { "epoch": 13.983870967741936, "grad_norm": 36.82117462158203, "learning_rate": 5e-07, "loss": 0.5126, "num_input_tokens_seen": 300686112, "step": 2601 }, { "epoch": 13.983870967741936, "loss": 0.5534948110580444, "loss_ce": 2.7968226277153008e-05, "loss_iou": 0.23828125, "loss_num": 0.01544189453125, "loss_xval": 0.5546875, "num_input_tokens_seen": 300686112, "step": 2601 }, { "epoch": 13.989247311827956, "grad_norm": 26.23712158203125, "learning_rate": 5e-07, "loss": 0.5544, "num_input_tokens_seen": 300801240, "step": 2602 }, { "epoch": 13.989247311827956, "loss": 0.4850090444087982, "loss_ce": 2.3700717065366916e-05, "loss_iou": 0.212890625, "loss_num": 0.0118408203125, "loss_xval": 0.484375, "num_input_tokens_seen": 300801240, "step": 2602 }, { "epoch": 13.994623655913978, "grad_norm": 18.43267059326172, "learning_rate": 5e-07, "loss": 0.4985, "num_input_tokens_seen": 300918800, "step": 2603 }, { "epoch": 13.994623655913978, "loss": 0.4594985246658325, "loss_ce": 2.587187191238627e-05, "loss_iou": 0.205078125, "loss_num": 0.010009765625, "loss_xval": 0.458984375, "num_input_tokens_seen": 300918800, "step": 2603 }, { "epoch": 14.0, "grad_norm": 38.087276458740234, "learning_rate": 5e-07, "loss": 0.6671, "num_input_tokens_seen": 301030704, "step": 2604 }, { "epoch": 14.0, "loss": 0.8459658026695251, "loss_ce": 1.857326424214989e-05, "loss_iou": 0.376953125, "loss_num": 0.0185546875, "loss_xval": 0.84765625, "num_input_tokens_seen": 301030704, "step": 2604 }, { "epoch": 14.005376344086022, "grad_norm": 26.620786666870117, "learning_rate": 5e-07, "loss": 0.5606, "num_input_tokens_seen": 301144888, "step": 2605 }, { "epoch": 14.005376344086022, "loss": 0.6307680606842041, "loss_ce": 3.0738279747311026e-05, "loss_iou": 0.27734375, "loss_num": 0.0155029296875, "loss_xval": 0.62890625, "num_input_tokens_seen": 301144888, "step": 2605 }, { "epoch": 14.010752688172044, "grad_norm": 19.640344619750977, "learning_rate": 5e-07, "loss": 0.4133, "num_input_tokens_seen": 301261072, "step": 2606 }, { "epoch": 14.010752688172044, "loss": 0.340420663356781, "loss_ce": 2.7581994800129905e-05, "loss_iou": 0.1220703125, "loss_num": 0.0191650390625, "loss_xval": 0.33984375, "num_input_tokens_seen": 301261072, "step": 2606 }, { "epoch": 14.016129032258064, "grad_norm": 31.742563247680664, "learning_rate": 5e-07, "loss": 0.5852, "num_input_tokens_seen": 301374156, "step": 2607 }, { "epoch": 14.016129032258064, "loss": 0.8709917068481445, "loss_ce": 2.003874396905303e-05, "loss_iou": 0.39453125, "loss_num": 0.0164794921875, "loss_xval": 0.87109375, "num_input_tokens_seen": 301374156, "step": 2607 }, { "epoch": 14.021505376344086, "grad_norm": 16.96920394897461, "learning_rate": 5e-07, "loss": 0.5864, "num_input_tokens_seen": 301488884, "step": 2608 }, { "epoch": 14.021505376344086, "loss": 0.6220963597297668, "loss_ce": 2.601319647510536e-05, "loss_iou": 0.263671875, "loss_num": 0.0189208984375, "loss_xval": 0.62109375, "num_input_tokens_seen": 301488884, "step": 2608 }, { "epoch": 14.026881720430108, "grad_norm": 17.111665725708008, "learning_rate": 5e-07, "loss": 0.5266, "num_input_tokens_seen": 301605704, "step": 2609 }, { "epoch": 14.026881720430108, "loss": 0.3995281457901001, "loss_ce": 0.00011406120756873861, "loss_iou": 0.171875, "loss_num": 0.01123046875, "loss_xval": 0.3984375, "num_input_tokens_seen": 301605704, "step": 2609 }, { "epoch": 14.03225806451613, "grad_norm": 19.37773323059082, "learning_rate": 5e-07, "loss": 0.6478, "num_input_tokens_seen": 301721360, "step": 2610 }, { "epoch": 14.03225806451613, "loss": 0.7220153212547302, "loss_ce": 9.151914855465293e-05, "loss_iou": 0.3203125, "loss_num": 0.0159912109375, "loss_xval": 0.72265625, "num_input_tokens_seen": 301721360, "step": 2610 }, { "epoch": 14.03763440860215, "grad_norm": 31.648256301879883, "learning_rate": 5e-07, "loss": 0.7132, "num_input_tokens_seen": 301836740, "step": 2611 }, { "epoch": 14.03763440860215, "loss": 0.6924258470535278, "loss_ce": 4.306806658860296e-05, "loss_iou": 0.30859375, "loss_num": 0.015380859375, "loss_xval": 0.69140625, "num_input_tokens_seen": 301836740, "step": 2611 }, { "epoch": 14.043010752688172, "grad_norm": 19.530874252319336, "learning_rate": 5e-07, "loss": 0.6895, "num_input_tokens_seen": 301950204, "step": 2612 }, { "epoch": 14.043010752688172, "loss": 1.1823934316635132, "loss_ce": 2.0400813809828833e-05, "loss_iou": 0.51171875, "loss_num": 0.032470703125, "loss_xval": 1.1796875, "num_input_tokens_seen": 301950204, "step": 2612 }, { "epoch": 14.048387096774194, "grad_norm": 21.489124298095703, "learning_rate": 5e-07, "loss": 0.6602, "num_input_tokens_seen": 302065416, "step": 2613 }, { "epoch": 14.048387096774194, "loss": 0.5613073110580444, "loss_ce": 2.8029971872456372e-05, "loss_iou": 0.2353515625, "loss_num": 0.0181884765625, "loss_xval": 0.5625, "num_input_tokens_seen": 302065416, "step": 2613 }, { "epoch": 14.053763440860216, "grad_norm": 30.761919021606445, "learning_rate": 5e-07, "loss": 0.692, "num_input_tokens_seen": 302177160, "step": 2614 }, { "epoch": 14.053763440860216, "loss": 0.8576942682266235, "loss_ce": 2.824088187480811e-05, "loss_iou": 0.38671875, "loss_num": 0.0167236328125, "loss_xval": 0.859375, "num_input_tokens_seen": 302177160, "step": 2614 }, { "epoch": 14.059139784946236, "grad_norm": 34.516239166259766, "learning_rate": 5e-07, "loss": 0.5794, "num_input_tokens_seen": 302292964, "step": 2615 }, { "epoch": 14.059139784946236, "loss": 0.5915735363960266, "loss_ce": 2.0783911168109626e-05, "loss_iou": 0.259765625, "loss_num": 0.01397705078125, "loss_xval": 0.58984375, "num_input_tokens_seen": 302292964, "step": 2615 }, { "epoch": 14.064516129032258, "grad_norm": 36.43149948120117, "learning_rate": 5e-07, "loss": 0.5758, "num_input_tokens_seen": 302407600, "step": 2616 }, { "epoch": 14.064516129032258, "loss": 0.745949923992157, "loss_ce": 0.00010031033161794767, "loss_iou": 0.33203125, "loss_num": 0.0166015625, "loss_xval": 0.74609375, "num_input_tokens_seen": 302407600, "step": 2616 }, { "epoch": 14.06989247311828, "grad_norm": 24.99992561340332, "learning_rate": 5e-07, "loss": 0.5589, "num_input_tokens_seen": 302523108, "step": 2617 }, { "epoch": 14.06989247311828, "loss": 0.4569284915924072, "loss_ce": 1.9306642570882104e-05, "loss_iou": 0.1962890625, "loss_num": 0.012939453125, "loss_xval": 0.45703125, "num_input_tokens_seen": 302523108, "step": 2617 }, { "epoch": 14.075268817204302, "grad_norm": 29.991527557373047, "learning_rate": 5e-07, "loss": 0.6084, "num_input_tokens_seen": 302638144, "step": 2618 }, { "epoch": 14.075268817204302, "loss": 0.5583734512329102, "loss_ce": 2.382554521318525e-05, "loss_iou": 0.2421875, "loss_num": 0.0147705078125, "loss_xval": 0.55859375, "num_input_tokens_seen": 302638144, "step": 2618 }, { "epoch": 14.080645161290322, "grad_norm": 40.70684051513672, "learning_rate": 5e-07, "loss": 0.5612, "num_input_tokens_seen": 302750512, "step": 2619 }, { "epoch": 14.080645161290322, "loss": 0.5066824555397034, "loss_ce": 9.068191866390407e-05, "loss_iou": 0.21484375, "loss_num": 0.01519775390625, "loss_xval": 0.5078125, "num_input_tokens_seen": 302750512, "step": 2619 }, { "epoch": 14.086021505376344, "grad_norm": 26.846399307250977, "learning_rate": 5e-07, "loss": 0.645, "num_input_tokens_seen": 302861616, "step": 2620 }, { "epoch": 14.086021505376344, "loss": 0.6692028045654297, "loss_ce": 1.3390620551945176e-05, "loss_iou": 0.2890625, "loss_num": 0.017822265625, "loss_xval": 0.66796875, "num_input_tokens_seen": 302861616, "step": 2620 }, { "epoch": 14.091397849462366, "grad_norm": 25.165658950805664, "learning_rate": 5e-07, "loss": 0.5626, "num_input_tokens_seen": 302974612, "step": 2621 }, { "epoch": 14.091397849462366, "loss": 0.5432320237159729, "loss_ce": 1.9147342754877172e-05, "loss_iou": 0.2392578125, "loss_num": 0.01300048828125, "loss_xval": 0.54296875, "num_input_tokens_seen": 302974612, "step": 2621 }, { "epoch": 14.096774193548388, "grad_norm": 31.73409652709961, "learning_rate": 5e-07, "loss": 0.5708, "num_input_tokens_seen": 303086032, "step": 2622 }, { "epoch": 14.096774193548388, "loss": 0.7001237869262695, "loss_ce": 5.053750646766275e-05, "loss_iou": 0.298828125, "loss_num": 0.0205078125, "loss_xval": 0.69921875, "num_input_tokens_seen": 303086032, "step": 2622 }, { "epoch": 14.102150537634408, "grad_norm": 21.486591339111328, "learning_rate": 5e-07, "loss": 0.7165, "num_input_tokens_seen": 303200296, "step": 2623 }, { "epoch": 14.102150537634408, "loss": 0.5517935156822205, "loss_ce": 9.673030581325293e-05, "loss_iou": 0.2392578125, "loss_num": 0.01470947265625, "loss_xval": 0.55078125, "num_input_tokens_seen": 303200296, "step": 2623 }, { "epoch": 14.10752688172043, "grad_norm": 27.214712142944336, "learning_rate": 5e-07, "loss": 0.5218, "num_input_tokens_seen": 303316820, "step": 2624 }, { "epoch": 14.10752688172043, "loss": 0.5818161368370056, "loss_ce": 2.899815808632411e-05, "loss_iou": 0.26171875, "loss_num": 0.01116943359375, "loss_xval": 0.58203125, "num_input_tokens_seen": 303316820, "step": 2624 }, { "epoch": 14.112903225806452, "grad_norm": 25.33843994140625, "learning_rate": 5e-07, "loss": 0.6381, "num_input_tokens_seen": 303432320, "step": 2625 }, { "epoch": 14.112903225806452, "loss": 0.6374286413192749, "loss_ce": 9.954857523553073e-05, "loss_iou": 0.251953125, "loss_num": 0.02685546875, "loss_xval": 0.63671875, "num_input_tokens_seen": 303432320, "step": 2625 }, { "epoch": 14.118279569892474, "grad_norm": 20.86295509338379, "learning_rate": 5e-07, "loss": 0.5559, "num_input_tokens_seen": 303546276, "step": 2626 }, { "epoch": 14.118279569892474, "loss": 0.5537310242652893, "loss_ce": 2.0080226022400893e-05, "loss_iou": 0.2451171875, "loss_num": 0.0125732421875, "loss_xval": 0.5546875, "num_input_tokens_seen": 303546276, "step": 2626 }, { "epoch": 14.123655913978494, "grad_norm": 30.183767318725586, "learning_rate": 5e-07, "loss": 0.6509, "num_input_tokens_seen": 303661444, "step": 2627 }, { "epoch": 14.123655913978494, "loss": 0.706322431564331, "loss_ce": 2.3565331503050402e-05, "loss_iou": 0.302734375, "loss_num": 0.0198974609375, "loss_xval": 0.70703125, "num_input_tokens_seen": 303661444, "step": 2627 }, { "epoch": 14.129032258064516, "grad_norm": 29.10590171813965, "learning_rate": 5e-07, "loss": 0.5152, "num_input_tokens_seen": 303779916, "step": 2628 }, { "epoch": 14.129032258064516, "loss": 0.6264896392822266, "loss_ce": 2.481847332092002e-05, "loss_iou": 0.26953125, "loss_num": 0.01708984375, "loss_xval": 0.625, "num_input_tokens_seen": 303779916, "step": 2628 }, { "epoch": 14.134408602150538, "grad_norm": 22.731889724731445, "learning_rate": 5e-07, "loss": 0.7376, "num_input_tokens_seen": 303895148, "step": 2629 }, { "epoch": 14.134408602150538, "loss": 0.6819372773170471, "loss_ce": 5.24798160768114e-05, "loss_iou": 0.279296875, "loss_num": 0.0247802734375, "loss_xval": 0.68359375, "num_input_tokens_seen": 303895148, "step": 2629 }, { "epoch": 14.13978494623656, "grad_norm": 28.279008865356445, "learning_rate": 5e-07, "loss": 0.6224, "num_input_tokens_seen": 304009160, "step": 2630 }, { "epoch": 14.13978494623656, "loss": 0.5739990472793579, "loss_ce": 2.4453436708427034e-05, "loss_iou": 0.2431640625, "loss_num": 0.0174560546875, "loss_xval": 0.57421875, "num_input_tokens_seen": 304009160, "step": 2630 }, { "epoch": 14.14516129032258, "grad_norm": 21.035612106323242, "learning_rate": 5e-07, "loss": 0.6248, "num_input_tokens_seen": 304124436, "step": 2631 }, { "epoch": 14.14516129032258, "loss": 0.8031457662582397, "loss_ce": 4.5169559598434716e-05, "loss_iou": 0.3515625, "loss_num": 0.02001953125, "loss_xval": 0.8046875, "num_input_tokens_seen": 304124436, "step": 2631 }, { "epoch": 14.150537634408602, "grad_norm": 39.626075744628906, "learning_rate": 5e-07, "loss": 0.585, "num_input_tokens_seen": 304238612, "step": 2632 }, { "epoch": 14.150537634408602, "loss": 0.3216211497783661, "loss_ce": 2.6907386200036854e-05, "loss_iou": 0.140625, "loss_num": 0.0078125, "loss_xval": 0.322265625, "num_input_tokens_seen": 304238612, "step": 2632 }, { "epoch": 14.155913978494624, "grad_norm": 27.276639938354492, "learning_rate": 5e-07, "loss": 0.5389, "num_input_tokens_seen": 304357728, "step": 2633 }, { "epoch": 14.155913978494624, "loss": 0.4525451064109802, "loss_ce": 3.044782170036342e-05, "loss_iou": 0.1865234375, "loss_num": 0.015869140625, "loss_xval": 0.453125, "num_input_tokens_seen": 304357728, "step": 2633 }, { "epoch": 14.161290322580646, "grad_norm": 41.56672668457031, "learning_rate": 5e-07, "loss": 0.5368, "num_input_tokens_seen": 304472888, "step": 2634 }, { "epoch": 14.161290322580646, "loss": 0.537490963935852, "loss_ce": 1.541575875307899e-05, "loss_iou": 0.2421875, "loss_num": 0.010498046875, "loss_xval": 0.5390625, "num_input_tokens_seen": 304472888, "step": 2634 }, { "epoch": 14.166666666666666, "grad_norm": 40.31985855102539, "learning_rate": 5e-07, "loss": 0.5359, "num_input_tokens_seen": 304589512, "step": 2635 }, { "epoch": 14.166666666666666, "loss": 0.35220181941986084, "loss_ce": 2.8946851671207696e-05, "loss_iou": 0.146484375, "loss_num": 0.011962890625, "loss_xval": 0.3515625, "num_input_tokens_seen": 304589512, "step": 2635 }, { "epoch": 14.172043010752688, "grad_norm": 46.7147331237793, "learning_rate": 5e-07, "loss": 0.7054, "num_input_tokens_seen": 304704568, "step": 2636 }, { "epoch": 14.172043010752688, "loss": 0.8072394132614136, "loss_ce": 0.00011047557200072333, "loss_iou": 0.33984375, "loss_num": 0.025634765625, "loss_xval": 0.80859375, "num_input_tokens_seen": 304704568, "step": 2636 }, { "epoch": 14.17741935483871, "grad_norm": 39.297645568847656, "learning_rate": 5e-07, "loss": 0.7128, "num_input_tokens_seen": 304819036, "step": 2637 }, { "epoch": 14.17741935483871, "loss": 0.6301968097686768, "loss_ce": 6.985070649534464e-05, "loss_iou": 0.279296875, "loss_num": 0.01446533203125, "loss_xval": 0.62890625, "num_input_tokens_seen": 304819036, "step": 2637 }, { "epoch": 14.182795698924732, "grad_norm": 25.148231506347656, "learning_rate": 5e-07, "loss": 0.5721, "num_input_tokens_seen": 304930304, "step": 2638 }, { "epoch": 14.182795698924732, "loss": 0.4526507556438446, "loss_ce": 1.4029104931978509e-05, "loss_iou": 0.1962890625, "loss_num": 0.01190185546875, "loss_xval": 0.453125, "num_input_tokens_seen": 304930304, "step": 2638 }, { "epoch": 14.188172043010752, "grad_norm": 22.15574836730957, "learning_rate": 5e-07, "loss": 0.6312, "num_input_tokens_seen": 305046988, "step": 2639 }, { "epoch": 14.188172043010752, "loss": 0.5741203427314758, "loss_ce": 2.3669581423746422e-05, "loss_iou": 0.236328125, "loss_num": 0.020263671875, "loss_xval": 0.57421875, "num_input_tokens_seen": 305046988, "step": 2639 }, { "epoch": 14.193548387096774, "grad_norm": 20.2773494720459, "learning_rate": 5e-07, "loss": 0.5594, "num_input_tokens_seen": 305163040, "step": 2640 }, { "epoch": 14.193548387096774, "loss": 0.5539984703063965, "loss_ce": 4.344797707744874e-05, "loss_iou": 0.240234375, "loss_num": 0.01483154296875, "loss_xval": 0.5546875, "num_input_tokens_seen": 305163040, "step": 2640 }, { "epoch": 14.198924731182796, "grad_norm": 23.902570724487305, "learning_rate": 5e-07, "loss": 0.4738, "num_input_tokens_seen": 305280768, "step": 2641 }, { "epoch": 14.198924731182796, "loss": 0.4160303473472595, "loss_ce": 1.4738702702743467e-05, "loss_iou": 0.181640625, "loss_num": 0.0103759765625, "loss_xval": 0.416015625, "num_input_tokens_seen": 305280768, "step": 2641 }, { "epoch": 14.204301075268818, "grad_norm": 20.942472457885742, "learning_rate": 5e-07, "loss": 0.615, "num_input_tokens_seen": 305400028, "step": 2642 }, { "epoch": 14.204301075268818, "loss": 0.7709689140319824, "loss_ce": 9.488904470345005e-05, "loss_iou": 0.333984375, "loss_num": 0.0206298828125, "loss_xval": 0.76953125, "num_input_tokens_seen": 305400028, "step": 2642 }, { "epoch": 14.209677419354838, "grad_norm": 44.13407897949219, "learning_rate": 5e-07, "loss": 0.6643, "num_input_tokens_seen": 305514064, "step": 2643 }, { "epoch": 14.209677419354838, "loss": 0.8237589597702026, "loss_ce": 2.8487313102232292e-05, "loss_iou": 0.37109375, "loss_num": 0.01611328125, "loss_xval": 0.82421875, "num_input_tokens_seen": 305514064, "step": 2643 }, { "epoch": 14.21505376344086, "grad_norm": 20.18628692626953, "learning_rate": 5e-07, "loss": 0.5941, "num_input_tokens_seen": 305631188, "step": 2644 }, { "epoch": 14.21505376344086, "loss": 0.6880202293395996, "loss_ce": 3.194841701770201e-05, "loss_iou": 0.275390625, "loss_num": 0.0274658203125, "loss_xval": 0.6875, "num_input_tokens_seen": 305631188, "step": 2644 }, { "epoch": 14.220430107526882, "grad_norm": 37.821834564208984, "learning_rate": 5e-07, "loss": 0.7525, "num_input_tokens_seen": 305748164, "step": 2645 }, { "epoch": 14.220430107526882, "loss": 1.122368574142456, "loss_ce": 5.422619142336771e-05, "loss_iou": 0.466796875, "loss_num": 0.03759765625, "loss_xval": 1.125, "num_input_tokens_seen": 305748164, "step": 2645 }, { "epoch": 14.225806451612904, "grad_norm": 60.60845947265625, "learning_rate": 5e-07, "loss": 0.5735, "num_input_tokens_seen": 305864224, "step": 2646 }, { "epoch": 14.225806451612904, "loss": 0.4975912570953369, "loss_ce": 3.2670148357283324e-05, "loss_iou": 0.2275390625, "loss_num": 0.00860595703125, "loss_xval": 0.498046875, "num_input_tokens_seen": 305864224, "step": 2646 }, { "epoch": 14.231182795698924, "grad_norm": 37.256744384765625, "learning_rate": 5e-07, "loss": 0.6205, "num_input_tokens_seen": 305977280, "step": 2647 }, { "epoch": 14.231182795698924, "loss": 0.7121850848197937, "loss_ce": 2.688412496354431e-05, "loss_iou": 0.31640625, "loss_num": 0.015625, "loss_xval": 0.7109375, "num_input_tokens_seen": 305977280, "step": 2647 }, { "epoch": 14.236559139784946, "grad_norm": 28.383094787597656, "learning_rate": 5e-07, "loss": 0.6359, "num_input_tokens_seen": 306093884, "step": 2648 }, { "epoch": 14.236559139784946, "loss": 0.6534903049468994, "loss_ce": 4.796894427272491e-05, "loss_iou": 0.28515625, "loss_num": 0.0162353515625, "loss_xval": 0.65234375, "num_input_tokens_seen": 306093884, "step": 2648 }, { "epoch": 14.241935483870968, "grad_norm": 38.50673294067383, "learning_rate": 5e-07, "loss": 0.4704, "num_input_tokens_seen": 306209896, "step": 2649 }, { "epoch": 14.241935483870968, "loss": 0.35988733172416687, "loss_ce": 2.404485348961316e-05, "loss_iou": 0.1455078125, "loss_num": 0.0137939453125, "loss_xval": 0.359375, "num_input_tokens_seen": 306209896, "step": 2649 }, { "epoch": 14.24731182795699, "grad_norm": 34.18161392211914, "learning_rate": 5e-07, "loss": 0.5812, "num_input_tokens_seen": 306324704, "step": 2650 }, { "epoch": 14.24731182795699, "loss": 0.5696050524711609, "loss_ce": 2.4979943191283382e-05, "loss_iou": 0.25, "loss_num": 0.0137939453125, "loss_xval": 0.5703125, "num_input_tokens_seen": 306324704, "step": 2650 }, { "epoch": 14.25268817204301, "grad_norm": 26.88008689880371, "learning_rate": 5e-07, "loss": 0.7077, "num_input_tokens_seen": 306442272, "step": 2651 }, { "epoch": 14.25268817204301, "loss": 0.4661078453063965, "loss_ce": 4.337133213994093e-05, "loss_iou": 0.201171875, "loss_num": 0.0126953125, "loss_xval": 0.466796875, "num_input_tokens_seen": 306442272, "step": 2651 }, { "epoch": 14.258064516129032, "grad_norm": 23.555049896240234, "learning_rate": 5e-07, "loss": 0.5504, "num_input_tokens_seen": 306558556, "step": 2652 }, { "epoch": 14.258064516129032, "loss": 0.5410707592964172, "loss_ce": 5.511197377927601e-05, "loss_iou": 0.234375, "loss_num": 0.01434326171875, "loss_xval": 0.5390625, "num_input_tokens_seen": 306558556, "step": 2652 }, { "epoch": 14.263440860215054, "grad_norm": 23.758560180664062, "learning_rate": 5e-07, "loss": 0.6185, "num_input_tokens_seen": 306673620, "step": 2653 }, { "epoch": 14.263440860215054, "loss": 0.6113477945327759, "loss_ce": 1.967620846698992e-05, "loss_iou": 0.2470703125, "loss_num": 0.0234375, "loss_xval": 0.609375, "num_input_tokens_seen": 306673620, "step": 2653 }, { "epoch": 14.268817204301076, "grad_norm": 25.50014305114746, "learning_rate": 5e-07, "loss": 0.6357, "num_input_tokens_seen": 306789764, "step": 2654 }, { "epoch": 14.268817204301076, "loss": 0.3635830581188202, "loss_ce": 2.716407470870763e-05, "loss_iou": 0.1513671875, "loss_num": 0.012451171875, "loss_xval": 0.36328125, "num_input_tokens_seen": 306789764, "step": 2654 }, { "epoch": 14.274193548387096, "grad_norm": 23.37663459777832, "learning_rate": 5e-07, "loss": 0.6012, "num_input_tokens_seen": 306906824, "step": 2655 }, { "epoch": 14.274193548387096, "loss": 0.4973624348640442, "loss_ce": 4.7987679863581434e-05, "loss_iou": 0.212890625, "loss_num": 0.01416015625, "loss_xval": 0.498046875, "num_input_tokens_seen": 306906824, "step": 2655 }, { "epoch": 14.279569892473118, "grad_norm": 29.67287826538086, "learning_rate": 5e-07, "loss": 0.5314, "num_input_tokens_seen": 307022532, "step": 2656 }, { "epoch": 14.279569892473118, "loss": 0.3931393027305603, "loss_ce": 7.289298810064793e-05, "loss_iou": 0.1650390625, "loss_num": 0.01251220703125, "loss_xval": 0.392578125, "num_input_tokens_seen": 307022532, "step": 2656 }, { "epoch": 14.28494623655914, "grad_norm": 29.95015525817871, "learning_rate": 5e-07, "loss": 0.6134, "num_input_tokens_seen": 307140988, "step": 2657 }, { "epoch": 14.28494623655914, "loss": 0.696448802947998, "loss_ce": 3.770484181586653e-05, "loss_iou": 0.318359375, "loss_num": 0.01171875, "loss_xval": 0.6953125, "num_input_tokens_seen": 307140988, "step": 2657 }, { "epoch": 14.290322580645162, "grad_norm": 16.877843856811523, "learning_rate": 5e-07, "loss": 0.495, "num_input_tokens_seen": 307259532, "step": 2658 }, { "epoch": 14.290322580645162, "loss": 0.3793574571609497, "loss_ce": 2.3965876607690006e-05, "loss_iou": 0.1640625, "loss_num": 0.01019287109375, "loss_xval": 0.37890625, "num_input_tokens_seen": 307259532, "step": 2658 }, { "epoch": 14.295698924731182, "grad_norm": 25.69105339050293, "learning_rate": 5e-07, "loss": 0.7116, "num_input_tokens_seen": 307376088, "step": 2659 }, { "epoch": 14.295698924731182, "loss": 0.7939882278442383, "loss_ce": 4.289212665753439e-05, "loss_iou": 0.341796875, "loss_num": 0.0216064453125, "loss_xval": 0.79296875, "num_input_tokens_seen": 307376088, "step": 2659 }, { "epoch": 14.301075268817204, "grad_norm": 25.648027420043945, "learning_rate": 5e-07, "loss": 0.4629, "num_input_tokens_seen": 307493648, "step": 2660 }, { "epoch": 14.301075268817204, "loss": 0.5823050737380981, "loss_ce": 2.967080035887193e-05, "loss_iou": 0.267578125, "loss_num": 0.00921630859375, "loss_xval": 0.58203125, "num_input_tokens_seen": 307493648, "step": 2660 }, { "epoch": 14.306451612903226, "grad_norm": 24.10832405090332, "learning_rate": 5e-07, "loss": 0.6544, "num_input_tokens_seen": 307608832, "step": 2661 }, { "epoch": 14.306451612903226, "loss": 0.4676707983016968, "loss_ce": 1.9407072613830678e-05, "loss_iou": 0.1943359375, "loss_num": 0.0157470703125, "loss_xval": 0.466796875, "num_input_tokens_seen": 307608832, "step": 2661 }, { "epoch": 14.311827956989248, "grad_norm": 33.626251220703125, "learning_rate": 5e-07, "loss": 0.5627, "num_input_tokens_seen": 307727012, "step": 2662 }, { "epoch": 14.311827956989248, "loss": 0.6500843167304993, "loss_ce": 5.9922975196968764e-05, "loss_iou": 0.2890625, "loss_num": 0.0145263671875, "loss_xval": 0.6484375, "num_input_tokens_seen": 307727012, "step": 2662 }, { "epoch": 14.317204301075268, "grad_norm": 21.74756622314453, "learning_rate": 5e-07, "loss": 0.4576, "num_input_tokens_seen": 307843200, "step": 2663 }, { "epoch": 14.317204301075268, "loss": 0.4150693118572235, "loss_ce": 3.0257844628067687e-05, "loss_iou": 0.1826171875, "loss_num": 0.0098876953125, "loss_xval": 0.4140625, "num_input_tokens_seen": 307843200, "step": 2663 }, { "epoch": 14.32258064516129, "grad_norm": 26.282560348510742, "learning_rate": 5e-07, "loss": 0.543, "num_input_tokens_seen": 307962980, "step": 2664 }, { "epoch": 14.32258064516129, "loss": 0.6734893321990967, "loss_ce": 2.7439989935373887e-05, "loss_iou": 0.265625, "loss_num": 0.0284423828125, "loss_xval": 0.671875, "num_input_tokens_seen": 307962980, "step": 2664 }, { "epoch": 14.327956989247312, "grad_norm": 26.98470687866211, "learning_rate": 5e-07, "loss": 0.7663, "num_input_tokens_seen": 308079692, "step": 2665 }, { "epoch": 14.327956989247312, "loss": 0.6296675205230713, "loss_ce": 2.8860627935500816e-05, "loss_iou": 0.267578125, "loss_num": 0.0185546875, "loss_xval": 0.62890625, "num_input_tokens_seen": 308079692, "step": 2665 }, { "epoch": 14.333333333333334, "grad_norm": 21.34450340270996, "learning_rate": 5e-07, "loss": 0.5839, "num_input_tokens_seen": 308195400, "step": 2666 }, { "epoch": 14.333333333333334, "loss": 0.574963390827179, "loss_ce": 0.0002563492162153125, "loss_iou": 0.236328125, "loss_num": 0.0205078125, "loss_xval": 0.57421875, "num_input_tokens_seen": 308195400, "step": 2666 }, { "epoch": 14.338709677419354, "grad_norm": 31.75386619567871, "learning_rate": 5e-07, "loss": 0.6636, "num_input_tokens_seen": 308307924, "step": 2667 }, { "epoch": 14.338709677419354, "loss": 0.8244810104370117, "loss_ce": 1.8179336620960385e-05, "loss_iou": 0.3515625, "loss_num": 0.0240478515625, "loss_xval": 0.82421875, "num_input_tokens_seen": 308307924, "step": 2667 }, { "epoch": 14.344086021505376, "grad_norm": 23.675111770629883, "learning_rate": 5e-07, "loss": 0.6161, "num_input_tokens_seen": 308424412, "step": 2668 }, { "epoch": 14.344086021505376, "loss": 0.4217812716960907, "loss_ce": 2.8348506020847708e-05, "loss_iou": 0.1748046875, "loss_num": 0.01458740234375, "loss_xval": 0.421875, "num_input_tokens_seen": 308424412, "step": 2668 }, { "epoch": 14.349462365591398, "grad_norm": 21.725683212280273, "learning_rate": 5e-07, "loss": 0.6673, "num_input_tokens_seen": 308538632, "step": 2669 }, { "epoch": 14.349462365591398, "loss": 0.8912753462791443, "loss_ce": 3.998930333182216e-05, "loss_iou": 0.40625, "loss_num": 0.01556396484375, "loss_xval": 0.890625, "num_input_tokens_seen": 308538632, "step": 2669 }, { "epoch": 14.35483870967742, "grad_norm": 19.05655288696289, "learning_rate": 5e-07, "loss": 0.677, "num_input_tokens_seen": 308652764, "step": 2670 }, { "epoch": 14.35483870967742, "loss": 0.6755796670913696, "loss_ce": 4.2551211663521826e-05, "loss_iou": 0.27734375, "loss_num": 0.0244140625, "loss_xval": 0.67578125, "num_input_tokens_seen": 308652764, "step": 2670 }, { "epoch": 14.36021505376344, "grad_norm": 23.127845764160156, "learning_rate": 5e-07, "loss": 0.7092, "num_input_tokens_seen": 308766168, "step": 2671 }, { "epoch": 14.36021505376344, "loss": 0.9680647253990173, "loss_ce": 4.71427156298887e-05, "loss_iou": 0.4375, "loss_num": 0.0186767578125, "loss_xval": 0.96875, "num_input_tokens_seen": 308766168, "step": 2671 }, { "epoch": 14.365591397849462, "grad_norm": 27.453533172607422, "learning_rate": 5e-07, "loss": 0.5684, "num_input_tokens_seen": 308881192, "step": 2672 }, { "epoch": 14.365591397849462, "loss": 0.3719685971736908, "loss_ce": 0.0001424226356903091, "loss_iou": 0.162109375, "loss_num": 0.00927734375, "loss_xval": 0.37109375, "num_input_tokens_seen": 308881192, "step": 2672 }, { "epoch": 14.370967741935484, "grad_norm": 22.73462677001953, "learning_rate": 5e-07, "loss": 0.5935, "num_input_tokens_seen": 308998112, "step": 2673 }, { "epoch": 14.370967741935484, "loss": 0.5069280862808228, "loss_ce": 3.114617720711976e-05, "loss_iou": 0.228515625, "loss_num": 0.010009765625, "loss_xval": 0.5078125, "num_input_tokens_seen": 308998112, "step": 2673 }, { "epoch": 14.376344086021506, "grad_norm": 28.364765167236328, "learning_rate": 5e-07, "loss": 0.6977, "num_input_tokens_seen": 309115600, "step": 2674 }, { "epoch": 14.376344086021506, "loss": 0.40407511591911316, "loss_ce": 2.2367355995811522e-05, "loss_iou": 0.1826171875, "loss_num": 0.00787353515625, "loss_xval": 0.404296875, "num_input_tokens_seen": 309115600, "step": 2674 }, { "epoch": 14.381720430107526, "grad_norm": 23.64194107055664, "learning_rate": 5e-07, "loss": 0.5594, "num_input_tokens_seen": 309231840, "step": 2675 }, { "epoch": 14.381720430107526, "loss": 0.7428675889968872, "loss_ce": 6.971001857891679e-05, "loss_iou": 0.314453125, "loss_num": 0.022705078125, "loss_xval": 0.7421875, "num_input_tokens_seen": 309231840, "step": 2675 }, { "epoch": 14.387096774193548, "grad_norm": 25.00940704345703, "learning_rate": 5e-07, "loss": 0.612, "num_input_tokens_seen": 309347396, "step": 2676 }, { "epoch": 14.387096774193548, "loss": 0.6345821619033813, "loss_ce": 6.072260657674633e-05, "loss_iou": 0.267578125, "loss_num": 0.02001953125, "loss_xval": 0.6328125, "num_input_tokens_seen": 309347396, "step": 2676 }, { "epoch": 14.39247311827957, "grad_norm": 25.717674255371094, "learning_rate": 5e-07, "loss": 0.6721, "num_input_tokens_seen": 309461148, "step": 2677 }, { "epoch": 14.39247311827957, "loss": 0.6638381481170654, "loss_ce": 1.980084562092088e-05, "loss_iou": 0.291015625, "loss_num": 0.0159912109375, "loss_xval": 0.6640625, "num_input_tokens_seen": 309461148, "step": 2677 }, { "epoch": 14.397849462365592, "grad_norm": 26.315641403198242, "learning_rate": 5e-07, "loss": 0.5285, "num_input_tokens_seen": 309573440, "step": 2678 }, { "epoch": 14.397849462365592, "loss": 0.5268697142601013, "loss_ce": 1.422414970875252e-05, "loss_iou": 0.2392578125, "loss_num": 0.00958251953125, "loss_xval": 0.52734375, "num_input_tokens_seen": 309573440, "step": 2678 }, { "epoch": 14.403225806451612, "grad_norm": 24.074363708496094, "learning_rate": 5e-07, "loss": 0.5004, "num_input_tokens_seen": 309691484, "step": 2679 }, { "epoch": 14.403225806451612, "loss": 0.5169985294342041, "loss_ce": 3.07756636175327e-05, "loss_iou": 0.208984375, "loss_num": 0.019775390625, "loss_xval": 0.515625, "num_input_tokens_seen": 309691484, "step": 2679 }, { "epoch": 14.408602150537634, "grad_norm": 18.32600975036621, "learning_rate": 5e-07, "loss": 0.6087, "num_input_tokens_seen": 309809756, "step": 2680 }, { "epoch": 14.408602150537634, "loss": 0.5596756339073181, "loss_ce": 0.00010533238673815504, "loss_iou": 0.251953125, "loss_num": 0.0113525390625, "loss_xval": 0.55859375, "num_input_tokens_seen": 309809756, "step": 2680 }, { "epoch": 14.413978494623656, "grad_norm": 18.918241500854492, "learning_rate": 5e-07, "loss": 0.647, "num_input_tokens_seen": 309925160, "step": 2681 }, { "epoch": 14.413978494623656, "loss": 0.546250581741333, "loss_ce": 4.70006198156625e-05, "loss_iou": 0.248046875, "loss_num": 0.0098876953125, "loss_xval": 0.546875, "num_input_tokens_seen": 309925160, "step": 2681 }, { "epoch": 14.419354838709678, "grad_norm": 25.616985321044922, "learning_rate": 5e-07, "loss": 0.5931, "num_input_tokens_seen": 310041316, "step": 2682 }, { "epoch": 14.419354838709678, "loss": 0.3603745102882385, "loss_ce": 2.29242468776647e-05, "loss_iou": 0.15234375, "loss_num": 0.010986328125, "loss_xval": 0.359375, "num_input_tokens_seen": 310041316, "step": 2682 }, { "epoch": 14.424731182795698, "grad_norm": 27.106124877929688, "learning_rate": 5e-07, "loss": 0.5469, "num_input_tokens_seen": 310157212, "step": 2683 }, { "epoch": 14.424731182795698, "loss": 0.5176635980606079, "loss_ce": 8.551800419809297e-05, "loss_iou": 0.228515625, "loss_num": 0.011962890625, "loss_xval": 0.515625, "num_input_tokens_seen": 310157212, "step": 2683 }, { "epoch": 14.43010752688172, "grad_norm": 35.411537170410156, "learning_rate": 5e-07, "loss": 0.5771, "num_input_tokens_seen": 310274988, "step": 2684 }, { "epoch": 14.43010752688172, "loss": 0.6710640788078308, "loss_ce": 4.3577063479460776e-05, "loss_iou": 0.302734375, "loss_num": 0.013427734375, "loss_xval": 0.671875, "num_input_tokens_seen": 310274988, "step": 2684 }, { "epoch": 14.435483870967742, "grad_norm": 96.99394226074219, "learning_rate": 5e-07, "loss": 0.5886, "num_input_tokens_seen": 310393120, "step": 2685 }, { "epoch": 14.435483870967742, "loss": 0.4921734929084778, "loss_ce": 4.7000616177683696e-05, "loss_iou": 0.2119140625, "loss_num": 0.01361083984375, "loss_xval": 0.4921875, "num_input_tokens_seen": 310393120, "step": 2685 }, { "epoch": 14.440860215053764, "grad_norm": 31.67730140686035, "learning_rate": 5e-07, "loss": 0.5598, "num_input_tokens_seen": 310508764, "step": 2686 }, { "epoch": 14.440860215053764, "loss": 0.6096851229667664, "loss_ce": 6.598873005714267e-05, "loss_iou": 0.267578125, "loss_num": 0.01483154296875, "loss_xval": 0.609375, "num_input_tokens_seen": 310508764, "step": 2686 }, { "epoch": 14.446236559139784, "grad_norm": 39.57780838012695, "learning_rate": 5e-07, "loss": 0.6062, "num_input_tokens_seen": 310624464, "step": 2687 }, { "epoch": 14.446236559139784, "loss": 0.8867671489715576, "loss_ce": 4.8371002776548266e-05, "loss_iou": 0.388671875, "loss_num": 0.021728515625, "loss_xval": 0.88671875, "num_input_tokens_seen": 310624464, "step": 2687 }, { "epoch": 14.451612903225806, "grad_norm": 35.02458190917969, "learning_rate": 5e-07, "loss": 0.5984, "num_input_tokens_seen": 310742856, "step": 2688 }, { "epoch": 14.451612903225806, "loss": 0.5507296323776245, "loss_ce": 7.049087435007095e-05, "loss_iou": 0.2275390625, "loss_num": 0.01904296875, "loss_xval": 0.55078125, "num_input_tokens_seen": 310742856, "step": 2688 }, { "epoch": 14.456989247311828, "grad_norm": 39.629608154296875, "learning_rate": 5e-07, "loss": 0.565, "num_input_tokens_seen": 310859880, "step": 2689 }, { "epoch": 14.456989247311828, "loss": 0.7525149583816528, "loss_ce": 7.358418952208012e-05, "loss_iou": 0.328125, "loss_num": 0.0189208984375, "loss_xval": 0.75390625, "num_input_tokens_seen": 310859880, "step": 2689 }, { "epoch": 14.46236559139785, "grad_norm": 22.569332122802734, "learning_rate": 5e-07, "loss": 0.5965, "num_input_tokens_seen": 310974824, "step": 2690 }, { "epoch": 14.46236559139785, "loss": 0.5536268949508667, "loss_ce": 3.8034748286008835e-05, "loss_iou": 0.2578125, "loss_num": 0.007568359375, "loss_xval": 0.5546875, "num_input_tokens_seen": 310974824, "step": 2690 }, { "epoch": 14.46774193548387, "grad_norm": 21.994482040405273, "learning_rate": 5e-07, "loss": 0.6191, "num_input_tokens_seen": 311091016, "step": 2691 }, { "epoch": 14.46774193548387, "loss": 0.946312427520752, "loss_ce": 2.3380333004752174e-05, "loss_iou": 0.396484375, "loss_num": 0.0308837890625, "loss_xval": 0.9453125, "num_input_tokens_seen": 311091016, "step": 2691 }, { "epoch": 14.473118279569892, "grad_norm": 17.6896915435791, "learning_rate": 5e-07, "loss": 0.5265, "num_input_tokens_seen": 311209692, "step": 2692 }, { "epoch": 14.473118279569892, "loss": 0.5246795415878296, "loss_ce": 2.134691567334812e-05, "loss_iou": 0.234375, "loss_num": 0.01116943359375, "loss_xval": 0.5234375, "num_input_tokens_seen": 311209692, "step": 2692 }, { "epoch": 14.478494623655914, "grad_norm": 21.580158233642578, "learning_rate": 5e-07, "loss": 0.561, "num_input_tokens_seen": 311328660, "step": 2693 }, { "epoch": 14.478494623655914, "loss": 0.5158265233039856, "loss_ce": 1.847729436121881e-05, "loss_iou": 0.2275390625, "loss_num": 0.01190185546875, "loss_xval": 0.515625, "num_input_tokens_seen": 311328660, "step": 2693 }, { "epoch": 14.483870967741936, "grad_norm": 26.10664176940918, "learning_rate": 5e-07, "loss": 0.6167, "num_input_tokens_seen": 311444136, "step": 2694 }, { "epoch": 14.483870967741936, "loss": 0.8761751651763916, "loss_ce": 0.00044271486694924533, "loss_iou": 0.375, "loss_num": 0.0255126953125, "loss_xval": 0.875, "num_input_tokens_seen": 311444136, "step": 2694 }, { "epoch": 14.489247311827956, "grad_norm": 31.533649444580078, "learning_rate": 5e-07, "loss": 0.5382, "num_input_tokens_seen": 311560812, "step": 2695 }, { "epoch": 14.489247311827956, "loss": 0.6047812104225159, "loss_ce": 4.486276884563267e-05, "loss_iou": 0.2734375, "loss_num": 0.01165771484375, "loss_xval": 0.60546875, "num_input_tokens_seen": 311560812, "step": 2695 }, { "epoch": 14.494623655913978, "grad_norm": 22.642648696899414, "learning_rate": 5e-07, "loss": 0.4968, "num_input_tokens_seen": 311676180, "step": 2696 }, { "epoch": 14.494623655913978, "loss": 0.4787825345993042, "loss_ce": 2.2776246623834595e-05, "loss_iou": 0.212890625, "loss_num": 0.0106201171875, "loss_xval": 0.478515625, "num_input_tokens_seen": 311676180, "step": 2696 }, { "epoch": 14.5, "grad_norm": 39.80891799926758, "learning_rate": 5e-07, "loss": 0.5928, "num_input_tokens_seen": 311791656, "step": 2697 }, { "epoch": 14.5, "loss": 0.33388060331344604, "loss_ce": 1.8293514585820958e-05, "loss_iou": 0.150390625, "loss_num": 0.0068359375, "loss_xval": 0.333984375, "num_input_tokens_seen": 311791656, "step": 2697 }, { "epoch": 14.505376344086022, "grad_norm": 31.04887580871582, "learning_rate": 5e-07, "loss": 0.5827, "num_input_tokens_seen": 311909912, "step": 2698 }, { "epoch": 14.505376344086022, "loss": 0.3380884528160095, "loss_ce": 1.474449618399376e-05, "loss_iou": 0.140625, "loss_num": 0.0113525390625, "loss_xval": 0.337890625, "num_input_tokens_seen": 311909912, "step": 2698 }, { "epoch": 14.510752688172044, "grad_norm": 26.78239631652832, "learning_rate": 5e-07, "loss": 0.6147, "num_input_tokens_seen": 312024856, "step": 2699 }, { "epoch": 14.510752688172044, "loss": 0.6953606009483337, "loss_ce": 4.8101443098858e-05, "loss_iou": 0.31640625, "loss_num": 0.01202392578125, "loss_xval": 0.6953125, "num_input_tokens_seen": 312024856, "step": 2699 }, { "epoch": 14.516129032258064, "grad_norm": 22.188800811767578, "learning_rate": 5e-07, "loss": 0.5841, "num_input_tokens_seen": 312141464, "step": 2700 }, { "epoch": 14.516129032258064, "loss": 0.5252071022987366, "loss_ce": 6.059199949959293e-05, "loss_iou": 0.2353515625, "loss_num": 0.0107421875, "loss_xval": 0.5234375, "num_input_tokens_seen": 312141464, "step": 2700 }, { "epoch": 14.521505376344086, "grad_norm": 31.08066749572754, "learning_rate": 5e-07, "loss": 0.4933, "num_input_tokens_seen": 312257688, "step": 2701 }, { "epoch": 14.521505376344086, "loss": 0.5290652513504028, "loss_ce": 1.2522281394922175e-05, "loss_iou": 0.22265625, "loss_num": 0.016845703125, "loss_xval": 0.52734375, "num_input_tokens_seen": 312257688, "step": 2701 }, { "epoch": 14.526881720430108, "grad_norm": 27.99073028564453, "learning_rate": 5e-07, "loss": 0.5617, "num_input_tokens_seen": 312373272, "step": 2702 }, { "epoch": 14.526881720430108, "loss": 0.34627366065979004, "loss_ce": 2.123817102983594e-05, "loss_iou": 0.1435546875, "loss_num": 0.0118408203125, "loss_xval": 0.345703125, "num_input_tokens_seen": 312373272, "step": 2702 }, { "epoch": 14.532258064516128, "grad_norm": 20.825773239135742, "learning_rate": 5e-07, "loss": 0.5706, "num_input_tokens_seen": 312487928, "step": 2703 }, { "epoch": 14.532258064516128, "loss": 0.38728803396224976, "loss_ce": 1.9989642169093713e-05, "loss_iou": 0.1611328125, "loss_num": 0.01300048828125, "loss_xval": 0.38671875, "num_input_tokens_seen": 312487928, "step": 2703 }, { "epoch": 14.53763440860215, "grad_norm": 22.480144500732422, "learning_rate": 5e-07, "loss": 0.5688, "num_input_tokens_seen": 312602804, "step": 2704 }, { "epoch": 14.53763440860215, "loss": 0.49245473742485046, "loss_ce": 2.3124826839193702e-05, "loss_iou": 0.21484375, "loss_num": 0.0125732421875, "loss_xval": 0.4921875, "num_input_tokens_seen": 312602804, "step": 2704 }, { "epoch": 14.543010752688172, "grad_norm": 26.619142532348633, "learning_rate": 5e-07, "loss": 0.724, "num_input_tokens_seen": 312722124, "step": 2705 }, { "epoch": 14.543010752688172, "loss": 0.4883055090904236, "loss_ce": 2.4259476049337536e-05, "loss_iou": 0.212890625, "loss_num": 0.01263427734375, "loss_xval": 0.48828125, "num_input_tokens_seen": 312722124, "step": 2705 }, { "epoch": 14.548387096774194, "grad_norm": 44.15766525268555, "learning_rate": 5e-07, "loss": 0.7395, "num_input_tokens_seen": 312836104, "step": 2706 }, { "epoch": 14.548387096774194, "loss": 0.7407541871070862, "loss_ce": 3.154145088046789e-05, "loss_iou": 0.30859375, "loss_num": 0.0250244140625, "loss_xval": 0.7421875, "num_input_tokens_seen": 312836104, "step": 2706 }, { "epoch": 14.553763440860216, "grad_norm": 38.20620346069336, "learning_rate": 5e-07, "loss": 0.7328, "num_input_tokens_seen": 312952188, "step": 2707 }, { "epoch": 14.553763440860216, "loss": 0.5188316702842712, "loss_ce": 3.287008439656347e-05, "loss_iou": 0.23828125, "loss_num": 0.00848388671875, "loss_xval": 0.51953125, "num_input_tokens_seen": 312952188, "step": 2707 }, { "epoch": 14.559139784946236, "grad_norm": 37.589473724365234, "learning_rate": 5e-07, "loss": 0.6734, "num_input_tokens_seen": 313065676, "step": 2708 }, { "epoch": 14.559139784946236, "loss": 0.8105687499046326, "loss_ce": 2.1865647795493715e-05, "loss_iou": 0.376953125, "loss_num": 0.0115966796875, "loss_xval": 0.8125, "num_input_tokens_seen": 313065676, "step": 2708 }, { "epoch": 14.564516129032258, "grad_norm": 26.029422760009766, "learning_rate": 5e-07, "loss": 0.5105, "num_input_tokens_seen": 313181820, "step": 2709 }, { "epoch": 14.564516129032258, "loss": 0.5483675003051758, "loss_ce": 2.7620011678664014e-05, "loss_iou": 0.2392578125, "loss_num": 0.0137939453125, "loss_xval": 0.546875, "num_input_tokens_seen": 313181820, "step": 2709 }, { "epoch": 14.56989247311828, "grad_norm": 43.728790283203125, "learning_rate": 5e-07, "loss": 0.6095, "num_input_tokens_seen": 313296396, "step": 2710 }, { "epoch": 14.56989247311828, "loss": 0.7076989412307739, "loss_ce": 5.737643004977144e-05, "loss_iou": 0.3203125, "loss_num": 0.01385498046875, "loss_xval": 0.70703125, "num_input_tokens_seen": 313296396, "step": 2710 }, { "epoch": 14.575268817204302, "grad_norm": 29.87234115600586, "learning_rate": 5e-07, "loss": 0.7706, "num_input_tokens_seen": 313415624, "step": 2711 }, { "epoch": 14.575268817204302, "loss": 0.7349917888641357, "loss_ce": 0.00012854258238803595, "loss_iou": 0.326171875, "loss_num": 0.0166015625, "loss_xval": 0.734375, "num_input_tokens_seen": 313415624, "step": 2711 }, { "epoch": 14.580645161290322, "grad_norm": 16.491426467895508, "learning_rate": 5e-07, "loss": 0.6562, "num_input_tokens_seen": 313532404, "step": 2712 }, { "epoch": 14.580645161290322, "loss": 0.6457786560058594, "loss_ce": 2.6675419576349668e-05, "loss_iou": 0.29296875, "loss_num": 0.01177978515625, "loss_xval": 0.64453125, "num_input_tokens_seen": 313532404, "step": 2712 }, { "epoch": 14.586021505376344, "grad_norm": 26.110395431518555, "learning_rate": 5e-07, "loss": 0.6619, "num_input_tokens_seen": 313645924, "step": 2713 }, { "epoch": 14.586021505376344, "loss": 0.6434524059295654, "loss_ce": 1.9851187971653417e-05, "loss_iou": 0.2734375, "loss_num": 0.01953125, "loss_xval": 0.64453125, "num_input_tokens_seen": 313645924, "step": 2713 }, { "epoch": 14.591397849462366, "grad_norm": 22.22669792175293, "learning_rate": 5e-07, "loss": 0.5586, "num_input_tokens_seen": 313762256, "step": 2714 }, { "epoch": 14.591397849462366, "loss": 0.5073543787002563, "loss_ce": 3.01102118100971e-05, "loss_iou": 0.2294921875, "loss_num": 0.00946044921875, "loss_xval": 0.5078125, "num_input_tokens_seen": 313762256, "step": 2714 }, { "epoch": 14.596774193548388, "grad_norm": 26.000150680541992, "learning_rate": 5e-07, "loss": 0.5791, "num_input_tokens_seen": 313878064, "step": 2715 }, { "epoch": 14.596774193548388, "loss": 0.862695574760437, "loss_ce": 2.4663464500918053e-05, "loss_iou": 0.373046875, "loss_num": 0.0233154296875, "loss_xval": 0.86328125, "num_input_tokens_seen": 313878064, "step": 2715 }, { "epoch": 14.602150537634408, "grad_norm": 17.77315330505371, "learning_rate": 5e-07, "loss": 0.6474, "num_input_tokens_seen": 313995744, "step": 2716 }, { "epoch": 14.602150537634408, "loss": 0.6715593338012695, "loss_ce": 5.051010157330893e-05, "loss_iou": 0.310546875, "loss_num": 0.00994873046875, "loss_xval": 0.671875, "num_input_tokens_seen": 313995744, "step": 2716 }, { "epoch": 14.60752688172043, "grad_norm": 24.373682022094727, "learning_rate": 5e-07, "loss": 0.5287, "num_input_tokens_seen": 314112484, "step": 2717 }, { "epoch": 14.60752688172043, "loss": 0.46986034512519836, "loss_ce": 0.00013377871073316783, "loss_iou": 0.20703125, "loss_num": 0.0111083984375, "loss_xval": 0.46875, "num_input_tokens_seen": 314112484, "step": 2717 }, { "epoch": 14.612903225806452, "grad_norm": 30.470279693603516, "learning_rate": 5e-07, "loss": 0.5584, "num_input_tokens_seen": 314226824, "step": 2718 }, { "epoch": 14.612903225806452, "loss": 0.5442163348197937, "loss_ce": 2.6895593691733666e-05, "loss_iou": 0.22265625, "loss_num": 0.0196533203125, "loss_xval": 0.54296875, "num_input_tokens_seen": 314226824, "step": 2718 }, { "epoch": 14.618279569892474, "grad_norm": 26.005413055419922, "learning_rate": 5e-07, "loss": 0.5939, "num_input_tokens_seen": 314346464, "step": 2719 }, { "epoch": 14.618279569892474, "loss": 0.6306508779525757, "loss_ce": 3.561971243470907e-05, "loss_iou": 0.265625, "loss_num": 0.02001953125, "loss_xval": 0.62890625, "num_input_tokens_seen": 314346464, "step": 2719 }, { "epoch": 14.623655913978494, "grad_norm": 18.507055282592773, "learning_rate": 5e-07, "loss": 0.5285, "num_input_tokens_seen": 314462840, "step": 2720 }, { "epoch": 14.623655913978494, "loss": 0.5175356864929199, "loss_ce": 0.0002017054648604244, "loss_iou": 0.23828125, "loss_num": 0.00830078125, "loss_xval": 0.515625, "num_input_tokens_seen": 314462840, "step": 2720 }, { "epoch": 14.629032258064516, "grad_norm": 19.636449813842773, "learning_rate": 5e-07, "loss": 0.4968, "num_input_tokens_seen": 314578716, "step": 2721 }, { "epoch": 14.629032258064516, "loss": 0.43537330627441406, "loss_ce": 5.530638009076938e-05, "loss_iou": 0.18359375, "loss_num": 0.0135498046875, "loss_xval": 0.435546875, "num_input_tokens_seen": 314578716, "step": 2721 }, { "epoch": 14.634408602150538, "grad_norm": 23.38027572631836, "learning_rate": 5e-07, "loss": 0.5576, "num_input_tokens_seen": 314693252, "step": 2722 }, { "epoch": 14.634408602150538, "loss": 0.4972461462020874, "loss_ce": 5.3791030950378627e-05, "loss_iou": 0.21484375, "loss_num": 0.0135498046875, "loss_xval": 0.498046875, "num_input_tokens_seen": 314693252, "step": 2722 }, { "epoch": 14.63978494623656, "grad_norm": 19.395954132080078, "learning_rate": 5e-07, "loss": 0.5553, "num_input_tokens_seen": 314809152, "step": 2723 }, { "epoch": 14.63978494623656, "loss": 0.45779454708099365, "loss_ce": 3.087209915975109e-05, "loss_iou": 0.1796875, "loss_num": 0.019775390625, "loss_xval": 0.45703125, "num_input_tokens_seen": 314809152, "step": 2723 }, { "epoch": 14.64516129032258, "grad_norm": 23.307409286499023, "learning_rate": 5e-07, "loss": 0.6011, "num_input_tokens_seen": 314924328, "step": 2724 }, { "epoch": 14.64516129032258, "loss": 0.6533474922180176, "loss_ce": 2.7124278858536854e-05, "loss_iou": 0.306640625, "loss_num": 0.008056640625, "loss_xval": 0.65234375, "num_input_tokens_seen": 314924328, "step": 2724 }, { "epoch": 14.650537634408602, "grad_norm": 38.55192184448242, "learning_rate": 5e-07, "loss": 0.6228, "num_input_tokens_seen": 315037884, "step": 2725 }, { "epoch": 14.650537634408602, "loss": 0.7849398255348206, "loss_ce": 2.767580190266017e-05, "loss_iou": 0.322265625, "loss_num": 0.02783203125, "loss_xval": 0.78515625, "num_input_tokens_seen": 315037884, "step": 2725 }, { "epoch": 14.655913978494624, "grad_norm": 21.129064559936523, "learning_rate": 5e-07, "loss": 0.5825, "num_input_tokens_seen": 315154220, "step": 2726 }, { "epoch": 14.655913978494624, "loss": 0.4928164482116699, "loss_ce": 1.8596248992253095e-05, "loss_iou": 0.2099609375, "loss_num": 0.014404296875, "loss_xval": 0.4921875, "num_input_tokens_seen": 315154220, "step": 2726 }, { "epoch": 14.661290322580646, "grad_norm": 19.524961471557617, "learning_rate": 5e-07, "loss": 0.6736, "num_input_tokens_seen": 315270228, "step": 2727 }, { "epoch": 14.661290322580646, "loss": 0.8410761952400208, "loss_ce": 1.1718065252352972e-05, "loss_iou": 0.357421875, "loss_num": 0.0252685546875, "loss_xval": 0.83984375, "num_input_tokens_seen": 315270228, "step": 2727 }, { "epoch": 14.666666666666666, "grad_norm": 24.825359344482422, "learning_rate": 5e-07, "loss": 0.8052, "num_input_tokens_seen": 315387988, "step": 2728 }, { "epoch": 14.666666666666666, "loss": 0.7857194542884827, "loss_ce": 7.489983545383438e-05, "loss_iou": 0.33984375, "loss_num": 0.0211181640625, "loss_xval": 0.78515625, "num_input_tokens_seen": 315387988, "step": 2728 }, { "epoch": 14.672043010752688, "grad_norm": 20.947710037231445, "learning_rate": 5e-07, "loss": 0.5701, "num_input_tokens_seen": 315501872, "step": 2729 }, { "epoch": 14.672043010752688, "loss": 0.7149238586425781, "loss_ce": 1.9058021280216053e-05, "loss_iou": 0.28515625, "loss_num": 0.0291748046875, "loss_xval": 0.71484375, "num_input_tokens_seen": 315501872, "step": 2729 }, { "epoch": 14.67741935483871, "grad_norm": 18.512210845947266, "learning_rate": 5e-07, "loss": 0.5363, "num_input_tokens_seen": 315618188, "step": 2730 }, { "epoch": 14.67741935483871, "loss": 0.4957786798477173, "loss_ce": 5.112973303766921e-05, "loss_iou": 0.23046875, "loss_num": 0.007080078125, "loss_xval": 0.49609375, "num_input_tokens_seen": 315618188, "step": 2730 }, { "epoch": 14.682795698924732, "grad_norm": 26.81171417236328, "learning_rate": 5e-07, "loss": 0.6483, "num_input_tokens_seen": 315734316, "step": 2731 }, { "epoch": 14.682795698924732, "loss": 0.6033267974853516, "loss_ce": 5.5269996664719656e-05, "loss_iou": 0.27734375, "loss_num": 0.0101318359375, "loss_xval": 0.6015625, "num_input_tokens_seen": 315734316, "step": 2731 }, { "epoch": 14.688172043010752, "grad_norm": 24.413482666015625, "learning_rate": 5e-07, "loss": 0.5688, "num_input_tokens_seen": 315851536, "step": 2732 }, { "epoch": 14.688172043010752, "loss": 0.6189190149307251, "loss_ce": 2.2561303921975195e-05, "loss_iou": 0.26953125, "loss_num": 0.0157470703125, "loss_xval": 0.6171875, "num_input_tokens_seen": 315851536, "step": 2732 }, { "epoch": 14.693548387096774, "grad_norm": 33.57636642456055, "learning_rate": 5e-07, "loss": 0.5596, "num_input_tokens_seen": 315967104, "step": 2733 }, { "epoch": 14.693548387096774, "loss": 0.6199092864990234, "loss_ce": 3.620026836870238e-05, "loss_iou": 0.263671875, "loss_num": 0.0185546875, "loss_xval": 0.62109375, "num_input_tokens_seen": 315967104, "step": 2733 }, { "epoch": 14.698924731182796, "grad_norm": 26.546192169189453, "learning_rate": 5e-07, "loss": 0.6861, "num_input_tokens_seen": 316085908, "step": 2734 }, { "epoch": 14.698924731182796, "loss": 0.671801745891571, "loss_ce": 4.878977779299021e-05, "loss_iou": 0.2890625, "loss_num": 0.0181884765625, "loss_xval": 0.671875, "num_input_tokens_seen": 316085908, "step": 2734 }, { "epoch": 14.704301075268818, "grad_norm": 23.162710189819336, "learning_rate": 5e-07, "loss": 0.4747, "num_input_tokens_seen": 316200256, "step": 2735 }, { "epoch": 14.704301075268818, "loss": 0.45301640033721924, "loss_ce": 1.3473627404891886e-05, "loss_iou": 0.19921875, "loss_num": 0.010986328125, "loss_xval": 0.453125, "num_input_tokens_seen": 316200256, "step": 2735 }, { "epoch": 14.709677419354838, "grad_norm": 35.577423095703125, "learning_rate": 5e-07, "loss": 0.671, "num_input_tokens_seen": 316316508, "step": 2736 }, { "epoch": 14.709677419354838, "loss": 0.48185133934020996, "loss_ce": 3.9804959669709206e-05, "loss_iou": 0.205078125, "loss_num": 0.01422119140625, "loss_xval": 0.482421875, "num_input_tokens_seen": 316316508, "step": 2736 }, { "epoch": 14.71505376344086, "grad_norm": 30.32946014404297, "learning_rate": 5e-07, "loss": 0.5444, "num_input_tokens_seen": 316429256, "step": 2737 }, { "epoch": 14.71505376344086, "loss": 0.605486273765564, "loss_ce": 1.751893069013022e-05, "loss_iou": 0.2470703125, "loss_num": 0.0224609375, "loss_xval": 0.60546875, "num_input_tokens_seen": 316429256, "step": 2737 }, { "epoch": 14.720430107526882, "grad_norm": 58.71696853637695, "learning_rate": 5e-07, "loss": 0.5081, "num_input_tokens_seen": 316545548, "step": 2738 }, { "epoch": 14.720430107526882, "loss": 0.5332269668579102, "loss_ce": 2.3824435629649088e-05, "loss_iou": 0.2138671875, "loss_num": 0.021240234375, "loss_xval": 0.53125, "num_input_tokens_seen": 316545548, "step": 2738 }, { "epoch": 14.725806451612904, "grad_norm": 17.796545028686523, "learning_rate": 5e-07, "loss": 0.4782, "num_input_tokens_seen": 316662808, "step": 2739 }, { "epoch": 14.725806451612904, "loss": 0.453260213136673, "loss_ce": 1.3131528248777613e-05, "loss_iou": 0.2001953125, "loss_num": 0.0106201171875, "loss_xval": 0.453125, "num_input_tokens_seen": 316662808, "step": 2739 }, { "epoch": 14.731182795698924, "grad_norm": 16.74785804748535, "learning_rate": 5e-07, "loss": 0.6827, "num_input_tokens_seen": 316780380, "step": 2740 }, { "epoch": 14.731182795698924, "loss": 0.6782597303390503, "loss_ce": 3.7072655686642975e-05, "loss_iou": 0.3046875, "loss_num": 0.01336669921875, "loss_xval": 0.6796875, "num_input_tokens_seen": 316780380, "step": 2740 }, { "epoch": 14.736559139784946, "grad_norm": 26.680522918701172, "learning_rate": 5e-07, "loss": 0.558, "num_input_tokens_seen": 316895844, "step": 2741 }, { "epoch": 14.736559139784946, "loss": 0.5422573685646057, "loss_ce": 2.100172969221603e-05, "loss_iou": 0.2294921875, "loss_num": 0.0164794921875, "loss_xval": 0.54296875, "num_input_tokens_seen": 316895844, "step": 2741 }, { "epoch": 14.741935483870968, "grad_norm": 18.20510482788086, "learning_rate": 5e-07, "loss": 0.7563, "num_input_tokens_seen": 317012040, "step": 2742 }, { "epoch": 14.741935483870968, "loss": 0.7547928690910339, "loss_ce": 3.212237061234191e-05, "loss_iou": 0.318359375, "loss_num": 0.0234375, "loss_xval": 0.75390625, "num_input_tokens_seen": 317012040, "step": 2742 }, { "epoch": 14.74731182795699, "grad_norm": 26.167531967163086, "learning_rate": 5e-07, "loss": 0.6946, "num_input_tokens_seen": 317126752, "step": 2743 }, { "epoch": 14.74731182795699, "loss": 0.5991413593292236, "loss_ce": 2.0316196241765283e-05, "loss_iou": 0.244140625, "loss_num": 0.022216796875, "loss_xval": 0.59765625, "num_input_tokens_seen": 317126752, "step": 2743 }, { "epoch": 14.75268817204301, "grad_norm": 25.651092529296875, "learning_rate": 5e-07, "loss": 0.5251, "num_input_tokens_seen": 317242672, "step": 2744 }, { "epoch": 14.75268817204301, "loss": 0.7551641464233398, "loss_ce": 3.7182682717684656e-05, "loss_iou": 0.3046875, "loss_num": 0.02978515625, "loss_xval": 0.75390625, "num_input_tokens_seen": 317242672, "step": 2744 }, { "epoch": 14.758064516129032, "grad_norm": 21.684499740600586, "learning_rate": 5e-07, "loss": 0.5838, "num_input_tokens_seen": 317359756, "step": 2745 }, { "epoch": 14.758064516129032, "loss": 0.5609958171844482, "loss_ce": 8.276078733615577e-05, "loss_iou": 0.2236328125, "loss_num": 0.0225830078125, "loss_xval": 0.5625, "num_input_tokens_seen": 317359756, "step": 2745 }, { "epoch": 14.763440860215054, "grad_norm": 27.757055282592773, "learning_rate": 5e-07, "loss": 0.5437, "num_input_tokens_seen": 317473348, "step": 2746 }, { "epoch": 14.763440860215054, "loss": 0.52545565366745, "loss_ce": 6.50262736598961e-05, "loss_iou": 0.2333984375, "loss_num": 0.0115966796875, "loss_xval": 0.5234375, "num_input_tokens_seen": 317473348, "step": 2746 }, { "epoch": 14.768817204301076, "grad_norm": 31.600025177001953, "learning_rate": 5e-07, "loss": 0.6296, "num_input_tokens_seen": 317586996, "step": 2747 }, { "epoch": 14.768817204301076, "loss": 0.45601794123649597, "loss_ce": 8.530237391823903e-05, "loss_iou": 0.1845703125, "loss_num": 0.017333984375, "loss_xval": 0.455078125, "num_input_tokens_seen": 317586996, "step": 2747 }, { "epoch": 14.774193548387096, "grad_norm": 29.433712005615234, "learning_rate": 5e-07, "loss": 0.452, "num_input_tokens_seen": 317703024, "step": 2748 }, { "epoch": 14.774193548387096, "loss": 0.3963942229747772, "loss_ce": 3.1922696507535875e-05, "loss_iou": 0.1767578125, "loss_num": 0.0086669921875, "loss_xval": 0.396484375, "num_input_tokens_seen": 317703024, "step": 2748 }, { "epoch": 14.779569892473118, "grad_norm": 25.699092864990234, "learning_rate": 5e-07, "loss": 0.4499, "num_input_tokens_seen": 317815748, "step": 2749 }, { "epoch": 14.779569892473118, "loss": 0.4775741398334503, "loss_ce": 3.5063945688307285e-05, "loss_iou": 0.2001953125, "loss_num": 0.01556396484375, "loss_xval": 0.4765625, "num_input_tokens_seen": 317815748, "step": 2749 }, { "epoch": 14.78494623655914, "grad_norm": 21.37201690673828, "learning_rate": 5e-07, "loss": 0.6357, "num_input_tokens_seen": 317931116, "step": 2750 }, { "epoch": 14.78494623655914, "eval_icons_CIoU": 0.13188185170292854, "eval_icons_GIoU": 0.09493804723024368, "eval_icons_IoU": 0.29334819316864014, "eval_icons_MAE_all": 0.03200614359229803, "eval_icons_MAE_h": 0.0354624018073082, "eval_icons_MAE_w": 0.05565761588513851, "eval_icons_MAE_x_boxes": 0.05283237807452679, "eval_icons_MAE_y_boxes": 0.034401487559080124, "eval_icons_NUM_probability": 0.9992242753505707, "eval_icons_inside_bbox": 0.6059027910232544, "eval_icons_loss": 1.9439681768417358, "eval_icons_loss_ce": 0.0001905981480376795, "eval_icons_loss_iou": 0.88427734375, "eval_icons_loss_num": 0.03272247314453125, "eval_icons_loss_xval": 1.931640625, "eval_icons_runtime": 45.3935, "eval_icons_samples_per_second": 1.101, "eval_icons_steps_per_second": 0.044, "num_input_tokens_seen": 317931116, "step": 2750 }, { "epoch": 14.78494623655914, "eval_screenspot_CIoU": 0.34085364639759064, "eval_screenspot_GIoU": 0.3338239888350169, "eval_screenspot_IoU": 0.4376797080039978, "eval_screenspot_MAE_all": 0.05378408854206403, "eval_screenspot_MAE_h": 0.04895661833385626, "eval_screenspot_MAE_w": 0.07401567821701367, "eval_screenspot_MAE_x_boxes": 0.07484844078620274, "eval_screenspot_MAE_y_boxes": 0.03783431835472584, "eval_screenspot_NUM_probability": 0.9998782277107239, "eval_screenspot_inside_bbox": 0.7516666650772095, "eval_screenspot_loss": 1.648237943649292, "eval_screenspot_loss_ce": 6.407649925677106e-05, "eval_screenspot_loss_iou": 0.7127278645833334, "eval_screenspot_loss_num": 0.060956319173177086, "eval_screenspot_loss_xval": 1.7301432291666667, "eval_screenspot_runtime": 73.7313, "eval_screenspot_samples_per_second": 1.207, "eval_screenspot_steps_per_second": 0.041, "num_input_tokens_seen": 317931116, "step": 2750 } ], "logging_steps": 1.0, "max_steps": 9300, "num_input_tokens_seen": 317931116, "num_train_epochs": 50, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.4790757454996046e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }